예제 #1
0
    def run(self):
        ## prepare a log file
        fid = open(os.path.join(self.config.log['data'],'createdb.log'),'wa')
        writer = csv.writer(fid)

        def push_out(line):
            writer.writerow([line])
            print(line)

        push_out(sys.argv[0])
        push_out(time.asctime())
        push_out("Getting ready to create database...")

        ## conect to the database
        session,engine = db_connect(verbose=False)
        Base.metadata.drop_all(engine)
        Base.metadata.create_all(engine)

        push_out("Creating database with...")
        for t in Base.metadata.sorted_tables:
            push_out("\t"+t.name)

        ## determine file sizes
        timeStart = time.time()
        push_out("determining filesizes...")
        idmapCount,geneInfoCount = get_file_sizes()

        push_out("extracting taxa list...")
        totalAnnotations = get_total_annotations()
        push_out("...extraction time: %s"%time.strftime('%H:%M:%S',time.gmtime(time.time()-timeStart)))

        ## taxa table
        push_out("Populating the database taxa table")
        timeStr,addedStr = populate_taxon_table(engine)
        push_out(timeStr)
        push_out(addedStr)

        ## gene table
        push_out("Populating the database with %s genes"%(geneInfoCount))
        timeStr,addedStr = populate_gene_table(geneInfoCount,session,engine)
        push_out(timeStr)
        push_out(addedStr)
        
        ## uniprot table
        push_out("Populating the database with %s uniprot entries"%(idmapCount))
        timeStr,addedStr = populate_uniprot_table(idmapCount,session,engine)
        push_out(timeStr)
        push_out(addedStr)

        ## populate the go-terms
        push_out("Populating the database with for go terms...")
        timeStr,addedStr = populate_go_terms(engine)
        push_out(timeStr)
        push_out(addedStr)

        ## populate the go-annotations
        push_out("Populating the database with for go annotations...")
        timeStr,addedStr,ignored = populate_go_annotations(totalAnnotations,session,engine)
        push_out(timeStr)
        push_out(addedStr)
        push_out("There were %s uniprot annotations ignored"%str(ignored[0]))
        push_out("There were %s gene annotations ignored"%str(ignored[1]))

        print_db_summary()
        fid.close()
예제 #2
0
http://www.sqlalchemy.org/trac/wiki/UsageRecipes/SchemaDisplay
"""

### make imports
import sys, os, re
from DatabaseTables import Base, Taxon, Gene, Uniprot
from DatabaseTools import db_connect

try:
    from sqlalchemy_schemadisplay import create_schema_graph
    createGraph = True
except:
    createGraph = False

## conect to the database
session, engine = db_connect(verbose=False)

## test the 'Taxon' table
testID = '7227'
query = session.query(Taxon).filter_by(ncbi_id=testID).first()

if query == None:
    print "ERROR: init taxon not found."
    sys.exit()

if int(query.ncbi_id) != int(testID):
    print "ERROR: Bad match to taxon id"
    print query.ncbi_id, testID

if query.name != "Drosophila melanogaster":
    print "ERROR: Bad match to taxon name"
예제 #3
0
http://www.sqlalchemy.org/trac/wiki/UsageRecipes/SchemaDisplay
"""

### make imports
import sys,os,re
from DatabaseTables import Base,Taxon,Gene,Uniprot
from DatabaseTools import db_connect

try:
    from sqlalchemy_schemadisplay import create_schema_graph
    createGraph = True
except:
    createGraph = False

## conect to the database                                                            
session,engine = db_connect(verbose=False)

## test the 'Taxon' table
testID = '7227'
query = session.query(Taxon).filter_by(ncbi_id=testID).first() 

if query == None:
    print("ERROR: init taxon not found.")
    sys.exit()

if int(query.ncbi_id) != int(testID):
    print("ERROR: Bad match to taxon id")
    print(query.ncbi_id,testID)

if query.name != "Drosophila melanogaster":
    print("ERROR: Bad match to taxon name")