def __processGffFilesNew(self, newOrganismDirs): for newOrganism in newOrganismDirs: # start by creating the BLAST database newOrganism = os.path.join(NEW_GENOMIC_DATA_DIR, newOrganism) print newOrganism organismFiles = os.walk(newOrganism).next()[2] faa = None ffn = None gff = None gbk = None for organismFile in organismFiles: extension = os.path.splitext(organismFile)[1] if (extension == '.ffn'): ffn = organismFile elif (extension == '.faa'): faa = organismFile elif (extension == '.gff'): gff = organismFile elif (extension == '.gbk'): gbk = organismFile if (faa and ffn and gff and gbk): break if (faa): GenomeDBUtil.runFormatDB(os.path.basename(faa), newOrganism, protein=True) self.report.addLogEntry('Ran formatdb successully on ' + faa) if (ffn): GenomeDBUtil.runFormatDB(os.path.basename(ffn), newOrganism, protein=False) self.report.addLogEntry('Ran formatdb successully on ' + ffn) # process the gff and genbank files for creating the databases if (gff and gbk): # create the sqlite database for GBrowse and create the configuration file # for GBrowse hook up dbName = os.path.splitext(os.path.basename(gff))[0] + '.db' dbName = os.path.join(newOrganism, dbName) gff = os.path.join(newOrganism, gff) parser = GenBank.RecordParser() gbk = os.path.join(newOrganism, gbk) record = parser.parse(open(gbk)) organismName = record.organism accession = record.accession[0] self.report.addLogEntry('Found organism name ' + organismName) # create a brand new GBrowse configuration file examiner = GFFExaminer() gffHandle = open(gff) landmark = examiner.available_limits(gffHandle)['gff_id'].keys()[0][0] gffRewriter = GFFRewriter(filename=gff, outfile=gff+".sorted.prepared" , accession=accession) '''gffRewriter.addUnknownCvTerms({ 'user' : settings.DATABASES['default']['USER'], 'password' : settings.DATABASES['default']['PASSWORD'], 'db' : settings.DATABASES['default']['NAME'] })''' gffRewriter.addColor({ 'user' : settings.DATABASES['default']['USER'], 'password' : settings.DATABASES['default']['PASSWORD'], 'db' : 'MyGO' }) error = gffRewriter.getError() print error gff = gff + ".sorted.prepared" args = ['-a', 'DBI::SQLite', '-c', '-f', '-d', dbName, gff] runProgram('bp_seqfeature_load.pl', args) self.report.addLogEntry('Successfully created sqlite database for ' + str(gff)) organismDir = os.path.basename(newOrganism) self.report.addLogEntry('Added new GBrowse entry for ' + organismName) # now edit the record in Chado by first adding the organism and then adding # bulk loading the information from gff3 id = GenomeDBUtil.addOrganismToChado(gff, organismName) GenomeDBUtil.createNewGBrowseEntry(landmark, dbName, organismDir, organismName, id)