def test_schema(self): "Test schema" sp_hbb1 = testutil.datafile('sp_hbb1') sp2 = seqdb.BlastDB(sp_hbb1) sp2.__doc__ = 'another sp' worldbase.Bio.Seq.sp2 = sp2 sp = worldbase.Bio.Seq.Swissprot.sp42() m = mapping.Mapping(sourceDB=sp,targetDB=sp2) m.__doc__ = 'sp -> sp2' worldbase.Bio.Seq.testmap = m worldbaseSchema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2) worldbase.commit() worldbase.clear_cache() sp3 = seqdb.BlastDB(sp_hbb1) sp3.__doc__ = 'sp number 3' worldbase.Bio.Seq.sp3 = sp3 sp2 = worldbase.Bio.Seq.sp2() m = mapping.Mapping(sourceDB=sp3,targetDB=sp2) m.__doc__ = 'sp3 -> sp2' worldbase.Bio.Seq.testmap2 = m worldbaseSchema.Bio.Seq.testmap2 = metabase.OneToManyRelation(sp3, sp2) l = worldbase._mdb.resourceCache.keys() l.sort() assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2'] worldbase.commit() g = worldbase._mdb.writer.storage.graph expected = set(['Bio.Annotation.annoDB', 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3']) found = set(g.keys()) self.EQ(len(expected - found), 0)
def test_schema(self): "Test schema" sp_hbb1 = testutil.datafile('sp_hbb1') sp2 = seqdb.BlastDB(sp_hbb1) sp2.__doc__ = 'another sp' worldbase.Bio.Seq.sp2 = sp2 sp = worldbase.Bio.Seq.Swissprot.sp42() m = mapping.Mapping(sourceDB=sp, targetDB=sp2) m.__doc__ = 'sp -> sp2' worldbase.Bio.Seq.testmap = m worldbase.schema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2) worldbase.commit() worldbase.clear_cache() sp3 = seqdb.BlastDB(sp_hbb1) sp3.__doc__ = 'sp number 3' worldbase.Bio.Seq.sp3 = sp3 sp2 = worldbase.Bio.Seq.sp2() m = mapping.Mapping(sourceDB=sp3, targetDB=sp2) m.__doc__ = 'sp3 -> sp2' worldbase.Bio.Seq.testmap2 = m worldbase.schema.Bio.Seq.testmap2 = metabase.OneToManyRelation( sp3, sp2) l = worldbase._mdb.resourceCache.keys() l.sort() assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2'] worldbase.commit() g = worldbase._mdb.writer.storage.graph expected = set([ 'Bio.Annotation.annoDB', 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3' ]) found = set(g.keys()) self.EQ(len(expected - found), 0)
def setUp(self,**kwargs): TestBase.setUp(self) dnaseq = testutil.datafile('dnaseq.fasta') tryannot = testutil.tempdatafile('tryannot') db = seqdb.BlastDB(dnaseq) try: db.__doc__ = 'little dna' worldbase.Bio.Test.dna = db annoDB = seqdb.AnnotationDB({1:('seq1',5,10,'fred'), 2:('seq1',-60,-50,'bob'), 3:('seq2',-20,-10,'mary')}, db, sliceAttrDict=dict(id=0, start=1, stop=2, name=3)) annoDB.__doc__ = 'trivial annotation' worldbase.Bio.Test.annoDB = annoDB nlmsa = cnestedlist.NLMSA(tryannot,'w',pairwiseMode=True, bidirectional=False) try: for annID in annoDB: nlmsa.addAnnotation(annoDB[annID]) nlmsa.build(verbose=False) nlmsa.__doc__ = 'trivial map' worldbase.Bio.Test.map = nlmsa worldbaseSchema.Bio.Test.map = metabase.ManyToManyRelation(db, annoDB,bindAttrs=('exons',)) worldbase.commit() worldbase.clear_cache() finally: nlmsa.close() finally: db.close()
def setUp(self): TestBase.setUp(self) populate_swissprot() # save some data worldbase.commit() # finally save everything to metabase worldbase.clear_cache() # force all requests to reload res = [ 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.frag', 'Bio.Seq.spmap', 'Bio.Annotation.annoDB', 'Bio.Annotation.map' ] self.server = testutil.TestXMLRPCServer(res, self.tempdir.path)
def close(self,commitData=True): """Close method which performs updating the docstrings and creating the schema objects.""" print "# Finalizing mapping schema(s)..." if self._closed == True: return # UPDATE DOC STRING FOR forward MAPPING self.Mf.__doc__ = "Mapping resource (forward) between annotations %s and %s" % (self.Mf.sourceDB._persistent_id, self.Mf.targetDB._persistent_id) #UPDATE OUR METABASE WITH THE RESOURCE STRING FOR THE MAPPING worldbase.add_resource(self.resourceString+"_forward", self.Mf) # FOR forward MAPPING forward_bindAttrs = (self.forwardAttr, self.inverseAttr) # self.inverseAttr is either None or set to an appropriate inverse attribute relationF = metabase.OneToManyRelation(self.Mf.sourceDB, self.Mf.targetDB, bindAttrs=forward_bindAttrs) relationF.__doc__ = "Mapping schema (forward) between annotations %s and %s" % (self.Mf.sourceDB._persistent_id, self.Mf.targetDB._persistent_id) # UPDATE OUR SCHEMA WITH THE RESOURCE STRING FOR THE MAPPING worldbase.add_schema(self.resourceString+"_forward", relationF) # HANDLE REVERSE MAPPING AND SCHEMA relationR = None if self.Mr != None: # UPDATE DOC STRING FOR reverse MAPPING self.Mr.__doc__ = "Mapping resource (reverse) between annotations %s and %s" % (self.Mr.sourceDB._persistent_id, self.Mr.targetDB._persistent_id) #UPDATE OUR METABASE WITH THE RESOURCE STRING FOR THE MAPPING worldbase.add_resource(self.resourceString+"_reverse", self.Mr) # FOR reverse MAPPING reverse_bindAttrs = (self.reverseAttr, None) relationR = metabase.OneToManyRelation(self.Mr.sourceDB, self.Mr.targetDB, bindAttrs=reverse_bindAttrs) relationR.__doc__ = "Mapping schema (reverse) between annotations %s and %s" % (self.Mr.sourceDB._persistent_id, # Use self.Mf for consistent self.Mr.targetDB._persistent_id) # doc strings with forward mapping # UPDATE OUR SCHEMA WITH THE RESOURCE STRING FOR THE MAPPING worldbase.add_schema(self.resourceString+"_reverse", relationR) if(commitData==True): print "# Committing to worldbase: (1) %s (2) %s" % (str(self.Mf.__doc__),str(self.Mr.__doc__)) worldbase.commit() print "# Closing mapping object(s)" # FLUSH MAPPING(S) TO PERSISTENT STORAGE self.Mf.close() if self.Mr: self.Mr.close() # Set closed flag self._closed = True
def test_download(self): "Downloading of gzipped file using worldbase" url = SourceURL('http://www.doe-mbi.ucla.edu/~leec/test.gz') url.__doc__ = 'test download' worldbase.add_resource('Bio.Test.Download1', url) worldbase.commit() # performs the download fpath = worldbase.Bio.Test.Download1() h = testutil.get_file_md5(fpath) self.assertEqual(h.hexdigest(), 'f95656496c5182d6cff9a56153c9db73') os.remove(fpath)
def test_download(self): "Downloading of gzipped file using pygr.Data" url = SourceURL('http://www.doe-mbi.ucla.edu/~leec/test.gz') url.__doc__ = 'test download' worldbase.add_resource('Bio.Test.Download1', url) worldbase.commit() # performs the download fpath = worldbase.Bio.Test.Download1() h = testutil.get_file_md5(fpath) self.assertEqual(h.hexdigest(), 'f95656496c5182d6cff9a56153c9db73') os.remove(fpath)
def save_NLMSA_downloaders(url, fileFilter=lambda x: x.endswith(".txt.gz"), resourceStem='Bio.MSA.UCSC.', fileDocumenter=None, fileNamer=None): 'save NLMSA downloader / builder objects for a set of downloadable textdump files' if fileDocumenter is None: fileDocumenter = lambda x: 'NLMSA alignment '+x if fileNamer is None: # a default resource naming function fileNamer = lambda x:resourceStem+x[:-3] # remove .gz suffix from pygr.nlmsa_utils import NLMSABuilder from pygr.downloader import SourceURL d = catalog_downloads(url, fileFilter, fileNamer, fileDocumenter, SourceURL) for resID,o in d.items(): nlmsa = NLMSABuilder(o) nlmsa.__doc__ = fileDocumenter(resID) d[resID[:-4]] = nlmsa # remove .txt suffix from pygr import worldbase worldbase.add_resource(d) worldbase.commit() return d # just in case the user wants to see what was saved
def all_vs_all_blast_save(): """ Creates the blast files used during testing. Must be called before running the tests """ tempdir = testutil.TempDir("blast-test") testutil.change_pygrdatapath(tempdir.path) sp_hbb1 = testutil.datafile("sp_hbb1") all_vs_all = testutil.tempdatafile("all_vs_all") sp = seqdb.BlastDB(sp_hbb1) msa = cnestedlist.NLMSA(all_vs_all, mode="w", pairwiseMode=True, bidirectional=False) # get strong homologs, save alignment in msa for every sequence reader = islice(sp.iteritems(), None) for id, s in reader: sp.blast(s, msa, expmax=1e-10, verbose=False) # done constructing the alignment, so build the alignment db indexes msa.build(saveSeqDict=True) db = msa.seqDict.dicts.keys()[0] working, result = {}, {} for k in db.values(): edges = msa[k].edges(minAlignSize=12, pIdentityMin=0.5) for t in edges: assert len(t[0]) >= 12 tmpdict = dict( map(lambda x: (x, None), [(str(t[0]), str(t[1]), t[2].pIdentity(trapOverflow=False)) for t in edges]) ) result[repr(k)] = tmpdict.keys() result[repr(k)].sort() # save it into worldbase data = testutil.TestData() data.__doc__ = "sp_allvall" data.result = result worldbase.Bio.Blast = data worldbase.commit()
def setUp(self, **kwargs): TestBase.setUp(self) dnaseq = testutil.datafile('dnaseq.fasta') tryannot = testutil.tempdatafile('tryannot') db = seqdb.BlastDB(dnaseq) try: db.__doc__ = 'little dna' worldbase.Bio.Test.dna = db annoDB = seqdb.AnnotationDB( { 1: ('seq1', 5, 10, 'fred'), 2: ('seq1', -60, -50, 'bob'), 3: ('seq2', -20, -10, 'mary') }, db, sliceAttrDict=dict(id=0, start=1, stop=2, name=3)) annoDB.__doc__ = 'trivial annotation' worldbase.Bio.Test.annoDB = annoDB nlmsa = cnestedlist.NLMSA(tryannot, 'w', pairwiseMode=True, bidirectional=False) try: for annID in annoDB: nlmsa.addAnnotation(annoDB[annID]) nlmsa.build() nlmsa.__doc__ = 'trivial map' worldbase.Bio.Test.map = nlmsa worldbase.schema.Bio.Test.map = metabase.ManyToManyRelation( db, annoDB, bindAttrs=('exons', )) worldbase.commit() worldbase.clear_cache() finally: nlmsa.close() finally: db.close()
def test_xmlrpc(self): "Test XMLRPC" worldbase.clear_cache() # force all future requests to reload worldbase.update("http://localhost:%s" % self.server.port) # from XMLRPC check_match(self) # run all our tests check_dir(self) check_dir_noargs(self) check_dir_download(self) check_dir_re(self) check_bind(self) check_bind2(self) sb_hbb1 = testutil.datafile('sp_hbb1') # test readonly checks sp2 = seqdb.BlastDB(sb_hbb1) sp2.__doc__ = 'another sp' try: worldbase.Bio.Seq.sp2 = sp2 worldbase.commit() msg = 'failed to catch bad attempt to write to XMLRPC server' raise KeyError(msg) except ValueError: pass
def save_NLMSA_downloaders(url, fileFilter=lambda x: x.endswith(".txt.gz"), resourceStem='Bio.MSA.UCSC.', fileDocumenter=None, fileNamer=None): '''save NLMSA downloader / builder objects for a set of downloadable textdump files''' if fileDocumenter is None: fileDocumenter = lambda x: 'NLMSA alignment ' + x if fileNamer is None: # a default resource naming function fileNamer = lambda x: resourceStem + x[:-3] # remove .gz suffix from pygr.nlmsa_utils import NLMSABuilder from pygr.downloader import SourceURL d = catalog_downloads(url, fileFilter, fileNamer, fileDocumenter, SourceURL) for resID, o in d.items(): nlmsa = NLMSABuilder(o) nlmsa.__doc__ = fileDocumenter(resID) d[resID[:-4]] = nlmsa # remove .txt suffix from pygr import worldbase worldbase.add_resource(d) worldbase.commit() return d # just in case the user wants to see what was saved
def test_xmlrpc(self): "Test XMLRPC" worldbase.clear_cache() # force all future requests to reload # Add our test XMLRPC resource. worldbase.update("http://localhost:%s" % self.server.port) check_match(self) # run all our tests check_dir(self) check_dir_noargs(self) check_dir_download(self) check_dir_re(self) check_bind(self) check_bind2(self) sb_hbb1 = testutil.datafile('sp_hbb1') # test readonly checks sp2 = seqdb.BlastDB(sb_hbb1) sp2.__doc__ = 'another sp' try: worldbase.Bio.Seq.sp2 = sp2 worldbase.commit() msg = 'failed to catch bad attempt to write to XMLRPC server' raise KeyError(msg) except ValueError: pass
print 'added', n, 'records' ### create slicedb = sqlgraph.SQLTable('annotations', serverInfo=SQLiteServerInfo(sql_file)) annodb = annotation.AnnotationDB(slicedb, genome, annotationType='sql:', sliceAttrDict=dict(id='seq_id')) ### save from pygr import worldbase genome.__doc__ = 'Campy genome' worldbase.Bio.campy.genome = genome annodb.__doc__ = 'Campy gene annotations from NCBI (PTT)' worldbase.Bio.campy.genes = annodb nlmsa = cnestedlist.NLMSA('genes_map', 'w', pairwiseMode=True) for v in annodb.itervalues(): nlmsa.addAnnotation(v) nlmsa.build(saveSeqDict=False) nlmsa.__doc__ = 'Campy gene mapping from NCBI' worldbase.Bio.campy.gene_map = nlmsa worldbase.commit()
def main(): """Build an annotation from the given gff file """ usage = """Build and save the annotations defined in the given gff files Saves an annotationDB (representing the file itself) and creates a mapping in the form genome[chromosome][100:200].officialGenes""" parser = optparse.OptionParser("%prog [options] data1.gff [data2.gff ...]\n"+usage) parser.add_option("--genome_resource", '-g', dest="genome_resource", type="string", help="""The pygr resource for the genome, eg, 'Bio.Seq.Genome.TRICA.triCas3'""") parser.add_option("--annotationDB_resource", '-a', dest="annotationDB_resource", type="string", help="""Where to save the created annotationDB. eg, Bio.Annotation.TRICA.triCas3.officialGenes""") parser.add_option("--save_pathstem", '-p', dest="pathstem", type="string", help="""The file to save the exon resource to, eg, '/home/baldig/projects/genomics/pygrdata/annotations/fly/triCas3_official_genes'""") parser.add_option("--map_resource", '-m', dest="map_resource", type="string", help="""the resource to save the annotationDB->Genome map, saved both to worldbase and to worldbase.schema, eg, 'Bio.Annotation.TRICA.triCas3.BeetleBase.officialGenesMap""") parser.add_option("--bind_attribute", '-b', dest="bind_attribute", type="string", help="""The attribute to access annotationDB from genome region, eg, 'officialGenes' would be accessible via triCas3['ChLG2'][100:200].officialGenes Default is not to bind an attribute to genome""") (opts, args) = parser.parse_args() if len(args) < 1: parser.print_help() print 'Please specify at least one gff file to read' sys.exit(-1) if None in [opts.genome_resource, opts.annotationDB_resource, opts.pathstem, opts.map_resource]: parser.print_help() print 'Required options: genome_resource, annotationDB_resource, pathstem, map_resource' sys.exit(-1) print '# Loading original genome db' genome = worldbase(opts.genome_resource) annotDB = annotation.AnnotationDB(None, genome, opts.bind_attribute, filename=opts.pathstem + '_annotDB', mode='c', verbose=False) nlmsa = cnestedlist.NLMSA(opts.pathstem, 'w', pairwiseMode=True, bidirectional=False) index = 0 # unique ID used in annotationD for filename in args: print '# adding to annotationDB from %s' % filename fileIn = open(filename) for row in read_for_pygr(fileIn): curAnnot = annotDB.new_annotation(index, row) nlmsa.addAnnotation(curAnnot) index += 1 annotDB.close() # Flush annotation data to disk print '# building NLMSA from all gff files' nlmsa.build(saveSeqDict=True) print '# saving annotationDB and NLMSA to worldbase as %s and %s' % (opts.annotationDB_resource, opts.map_resource) annotDB.__doc__ = 'Combined gff annotationDB from files %s on genome %s' % (', '.join(args), opts.genome_resource) nlmsa.__doc__ = 'Mapping of %s, from gff files %s onto genome %s' % (opts.annotationDB_resource, ', '.join(args), opts.genome_resource) worldbase.add_resource(opts.annotationDB_resource, annotDB) worldbase.add_resource(opts.map_resource, nlmsa) if opts.bind_attribute: print '# saving worldbase schema with bindAttrs=(%s)' % opts.bind_attribute genome_annotDB_relation = metabase.ManyToManyRelation(genome, annotDB, bindAttrs=(opts.bind_attribute,)) genome_annotDB_relation.__doc__ = 'GFF based mapping from %s to genome %s' % (opts.annotationDB_resource, opts.genome_resource) worldbase.add_schema('%s' % opts.map_resource, genome_annotDB_relation) print '# committing worldbase resources' worldbase.commit()
def setUp(self, *args, **kwargs): TestBase.setUp(self, *args, **kwargs) populate_swissprot() worldbase.commit() # finally save everything worldbase.clear_cache() # force all requests to reload
def main(): """ Load the given csv file into an sqlite table, saving an annotationDB and an NLMSA version of the original file """ parser = optparse.OptionParser("%prog [options] infile.csv\n"+main.__doc__) parser.add_option("--datapath", '-p', dest="datapath", type="string", default='/home/shared/pygrdata/annotations/HUMAN/hg18', help="""Sets the datafile path. Default=%default""") parser.add_option("--table_name", '-t', dest="table_name", type="string", help="""The resource table's name and data stem, e.g., refGene => datapath/refGene.sqlite """) parser.add_option("--genome", '-g', dest="genome_resource", type="string", default='hg18', help="""The pygr resource for the genome, default=%default""") parser.add_option("--save_resource", '-r', dest="save_resource", type="string", help="""Where to save the created annotationDB and NLMSA. eg, Bio.Annotation.HUMAN.hg18.MotifMap.M0001""") parser.add_option("--bind_attribute", '-b', dest="bind_attribute", type="string", help="""The attribute to access annotationDB from genome region, eg, 'officialGenes' would be accessible via triCas3['ChLG2'][100:200].officialGenes Default is not to bind an attribute to genome""") parser.add_option("--slice_attrs", '-s', dest="slice_attrs", type="string", default='dict(id="chromosome", start="start", stop="stop", orientation="orientation")', help="""dictionary providing aliases in csv file for id, start, stop, etc. default=%default'""") parser.add_option("--bed_format", dest="bed_format", action='store_true', help="""csv file is in BED file format, without headers.""") opts, args = parser.parse_args() if len(args) < 1: parser.print_help() print 'Please specify at least one csv file to read' sys.exit(-1) if None in [opts.save_resource, opts.table_name]: parser.print_help() print 'Required options: save_resource, table_name' sys.exit(-1) fileIn = open(args[0]) if not opts.bed_format: reader = csv.DictReader(fileIn, delimiter='\t') else: fileIn = itertools.ifilter(bedCommentFilter, fileIn) reader = csv.DictReader(fileIn, delimiter='\t', fieldnames=['chromosome', 'start', 'stop'], restkey='junkData') fieldnames = reader.fieldnames print fieldnames print '# Loading genome %s' % opts.genome_resource genome = getGenome(opts.genome_resource) opts.table_name = opts.table_name.replace('.','_') # SQL interprets . as membership tablePath = os.path.join(opts.datapath,opts.table_name + '.sqlite') print '# Creating sqlite table for %s at %s' % (opts.table_name, tablePath) dataTable = convertBedToSQLite(reader, opts.table_name, fieldNames=fieldnames) print '# Making AnnotationDB and NLMSA...' annotDB = annotation.AnnotationDB(dataTable, genome, annotationType=opts.table_name+':', sliceAttrDict=eval(opts.slice_attrs)) annotDB.__doc__ = 'AnnotationDB for %s on %s' % (opts.table_name, opts.genome_resource) msaName = os.path.join(opts.datapath, opts.table_name + '_') annotMap = makeNLMSA([annotDB], dataPath=msaName) print '# Saving results to worldbase as %s and %s...' % (opts.save_resource, opts.save_resource+'_db') worldbase.add_resource(opts.save_resource, annotMap) worldbase.add_resource(opts.save_resource+'_db', annotDB) worldbase.commit()
def main(): """Build an annotation from the given gff file """ usage = """Build and save the annotations defined in the given gff files Saves an annotationDB (representing the file itself) and creates a mapping in the form genome[chromosome][100:200].officialGenes""" parser = optparse.OptionParser("%prog [options] data1.gff [data2.gff ...]\n"+usage) parser.add_option("--genome_resource", '-g', dest="genome_resource", type="string", help="""The pygr resource for the genome, eg, 'Bio.Seq.Genome.TRICA.triCas3'""") #parser.add_option("--annotationDB_resource", '-a', dest="annotationDB_resource", type="string", #help="""Where to save the created annotationDB. eg, #Bio.Annotation.TRICA.triCas3.officialGenes""") parser.add_option("--sqlDB_resource", '-s', dest="sqlDB_resource", type="string", help="""Where to save the created sqlDB and a unique file name eg, Bio.Annotation.TRICA.triCas3.features_sqlDB,gffDB_v1""") parser.add_option("--save_pathstem", '-p', dest="pathstem", type="string", help="""The file to save the resource to, eg, '/home/baldig/projects/genomics/pygrdata/annotations/fly/triCas3_official_genes'""") parser.add_option("--map_resource", '-m', dest="map_resource", type="string", help="""the resource to save the annotationDB->Genome map, saved both to worldbase and to worldbase.schema, eg, 'Bio.Annotation.TRICA.triCas3.BeetleBase.officialGenesMap""") parser.add_option("--bind_attribute", '-b', dest="bind_attribute", type="string", help="""The attribute to access annotationDB from genome region, eg, 'officialGenes' would be accessible via triCas3['ChLG2'][100:200].officialGenes Default is not to bind an attribute to genome""") (opts, args) = parser.parse_args() if len(args) < 1: parser.print_help() print 'Please specify at least one gff file to read' sys.exit(-1) if None in [opts.genome_resource, opts.pathstem, opts.map_resource]: parser.print_help() print 'Required options: genome_resource, sqlDB_resource, pathstem, map_resource' sys.exit(-1) if opts.sqlDB_resource.count(',') != 1: parser.print_help() print 'Error: sqlDB_resource must be comma separated string with exactly one comma.' else: opts.sqlDB_resource = opts.sqlDB_resource.split(',') try : w = worldbase(opts.sqlDB_resource[0]) parser.print_help() print "Warning: sqlDB_resource already exists. Please select a new name." exit(-1) except WorldbaseNotFoundError: pass print '# Loading original genome db' genome = worldbase(opts.genome_resource) #annotDB = annotation.AnnotationDB(None, genome, opts.bind_attribute, #filename=opts.pathstem + '_annotDB', mode='c', verbose=False) sqlDB = sqlgraph.SQLiteServerInfo('%s/%s.sqlite' %(opts.pathstem,opts.sqlDB_resource[1])) gff2lite = simpleGFF2PygrSQLite(sqlDB) nlmsa = cnestedlist.NLMSA(opts.pathstem, 'w', pairwiseMode=True, bidirectional=False) for filename in args: print '# adding to sqlDB from %s' % filename gff2lite.update(filename) tableNames = gff2lite.getTableNames() for table in tableNames: #for row in read_for_pygr(fileIn): #curAnnot = annotDB.new_annotation(index, row) #nlmsa.addAnnotation(curAnnot) #index += 1 #annotDB.close() # Flush annotation data to disk print '# building NLMSA from all gff files' nlmsa.build(saveSeqDict=True) print '# saving annotationDB and NLMSA to worldbase as %s and %s' % (opts.annotationDB_resource, opts.map_resource) annotDB.__doc__ = 'Combined gff annotationDB from files %s on genome %s' % (', '.join(args), opts.genome_resource) nlmsa.__doc__ = 'Mapping of %s, from gff files %s onto genome %s' % (opts.annotationDB_resource, ', '.join(args), opts.genome_resource) worldbase.add_resource(opts.annotationDB_resource, annotDB) worldbase.add_resource(opts.map_resource, nlmsa) if opts.bind_attribute: print '# saving worldbase schema with bindAttrs=(%s)' % opts.bind_attribute genome_annotDB_relation = metabase.ManyToManyRelation(genome, annotDB, bindAttrs=(opts.bind_attribute,)) genome_annotDB_relation.__doc__ = 'GFF based mapping from %s to genome %s' % (opts.annotationDB_resource, opts.genome_resource) worldbase.add_schema('%s' % opts.map_resource, genome_annotDB_relation) print '# committing worldbase resources' worldbase.commit() if __name__ == "__main__": main()
def setUp(self): """Set up some testing sequences and features. """ print "# Setting annotation databases, nlmsa and committing to worldbase" tuple_attrdict = dict(id=0, start=1, stop=2, orientation=3) self.genome = worldbase("Bio.Seq.Genome.HUMAN.hg18") # annotation db1 self.annodb1 = annotation.AnnotationDB({}, self.genome, sliceAttrDict=tuple_attrdict) self.annodb1._persistent_id = 'foo1_db' # set up some test slices in an AnnotationDB self.seq_id = "chr1" self.annot1 = self.annodb1.new_annotation('A1', (self.seq_id, 200, 300, 1)) self.annot2 = self.annodb1.new_annotation('B1', (self.seq_id, 100, 150, 1)) self.annot3 = self.annodb1.new_annotation('C1', (self.seq_id, 50, 75, -1)) self.annot4 = self.annodb1.new_annotation('D1', (self.seq_id, 400, 500, 1)) self.annot5 = self.annodb1.new_annotation('E1', (self.seq_id, 600, 700, 1)) # create a nested list from our AnnotationDB # these are our "features" self.nlmsa1 = cnestedlist.NLMSA(pathstem='test.mapping.foo1', mode='w', pairwiseMode=True) for k in self.annodb1: self.nlmsa1.addAnnotation(self.annodb1[k]) self.nlmsa1.build() # annotation db2 self.annodb2 = annotation.AnnotationDB({}, self.genome, sliceAttrDict=tuple_attrdict) self.annodb2._persistent_id = 'foo2_db' # set up some test slices in an AnnotationDB self.seq_id2 = "chr2" self.annot6 = self.annodb2.new_annotation('A2', (self.seq_id2, 200, 300, 1)) self.annot7 = self.annodb2.new_annotation('B2', (self.seq_id2, 100, 150, 1)) self.annot8 = self.annodb2.new_annotation('C2', (self.seq_id2, 50, 75, -1)) self.annot9 = self.annodb2.new_annotation('D2', (self.seq_id2, 400, 500, 1)) self.annot10 = self.annodb2.new_annotation('E2', (self.seq_id2, 600, 700, 1)) # create a nested list from our AnnotationDB # these are our "features" self.nlmsa2 = cnestedlist.NLMSA(pathstem='test.mapping.foo2', mode='w', pairwiseMode=True) for k in self.annodb2: self.nlmsa2.addAnnotation(self.annodb2[k]) self.nlmsa2.build() # update WORLDBASEPATH self.annodb1.__doc__ = 'annodb1 db' self.nlmsa1.__doc__ = 'annodb1 nlmsa' self.annodb2.__doc__ = 'annodb2 db' self.nlmsa2.__doc__ = 'annodb2 nlmsa' worldbase.add_resource('Test.Annotations.annodb1_db',self.annodb1) worldbase.add_resource('Test.Annotations.annodb2_db',self.annodb2) worldbase.add_resource('Test.Annotations.annodb1',self.nlmsa1) worldbase.add_resource('Test.Annotations.annodb2',self.nlmsa2) worldbase.commit()