def test_multiblast_parser_long(self): "Testing multiblast parser with long input" longerFile = testutil.datafile("sp_all_hbb") sp_all_hbb = seqdb.SequenceFileDB(longerFile) filename = testutil.datafile("multiblast_long_output.txt") multiblast_output = open(filename, "r") try: al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False) al = blast.read_blast_alignment(multiblast_output, sp_all_hbb, self.prot, al) finally: multiblast_output.close() al.build() results = [] for seq in sp_all_hbb.values(): try: results.append(al[seq]) except KeyError: pass correctfile = file(testutil.datafile("multiblast_long_correct.txt"), "r") try: correct = [] for line in correctfile: t = line.split() correct.append((t[0], t[1], float(t[2]))) finally: correctfile.close() check_results(results, correct, pair_identity_tuple)
def trash_intermediate_files(self): seqlen = testutil.datafile('dnaseq.fasta.seqlen') pureseq = testutil.datafile('dnaseq.fasta.pureseq') try: os.unlink(seqlen) os.unlink(pureseq) except OSError: pass
def setUp(self): if not testutil.blast_enabled(): raise SkipTest, "no BLAST installed" hbb1_mouse = testutil.datafile('hbb1_mouse.fa') sp_hbb1 = testutil.datafile('sp_hbb1') self.dna = seqdb.SequenceFileDB(hbb1_mouse) self.prot = seqdb.SequenceFileDB(sp_hbb1)
def setUp(self): hbb1_mouse = testutil.datafile("hbb1_mouse.fa") hbb1_mouse_rc = testutil.datafile("hbb1_mouse_rc.fa") sp_hbb1 = testutil.datafile("sp_hbb1") gapping = testutil.datafile("gapping.fa") self.dna = seqdb.SequenceFileDB(hbb1_mouse) self.dna_rc = seqdb.SequenceFileDB(hbb1_mouse_rc) self.prot = seqdb.SequenceFileDB(sp_hbb1) self.gapping = seqdb.SequenceFileDB(gapping)
def test_do_gunzip(self): "test do_gunzip" zipfile = testutil.datafile("test.gz") outfile = testutil.tempdatafile("test4.out") do_gunzip(zipfile, outfile) h = testutil.get_file_md5(outfile) self.assertEqual(h.hexdigest(), "1db5a21a01ba465fd26c3203d6589b0e")
def test_blastx_parser(self): "Testing blastx parser" blastx_output = open(testutil.datafile("blastx_output.txt"), "r") seq_dict = {"gi|171854975|dbj|AB364477.1|": self.dna["gi|171854975|dbj|AB364477.1|"]} try: results = blast.read_blast_alignment( blastx_output, seq_dict, blast.BlastIDIndex(self.prot), translateSrc=True ) finally: blastx_output.close() correct = [ (143, 143, 429, 0.53146853146853146), (143, 145, 429, 0.28275862068965518), (143, 145, 429, 0.28965517241379313), (143, 145, 429, 0.29655172413793102), (143, 145, 429, 0.30344827586206896), (144, 144, 432, 0.4513888888888889), (144, 144, 432, 0.4513888888888889), (145, 145, 435, 0.45517241379310347), (145, 145, 435, 0.51034482758620692), (146, 142, 438, 0.35616438356164382), (146, 146, 438, 0.4589041095890411), (146, 146, 438, 0.46575342465753422), (146, 146, 438, 0.4726027397260274), (146, 146, 438, 0.4726027397260274), (146, 146, 438, 0.4863013698630137), (146, 146, 438, 0.59589041095890416), (146, 146, 438, 0.62328767123287676), (146, 146, 438, 0.66438356164383561), (146, 146, 438, 0.74657534246575341), (146, 146, 438, 0.91095890410958902), (146, 146, 438, 0.97945205479452058), ] check_results([results], correct, lambda t: (len(t[0]), len(t[1]), len(t[0].sequence), t[2].pIdentity()))
def setUp(self,**kwargs): TestBase.setUp(self) dnaseq = testutil.datafile('dnaseq.fasta') tryannot = testutil.tempdatafile('tryannot') db = seqdb.BlastDB(dnaseq) try: db.__doc__ = 'little dna' worldbase.Bio.Test.dna = db annoDB = seqdb.AnnotationDB({1:('seq1',5,10,'fred'), 2:('seq1',-60,-50,'bob'), 3:('seq2',-20,-10,'mary')}, db, sliceAttrDict=dict(id=0, start=1, stop=2, name=3)) annoDB.__doc__ = 'trivial annotation' worldbase.Bio.Test.annoDB = annoDB nlmsa = cnestedlist.NLMSA(tryannot,'w',pairwiseMode=True, bidirectional=False) try: for annID in annoDB: nlmsa.addAnnotation(annoDB[annID]) nlmsa.build(verbose=False) nlmsa.__doc__ = 'trivial map' worldbase.Bio.Test.map = nlmsa worldbaseSchema.Bio.Test.map = metabase.ManyToManyRelation(db, annoDB,bindAttrs=('exons',)) worldbase.commit() worldbase.clear_cache() finally: nlmsa.close() finally: db.close()
def test_do_unzip(self): 'test do_unzip' zipfile = testutil.datafile('test.zip') outfile = testutil.tempdatafile('test2.out') do_unzip(zipfile, outfile, singleFile=True) h = testutil.get_file_md5(outfile) self.assertEqual(h.hexdigest(), '12ada4c51ccb4c7277c16f1a3c000b90')
def setUp(self, **kwargs): TestBase.setUp(self) dnaseq = testutil.datafile("dnaseq.fasta") tryannot = testutil.tempdatafile("tryannot") db = seqdb.BlastDB(dnaseq) try: db.__doc__ = "little dna" self.pygrData.Bio.Test.dna = db annoDB = seqdb.AnnotationDB( {1: ("seq1", 5, 10, "fred"), 2: ("seq1", -60, -50, "bob"), 3: ("seq2", -20, -10, "mary")}, db, sliceAttrDict=dict(id=0, start=1, stop=2, name=3), ) annoDB.__doc__ = "trivial annotation" self.pygrData.Bio.Test.annoDB = annoDB nlmsa = cnestedlist.NLMSA(tryannot, "w", pairwiseMode=True, bidirectional=False) try: for annID in annoDB: nlmsa.addAnnotation(annoDB[annID]) nlmsa.build() nlmsa.__doc__ = "trivial map" self.pygrData.Bio.Test.map = nlmsa self.schema.Bio.Test.map = metabase.ManyToManyRelation(db, annoDB, bindAttrs=("exons",)) self.metabase.commit() self.metabase.clear_cache() finally: nlmsa.close() finally: db.close()
def test_run_unzip(self): 'test uncompress_file unzip' zipfile = testutil.datafile('test.zip') outfile = testutil.tempdatafile('test.out') uncompress_file(zipfile, newpath=outfile, singleFile=True) h = testutil.get_file_md5(outfile) self.assertEqual(h.hexdigest(), '12ada4c51ccb4c7277c16f1a3c000b90')
def setUp(self, **kwargs): TestBase.setUp(self) dnaseq = testutil.datafile('dnaseq.fasta') tryannot = testutil.tempdatafile('tryannot') db = seqdb.BlastDB(dnaseq) try: db.__doc__ = 'little dna' self.pygrData.Bio.Test.dna = db annoDB = seqdb.AnnotationDB({1: ('seq1', 5, 10, 'fred'), 2: ('seq1', -60, -50, 'bob'), 3: ('seq2', -20, -10, 'mary')}, db, sliceAttrDict=dict(id=0, start=1, stop=2, name=3)) annoDB.__doc__ = 'trivial annotation' self.pygrData.Bio.Test.annoDB = annoDB nlmsa = cnestedlist.NLMSA(tryannot, 'w', pairwiseMode=True, bidirectional=False) try: for annID in annoDB: nlmsa.addAnnotation(annoDB[annID]) nlmsa.build() nlmsa.__doc__ = 'trivial map' self.pygrData.Bio.Test.map = nlmsa self.schema.Bio.Test.map = metabase.ManyToManyRelation(db, annoDB, bindAttrs=('exons', )) self.metabase.commit() self.metabase.clear_cache() finally: nlmsa.close() finally: db.close()
def test_do_gunzip(self): 'test do_gunzip' zipfile = testutil.datafile('test.gz') outfile = testutil.tempdatafile('test4.out') do_gunzip(zipfile, outfile) h = testutil.get_file_md5(outfile) self.assertEqual(h.hexdigest(), '1db5a21a01ba465fd26c3203d6589b0e')
def test_schema(self): "Test schema" sp_hbb1 = testutil.datafile('sp_hbb1') sp2 = seqdb.BlastDB(sp_hbb1) sp2.__doc__ = 'another sp' worldbase.Bio.Seq.sp2 = sp2 sp = worldbase.Bio.Seq.Swissprot.sp42() m = mapping.Mapping(sourceDB=sp, targetDB=sp2) m.__doc__ = 'sp -> sp2' worldbase.Bio.Seq.testmap = m worldbase.schema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2) worldbase.commit() worldbase.clear_cache() sp3 = seqdb.BlastDB(sp_hbb1) sp3.__doc__ = 'sp number 3' worldbase.Bio.Seq.sp3 = sp3 sp2 = worldbase.Bio.Seq.sp2() m = mapping.Mapping(sourceDB=sp3, targetDB=sp2) m.__doc__ = 'sp3 -> sp2' worldbase.Bio.Seq.testmap2 = m worldbase.schema.Bio.Seq.testmap2 = metabase.OneToManyRelation( sp3, sp2) l = worldbase._mdb.resourceCache.keys() l.sort() assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2'] worldbase.commit() g = worldbase._mdb.writer.storage.graph expected = set([ 'Bio.Annotation.annoDB', 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3' ]) found = set(g.keys()) self.EQ(len(expected - found), 0)
def test_basic_iadd(self): dnaseq = testutil.datafile('dnaseq.fasta') seqdb = SequenceFileDB(dnaseq) try: new_seq = seqdb['seq1'] self.db += new_seq assert new_seq in self.db name = (~self.db)[new_seq] assert name == 'dnaseq.seq1', name ### seqdb2 = SequenceFileDB(dnaseq) try: # Munge the filepath for testing. seqdb2.filepath = 'foo' new_seq2 = seqdb2['seq1'] self.db += new_seq2 name2 = (~self.db)[new_seq2] assert name2 == 'foo.seq1', name2 finally: seqdb2.close() finally: seqdb.close()
def test_schema(self): "Test schema" sp_hbb1 = testutil.datafile('sp_hbb1') sp2 = seqdb.BlastDB(sp_hbb1) sp2.__doc__ = 'another sp' pygr.Data.Bio.Seq.sp2 = sp2 sp = pygr.Data.Bio.Seq.Swissprot.sp42() m = mapping.Mapping(sourceDB=sp, targetDB=sp2) m.__doc__ = 'sp -> sp2' pygr.Data.Bio.Seq.testmap = m pygr.Data.schema.Bio.Seq.testmap = pygr.Data.OneToManyRelation(sp, sp2) pygr.Data.save() pygr.Data.clear_cache() sp3 = seqdb.BlastDB(sp_hbb1) sp3.__doc__ = 'sp number 3' pygr.Data.Bio.Seq.sp3 = sp3 sp2 = pygr.Data.Bio.Seq.sp2() m = mapping.Mapping(sourceDB=sp3, targetDB=sp2) m.__doc__ = 'sp3 -> sp2' pygr.Data.Bio.Seq.testmap2 = m pygr.Data.schema.Bio.Seq.testmap2 = pygr.Data.OneToManyRelation(sp3, sp2) # List all cached resources. l = pygr.Data.getResource.resourceCache.keys() l.sort() assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2'] pygr.Data.save() g = pygr.Data.getResource.writer.storage.graph expected = set(['Bio.Annotation.annoDB', 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3']) found = set(g.keys()) self.EQ(len(expected - found), 0)
def test_schema(self): "Test schema" sp_hbb1 = testutil.datafile('sp_hbb1') sp2 = seqdb.BlastDB(sp_hbb1) sp2.__doc__ = 'another sp' worldbase.Bio.Seq.sp2 = sp2 sp = worldbase.Bio.Seq.Swissprot.sp42() m = mapping.Mapping(sourceDB=sp,targetDB=sp2) m.__doc__ = 'sp -> sp2' worldbase.Bio.Seq.testmap = m worldbaseSchema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2) worldbase.commit() worldbase.clear_cache() sp3 = seqdb.BlastDB(sp_hbb1) sp3.__doc__ = 'sp number 3' worldbase.Bio.Seq.sp3 = sp3 sp2 = worldbase.Bio.Seq.sp2() m = mapping.Mapping(sourceDB=sp3,targetDB=sp2) m.__doc__ = 'sp3 -> sp2' worldbase.Bio.Seq.testmap2 = m worldbaseSchema.Bio.Seq.testmap2 = metabase.OneToManyRelation(sp3, sp2) l = worldbase._mdb.resourceCache.keys() l.sort() assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2'] worldbase.commit() g = worldbase._mdb.writer.storage.graph expected = set(['Bio.Annotation.annoDB', 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3']) found = set(g.keys()) self.EQ(len(expected - found), 0)
def test_schema(self): "Test schema" sp_hbb1 = testutil.datafile("sp_hbb1") sp2 = seqdb.BlastDB(sp_hbb1) sp2.__doc__ = "another sp" self.pygrData.Bio.Seq.sp2 = sp2 sp = self.pygrData.Bio.Seq.Swissprot.sp42() m = mapping.Mapping(sourceDB=sp, targetDB=sp2) m.__doc__ = "sp -> sp2" self.pygrData.Bio.Seq.testmap = m self.schema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2) self.metabase.commit() self.metabase.clear_cache() sp3 = seqdb.BlastDB(sp_hbb1) sp3.__doc__ = "sp number 3" self.pygrData.Bio.Seq.sp3 = sp3 sp2 = self.pygrData.Bio.Seq.sp2() m = mapping.Mapping(sourceDB=sp3, targetDB=sp2) m.__doc__ = "sp3 -> sp2" self.pygrData.Bio.Seq.testmap2 = m self.schema.Bio.Seq.testmap2 = metabase.OneToManyRelation(sp3, sp2) l = self.metabase.resourceCache.keys() l.sort() assert l == ["Bio.Seq.sp2", "Bio.Seq.sp3", "Bio.Seq.testmap2"] self.metabase.commit() g = self.metabase.writer.storage.graph expected = set(["Bio.Annotation.annoDB", "Bio.Seq.Swissprot.sp42", "Bio.Seq.sp2", "Bio.Seq.sp3"]) found = set(g.keys()) self.EQ(len(expected - found), 0)
def test_run_gunzip(self): 'test uncompress_file gunzip' zipfile = testutil.datafile('test.gz') outfile = testutil.tempdatafile('test3.out') uncompress_file(zipfile, newpath=outfile) h = testutil.get_file_md5(outfile) self.assertEqual(h.hexdigest(), '1db5a21a01ba465fd26c3203d6589b0e')
def test_headerfile_create_with_trypath(self): header = testutil.datafile('prefixUnionDict-1.txt') db = PrefixUnionDict(filename=header, trypath=[os.path.dirname(header)]) try: assert len(db) == 2, db.prefixDict finally: close_pud_dicts(db)
def test_headerfile_create(self): header = testutil.datafile('prefixUnionDict-1.txt') db = PrefixUnionDict(filename=header) try: assert len(db) == 2 assert 'a.seq1' in db finally: close_pud_dicts(db)
def test_inverse_add_behavior(self): dnaseq = testutil.datafile('dnaseq.fasta') seqdb = SequenceFileDB(dnaseq) try: seq = seqdb['seq1'] name = (~self.db)[seq] finally: seqdb.close() # only need to close if exception occurs
def test_funny_key2(self): "check handling of ID containing multiple separators" dnaseq = testutil.datafile('funnyseq.fasta') seqdb = SequenceFileDB(dnaseq) # contains 'seq1', 'seq2' try: pudb = PrefixUnionDict({'prefix': seqdb}) seq = pudb['prefix.seq.2.even.longer'] finally: seqdb.close()
def test_cache(self): "Sequence slice cache mechanics." dnaseq = testutil.datafile('dnaseq.fasta') db = SequenceFileDB(dnaseq) try: # create cache components cacheDict = {} cacheHint = db.cacheHint # get seq1 seq1 = db['seq1'] # _cache is only created on first cache attempt assert not hasattr(db, '_cache') # build an 'owner' object class AnonymousOwner(object): pass owner = AnonymousOwner() # save seq1 in cache cacheDict['seq1'] = (seq1.start, seq1.stop) cacheHint(cacheDict, owner) del cacheDict # 'owner' now holds reference # peek into _cache and assert that only the ival coordinates # are stored v = db._cache.values()[0] assert len(v['seq1']) == 2 del v # force a cache access & check that now we've stored actual string ival = str(seq1[5:10]) v = db._cache.values()[0] # ...check that we've stored actual string assert len(v['seq1']) == 3 # again force cache access, this time to the stored sequence string ival = str(seq1[5:10]) # now, eliminate all references to the cache proxy dict del owner # trash unused objects - not strictly necessary, because there are # no islands of circular references & so all objects are already # deallocated, but that's implementation dependent. gc.collect() # ok, cached values should now be gone. v = db._cache.values() assert len(v) == 0 finally: db.close()
def test_multiblast_long(self): "testing multi sequence blast with long db to assess thread safety, see issue 79" longerFile = testutil.datafile('sp_all_hbb') sp_all_hbb = seqdb.SequenceFileDB(longerFile) blastmap = blast.BlastMapping(self.prot, verbose=False) al = cnestedlist.NLMSA('blasthits', 'memory', pairwiseMode=True, bidirectional=False) blastmap(None, al, queryDB=sp_all_hbb) # all vs all al.build() # construct the alignment indexes
def test_headerfile_create_fail(self): header = testutil.datafile('prefixUnionDict-3.txt') try: db = PrefixUnionDict(filename=header) assert 0, "should not reach this point" except IOError: pass except AssertionError: close_pud_dicts(db) raise
def test_nlmsaslice_cache(self): "NLMSASlice sequence caching & removal" # set up sequences dnaseq = testutil.datafile('dnaseq.fasta') db = SequenceFileDB(dnaseq, autoGC=-1) # use pure WeakValueDict... try: gc.collect() assert len( db._weakValueDict) == 0, '_weakValueDict should be empty' seq1, seq2 = db['seq1'], db['seq2'] assert len(db._weakValueDict)==2, \ '_weakValueDict should have 2 seqs' # build referencing NLMSA mymap = NLMSA('test', 'memory', db, pairwiseMode=True) mymap += seq1 mymap[seq1] += seq2 mymap.build() # check: no cache assert not hasattr(db, '_cache'), 'should be no cache yet' seq1, seq2 = db['seq1'], db['seq2'] # re-retrieve # now retrieve a NLMSASlice, forcing entry of seq into cache ival = seq1[5:10] x = mymap[ival] assert len(db._cache.values()) != 0 n1 = len(db._cache) assert n1 == 1, "should be exactly one cache entry, not %d" % \ (n1, ) # ok, now trash referencing arguments & make sure of cleanup del x gc.collect() assert len(db._cache.values()) == 0 n2 = len(db._cache) assert n2 == 0, '%d objects remain; cache memory leak!' % n2 # FAIL because of __dealloc__ error in cnestedlist.NLMSASlice. # Drop our references, the cache should empty. del mymap, ival, seq1, seq2 gc.collect() # check that db._weakValueDict cache is empty assert len( db._weakValueDict) == 0, '_weakValueDict should be empty' finally: db.close()
def test_multiblast_long(self): "testing multi sequence blast with long db" if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") longerFile = testutil.datafile("sp_all_hbb") sp_all_hbb = seqdb.SequenceFileDB(longerFile) blastmap = blast.BlastMapping(self.prot, verbose=False) al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False) blastmap(None, al, queryDB=sp_all_hbb) # all vs all al.build() # construct the alignment indexes
def test_headerfile_create_conflict(self): "test non-empty prefixDict with a passed in PUD header file: conflict" subdb = SequenceFileDB(self.dbfile) try: header = testutil.datafile('prefixUnionDict-1.txt') try: db = PrefixUnionDict(filename=header, prefixDict={ 'foo' : subdb }) assert 0, "should not get here" except TypeError: pass finally: subdb.close()
def test_nlmsaslice_cache(self): "NLMSASlice sequence caching & removal" # set up sequences dnaseq = testutil.datafile('dnaseq.fasta') db = SequenceFileDB(dnaseq, autoGC=-1) # use pure WeakValueDict... try: gc.collect() assert len(db._weakValueDict)==0, '_weakValueDict should be empty' seq1, seq2 = db['seq1'], db['seq2'] assert len(db._weakValueDict)==2, \ '_weakValueDict should have 2 seqs' # build referencing NLMSA mymap = NLMSA('test', 'memory', db, pairwiseMode=True) mymap += seq1 mymap[seq1] += seq2 mymap.build() # check: no cache assert not hasattr(db, '_cache'), 'should be no cache yet' seq1, seq2 = db['seq1'], db['seq2'] # re-retrieve # now retrieve a NLMSASlice, forcing entry of seq into cache ival = seq1[5:10] x = mymap[ival] assert len(db._cache.values()) != 0 n1 = len(db._cache) assert n1 == 1, "should be exactly one cache entry, not %d" % \ (n1, ) # ok, now trash referencing arguments & make sure of cleanup del x gc.collect() assert len(db._cache.values()) == 0 n2 = len(db._cache) assert n2 == 0, '%d objects remain; cache memory leak!' % n2 # FAIL because of __dealloc__ error in cnestedlist.NLMSASlice. # Drop our references, the cache should empty. del mymap, ival, seq1, seq2 gc.collect() # check that db._weakValueDict cache is empty assert len(db._weakValueDict)==0, '_weakValueDict should be empty' finally: db.close()
def test_blastp_parser(self): "Testing blastp parser" blastp_output = open(testutil.datafile("blastp_output.txt"), "r") seq_dict = {"HBB1_XENLA": self.prot["HBB1_XENLA"]} prot_index = blast.BlastIDIndex(self.prot) try: alignment = blast.read_blast_alignment(blastp_output, seq_dict, prot_index) results = alignment[self.prot["HBB1_XENLA"]] finally: blastp_output.close() check_results([results], blastp_correct_results, pair_identity_tuple)
def test_headerfile_create_conflict(self): "test non-empty prefixDict with a passed in PUD header file: conflict" subdb = SequenceFileDB(self.dbfile) try: header = testutil.datafile('prefixUnionDict-1.txt') try: db = PrefixUnionDict(filename=header, prefixDict={'foo': subdb}) assert 0, "should not get here" except TypeError: pass finally: subdb.close()
def test_multiblast_parser(self): "Testing multiblast parser" multiblast_output = open(testutil.datafile("multiblast_output.txt"), "r") try: al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False) al = blast.read_blast_alignment(multiblast_output, self.prot, blast.BlastIDIndex(self.prot), al) finally: multiblast_output.close() al.build() results = [al[seq] for seq in self.prot.values()] check_results(results, correct_multiblast_results, pair_identity_tuple)
def test_no_db_info(self): dnaseq = testutil.datafile('dnaseq.fasta') seqdb = SequenceFileDB(dnaseq) try: new_seq = seqdb['seq1'] assert getattr(seqdb, '_persistent_id', None) is None del seqdb.filepath self.db += new_seq name = (~self.db)[new_seq] assert name == 'noname0.seq1' finally: seqdb.close()
def test_iadd_db_twice(self): dnaseq = testutil.datafile('dnaseq.fasta') seqdb = SequenceFileDB(dnaseq) try: new_seq = seqdb['seq1'] self.db += new_seq name1 = (~self.db)[new_seq] self.db += new_seq # should do nothing... name2 = (~self.db)[new_seq] assert name1 == name2 # ...leaving seq with same name. finally: seqdb.close()
def test_inverse_noadd_behavior(self): # compare with test_inverse_add_behavior... db = SeqPrefixUnionDict(addAll=False) dnaseq = testutil.datafile('dnaseq.fasta') seqdb = SequenceFileDB(dnaseq) try: seq = seqdb['seq1'] try: name = (~db)[seq] assert 0, "should not get here" except KeyError: pass finally: seqdb.close()
def populate_swissprot(): "Populate the current worldbase with swissprot data" # build BlastDB out of the sequences sp_hbb1 = testutil.datafile('sp_hbb1') sp = seqdb.BlastDB(sp_hbb1) sp.__doc__ = 'little swissprot' worldbase.Bio.Seq.Swissprot.sp42 = sp # also store a fragment hbb = sp['HBB1_TORMA'] ival = hbb[10:35] ival.__doc__ = 'fragment' worldbase.Bio.Seq.frag = ival # build a mapping to itself m = mapping.Mapping(sourceDB=sp, targetDB=sp) trypsin = sp['PRCA_ANAVA'] m[hbb] = trypsin m.__doc__ = 'map sp to itself' worldbase.Bio.Seq.spmap = m # create an annotation database and bind as exons attribute worldbase.schema.Bio.Seq.spmap = metabase.OneToManyRelation( sp, sp, bindAttrs=('buddy', )) annoDB = seqdb.AnnotationDB({1: ('HBB1_TORMA', 10, 50)}, sp, sliceAttrDict=dict(id=0, start=1, stop=2)) exon = annoDB[1] # generate the names where these will be stored tempdir = testutil.TempDir('exonAnnot') filename = tempdir.subfile('cnested') nlmsa = cnestedlist.NLMSA(filename, 'w', pairwiseMode=True, bidirectional=False) nlmsa.addAnnotation(exon) nlmsa.build() annoDB.__doc__ = 'a little annotation db' nlmsa.__doc__ = 'a little map' worldbase.Bio.Annotation.annoDB = annoDB worldbase.Bio.Annotation.map = nlmsa worldbase.schema.Bio.Annotation.map = \ metabase.ManyToManyRelation(sp, annoDB, bindAttrs=('exons', ))
def test_iadd_duplicate_seqdb(self): dnaseq = testutil.datafile('dnaseq.fasta') seqdb = SequenceFileDB(dnaseq) try: seqdb2 = SequenceFileDB(dnaseq) try: new_seq = seqdb['seq1'] new_seq2 = seqdb2['seq1'] self.db += new_seq try: self.db += new_seq2 assert 0, "should never reach this point" except ValueError: pass finally: seqdb2.close() finally: seqdb.close()
def test_generic_build(self): "GenericBuilder construction of the BlastDB" sp_hbb1 = testutil.datafile('sp_hbb1') gb = GenericBuilder('BlastDB', sp_hbb1) s = pickle.dumps(gb) db = pickle.loads(s) # force construction of the BlastDB self.EQ(len(db), 24) found = [x for x in db] found.sort() expected = ['HBB0_PAGBO', 'HBB1_ANAMI', 'HBB1_CYGMA', 'HBB1_IGUIG', 'HBB1_MOUSE', 'HBB1_ONCMY', 'HBB1_PAGBO', 'HBB1_RAT', 'HBB1_SPHPU', 'HBB1_TAPTE', 'HBB1_TORMA', 'HBB1_TRICR', 'HBB1_UROHA', 'HBB1_VAREX', 'HBB1_XENBO', 'HBB1_XENLA', 'HBB1_XENTR', 'MYG_DIDMA', 'MYG_ELEMA', 'MYG_ERIEU', 'MYG_ESCGI', 'MYG_GALCR', 'PRCA_ANASP', 'PRCA_ANAVA'] expected.sort() self.EQ(expected, found)
def test_headerfile_write(self): header = testutil.datafile('prefixUnionDict-2.txt') db = PrefixUnionDict(filename=header) try: assert len(db) == 4 assert 'a.seq1' in db assert 'b.seq1' in db output = testutil.tempdatafile('prefixUnionDict-write.txt') db.writeHeaderFile(output) finally: close_pud_dicts(db) db2 = PrefixUnionDict(filename=output, trypath=[os.path.dirname(header)]) try: assert len(db2) == 4 assert 'a.seq1' in db2 assert 'b.seq1' in db2 finally: close_pud_dicts(db2)
def test_xmlrpc(self): "Test XMLRPC" pygr.Data.clear_cache() # force all requests to reload pygr.Data.update("http://localhost:%s" % self.server.port) check_match(self) check_dir(self) check_dir_noargs(self) check_dir_download(self) check_dir_re(self) check_bind(self) check_bind2(self) sb_hbb1 = testutil.datafile('sp_hbb1') sp2 = seqdb.BlastDB(sb_hbb1) sp2.__doc__ = 'another sp' try: pygr.Data.Bio.Seq.sp2 = sp2 pygr.Data.save() msg = 'failed to catch bad attempt to write to XMLRPC server' raise KeyError(msg) except ValueError: pass
def test_xmlrpc(self): "Test XMLRPC" worldbase.clear_cache() # force all future requests to reload # Add our test XMLRPC resource. worldbase.update("http://localhost:%s" % self.server.port) check_match(self) # run all our tests check_dir(self) check_dir_noargs(self) check_dir_download(self) check_dir_re(self) check_bind(self) check_bind2(self) sb_hbb1 = testutil.datafile('sp_hbb1') # test readonly checks sp2 = seqdb.BlastDB(sb_hbb1) sp2.__doc__ = 'another sp' try: worldbase.Bio.Seq.sp2 = sp2 worldbase.commit() msg = 'failed to catch bad attempt to write to XMLRPC server' raise KeyError(msg) except ValueError: pass
def setUp(self): dnaseq = testutil.datafile('dnaseq.fasta') self.seqdb = SequenceFileDB(dnaseq) # contains 'seq1', 'seq2' self.db = SeqPrefixUnionDict({'prefix': self.seqdb})
def setUp(self): self.dbfile = testutil.datafile('dnaseq.fasta')
def setUp(self): "Test setup" dnaseq = testutil.datafile('dnaseq.fasta') self.db = SequenceFileDB(dnaseq) # contains 'seq1', 'seq2' self.db._weakValueDict.clear() # clear the cache
def setUp(self): hbb1_mouse = testutil.datafile('hbb1_mouse.fa') self.dna = seqdb.SequenceFileDB(hbb1_mouse) self.tdb = translationDB.get_translation_db(self.dna)