def test_multiblast_parser_long(self):
        "Testing multiblast parser with long input"
        longerFile = testutil.datafile("sp_all_hbb")
        sp_all_hbb = seqdb.SequenceFileDB(longerFile)

        filename = testutil.datafile("multiblast_long_output.txt")
        multiblast_output = open(filename, "r")
        try:
            al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False)
            al = blast.read_blast_alignment(multiblast_output, sp_all_hbb, self.prot, al)
        finally:
            multiblast_output.close()
        al.build()

        results = []
        for seq in sp_all_hbb.values():
            try:
                results.append(al[seq])
            except KeyError:
                pass
        correctfile = file(testutil.datafile("multiblast_long_correct.txt"), "r")
        try:
            correct = []
            for line in correctfile:
                t = line.split()
                correct.append((t[0], t[1], float(t[2])))
        finally:
            correctfile.close()
        check_results(results, correct, pair_identity_tuple)
 def trash_intermediate_files(self):
     seqlen = testutil.datafile('dnaseq.fasta.seqlen')
     pureseq = testutil.datafile('dnaseq.fasta.pureseq')
     try:
         os.unlink(seqlen)
         os.unlink(pureseq)
     except OSError:
         pass
Example #3
0
 def trash_intermediate_files(self):
     seqlen = testutil.datafile('dnaseq.fasta.seqlen')
     pureseq = testutil.datafile('dnaseq.fasta.pureseq')
     try:
         os.unlink(seqlen)
         os.unlink(pureseq)
     except OSError:
         pass
Example #4
0
    def setUp(self):
        if not testutil.blast_enabled():
            raise SkipTest, "no BLAST installed"
        
        hbb1_mouse = testutil.datafile('hbb1_mouse.fa')
        sp_hbb1 = testutil.datafile('sp_hbb1')

        self.dna = seqdb.SequenceFileDB(hbb1_mouse)
        self.prot = seqdb.SequenceFileDB(sp_hbb1)
    def setUp(self):
        hbb1_mouse = testutil.datafile("hbb1_mouse.fa")
        hbb1_mouse_rc = testutil.datafile("hbb1_mouse_rc.fa")
        sp_hbb1 = testutil.datafile("sp_hbb1")
        gapping = testutil.datafile("gapping.fa")

        self.dna = seqdb.SequenceFileDB(hbb1_mouse)
        self.dna_rc = seqdb.SequenceFileDB(hbb1_mouse_rc)
        self.prot = seqdb.SequenceFileDB(sp_hbb1)
        self.gapping = seqdb.SequenceFileDB(gapping)
Example #6
0
 def test_do_gunzip(self):
     "test do_gunzip"
     zipfile = testutil.datafile("test.gz")
     outfile = testutil.tempdatafile("test4.out")
     do_gunzip(zipfile, outfile)
     h = testutil.get_file_md5(outfile)
     self.assertEqual(h.hexdigest(), "1db5a21a01ba465fd26c3203d6589b0e")
    def test_blastx_parser(self):
        "Testing blastx parser"
        blastx_output = open(testutil.datafile("blastx_output.txt"), "r")
        seq_dict = {"gi|171854975|dbj|AB364477.1|": self.dna["gi|171854975|dbj|AB364477.1|"]}
        try:
            results = blast.read_blast_alignment(
                blastx_output, seq_dict, blast.BlastIDIndex(self.prot), translateSrc=True
            )
        finally:
            blastx_output.close()
        correct = [
            (143, 143, 429, 0.53146853146853146),
            (143, 145, 429, 0.28275862068965518),
            (143, 145, 429, 0.28965517241379313),
            (143, 145, 429, 0.29655172413793102),
            (143, 145, 429, 0.30344827586206896),
            (144, 144, 432, 0.4513888888888889),
            (144, 144, 432, 0.4513888888888889),
            (145, 145, 435, 0.45517241379310347),
            (145, 145, 435, 0.51034482758620692),
            (146, 142, 438, 0.35616438356164382),
            (146, 146, 438, 0.4589041095890411),
            (146, 146, 438, 0.46575342465753422),
            (146, 146, 438, 0.4726027397260274),
            (146, 146, 438, 0.4726027397260274),
            (146, 146, 438, 0.4863013698630137),
            (146, 146, 438, 0.59589041095890416),
            (146, 146, 438, 0.62328767123287676),
            (146, 146, 438, 0.66438356164383561),
            (146, 146, 438, 0.74657534246575341),
            (146, 146, 438, 0.91095890410958902),
            (146, 146, 438, 0.97945205479452058),
        ]

        check_results([results], correct, lambda t: (len(t[0]), len(t[1]), len(t[0].sequence), t[2].pIdentity()))
Example #8
0
    def setUp(self,**kwargs):
        TestBase.setUp(self)
        dnaseq = testutil.datafile('dnaseq.fasta')
        tryannot = testutil.tempdatafile('tryannot')

        db = seqdb.BlastDB(dnaseq)
        try:
            db.__doc__ = 'little dna'

            worldbase.Bio.Test.dna = db
            annoDB = seqdb.AnnotationDB({1:('seq1',5,10,'fred'),
                                         2:('seq1',-60,-50,'bob'),
                                         3:('seq2',-20,-10,'mary')},
                                        db,
                                  sliceAttrDict=dict(id=0, start=1, stop=2,
                                                     name=3))
            annoDB.__doc__ = 'trivial annotation'
            worldbase.Bio.Test.annoDB = annoDB
            nlmsa = cnestedlist.NLMSA(tryannot,'w',pairwiseMode=True,
                                      bidirectional=False)
            try:
                for annID in annoDB:
                    nlmsa.addAnnotation(annoDB[annID])

                nlmsa.build(verbose=False)
                nlmsa.__doc__ = 'trivial map'
                worldbase.Bio.Test.map = nlmsa
                worldbaseSchema.Bio.Test.map = metabase.ManyToManyRelation(db,
                                                       annoDB,bindAttrs=('exons',))
                worldbase.commit()
                worldbase.clear_cache()
            finally:
                nlmsa.close()
        finally:
            db.close()
Example #9
0
 def test_do_unzip(self):
     'test do_unzip'
     zipfile = testutil.datafile('test.zip')
     outfile = testutil.tempdatafile('test2.out')
     do_unzip(zipfile, outfile, singleFile=True)
     h = testutil.get_file_md5(outfile)
     self.assertEqual(h.hexdigest(), '12ada4c51ccb4c7277c16f1a3c000b90')
Example #10
0
    def setUp(self, **kwargs):
        TestBase.setUp(self)
        dnaseq = testutil.datafile("dnaseq.fasta")
        tryannot = testutil.tempdatafile("tryannot")

        db = seqdb.BlastDB(dnaseq)
        try:
            db.__doc__ = "little dna"

            self.pygrData.Bio.Test.dna = db
            annoDB = seqdb.AnnotationDB(
                {1: ("seq1", 5, 10, "fred"), 2: ("seq1", -60, -50, "bob"), 3: ("seq2", -20, -10, "mary")},
                db,
                sliceAttrDict=dict(id=0, start=1, stop=2, name=3),
            )
            annoDB.__doc__ = "trivial annotation"
            self.pygrData.Bio.Test.annoDB = annoDB
            nlmsa = cnestedlist.NLMSA(tryannot, "w", pairwiseMode=True, bidirectional=False)
            try:
                for annID in annoDB:
                    nlmsa.addAnnotation(annoDB[annID])

                nlmsa.build()
                nlmsa.__doc__ = "trivial map"
                self.pygrData.Bio.Test.map = nlmsa
                self.schema.Bio.Test.map = metabase.ManyToManyRelation(db, annoDB, bindAttrs=("exons",))
                self.metabase.commit()
                self.metabase.clear_cache()
            finally:
                nlmsa.close()
        finally:
            db.close()
Example #11
0
 def test_run_unzip(self):
     'test uncompress_file unzip'
     zipfile = testutil.datafile('test.zip')
     outfile = testutil.tempdatafile('test.out')
     uncompress_file(zipfile, newpath=outfile, singleFile=True)
     h = testutil.get_file_md5(outfile)
     self.assertEqual(h.hexdigest(), '12ada4c51ccb4c7277c16f1a3c000b90')
Example #12
0
    def setUp(self, **kwargs):
        TestBase.setUp(self)
        dnaseq = testutil.datafile('dnaseq.fasta')
        tryannot = testutil.tempdatafile('tryannot')

        db = seqdb.BlastDB(dnaseq)
        try:
            db.__doc__ = 'little dna'

            self.pygrData.Bio.Test.dna = db
            annoDB = seqdb.AnnotationDB({1: ('seq1', 5, 10, 'fred'),
                                         2: ('seq1', -60, -50, 'bob'),
                                         3: ('seq2', -20, -10, 'mary')},
                                        db,
                                  sliceAttrDict=dict(id=0, start=1, stop=2,
                                                     name=3))
            annoDB.__doc__ = 'trivial annotation'
            self.pygrData.Bio.Test.annoDB = annoDB
            nlmsa = cnestedlist.NLMSA(tryannot, 'w', pairwiseMode=True,
                                      bidirectional=False)
            try:
                for annID in annoDB:
                    nlmsa.addAnnotation(annoDB[annID])

                nlmsa.build()
                nlmsa.__doc__ = 'trivial map'
                self.pygrData.Bio.Test.map = nlmsa
                self.schema.Bio.Test.map = metabase.ManyToManyRelation(db,
                                                 annoDB, bindAttrs=('exons', ))
                self.metabase.commit()
                self.metabase.clear_cache()
            finally:
                nlmsa.close()
        finally:
            db.close()
Example #13
0
 def test_do_gunzip(self):
     'test do_gunzip'
     zipfile = testutil.datafile('test.gz')
     outfile = testutil.tempdatafile('test4.out')
     do_gunzip(zipfile, outfile)
     h = testutil.get_file_md5(outfile)
     self.assertEqual(h.hexdigest(), '1db5a21a01ba465fd26c3203d6589b0e')
Example #14
0
    def test_schema(self):
        "Test schema"
        sp_hbb1 = testutil.datafile('sp_hbb1')
        sp2 = seqdb.BlastDB(sp_hbb1)
        sp2.__doc__ = 'another sp'
        worldbase.Bio.Seq.sp2 = sp2
        sp = worldbase.Bio.Seq.Swissprot.sp42()
        m = mapping.Mapping(sourceDB=sp, targetDB=sp2)
        m.__doc__ = 'sp -> sp2'
        worldbase.Bio.Seq.testmap = m
        worldbase.schema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2)
        worldbase.commit()

        worldbase.clear_cache()

        sp3 = seqdb.BlastDB(sp_hbb1)
        sp3.__doc__ = 'sp number 3'
        worldbase.Bio.Seq.sp3 = sp3
        sp2 = worldbase.Bio.Seq.sp2()
        m = mapping.Mapping(sourceDB=sp3, targetDB=sp2)
        m.__doc__ = 'sp3 -> sp2'
        worldbase.Bio.Seq.testmap2 = m
        worldbase.schema.Bio.Seq.testmap2 = metabase.OneToManyRelation(
            sp3, sp2)
        l = worldbase._mdb.resourceCache.keys()
        l.sort()
        assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2']
        worldbase.commit()
        g = worldbase._mdb.writer.storage.graph
        expected = set([
            'Bio.Annotation.annoDB', 'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2',
            'Bio.Seq.sp3'
        ])
        found = set(g.keys())
        self.EQ(len(expected - found), 0)
    def test_basic_iadd(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            new_seq = seqdb['seq1']

            self.db += new_seq

            assert new_seq in self.db
            name = (~self.db)[new_seq]
            assert name == 'dnaseq.seq1', name

            ###

            seqdb2 = SequenceFileDB(dnaseq)
            try:
                # Munge the filepath for testing.
                seqdb2.filepath = 'foo'
                new_seq2 = seqdb2['seq1']

                self.db += new_seq2
                name2 = (~self.db)[new_seq2]
                assert name2 == 'foo.seq1', name2
            finally:
                seqdb2.close()
        finally:
            seqdb.close()
Example #16
0
    def test_schema(self):
        "Test schema"
        sp_hbb1 = testutil.datafile('sp_hbb1')
        sp2 = seqdb.BlastDB(sp_hbb1)
        sp2.__doc__ = 'another sp'
        pygr.Data.Bio.Seq.sp2 = sp2
        sp = pygr.Data.Bio.Seq.Swissprot.sp42()
        m = mapping.Mapping(sourceDB=sp, targetDB=sp2)
        m.__doc__ = 'sp -> sp2'
        pygr.Data.Bio.Seq.testmap = m
        pygr.Data.schema.Bio.Seq.testmap = pygr.Data.OneToManyRelation(sp, sp2)
        pygr.Data.save()

        pygr.Data.clear_cache()

        sp3 = seqdb.BlastDB(sp_hbb1)
        sp3.__doc__ = 'sp number 3'
        pygr.Data.Bio.Seq.sp3 = sp3
        sp2 = pygr.Data.Bio.Seq.sp2()
        m = mapping.Mapping(sourceDB=sp3, targetDB=sp2)
        m.__doc__ = 'sp3 -> sp2'
        pygr.Data.Bio.Seq.testmap2 = m
        pygr.Data.schema.Bio.Seq.testmap2 = pygr.Data.OneToManyRelation(sp3,
                                                                        sp2)
        # List all cached resources.
        l = pygr.Data.getResource.resourceCache.keys()
        l.sort()
        assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2']
        pygr.Data.save()
        g = pygr.Data.getResource.writer.storage.graph
        expected = set(['Bio.Annotation.annoDB',
                     'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3'])
        found = set(g.keys())
        self.EQ(len(expected - found), 0)
Example #17
0
    def test_basic_iadd(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            new_seq = seqdb['seq1']

            self.db += new_seq

            assert new_seq in self.db
            name = (~self.db)[new_seq]
            assert name == 'dnaseq.seq1', name

            ###

            seqdb2 = SequenceFileDB(dnaseq)
            try:
                # Munge the filepath for testing.
                seqdb2.filepath = 'foo'
                new_seq2 = seqdb2['seq1']

                self.db += new_seq2
                name2 = (~self.db)[new_seq2]
                assert name2 == 'foo.seq1', name2
            finally:
                seqdb2.close()
        finally:
            seqdb.close()
Example #18
0
    def test_schema(self):
        "Test schema"
        sp_hbb1 = testutil.datafile('sp_hbb1') 
        sp2 = seqdb.BlastDB(sp_hbb1)
        sp2.__doc__ = 'another sp'
        worldbase.Bio.Seq.sp2 = sp2
        sp = worldbase.Bio.Seq.Swissprot.sp42()
        m = mapping.Mapping(sourceDB=sp,targetDB=sp2)
        m.__doc__ = 'sp -> sp2'
        worldbase.Bio.Seq.testmap = m
        worldbaseSchema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2)
        worldbase.commit()

        worldbase.clear_cache()

        sp3 = seqdb.BlastDB(sp_hbb1)
        sp3.__doc__ = 'sp number 3'
        worldbase.Bio.Seq.sp3 = sp3
        sp2 = worldbase.Bio.Seq.sp2()
        m = mapping.Mapping(sourceDB=sp3,targetDB=sp2)
        m.__doc__ = 'sp3 -> sp2'
        worldbase.Bio.Seq.testmap2 = m
        worldbaseSchema.Bio.Seq.testmap2 = metabase.OneToManyRelation(sp3, sp2)
        l = worldbase._mdb.resourceCache.keys()
        l.sort()
        assert l == ['Bio.Seq.sp2', 'Bio.Seq.sp3', 'Bio.Seq.testmap2']
        worldbase.commit()
        g = worldbase._mdb.writer.storage.graph
        expected = set(['Bio.Annotation.annoDB',
                     'Bio.Seq.Swissprot.sp42', 'Bio.Seq.sp2', 'Bio.Seq.sp3'])
        found = set(g.keys()) 
        self.EQ(len(expected - found), 0) 
Example #19
0
    def test_schema(self):
        "Test schema"
        sp_hbb1 = testutil.datafile("sp_hbb1")
        sp2 = seqdb.BlastDB(sp_hbb1)
        sp2.__doc__ = "another sp"
        self.pygrData.Bio.Seq.sp2 = sp2
        sp = self.pygrData.Bio.Seq.Swissprot.sp42()
        m = mapping.Mapping(sourceDB=sp, targetDB=sp2)
        m.__doc__ = "sp -> sp2"
        self.pygrData.Bio.Seq.testmap = m
        self.schema.Bio.Seq.testmap = metabase.OneToManyRelation(sp, sp2)
        self.metabase.commit()

        self.metabase.clear_cache()

        sp3 = seqdb.BlastDB(sp_hbb1)
        sp3.__doc__ = "sp number 3"
        self.pygrData.Bio.Seq.sp3 = sp3
        sp2 = self.pygrData.Bio.Seq.sp2()
        m = mapping.Mapping(sourceDB=sp3, targetDB=sp2)
        m.__doc__ = "sp3 -> sp2"
        self.pygrData.Bio.Seq.testmap2 = m
        self.schema.Bio.Seq.testmap2 = metabase.OneToManyRelation(sp3, sp2)
        l = self.metabase.resourceCache.keys()
        l.sort()
        assert l == ["Bio.Seq.sp2", "Bio.Seq.sp3", "Bio.Seq.testmap2"]
        self.metabase.commit()
        g = self.metabase.writer.storage.graph
        expected = set(["Bio.Annotation.annoDB", "Bio.Seq.Swissprot.sp42", "Bio.Seq.sp2", "Bio.Seq.sp3"])
        found = set(g.keys())
        self.EQ(len(expected - found), 0)
Example #20
0
 def test_run_gunzip(self):
     'test uncompress_file gunzip'
     zipfile = testutil.datafile('test.gz')
     outfile = testutil.tempdatafile('test3.out')
     uncompress_file(zipfile, newpath=outfile)
     h = testutil.get_file_md5(outfile)
     self.assertEqual(h.hexdigest(), '1db5a21a01ba465fd26c3203d6589b0e')
Example #21
0
 def test_headerfile_create_with_trypath(self):
     header = testutil.datafile('prefixUnionDict-1.txt')
     db = PrefixUnionDict(filename=header,
                          trypath=[os.path.dirname(header)])
     try:
         assert len(db) == 2, db.prefixDict
     finally:
         close_pud_dicts(db)
Example #22
0
 def test_headerfile_create(self):
     header = testutil.datafile('prefixUnionDict-1.txt')
     db = PrefixUnionDict(filename=header)
     try:
         assert len(db) == 2
         assert 'a.seq1' in db
     finally:
         close_pud_dicts(db)
 def test_headerfile_create(self):
     header = testutil.datafile('prefixUnionDict-1.txt')
     db = PrefixUnionDict(filename=header)
     try:
         assert len(db) == 2
         assert 'a.seq1' in db
     finally:
         close_pud_dicts(db)
 def test_headerfile_create_with_trypath(self):
     header = testutil.datafile('prefixUnionDict-1.txt')
     db = PrefixUnionDict(filename=header,
                          trypath=[os.path.dirname(header)])
     try:
         assert len(db) == 2, db.prefixDict
     finally:
         close_pud_dicts(db)
Example #25
0
    def test_inverse_add_behavior(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            seq = seqdb['seq1']

            name = (~self.db)[seq]
        finally:
            seqdb.close()  # only need to close if exception occurs
Example #26
0
 def test_funny_key2(self):
     "check handling of ID containing multiple separators"
     dnaseq = testutil.datafile('funnyseq.fasta')
     seqdb = SequenceFileDB(dnaseq)  # contains 'seq1', 'seq2'
     try:
         pudb = PrefixUnionDict({'prefix': seqdb})
         seq = pudb['prefix.seq.2.even.longer']
     finally:
         seqdb.close()
 def test_funny_key2(self):
     "check handling of ID containing multiple separators"
     dnaseq = testutil.datafile('funnyseq.fasta')
     seqdb = SequenceFileDB(dnaseq)     # contains 'seq1', 'seq2'
     try:
         pudb = PrefixUnionDict({'prefix': seqdb})
         seq = pudb['prefix.seq.2.even.longer']
     finally:
         seqdb.close()
    def test_inverse_add_behavior(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            seq = seqdb['seq1']

            name = (~self.db)[seq]
        finally:
            seqdb.close() # only need to close if exception occurs
Example #29
0
    def test_cache(self):
        "Sequence slice cache mechanics."

        dnaseq = testutil.datafile('dnaseq.fasta')
        db = SequenceFileDB(dnaseq)

        try:
            # create cache components
            cacheDict = {}
            cacheHint = db.cacheHint

            # get seq1
            seq1 = db['seq1']

            # _cache is only created on first cache attempt
            assert not hasattr(db, '_cache')

            # build an 'owner' object
            class AnonymousOwner(object):
                pass

            owner = AnonymousOwner()

            # save seq1 in cache
            cacheDict['seq1'] = (seq1.start, seq1.stop)
            cacheHint(cacheDict, owner)
            del cacheDict  # 'owner' now holds reference

            # peek into _cache and assert that only the ival coordinates
            # are stored
            v = db._cache.values()[0]
            assert len(v['seq1']) == 2
            del v

            # force a cache access & check that now we've stored actual string
            ival = str(seq1[5:10])
            v = db._cache.values()[0]
            # ...check that we've stored actual string
            assert len(v['seq1']) == 3

            # again force cache access, this time to the stored sequence string
            ival = str(seq1[5:10])

            # now, eliminate all references to the cache proxy dict
            del owner

            # trash unused objects - not strictly necessary, because there are
            # no islands of circular references & so all objects are already
            # deallocated, but that's implementation dependent.
            gc.collect()

            # ok, cached values should now be gone.
            v = db._cache.values()
            assert len(v) == 0
        finally:
            db.close()
Example #30
0
    def test_multiblast_long(self):
        "testing multi sequence blast with long db to assess thread safety, see issue 79"
        longerFile = testutil.datafile('sp_all_hbb')

        sp_all_hbb = seqdb.SequenceFileDB(longerFile)
        blastmap = blast.BlastMapping(self.prot, verbose=False)
        al = cnestedlist.NLMSA('blasthits', 'memory', pairwiseMode=True,
                               bidirectional=False)
        blastmap(None, al, queryDB=sp_all_hbb) # all vs all
        al.build() # construct the alignment indexes
 def test_headerfile_create_fail(self):
     header = testutil.datafile('prefixUnionDict-3.txt')
     try:
         db = PrefixUnionDict(filename=header)
         assert 0, "should not reach this point"
     except IOError:
         pass
     except AssertionError:
         close_pud_dicts(db)
         raise
Example #32
0
 def test_headerfile_create_fail(self):
     header = testutil.datafile('prefixUnionDict-3.txt')
     try:
         db = PrefixUnionDict(filename=header)
         assert 0, "should not reach this point"
     except IOError:
         pass
     except AssertionError:
         close_pud_dicts(db)
         raise
    def test_cache(self):
        "Sequence slice cache mechanics."

        dnaseq = testutil.datafile('dnaseq.fasta')
        db = SequenceFileDB(dnaseq)

        try:
            # create cache components
            cacheDict = {}
            cacheHint = db.cacheHint

            # get seq1
            seq1 = db['seq1']

            # _cache is only created on first cache attempt
            assert not hasattr(db, '_cache')

            # build an 'owner' object
            class AnonymousOwner(object):
                pass
            owner = AnonymousOwner()

            # save seq1 in cache
            cacheDict['seq1'] = (seq1.start, seq1.stop)
            cacheHint(cacheDict, owner)
            del cacheDict                   # 'owner' now holds reference

            # peek into _cache and assert that only the ival coordinates
            # are stored
            v = db._cache.values()[0]
            assert len(v['seq1']) == 2
            del v

            # force a cache access & check that now we've stored actual string
            ival = str(seq1[5:10])
            v = db._cache.values()[0]
            # ...check that we've stored actual string
            assert len(v['seq1']) == 3

            # again force cache access, this time to the stored sequence string
            ival = str(seq1[5:10])

            # now, eliminate all references to the cache proxy dict
            del owner

            # trash unused objects - not strictly necessary, because there are
            # no islands of circular references & so all objects are already
            # deallocated, but that's implementation dependent.
            gc.collect()

            # ok, cached values should now be gone.
            v = db._cache.values()
            assert len(v) == 0
        finally:
            db.close()
Example #34
0
    def test_nlmsaslice_cache(self):
        "NLMSASlice sequence caching & removal"

        # set up sequences
        dnaseq = testutil.datafile('dnaseq.fasta')

        db = SequenceFileDB(dnaseq, autoGC=-1)  # use pure WeakValueDict...
        try:
            gc.collect()
            assert len(
                db._weakValueDict) == 0, '_weakValueDict should be empty'
            seq1, seq2 = db['seq1'], db['seq2']
            assert len(db._weakValueDict)==2, \
                    '_weakValueDict should have 2 seqs'

            # build referencing NLMSA
            mymap = NLMSA('test', 'memory', db, pairwiseMode=True)
            mymap += seq1
            mymap[seq1] += seq2
            mymap.build()

            # check: no cache
            assert not hasattr(db, '_cache'), 'should be no cache yet'

            seq1, seq2 = db['seq1'], db['seq2']  # re-retrieve
            # now retrieve a NLMSASlice, forcing entry of seq into cache
            ival = seq1[5:10]
            x = mymap[ival]

            assert len(db._cache.values()) != 0

            n1 = len(db._cache)
            assert n1 == 1, "should be exactly one cache entry, not %d" % \
                    (n1, )

            # ok, now trash referencing arguments & make sure of cleanup
            del x
            gc.collect()

            assert len(db._cache.values()) == 0

            n2 = len(db._cache)
            assert n2 == 0, '%d objects remain; cache memory leak!' % n2
            # FAIL because of __dealloc__ error in cnestedlist.NLMSASlice.

            # Drop our references, the cache should empty.
            del mymap, ival, seq1, seq2
            gc.collect()
            # check that db._weakValueDict cache is empty
            assert len(
                db._weakValueDict) == 0, '_weakValueDict should be empty'
        finally:
            db.close()
    def test_multiblast_long(self):
        "testing multi sequence blast with long db"
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        longerFile = testutil.datafile("sp_all_hbb")

        sp_all_hbb = seqdb.SequenceFileDB(longerFile)
        blastmap = blast.BlastMapping(self.prot, verbose=False)
        al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False)
        blastmap(None, al, queryDB=sp_all_hbb)  # all vs all
        al.build()  # construct the alignment indexes
Example #36
0
 def test_headerfile_create_conflict(self):
     "test non-empty prefixDict with a passed in PUD header file: conflict"
     subdb = SequenceFileDB(self.dbfile)
     try:
         header = testutil.datafile('prefixUnionDict-1.txt')
         try:
             db = PrefixUnionDict(filename=header, prefixDict={ 'foo' : subdb })
             assert 0, "should not get here"
         except TypeError:
             pass
     finally:
         subdb.close()
    def test_nlmsaslice_cache(self):
        "NLMSASlice sequence caching & removal"

        # set up sequences
        dnaseq = testutil.datafile('dnaseq.fasta')

        db = SequenceFileDB(dnaseq, autoGC=-1) # use pure WeakValueDict...
        try:
            gc.collect()
            assert len(db._weakValueDict)==0, '_weakValueDict should be empty'
            seq1, seq2 = db['seq1'], db['seq2']
            assert len(db._weakValueDict)==2, \
                    '_weakValueDict should have 2 seqs'

            # build referencing NLMSA
            mymap = NLMSA('test', 'memory', db, pairwiseMode=True)
            mymap += seq1
            mymap[seq1] += seq2
            mymap.build()

            # check: no cache
            assert not hasattr(db, '_cache'), 'should be no cache yet'

            seq1, seq2 = db['seq1'], db['seq2'] # re-retrieve
            # now retrieve a NLMSASlice, forcing entry of seq into cache
            ival = seq1[5:10]
            x = mymap[ival]

            assert len(db._cache.values()) != 0

            n1 = len(db._cache)
            assert n1 == 1, "should be exactly one cache entry, not %d" % \
                    (n1, )

            # ok, now trash referencing arguments & make sure of cleanup
            del x
            gc.collect()

            assert len(db._cache.values()) == 0


            n2 = len(db._cache)
            assert n2 == 0, '%d objects remain; cache memory leak!' % n2
            # FAIL because of __dealloc__ error in cnestedlist.NLMSASlice.

            # Drop our references, the cache should empty.
            del mymap, ival, seq1, seq2
            gc.collect()
            # check that db._weakValueDict cache is empty
            assert len(db._weakValueDict)==0, '_weakValueDict should be empty'
        finally:
            db.close()
    def test_blastp_parser(self):
        "Testing blastp parser"
        blastp_output = open(testutil.datafile("blastp_output.txt"), "r")

        seq_dict = {"HBB1_XENLA": self.prot["HBB1_XENLA"]}
        prot_index = blast.BlastIDIndex(self.prot)
        try:
            alignment = blast.read_blast_alignment(blastp_output, seq_dict, prot_index)
            results = alignment[self.prot["HBB1_XENLA"]]
        finally:
            blastp_output.close()

        check_results([results], blastp_correct_results, pair_identity_tuple)
Example #39
0
 def test_headerfile_create_conflict(self):
     "test non-empty prefixDict with a passed in PUD header file: conflict"
     subdb = SequenceFileDB(self.dbfile)
     try:
         header = testutil.datafile('prefixUnionDict-1.txt')
         try:
             db = PrefixUnionDict(filename=header,
                                  prefixDict={'foo': subdb})
             assert 0, "should not get here"
         except TypeError:
             pass
     finally:
         subdb.close()
    def test_multiblast_parser(self):
        "Testing multiblast parser"
        multiblast_output = open(testutil.datafile("multiblast_output.txt"), "r")

        try:
            al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False)
            al = blast.read_blast_alignment(multiblast_output, self.prot, blast.BlastIDIndex(self.prot), al)
        finally:
            multiblast_output.close()
        al.build()
        results = [al[seq] for seq in self.prot.values()]

        check_results(results, correct_multiblast_results, pair_identity_tuple)
Example #41
0
    def test_no_db_info(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            new_seq = seqdb['seq1']

            assert getattr(seqdb, '_persistent_id', None) is None
            del seqdb.filepath

            self.db += new_seq
            name = (~self.db)[new_seq]
            assert name == 'noname0.seq1'
        finally:
            seqdb.close()
Example #42
0
    def test_iadd_db_twice(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            new_seq = seqdb['seq1']

            self.db += new_seq
            name1 = (~self.db)[new_seq]

            self.db += new_seq  # should do nothing...
            name2 = (~self.db)[new_seq]
            assert name1 == name2  # ...leaving seq with same name.
        finally:
            seqdb.close()
    def test_no_db_info(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            new_seq = seqdb['seq1']

            assert getattr(seqdb, '_persistent_id', None) is None
            del seqdb.filepath

            self.db += new_seq
            name = (~self.db)[new_seq]
            assert name == 'noname0.seq1'
        finally:
            seqdb.close()
    def test_iadd_db_twice(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            new_seq = seqdb['seq1']

            self.db += new_seq
            name1 = (~self.db)[new_seq]

            self.db += new_seq              # should do nothing...
            name2 = (~self.db)[new_seq]
            assert name1 == name2           # ...leaving seq with same name.
        finally:
            seqdb.close()
Example #45
0
    def test_inverse_noadd_behavior(self):
        # compare with test_inverse_add_behavior...
        db = SeqPrefixUnionDict(addAll=False)
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            seq = seqdb['seq1']

            try:
                name = (~db)[seq]
                assert 0, "should not get here"
            except KeyError:
                pass
        finally:
            seqdb.close()
Example #46
0
def populate_swissprot():
    "Populate the current worldbase with swissprot data"
    # build BlastDB out of the sequences
    sp_hbb1 = testutil.datafile('sp_hbb1')
    sp = seqdb.BlastDB(sp_hbb1)
    sp.__doc__ = 'little swissprot'
    worldbase.Bio.Seq.Swissprot.sp42 = sp

    # also store a fragment
    hbb = sp['HBB1_TORMA']
    ival = hbb[10:35]
    ival.__doc__ = 'fragment'
    worldbase.Bio.Seq.frag = ival

    # build a mapping to itself
    m = mapping.Mapping(sourceDB=sp, targetDB=sp)
    trypsin = sp['PRCA_ANAVA']
    m[hbb] = trypsin
    m.__doc__ = 'map sp to itself'
    worldbase.Bio.Seq.spmap = m

    # create an annotation database and bind as exons attribute
    worldbase.schema.Bio.Seq.spmap = metabase.OneToManyRelation(
        sp, sp, bindAttrs=('buddy', ))
    annoDB = seqdb.AnnotationDB({1: ('HBB1_TORMA', 10, 50)},
                                sp,
                                sliceAttrDict=dict(id=0, start=1, stop=2))
    exon = annoDB[1]

    # generate the names where these will be stored
    tempdir = testutil.TempDir('exonAnnot')
    filename = tempdir.subfile('cnested')
    nlmsa = cnestedlist.NLMSA(filename,
                              'w',
                              pairwiseMode=True,
                              bidirectional=False)
    nlmsa.addAnnotation(exon)
    nlmsa.build()
    annoDB.__doc__ = 'a little annotation db'
    nlmsa.__doc__ = 'a little map'
    worldbase.Bio.Annotation.annoDB = annoDB
    worldbase.Bio.Annotation.map = nlmsa
    worldbase.schema.Bio.Annotation.map = \
         metabase.ManyToManyRelation(sp, annoDB, bindAttrs=('exons', ))
Example #47
0
    def test_iadd_duplicate_seqdb(self):
        dnaseq = testutil.datafile('dnaseq.fasta')
        seqdb = SequenceFileDB(dnaseq)
        try:
            seqdb2 = SequenceFileDB(dnaseq)
            try:
                new_seq = seqdb['seq1']
                new_seq2 = seqdb2['seq1']

                self.db += new_seq
                try:
                    self.db += new_seq2
                    assert 0, "should never reach this point"
                except ValueError:
                    pass
            finally:
                seqdb2.close()
        finally:
            seqdb.close()
Example #48
0
    def test_generic_build(self):
        "GenericBuilder construction of the BlastDB"

        sp_hbb1 = testutil.datafile('sp_hbb1')
        gb = GenericBuilder('BlastDB', sp_hbb1)
        s = pickle.dumps(gb)
        db = pickle.loads(s) # force construction of the BlastDB
        self.EQ(len(db), 24)

        found = [x for x in db]
        found.sort()

        expected = ['HBB0_PAGBO', 'HBB1_ANAMI', 'HBB1_CYGMA', 'HBB1_IGUIG',
                   'HBB1_MOUSE', 'HBB1_ONCMY', 'HBB1_PAGBO', 'HBB1_RAT',
                   'HBB1_SPHPU', 'HBB1_TAPTE', 'HBB1_TORMA', 'HBB1_TRICR',
                   'HBB1_UROHA', 'HBB1_VAREX', 'HBB1_XENBO', 'HBB1_XENLA',
                   'HBB1_XENTR', 'MYG_DIDMA', 'MYG_ELEMA', 'MYG_ERIEU',
                   'MYG_ESCGI', 'MYG_GALCR', 'PRCA_ANASP', 'PRCA_ANAVA']
        expected.sort()

        self.EQ(expected, found)
Example #49
0
    def test_headerfile_write(self):
        header = testutil.datafile('prefixUnionDict-2.txt')
        db = PrefixUnionDict(filename=header)
        try:
            assert len(db) == 4
            assert 'a.seq1' in db
            assert 'b.seq1' in db

            output = testutil.tempdatafile('prefixUnionDict-write.txt')
            db.writeHeaderFile(output)
        finally:
            close_pud_dicts(db)

        db2 = PrefixUnionDict(filename=output,
                              trypath=[os.path.dirname(header)])
        try:
            assert len(db2) == 4
            assert 'a.seq1' in db2
            assert 'b.seq1' in db2
        finally:
            close_pud_dicts(db2)
Example #50
0
    def test_xmlrpc(self):
        "Test XMLRPC"
        pygr.Data.clear_cache() # force all requests to reload
        pygr.Data.update("http://localhost:%s" % self.server.port)

        check_match(self)
        check_dir(self)
        check_dir_noargs(self)
        check_dir_download(self)
        check_dir_re(self)
        check_bind(self)
        check_bind2(self)

        sb_hbb1 = testutil.datafile('sp_hbb1')
        sp2 = seqdb.BlastDB(sb_hbb1)
        sp2.__doc__ = 'another sp'
        try:
            pygr.Data.Bio.Seq.sp2 = sp2
            pygr.Data.save()
            msg = 'failed to catch bad attempt to write to XMLRPC server'
            raise KeyError(msg)
        except ValueError:
            pass
Example #51
0
    def test_xmlrpc(self):
        "Test XMLRPC"
        worldbase.clear_cache()  # force all future requests to reload
        # Add our test XMLRPC resource.
        worldbase.update("http://localhost:%s" % self.server.port)

        check_match(self)  # run all our tests
        check_dir(self)
        check_dir_noargs(self)
        check_dir_download(self)
        check_dir_re(self)
        check_bind(self)
        check_bind2(self)

        sb_hbb1 = testutil.datafile('sp_hbb1')  # test readonly checks
        sp2 = seqdb.BlastDB(sb_hbb1)
        sp2.__doc__ = 'another sp'
        try:
            worldbase.Bio.Seq.sp2 = sp2
            worldbase.commit()
            msg = 'failed to catch bad attempt to write to XMLRPC server'
            raise KeyError(msg)
        except ValueError:
            pass
Example #52
0
 def setUp(self):
     dnaseq = testutil.datafile('dnaseq.fasta')
     self.seqdb = SequenceFileDB(dnaseq)  # contains 'seq1', 'seq2'
     self.db = SeqPrefixUnionDict({'prefix': self.seqdb})
Example #53
0
 def setUp(self):
     self.dbfile = testutil.datafile('dnaseq.fasta')
Example #54
0
    def setUp(self):
        "Test setup"
        dnaseq = testutil.datafile('dnaseq.fasta')
        self.db = SequenceFileDB(dnaseq)  # contains 'seq1', 'seq2'

        self.db._weakValueDict.clear()  # clear the cache
Example #55
0
 def setUp(self):
     hbb1_mouse = testutil.datafile('hbb1_mouse.fa')
     self.dna = seqdb.SequenceFileDB(hbb1_mouse)
     self.tdb = translationDB.get_translation_db(self.dna)