Example #1
0
    def setUp(self):
        if not testutil.blast_enabled():
            raise SkipTest, "no BLAST installed"
        
        hbb1_mouse = testutil.datafile('hbb1_mouse.fa')
        sp_hbb1 = testutil.datafile('sp_hbb1')

        self.dna = seqdb.SequenceFileDB(hbb1_mouse)
        self.prot = seqdb.SequenceFileDB(sp_hbb1)
    def test_blastp(self):
        "Testing blastp"
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        blastmap = blast.BlastMapping(self.prot, verbose=False)
        results = blastmap[self.prot["HBB1_XENLA"]]

        check_results_relaxed_blastp([results], blastp_correct_results, pair_identity_tuple, allowedLengthDiff=2)
    def test_seq_without_db(self):
        "Check that sequences without associated DBs work as query strings"
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")
        blastmap = blast.BlastMapping(self.prot, verbose=False)

        seq = self.prot["HBB1_XENLA"]
        seq_no_db = sequence.Sequence(str(seq), "HBB1_XENLA_no_db")
        slice = blastmap(seq=seq_no_db)[seq_no_db]
        assert len(slice)
    def test_tblastn_no_blastx(self):
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        blastmap = blast.BlastMapping(self.prot)
        try:
            results = blastmap[self.dna["gi|171854975|dbj|AB364477.1|"]]
            raise AssertionError("failed to trap blastx in BlastMapping")
        except ValueError:
            pass
    def test_blastx_no_blastp(self):
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        blastmap = blast.BlastxMapping(self.prot, verbose=False)

        try:
            results = blastmap(self.prot["HBB1_MOUSE"])
            raise AssertionError("failed to trap blastp in BlastxMapping")
        except ValueError:
            pass
    def test_blastx_rc(self):
        "Testing blastx with negative frames"
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        blastmap = blast.BlastxMapping(self.prot, verbose=False)

        correct = [
            (143, 143, 429, 0.53146853146853146),
            (143, 145, 429, 0.28275862068965518),
            (143, 145, 429, 0.28965517241379313),
            (143, 145, 429, 0.29655172413793102),
            (143, 145, 429, 0.30344827586206896),
            (144, 144, 432, 0.4513888888888889),
            (144, 144, 432, 0.4513888888888889),
            (145, 145, 435, 0.45517241379310347),
            (145, 145, 435, 0.51034482758620692),
            (146, 142, 438, 0.35616438356164382),
            (146, 146, 438, 0.4589041095890411),
            (146, 146, 438, 0.46575342465753422),
            (146, 146, 438, 0.4726027397260274),
            (146, 146, 438, 0.4726027397260274),
            (146, 146, 438, 0.4863013698630137),
            (146, 146, 438, 0.59589041095890416),
            (146, 146, 438, 0.62328767123287676),
            (146, 146, 438, 0.66438356164383561),
            (146, 146, 438, 0.74657534246575341),
            (146, 146, 438, 0.91095890410958902),
            (146, 146, 438, 0.97945205479452058),
        ]

        results = blastmap[self.dna_rc["hbb1_mouse_RC"]]
        check_results_relaxed_blastx(
            results,
            correct,
            lambda t: (len(t[0]), len(t[1]), len(t[0].sequence), t[2].pIdentity()),
            allowedLengthDiff=2,
        )

        results = blastmap[self.dna_rc["hbb1_mouse_RC_2"]]
        check_results_relaxed_blastx(
            results,
            correct,
            lambda t: (len(t[0]), len(t[1]), len(t[0].sequence), t[2].pIdentity()),
            allowedLengthDiff=2,
        )

        results = blastmap[self.dna_rc["hbb1_mouse_RC_3"]]
        check_results_relaxed_blastx(
            results,
            correct,
            lambda t: (len(t[0]), len(t[1]), len(t[0].sequence), t[2].pIdentity()),
            allowedLengthDiff=2,
        )
    def test_tblastn(self):
        "tblastn test"
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        blastmap = blast.BlastMapping(self.dna, verbose=False)
        correct = [(144, 144, 432, 0.451)]

        result = blastmap[self.prot["HBB1_XENLA"]]
        check_results_relaxed_blastx(
            [result], correct, lambda t: (len(t[1]), len(t[0]), len(t[1].sequence), t[2].pIdentity())
        )
    def test_translation_db_in_results_of_db_search(self):
        """
        Test that the NLMSA in a BlastxMapping properly picks up the
        translationDB from the query sequence dict.
        """
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")
        blastmap = blast.BlastxMapping(self.prot, verbose=False)
        results = blastmap(queryDB=self.dna)

        tdb = translationDB.get_translation_db(self.dna)
        assert tdb.annodb in results.seqDict.dicts
    def test_multiblast_long(self):
        "testing multi sequence blast with long db"
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        longerFile = testutil.datafile("sp_all_hbb")

        sp_all_hbb = seqdb.SequenceFileDB(longerFile)
        blastmap = blast.BlastMapping(self.prot, verbose=False)
        al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False)
        blastmap(None, al, queryDB=sp_all_hbb)  # all vs all
        al.build()  # construct the alignment indexes
    def test_non_consumable_results(self):
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")
        blastmap = blast.BlastxMapping(self.prot, verbose=False)

        query_seq = self.dna["gi|171854975|dbj|AB364477.1|"]
        results = blastmap[query_seq]

        x = list(results)
        y = list(results)

        assert len(x), x
        assert x == y, "BlastxMapping.__getitem__ should return list"
    def test_megablast(self):
        """test megablast"""
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        blastmap = blast.MegablastMapping(self.dna, verbose=False)
        # must use copy of sequence to get "self matches" from NLMSA...
        query = seqdb.Sequence(str(self.dna["gi|171854975|dbj|AB364477.1|"]), "foo")
        try:
            result = blastmap[query]
        except OSError:  # silently ignore missing RepeatMasker, megablast
            return
        found = [(len(t[0]), len(t[1])) for t in result.edges()]
        assert found == [(444, 444)]
    def test_translation_db_in_results_of_seq_search(self):
        """
        Test that the NLMSA in a BlastxMapping properly picks up the
        translationDB from a single input sequence.
        """
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")
        blastmap = blast.BlastxMapping(self.prot, verbose=False)

        query_seq = self.dna["gi|171854975|dbj|AB364477.1|"]
        results = blastmap(seq=query_seq)

        tdb = translationDB.get_translation_db(self.dna)
        assert tdb.annodb in results.seqDict.dicts
    def test_maskEnd(self):
        """
        This tests against a minor bug in cnestedlist where maskEnd
        is used to clip the end to the mask region.
        """
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        db = self.gapping
        blastmap = blast.BlastMapping(db)
        ungapped = db["ungapped"]
        gapped = db["gapped"]
        results = blastmap[gapped]

        results[ungapped]
    def test_multiblast_single(self):
        "Test multi-sequence BLAST results, for BLASTs run one by one."
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        blastmap = blast.BlastMapping(self.prot, verbose=False)
        al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False)

        for seq in self.prot.values():
            blastmap(seq, al)  # all vs all, one by one

        al.build()  # construct the alignment indexes
        results = [al[seq] for seq in self.prot.values()]
        results_multi = self.get_multiblast_results()
        # Strict check must work here even on live BLAST output
        check_results(results, results_multi, pair_identity_tuple)
    def test_translated_seqs_in_results(self):
        """
        Only NLMSASlices for the query sequence should show up in
        BlastxMapping.__getitem__, right?
        """
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")
        blastmap = blast.BlastxMapping(self.prot, verbose=False)

        query_seq = self.dna["gi|171854975|dbj|AB364477.1|"]
        results = blastmap[query_seq]

        tdb = translationDB.get_translation_db(self.dna)
        annodb = tdb.annodb

        for slice in results:
            assert slice.seq.id in annodb, "%s not in annodb!" % slice.seq.id
    def test_no_bidirectional(self):
        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        db = self.gapping
        gapped = db["gapped"]
        ungapped = db["ungapped"]

        blastmap = blast.BlastMapping(db)
        al = blastmap(queryDB=db)
        slice = al[gapped]

        found_once = False
        for src, dest, edge in al[gapped].edges():
            if src == gapped[0:40] and dest == ungapped[0:40]:
                assert not found_once, "BLAST results should not be bidirectional"
                found_once = True

        assert found_once, "should have found this match exactly once!"
    def get_multiblast_results(self):
        """return saved results or generate them if needed;
        results are saved so we only do this time-consuming operation once"""
        global _multiblast_results

        if not testutil.blast_enabled():
            raise SkipTest("no BLAST installed")

        if not _multiblast_results:
            logger.info("running expensive multiblast")
            blastmap = blast.BlastMapping(self.prot, verbose=False)
            al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False)

            blastmap(al=al, queryDB=self.prot)  # all vs all

            al.build()  # construct the alignment indexes
            results = [al[seq] for seq in self.prot.values()]
            _multiblast_results = reformat_results(results, pair_identity_tuple)

        return _multiblast_results