def setUp(self): if not testutil.blast_enabled(): raise SkipTest, "no BLAST installed" hbb1_mouse = testutil.datafile('hbb1_mouse.fa') sp_hbb1 = testutil.datafile('sp_hbb1') self.dna = seqdb.SequenceFileDB(hbb1_mouse) self.prot = seqdb.SequenceFileDB(sp_hbb1)
def test_blastp(self): "Testing blastp" if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastMapping(self.prot, verbose=False) results = blastmap[self.prot["HBB1_XENLA"]] check_results_relaxed_blastp([results], blastp_correct_results, pair_identity_tuple, allowedLengthDiff=2)
def test_seq_without_db(self): "Check that sequences without associated DBs work as query strings" if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastMapping(self.prot, verbose=False) seq = self.prot["HBB1_XENLA"] seq_no_db = sequence.Sequence(str(seq), "HBB1_XENLA_no_db") slice = blastmap(seq=seq_no_db)[seq_no_db] assert len(slice)
def test_tblastn_no_blastx(self): if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastMapping(self.prot) try: results = blastmap[self.dna["gi|171854975|dbj|AB364477.1|"]] raise AssertionError("failed to trap blastx in BlastMapping") except ValueError: pass
def test_blastx_no_blastp(self): if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastxMapping(self.prot, verbose=False) try: results = blastmap(self.prot["HBB1_MOUSE"]) raise AssertionError("failed to trap blastp in BlastxMapping") except ValueError: pass
def test_blastx_rc(self): "Testing blastx with negative frames" if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastxMapping(self.prot, verbose=False) correct = [ (143, 143, 429, 0.53146853146853146), (143, 145, 429, 0.28275862068965518), (143, 145, 429, 0.28965517241379313), (143, 145, 429, 0.29655172413793102), (143, 145, 429, 0.30344827586206896), (144, 144, 432, 0.4513888888888889), (144, 144, 432, 0.4513888888888889), (145, 145, 435, 0.45517241379310347), (145, 145, 435, 0.51034482758620692), (146, 142, 438, 0.35616438356164382), (146, 146, 438, 0.4589041095890411), (146, 146, 438, 0.46575342465753422), (146, 146, 438, 0.4726027397260274), (146, 146, 438, 0.4726027397260274), (146, 146, 438, 0.4863013698630137), (146, 146, 438, 0.59589041095890416), (146, 146, 438, 0.62328767123287676), (146, 146, 438, 0.66438356164383561), (146, 146, 438, 0.74657534246575341), (146, 146, 438, 0.91095890410958902), (146, 146, 438, 0.97945205479452058), ] results = blastmap[self.dna_rc["hbb1_mouse_RC"]] check_results_relaxed_blastx( results, correct, lambda t: (len(t[0]), len(t[1]), len(t[0].sequence), t[2].pIdentity()), allowedLengthDiff=2, ) results = blastmap[self.dna_rc["hbb1_mouse_RC_2"]] check_results_relaxed_blastx( results, correct, lambda t: (len(t[0]), len(t[1]), len(t[0].sequence), t[2].pIdentity()), allowedLengthDiff=2, ) results = blastmap[self.dna_rc["hbb1_mouse_RC_3"]] check_results_relaxed_blastx( results, correct, lambda t: (len(t[0]), len(t[1]), len(t[0].sequence), t[2].pIdentity()), allowedLengthDiff=2, )
def test_tblastn(self): "tblastn test" if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastMapping(self.dna, verbose=False) correct = [(144, 144, 432, 0.451)] result = blastmap[self.prot["HBB1_XENLA"]] check_results_relaxed_blastx( [result], correct, lambda t: (len(t[1]), len(t[0]), len(t[1].sequence), t[2].pIdentity()) )
def test_translation_db_in_results_of_db_search(self): """ Test that the NLMSA in a BlastxMapping properly picks up the translationDB from the query sequence dict. """ if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastxMapping(self.prot, verbose=False) results = blastmap(queryDB=self.dna) tdb = translationDB.get_translation_db(self.dna) assert tdb.annodb in results.seqDict.dicts
def test_multiblast_long(self): "testing multi sequence blast with long db" if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") longerFile = testutil.datafile("sp_all_hbb") sp_all_hbb = seqdb.SequenceFileDB(longerFile) blastmap = blast.BlastMapping(self.prot, verbose=False) al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False) blastmap(None, al, queryDB=sp_all_hbb) # all vs all al.build() # construct the alignment indexes
def test_non_consumable_results(self): if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastxMapping(self.prot, verbose=False) query_seq = self.dna["gi|171854975|dbj|AB364477.1|"] results = blastmap[query_seq] x = list(results) y = list(results) assert len(x), x assert x == y, "BlastxMapping.__getitem__ should return list"
def test_megablast(self): """test megablast""" if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.MegablastMapping(self.dna, verbose=False) # must use copy of sequence to get "self matches" from NLMSA... query = seqdb.Sequence(str(self.dna["gi|171854975|dbj|AB364477.1|"]), "foo") try: result = blastmap[query] except OSError: # silently ignore missing RepeatMasker, megablast return found = [(len(t[0]), len(t[1])) for t in result.edges()] assert found == [(444, 444)]
def test_translation_db_in_results_of_seq_search(self): """ Test that the NLMSA in a BlastxMapping properly picks up the translationDB from a single input sequence. """ if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastxMapping(self.prot, verbose=False) query_seq = self.dna["gi|171854975|dbj|AB364477.1|"] results = blastmap(seq=query_seq) tdb = translationDB.get_translation_db(self.dna) assert tdb.annodb in results.seqDict.dicts
def test_maskEnd(self): """ This tests against a minor bug in cnestedlist where maskEnd is used to clip the end to the mask region. """ if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") db = self.gapping blastmap = blast.BlastMapping(db) ungapped = db["ungapped"] gapped = db["gapped"] results = blastmap[gapped] results[ungapped]
def test_multiblast_single(self): "Test multi-sequence BLAST results, for BLASTs run one by one." if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastMapping(self.prot, verbose=False) al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False) for seq in self.prot.values(): blastmap(seq, al) # all vs all, one by one al.build() # construct the alignment indexes results = [al[seq] for seq in self.prot.values()] results_multi = self.get_multiblast_results() # Strict check must work here even on live BLAST output check_results(results, results_multi, pair_identity_tuple)
def test_translated_seqs_in_results(self): """ Only NLMSASlices for the query sequence should show up in BlastxMapping.__getitem__, right? """ if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") blastmap = blast.BlastxMapping(self.prot, verbose=False) query_seq = self.dna["gi|171854975|dbj|AB364477.1|"] results = blastmap[query_seq] tdb = translationDB.get_translation_db(self.dna) annodb = tdb.annodb for slice in results: assert slice.seq.id in annodb, "%s not in annodb!" % slice.seq.id
def test_no_bidirectional(self): if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") db = self.gapping gapped = db["gapped"] ungapped = db["ungapped"] blastmap = blast.BlastMapping(db) al = blastmap(queryDB=db) slice = al[gapped] found_once = False for src, dest, edge in al[gapped].edges(): if src == gapped[0:40] and dest == ungapped[0:40]: assert not found_once, "BLAST results should not be bidirectional" found_once = True assert found_once, "should have found this match exactly once!"
def get_multiblast_results(self): """return saved results or generate them if needed; results are saved so we only do this time-consuming operation once""" global _multiblast_results if not testutil.blast_enabled(): raise SkipTest("no BLAST installed") if not _multiblast_results: logger.info("running expensive multiblast") blastmap = blast.BlastMapping(self.prot, verbose=False) al = cnestedlist.NLMSA("blasthits", "memory", pairwiseMode=True, bidirectional=False) blastmap(al=al, queryDB=self.prot) # all vs all al.build() # construct the alignment indexes results = [al[seq] for seq in self.prot.values()] _multiblast_results = reformat_results(results, pair_identity_tuple) return _multiblast_results