def test_progress_with_guide_tree(self): """progressive align works with provided guide tree""" tree = make_tree(treestring=self.treestring) aligner = align_app.progressive_align(model="nucleotide", guide_tree=self.treestring) aln = aligner(self.seqs) self.assertEqual(len(aln), 42) aligner = align_app.progressive_align(model="nucleotide", guide_tree=tree) aln = aligner(self.seqs) self.assertEqual(len(aln), 42) # even if it has underscores in name treestring = ("(Bandicoot:0.4,FlyingFox:0.05,(Rhesus_macaque:0.06," "Human:0.0):0.04);") aligner = align_app.progressive_align(model="nucleotide", guide_tree=treestring) data = self.seqs.to_dict() data["Rhesus macaque"] = data.pop("Rhesus") seqs = make_unaligned_seqs(data) aln = aligner(seqs) self.assertEqual(len(aln), 42) # guide tree with no lengths raises value error with self.assertRaises(ValueError): _ = align_app.progressive_align( model="nucleotide", guide_tree="(Bandicoot,FlyingFox,(Rhesus_macaque,Human));", )
def test_progressive_align_codon(self): """progressive alignment with codon models""" aligner = align_app.progressive_align(model="GY94") aln = aligner(self.seqs) self.assertEqual(len(aln), 42) aligner = align_app.progressive_align(model="codon") aln = aligner(self.seqs) self.assertEqual(len(aln), 42)
def test_progressive_align_protein(self): """progressive alignment with protein models""" seqs = self.seqs.get_translation() aligner = align_app.progressive_align(model="WG01", guide_tree=self.treestring) aln = aligner(seqs) self.assertEqual(len(aln), 14) aligner = align_app.progressive_align(model="protein", guide_tree=self.treestring) aln = aligner(seqs) self.assertEqual(len(aln), 14)
def test_with_genetic_code(self): """handles genetic code argument""" aligner = align_app.progressive_align(model="GY94", gc="2") # the 'TGA' codon is a sense codon in vertebrate mitochondrial self.assertTrue("TGA" in aligner._model.get_motifs()) aligner = align_app.progressive_align(model="codon") # but a stop codon in the standard nuclear self.assertTrue("TGA" not in aligner._model.get_motifs()) # try using a nuclear with self.assertRaises(TypeError): aligner = align_app.progressive_align(model="nucleotide", gc="2")
def test_progress_with_guide_tree(self): """progressive align works with provided guide tree""" tree = make_tree(treestring=self.treestring) aligner = align_app.progressive_align( model="nucleotide", guide_tree=self.treestring ) aln = aligner(self.seqs) self.assertEqual(len(aln), 42) aligner = align_app.progressive_align(model="nucleotide", guide_tree=tree) aln = aligner(self.seqs) self.assertEqual(len(aln), 42)
def test_progressive_align_protein_moltype(self): """tests guide_tree is None and moltype is protein""" from cogent3 import load_aligned_seqs seqs = load_aligned_seqs("data/nexus_aa.nxs", moltype="protein") seqs = seqs.degap() seqs = seqs.take_seqs(["Rat", "Cow", "Human", "Mouse", "Whale"]) aligner = align_app.progressive_align(model="WG01") got = aligner(seqs) self.assertNotIsInstance(got, NotCompleted) aligner = align_app.progressive_align(model="protein") got = aligner(seqs) self.assertNotIsInstance(got, NotCompleted)
def _get_all_composables(tmp_dir_name): test_model1 = evo.model("HKY85") test_model2 = evo.model("GN") test_hyp = evo.hypothesis(test_model1, test_model2) test_num_reps = 100 applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), evo.model("GN"), evo.tabulate_stats(), sample.fixed_length(100), sample.min_length(100), io.write_db(tmp_dir_name, create=True), io.write_json(tmp_dir_name, create=True), io.write_seqs(tmp_dir_name, create=True), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), tree.scale_branches(), tree.uniformize_tree(), ] return applications
def test_pickle_progressive_align(self): """test progressive_align is picklable""" from pickle import dumps, loads aligner = align_app.progressive_align(model="codon") aln = aligner(self.seqs) got = loads(dumps(aln)) self.assertTrue(got)
def test_progressive_align_nuc(self): """progressive alignment with nuc models""" aligner = align_app.progressive_align(model="TN93", distance="TN93") aln = aligner(self.seqs) expect = { "Rhesus": "GCCAGCTCATTACAGCATGAGAACAG---TTTGTTACTCACT", "Human": "GCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT", "Bandicoot": "NACTCATTAATGCTTGAAACCAGCAG---TTTATTGTCCAAC", "FlyingFox": "GCCAGCTCTTTACAGCATGAGAACAG---TTTATTATACACT", } got = aln.to_dict() self.assertEqual(got, expect) # using default aligner = align_app.progressive_align(model="TN93", distance="TN93") aln = aligner(self.seqs) self.assertEqual(len(aln), 42) self.assertEqual(aln.moltype, aligner._moltype)
def test_progressive_fails(self): """should return NotCompletedResult along with message""" # Bandicoot has an inf-frame stop codon seqs = make_unaligned_seqs( data={"Human": "GCCTCA", "Rhesus": "GCCAGCTCA", "Bandicoot": "TGATCATTA"}, moltype="dna", ) aligner = align_app.progressive_align(model="codon") got = aligner(seqs) self.assertTrue(type(got), NotCompleted)
def _get_all_composable_apps(): applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), sample.fixed_length(100), sample.min_length(100), io.write_seqs(os.getcwd()), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), ] return applications
def test_composite_pickleable(self): """composable functions should be pickleable""" from pickle import dumps from cogent3.app import io, sample, evo, tree, translate, align read = io.load_aligned(moltype="dna") dumps(read) trans = translate.select_translatable() dumps(trans) aln = align.progressive_align("nucleotide") dumps(aln) just_nucs = sample.omit_degenerates(moltype="dna") dumps(just_nucs) limit = sample.fixed_length(1000, random=True) dumps(limit) mod = evo.model("HKY85") dumps(mod) qt = tree.quick_tree() dumps(qt) proc = read + trans + aln + just_nucs + limit + mod dumps(proc)
def test_est_dist_pair_slow(self): """tests the distance between seq pairs in aln""" aligner = align.align_to_ref() aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(0 <= got[("Mouse", "Human")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(0 <= got[("Mouse", "Human")]) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(0 <= got[("Mouse", "Human")]) aligner = align.align_to_ref(ref_seq="Mouse") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Mouse", "Human")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Mouse", "Human")]) aligner = align.align_to_ref() aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) aligner = align.align_to_ref(ref_seq="Opossum") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) treestring = "(Human:0.2,Bandicoot:0.2)" aligner = align.progressive_align(model="WG01", guide_tree=treestring) _ = aligner(self.seqs5)
def test_est_dist_pair_slow(self): """tests the distance between seq pairs in aln""" aligner = align.align_to_ref() aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(got[("Mouse", "Human")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref(ref_seq="Mouse") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Mouse", "Human")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref() aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) aligner = align.align_to_ref(ref_seq="Opossum") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) # now as a process proc = align.align_to_ref() + dist_app.fast_slow_dist( fast_calc="hamming", moltype="dna") got = proc(self.seqs1) self.assertEqual(got[("Human", "Rhesus")], 1) treestring = "(Human:0.2,Bandicoot:0.2)" aligner = align.progressive_align(model="WG01", guide_tree=treestring) _ = aligner(self.seqs5)