def setUp(self): """constructs _model_results if they don't already exist""" if self._model_results: return _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")) model3 = evo_app.model("GTR", opt_args=dict(max_evaluations=25, limit_action="ignore")) mr1 = model1(aln) mr2 = model2(aln) mr3 = model3(aln) self._model_results[mr1.name] = mr1 self._model_results[mr2.name] = mr2 self._model_results[mr3.name] = mr3
def test_model_tree_unique_trees(self): """handles case of using unique trees for each alignment""" with self.assertRaises(AssertionError): model1 = evo_app.model("GN", tree="(a,b,c)", unique_trees=True) _data1 = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } _data2 = { "Dog": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln1 = make_aligned_seqs(data=_data1, moltype="dna") aln2 = make_aligned_seqs(data=_data2, moltype="dna") model = evo_app.model( "GN", unique_trees=True, opt_args=dict(max_evaluations=2, limit_action="ignore"), ) for aln in (aln1, aln2): result = model(aln) self.assertIsInstance(result, model_result) # but the second one fails if unique_trees=False model = evo_app.model( "GN", unique_trees=False, opt_args=dict(max_evaluations=2, limit_action="ignore"), ) for aln, expect_type in ((aln1, model_result), (aln2, NotCompleted)): result = model(aln) self.assertIsInstance(result, expect_type)
def test_model_collection_init_sequential(self): """modelc collection uses preceding model to initialise function""" opt_args = dict(max_evaluations=15, limit_action="ignore") model1 = evo_app.model("F81", opt_args=opt_args) model2 = evo_app.model("HKY85", opt_args=opt_args) model3 = evo_app.model("GTR", opt_args=opt_args) # defaults to initialise model3 from model 2 from model1 mod_coll = evo_app.model_collection(model1, model2, model3, sequential=True) _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") result = mod_coll(aln) self.assertTrue(result["F81"].lf.lnL < result["HKY85"].lf.lnL < result["GTR"].lf.lnL) # can be set to False, in which case all models start at defaults mod_coll = evo_app.hypothesis(model1, model2, model3, sequential=False) result = mod_coll(aln) self.assertFalse(result["F81"].lf.lnL < result["HKY85"].lf.lnL < result["GTR"].lf.lnL) self.assertIsInstance(result, model_collection_result)
def _get_all_composables(tmp_dir_name): test_model1 = evo.model("HKY85") test_model2 = evo.model("GN") test_hyp = evo.hypothesis(test_model1, test_model2) test_num_reps = 100 applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), evo.model("GN"), evo.tabulate_stats(), sample.fixed_length(100), sample.min_length(100), io.write_db(tmp_dir_name, create=True), io.write_json(tmp_dir_name, create=True), io.write_seqs(tmp_dir_name, create=True), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), tree.scale_branches(), tree.uniformize_tree(), ] return applications
def test_roundtrip_hypothesis_result(self): """nested items retain the correct type after roundtrip""" from cogent3.app import evo as evo_app from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) result = hyp(aln) self.assertIsInstance(result["F81"][1], AlignmentLikelihoodFunction) data = result.to_json() got_obj = deserialise_object(data) for i in range(1, 4): for sm in ("F81", "GTR"): self.assertIsInstance(got_obj[sm][i], dict) # but after invoking deserialised_values got_obj.deserialised_values() for i in range(1, 4): for sm in ("F81", "GTR"): self.assertIsInstance(got_obj[sm][i], AlignmentLikelihoodFunction)
def test_bootstrap_composability(self): """can be composed with load_db and write_db""" m1 = evo_app.model("F81") m2 = evo_app.model("HKY85") hyp = evo_app.hypothesis(m1, m2) with TemporaryDirectory(dir=".") as dirname: path = join(dirname, "delme.tinydb") _ = io.load_db() + evo_app.bootstrap( hyp, num_reps=2) + io.write_db(path)
def test_model_param_rules(self): """applies upper bound if sensible""" mod = evo_app.model( "GN", param_rules=[dict(par_name="length", edge="Mouse", is_independent=False)], ) self.assertEqual(mod._param_rules[0].get("upper"), 50) mod = evo_app.model( "GN", param_rules=[dict(par_name="length", edge="Mouse", is_constant=True)] ) self.assertEqual(mod._param_rules[0].get("upper", None), None)
def test_bstrap_parallel(self): """exercising bootstrap with parallel""" aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna") aln = aln.take_seqs(aln.names[:3]) aln = aln.omit_gap_pos(allowed_gap_frac=0) opt_args = dict(max_evaluations=20, limit_action="ignore") m1 = evo_app.model("F81", opt_args=opt_args) m2 = evo_app.model("HKY85", opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=True) result = strapper(aln) self.assertIsInstance(result, evo_app.bootstrap_result)
def test_hypothesis_str(self): """correct str representation""" model1 = evo_app.model("HKY85") model2 = evo_app.model("HKY85", name="hky85-max-het", time_het="max") hyp = evo_app.hypothesis(model1, model2) got = str(hyp) expect = ("hypothesis(type='hypothesis', null='HKY85', " "alternates=(model(type='model', sm='HKY85', tree=None, " "name='hky85-max-het', sm_args=None, lf_args=None, " "time_het='max', param_rules=None, opt_args=None," " split_codons=False, show_progress=False, verbose=False),)," " init_alt=None)") self.assertEqual(got, expect)
def test_alt_hyp_fail_error(self): """if alt fails NotCompleted.origin should be model""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGA", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGA", "Opossum": "TGACCAGTGAAAGTGGCGGCGGTGGCTGA", } aln = make_aligned_seqs(data=_data, moltype="dna") tree = "(Mouse,Human,Opossum)" m1 = evo_app.model("F81", tree=tree) m2 = evo_app.model("MG94HKY", tree=tree) hyp = evo_app.hypothesis(m1, m2) r = hyp(aln) self.assertEqual(r.origin, "model")
def test_hyp_split_codon_select_models(self): """hypothesis_result identifies selects best model when split_codon""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) r = hyp(aln) bm = r.select_models() assert_allclose(bm[0].lnL, -85.00043312185628)
def test_model_tree(self): """allows tree to be string, None or tree""" treestring = "(a,b,c)" for tree in (treestring, make_tree(treestring=treestring), None): mod = evo_app.model("HKY85", tree=tree) expect = None if tree is None else make_tree(treestring=treestring) self.assertIsInstance(mod._tree, expect.__class__)
def test_bstrap(self): """exercising bootstrap with simple hypothesis""" aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna") aln = aln.take_seqs(aln.names[:3]) aln = aln.omit_gap_pos(allowed_gap_frac=0) opt_args = dict(max_evaluations=20, limit_action="ignore") m1 = evo_app.model("F81", opt_args=opt_args) m2 = evo_app.model("HKY85", opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=False) result = strapper(aln) nd = result.null_dist self.assertTrue(set(type(v) for v in nd), {float}) json = result.to_json() got = deserialise_object(json) self.assertIsInstance(got, evo_app.bootstrap_result)
def test_roundtrip_model_result2(self): """model_result of split codon correct type after roundtrip""" from cogent3.app import evo as evo_app from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) result = m1(aln) data = result.to_json() got_obj = deserialise_object(data) for i in range(1, 4): self.assertIsInstance(got_obj[i], dict) # after accessing attribute, should be automatically inflated _ = got_obj.lf for i in range(1, 4): self.assertIsInstance(got_obj[i], AlignmentLikelihoodFunction) # or after using the deserialise method data = result.to_json() got_obj = deserialise_object(data) got_obj.deserialised_values() for i in range(1, 4): self.assertIsInstance(got_obj[i], AlignmentLikelihoodFunction)
def test_pvalue(self): """hypothesis test p-value property""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) self.assertTrue(0 <= result.pvalue <= 1)
def test_hyp_init(self): """uses user specified init_alt function, or not""" opt_args = dict(max_evaluations=25, limit_action="ignore") model1 = evo_app.model("F81", opt_args=opt_args) model2 = evo_app.model("HKY85", opt_args=opt_args) # defaults to using null for init hyp = evo_app.hypothesis(model1, model2) _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") result = hyp(aln) self.assertEqual(result.df, 1) # user specified function hyp = evo_app.hypothesis(model1, model2, init_alt=lambda x, y: x) result = hyp(aln) self.assertEqual(result.df, 1)
def test_model_hypothesis_result_repr(self): """result objects __repr__ and _repr_html_ methods work correctly""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) self.assertIsInstance(result.__repr__(), str) self.assertIsInstance(result._repr_html_(), str) self.assertIsInstance(result.null.__repr__(), str) self.assertIsInstance(result.null._repr_html_(), str)
def test_model_str(self): """correct str representation""" model = evo_app.model("HKY85", time_het="max") got = str(model) self.assertEqual( got, ("model(type='model', sm='HKY85', tree=None, " "name=None, sm_args=None, lf_args=None, " "time_het='max', param_rules=None, " "opt_args=None, split_codons=False, " "show_progress=False, verbose=False)"), )
def test_model_moltype_mismatch(self): """if model and alignment moltypes incompatible""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGA", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGA", "Opossum": "TGACCAGTGAAAGTGGCGGCGGTGGCTGA", } aln = make_aligned_seqs(data=_data, moltype="dna") tree = "(Mouse,Human,Opossum)" m1 = evo_app.model("JTT92", tree=tree) r = m1(aln) self.assertEqual(r.origin, "model")
def test_model_collection_result(self): """round trip of model collection works""" from cogent3.app import evo as evo_app from cogent3.evolve.parameter_controller import ( AlignmentLikelihoodFunction, ) _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args) models = (m1, m2) mc_result = model_collection_result(name="collection", source="blah") for model in models: mc_result[model.name] = model(aln) for model in models: for i in range(1, 4): self.assertIsInstance( mc_result[model.name][i], AlignmentLikelihoodFunction ) data = mc_result.to_json() got_obj = deserialise_object(data) for model in models: for i in range(1, 4): self.assertIsInstance(got_obj[model.name][i], dict) # but after invoking deserialised_values got_obj.deserialised_values() for model in models: for i in range(1, 4): self.assertIsInstance( got_obj[model.name][i], AlignmentLikelihoodFunction )
def test_model_result_tree_discrete_time(self): """returns paralinear lengths""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("BH", opt_args=dict(max_evaluations=25, limit_action="ignore")) result = model1(aln) got = result.tree
def test_model_hypothesis_result_repr(self): """result objects __repr__ and _repr_html_ methods work correctly""" import re _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) # check the p-val formatted as %.4f pval = str(result).splitlines()[4].split()[-1] self.assertTrue(re.search(r"\d\.\d+", pval) is not None) self.assertIsInstance(result.__repr__(), str) self.assertIsInstance(result._repr_html_(), str) self.assertIsInstance(result.null.__repr__(), str) self.assertIsInstance(result.null._repr_html_(), str) aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna") aln = aln.take_seqs(["Human", "Rhesus", "Galago"])[2::3].omit_gap_pos() model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=100, limit_action="ignore")) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) pval = str(result).splitlines()[4].split()[-1] self.assertTrue(re.search(r"[0-9\.]+e-\d+", pval) is not None)
def test_discrete_time_model(self): """works with discrete-time submodel""" from cogent3.app.composable import NotCompleted _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") mod = evo_app.model("BH", opt_args=dict(max_evaluations=100, limit_action="ignore")) r = mod(aln) self.assertNotIsInstance(r, NotCompleted)
def test_ancestral(self): """recon ancestral states works""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") mod = evo_app.model("GN", opt_args=dict(max_evaluations=25, limit_action="ignore")) anc = evo_app.ancestral_states() result = anc(mod(aln)) self.assertEqual(result["root"].shape, (len(aln), 4)) assert_allclose(result["root"].row_sum(), 1)
def test_model_name_lf_name(self): """model_result.name is set as lf.name""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") mod = evo_app.model( "F81", name="blah", show_progress=False, opt_args=dict(max_evaluations=5, limit_action="ignore"), ) result = mod(aln) self.assertEqual(result.name, result.lf.name)
def test_model_result_repr_split_pos_model(self): """repr works for model_result of split codon positions""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") mod = evo_app.model( "F81", split_codons=True, show_progress=False, opt_args=dict(max_evaluations=55, limit_action="ignore"), ) result = mod(aln) s = repr(result)
def test_model_result_alignment(self): """returns alignment from lf""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") mod = evo_app.model( "F81", show_progress=False, opt_args=dict(max_evaluations=5, limit_action="ignore"), ) result = mod(aln) got = result.alignment self.assertEqual(got.to_dict(), _data)
def test_model_str(self): """correct str representation""" model = evo_app.model("HKY85", time_het="max") got = " ".join(str(model).splitlines()) print(got) expect = ( "model(type='model', sm='HKY85', tree=None, unique_trees=False, " "name=None, sm_args=None, lf_args=None, " "time_het='max', param_rules=None, " "opt_args=None, split_codons=False, " "show_progress=False, verbose=False)" ) self.assertEqual( got, expect, )
def test_repr(self): """does not fail""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") mod = evo_app.model( "F81", show_progress=False, opt_args=dict(max_evaluations=1, limit_action="ignore"), ) result = mod(aln) self.assertIsInstance(repr(result), str) # no values set self.assertIsInstance(repr(model_result(source="blah")), str)
def test_model_summed_branch_lengths(self): """returns summed branch lengths""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) result = model1(aln) tree = result.lf.get_annotated_tree() assert_allclose(result.total_length(), tree.total_length()) tree = result.lf.get_annotated_tree(length_as="paralinear") assert_allclose(result.total_length(length_as="paralinear"), tree.total_length())