def _get_all_composables(tmp_dir_name): test_model1 = evo.model("HKY85") test_model2 = evo.model("GN") test_hyp = evo.hypothesis(test_model1, test_model2) test_num_reps = 100 applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), evo.model("GN"), evo.tabulate_stats(), sample.fixed_length(100), sample.min_length(100), io.write_db(tmp_dir_name, create=True), io.write_json(tmp_dir_name, create=True), io.write_seqs(tmp_dir_name, create=True), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), tree.scale_branches(), tree.uniformize_tree(), ] return applications
def test_roundtrip_hypothesis_result(self): """nested items retain the correct type after roundtrip""" from cogent3.app import evo as evo_app from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) result = hyp(aln) self.assertIsInstance(result["F81"][1], AlignmentLikelihoodFunction) data = result.to_json() got_obj = deserialise_object(data) for i in range(1, 4): for sm in ("F81", "GTR"): self.assertIsInstance(got_obj[sm][i], dict) # but after invoking deserialised_values got_obj.deserialised_values() for i in range(1, 4): for sm in ("F81", "GTR"): self.assertIsInstance(got_obj[sm][i], AlignmentLikelihoodFunction)
def test_model_collection_init_sequential(self): """modelc collection uses preceding model to initialise function""" opt_args = dict(max_evaluations=15, limit_action="ignore") model1 = evo_app.model("F81", opt_args=opt_args) model2 = evo_app.model("HKY85", opt_args=opt_args) model3 = evo_app.model("GTR", opt_args=opt_args) # defaults to initialise model3 from model 2 from model1 mod_coll = evo_app.model_collection(model1, model2, model3, sequential=True) _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") result = mod_coll(aln) self.assertTrue(result["F81"].lf.lnL < result["HKY85"].lf.lnL < result["GTR"].lf.lnL) # can be set to False, in which case all models start at defaults mod_coll = evo_app.hypothesis(model1, model2, model3, sequential=False) result = mod_coll(aln) self.assertFalse(result["F81"].lf.lnL < result["HKY85"].lf.lnL < result["GTR"].lf.lnL) self.assertIsInstance(result, model_collection_result)
def test_bootstrap_composability(self): """can be composed with load_db and write_db""" m1 = evo_app.model("F81") m2 = evo_app.model("HKY85") hyp = evo_app.hypothesis(m1, m2) with TemporaryDirectory(dir=".") as dirname: path = join(dirname, "delme.tinydb") _ = io.load_db() + evo_app.bootstrap( hyp, num_reps=2) + io.write_db(path)
def test_hyp_init(self): """uses user specified init_alt function, or not""" opt_args = dict(max_evaluations=25, limit_action="ignore") model1 = evo_app.model("F81", opt_args=opt_args) model2 = evo_app.model("HKY85", opt_args=opt_args) # defaults to using null for init hyp = evo_app.hypothesis(model1, model2) _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") result = hyp(aln) self.assertEqual(result.df, 1) # user specified function hyp = evo_app.hypothesis(model1, model2, init_alt=lambda x, y: x) result = hyp(aln) self.assertEqual(result.df, 1)
def test_bstrap_parallel(self): """exercising bootstrap with parallel""" aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna") aln = aln.take_seqs(aln.names[:3]) aln = aln.omit_gap_pos(allowed_gap_frac=0) opt_args = dict(max_evaluations=20, limit_action="ignore") m1 = evo_app.model("F81", opt_args=opt_args) m2 = evo_app.model("HKY85", opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=True) result = strapper(aln) self.assertIsInstance(result, evo_app.bootstrap_result)
def test_hypothesis_str(self): """correct str representation""" model1 = evo_app.model("HKY85") model2 = evo_app.model("HKY85", name="hky85-max-het", time_het="max") hyp = evo_app.hypothesis(model1, model2) got = str(hyp) expect = ("hypothesis(type='hypothesis', null='HKY85', " "alternates=(model(type='model', sm='HKY85', tree=None, " "name='hky85-max-het', sm_args=None, lf_args=None, " "time_het='max', param_rules=None, opt_args=None," " split_codons=False, show_progress=False, verbose=False),)," " init_alt=None)") self.assertEqual(got, expect)
def test_alt_hyp_fail_error(self): """if alt fails NotCompleted.origin should be model""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGA", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGA", "Opossum": "TGACCAGTGAAAGTGGCGGCGGTGGCTGA", } aln = make_aligned_seqs(data=_data, moltype="dna") tree = "(Mouse,Human,Opossum)" m1 = evo_app.model("F81", tree=tree) m2 = evo_app.model("MG94HKY", tree=tree) hyp = evo_app.hypothesis(m1, m2) r = hyp(aln) self.assertEqual(r.origin, "model")
def test_model_hypothesis_result_repr(self): """result objects __repr__ and _repr_html_ methods work correctly""" import re _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) # check the p-val formatted as %.4f pval = str(result).splitlines()[4].split()[-1] self.assertTrue(re.search(r"\d\.\d+", pval) is not None) self.assertIsInstance(result.__repr__(), str) self.assertIsInstance(result._repr_html_(), str) self.assertIsInstance(result.null.__repr__(), str) self.assertIsInstance(result.null._repr_html_(), str) aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna") aln = aln.take_seqs(["Human", "Rhesus", "Galago"])[2::3].omit_gap_pos() model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=100, limit_action="ignore")) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) pval = str(result).splitlines()[4].split()[-1] self.assertTrue(re.search(r"[0-9\.]+e-\d+", pval) is not None)
def test_hyp_split_codon_select_models(self): """hypothesis_result identifies selects best model when split_codon""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") opt_args = dict(max_evaluations=10, limit_action="ignore") m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args) m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) r = hyp(aln) bm = r.select_models() assert_allclose(bm[0].lnL, -85.00043312185628)
def test_bstrap(self): """exercising bootstrap with simple hypothesis""" aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna") aln = aln.take_seqs(aln.names[:3]) aln = aln.omit_gap_pos(allowed_gap_frac=0) opt_args = dict(max_evaluations=20, limit_action="ignore") m1 = evo_app.model("F81", opt_args=opt_args) m2 = evo_app.model("HKY85", opt_args=opt_args) hyp = evo_app.hypothesis(m1, m2) strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=False) result = strapper(aln) nd = result.null_dist self.assertTrue(set(type(v) for v in nd), {float}) json = result.to_json() got = deserialise_object(json) self.assertIsInstance(got, evo_app.bootstrap_result)
def test_pvalue(self): """hypothesis test p-value property""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) self.assertTrue(0 <= result.pvalue <= 1)
def test_model_hypothesis_result_repr(self): """result objects __repr__ and _repr_html_ methods work correctly""" _data = { "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG", "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG", "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG", } aln = make_aligned_seqs(data=_data, moltype="dna") model1 = evo_app.model("F81", opt_args=dict(max_evaluations=25, limit_action="ignore")) model2 = evo_app.model("HKY85", opt_args=dict(max_evaluations=25, limit_action="ignore")) hyp = evo_app.hypothesis(model1, model2) result = hyp(aln) self.assertIsInstance(result.__repr__(), str) self.assertIsInstance(result._repr_html_(), str) self.assertIsInstance(result.null.__repr__(), str) self.assertIsInstance(result.null._repr_html_(), str)
def test_unique_models(self): """hypothesis raises ValueError if models not unique""" model1 = evo_app.model("HKY85") model2 = evo_app.model("HKY85", time_het="max") with self.assertRaises(ValueError): hyp = evo_app.hypothesis(model1, model2)