Beispiel #1
0
def _get_all_composables(tmp_dir_name):
    test_model1 = evo.model("HKY85")
    test_model2 = evo.model("GN")
    test_hyp = evo.hypothesis(test_model1, test_model2)
    test_num_reps = 100

    applications = [
        align.align_to_ref(),
        align.progressive_align(model="GY94"),
        evo.ancestral_states(),
        evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps),
        evo.hypothesis(test_model1, test_model2),
        evo.model("GN"),
        evo.tabulate_stats(),
        sample.fixed_length(100),
        sample.min_length(100),
        io.write_db(tmp_dir_name, create=True),
        io.write_json(tmp_dir_name, create=True),
        io.write_seqs(tmp_dir_name, create=True),
        sample.omit_bad_seqs(),
        sample.omit_degenerates(),
        sample.omit_duplicated(),
        sample.take_codon_positions(1),
        sample.take_named_seqs(),
        sample.trim_stop_codons(gc=1),
        translate.select_translatable(),
        tree.quick_tree(),
        tree.scale_branches(),
        tree.uniformize_tree(),
    ]
    return applications
Beispiel #2
0
    def test_roundtrip_hypothesis_result(self):
        """nested items retain the correct type after roundtrip"""
        from cogent3.app import evo as evo_app
        from cogent3.evolve.parameter_controller import AlignmentLikelihoodFunction

        _data = {
            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
        }
        aln = make_aligned_seqs(data=_data, moltype="dna")
        opt_args = dict(max_evaluations=10, limit_action="ignore")
        m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args)
        m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args)
        hyp = evo_app.hypothesis(m1, m2)
        result = hyp(aln)
        self.assertIsInstance(result["F81"][1], AlignmentLikelihoodFunction)

        data = result.to_json()
        got_obj = deserialise_object(data)
        for i in range(1, 4):
            for sm in ("F81", "GTR"):
                self.assertIsInstance(got_obj[sm][i], dict)

        # but after invoking  deserialised_values
        got_obj.deserialised_values()
        for i in range(1, 4):
            for sm in ("F81", "GTR"):
                self.assertIsInstance(got_obj[sm][i],
                                      AlignmentLikelihoodFunction)
Beispiel #3
0
    def test_model_collection_init_sequential(self):
        """modelc collection uses preceding model to initialise function"""
        opt_args = dict(max_evaluations=15, limit_action="ignore")
        model1 = evo_app.model("F81", opt_args=opt_args)
        model2 = evo_app.model("HKY85", opt_args=opt_args)
        model3 = evo_app.model("GTR", opt_args=opt_args)
        # defaults to initialise model3 from model 2 from model1
        mod_coll = evo_app.model_collection(model1,
                                            model2,
                                            model3,
                                            sequential=True)
        _data = {
            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
        }
        aln = make_aligned_seqs(data=_data, moltype="dna")
        result = mod_coll(aln)
        self.assertTrue(result["F81"].lf.lnL < result["HKY85"].lf.lnL <
                        result["GTR"].lf.lnL)

        # can be set to False, in which case all models start at defaults
        mod_coll = evo_app.hypothesis(model1, model2, model3, sequential=False)
        result = mod_coll(aln)
        self.assertFalse(result["F81"].lf.lnL < result["HKY85"].lf.lnL <
                         result["GTR"].lf.lnL)

        self.assertIsInstance(result, model_collection_result)
Beispiel #4
0
 def test_bootstrap_composability(self):
     """can be composed with load_db and write_db"""
     m1 = evo_app.model("F81")
     m2 = evo_app.model("HKY85")
     hyp = evo_app.hypothesis(m1, m2)
     with TemporaryDirectory(dir=".") as dirname:
         path = join(dirname, "delme.tinydb")
         _ = io.load_db() + evo_app.bootstrap(
             hyp, num_reps=2) + io.write_db(path)
Beispiel #5
0
    def test_hyp_init(self):
        """uses user specified init_alt function, or not"""
        opt_args = dict(max_evaluations=25, limit_action="ignore")
        model1 = evo_app.model("F81", opt_args=opt_args)
        model2 = evo_app.model("HKY85", opt_args=opt_args)
        # defaults to using null for init
        hyp = evo_app.hypothesis(model1, model2)
        _data = {
            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
        }
        aln = make_aligned_seqs(data=_data, moltype="dna")
        result = hyp(aln)
        self.assertEqual(result.df, 1)

        # user specified function
        hyp = evo_app.hypothesis(model1, model2, init_alt=lambda x, y: x)
        result = hyp(aln)
        self.assertEqual(result.df, 1)
Beispiel #6
0
 def test_bstrap_parallel(self):
     """exercising bootstrap with parallel"""
     aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna")
     aln = aln.take_seqs(aln.names[:3])
     aln = aln.omit_gap_pos(allowed_gap_frac=0)
     opt_args = dict(max_evaluations=20, limit_action="ignore")
     m1 = evo_app.model("F81", opt_args=opt_args)
     m2 = evo_app.model("HKY85", opt_args=opt_args)
     hyp = evo_app.hypothesis(m1, m2)
     strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=True)
     result = strapper(aln)
     self.assertIsInstance(result, evo_app.bootstrap_result)
Beispiel #7
0
 def test_hypothesis_str(self):
     """correct str representation"""
     model1 = evo_app.model("HKY85")
     model2 = evo_app.model("HKY85", name="hky85-max-het", time_het="max")
     hyp = evo_app.hypothesis(model1, model2)
     got = str(hyp)
     expect = ("hypothesis(type='hypothesis', null='HKY85', "
               "alternates=(model(type='model', sm='HKY85', tree=None, "
               "name='hky85-max-het', sm_args=None, lf_args=None, "
               "time_het='max', param_rules=None, opt_args=None,"
               " split_codons=False, show_progress=False, verbose=False),),"
               " init_alt=None)")
     self.assertEqual(got, expect)
Beispiel #8
0
 def test_alt_hyp_fail_error(self):
     """if alt fails NotCompleted.origin should be model"""
     _data = {
         "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGA",
         "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGA",
         "Opossum": "TGACCAGTGAAAGTGGCGGCGGTGGCTGA",
     }
     aln = make_aligned_seqs(data=_data, moltype="dna")
     tree = "(Mouse,Human,Opossum)"
     m1 = evo_app.model("F81", tree=tree)
     m2 = evo_app.model("MG94HKY", tree=tree)
     hyp = evo_app.hypothesis(m1, m2)
     r = hyp(aln)
     self.assertEqual(r.origin, "model")
Beispiel #9
0
    def test_model_hypothesis_result_repr(self):
        """result objects __repr__ and _repr_html_ methods work correctly"""
        import re

        _data = {
            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
        }
        aln = make_aligned_seqs(data=_data, moltype="dna")
        model1 = evo_app.model("F81",
                               opt_args=dict(max_evaluations=25,
                                             limit_action="ignore"))
        model2 = evo_app.model("HKY85",
                               opt_args=dict(max_evaluations=25,
                                             limit_action="ignore"))
        hyp = evo_app.hypothesis(model1, model2)
        result = hyp(aln)
        # check the p-val formatted as %.4f
        pval = str(result).splitlines()[4].split()[-1]
        self.assertTrue(re.search(r"\d\.\d+", pval) is not None)
        self.assertIsInstance(result.__repr__(), str)
        self.assertIsInstance(result._repr_html_(), str)
        self.assertIsInstance(result.null.__repr__(), str)
        self.assertIsInstance(result.null._repr_html_(), str)
        aln = load_aligned_seqs("data/primate_brca1.fasta", moltype="dna")
        aln = aln.take_seqs(["Human", "Rhesus", "Galago"])[2::3].omit_gap_pos()
        model1 = evo_app.model("F81",
                               opt_args=dict(max_evaluations=25,
                                             limit_action="ignore"))
        model2 = evo_app.model("HKY85",
                               opt_args=dict(max_evaluations=100,
                                             limit_action="ignore"))
        hyp = evo_app.hypothesis(model1, model2)
        result = hyp(aln)
        pval = str(result).splitlines()[4].split()[-1]
        self.assertTrue(re.search(r"[0-9\.]+e-\d+", pval) is not None)
Beispiel #10
0
 def test_hyp_split_codon_select_models(self):
     """hypothesis_result identifies selects best model when split_codon"""
     _data = {
         "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
         "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
         "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
     }
     aln = make_aligned_seqs(data=_data, moltype="dna")
     opt_args = dict(max_evaluations=10, limit_action="ignore")
     m1 = evo_app.model("F81", split_codons=True, opt_args=opt_args)
     m2 = evo_app.model("GTR", split_codons=True, opt_args=opt_args)
     hyp = evo_app.hypothesis(m1, m2)
     r = hyp(aln)
     bm = r.select_models()
     assert_allclose(bm[0].lnL, -85.00043312185628)
Beispiel #11
0
 def test_bstrap(self):
     """exercising bootstrap with simple hypothesis"""
     aln = load_aligned_seqs(join(data_dir, "brca1.fasta"), moltype="dna")
     aln = aln.take_seqs(aln.names[:3])
     aln = aln.omit_gap_pos(allowed_gap_frac=0)
     opt_args = dict(max_evaluations=20, limit_action="ignore")
     m1 = evo_app.model("F81", opt_args=opt_args)
     m2 = evo_app.model("HKY85", opt_args=opt_args)
     hyp = evo_app.hypothesis(m1, m2)
     strapper = evo_app.bootstrap(hyp, num_reps=2, parallel=False)
     result = strapper(aln)
     nd = result.null_dist
     self.assertTrue(set(type(v) for v in nd), {float})
     json = result.to_json()
     got = deserialise_object(json)
     self.assertIsInstance(got, evo_app.bootstrap_result)
Beispiel #12
0
 def test_pvalue(self):
     """hypothesis test p-value property"""
     _data = {
         "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
         "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
         "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
     }
     aln = make_aligned_seqs(data=_data, moltype="dna")
     model1 = evo_app.model("F81",
                            opt_args=dict(max_evaluations=25,
                                          limit_action="ignore"))
     model2 = evo_app.model("HKY85",
                            opt_args=dict(max_evaluations=25,
                                          limit_action="ignore"))
     hyp = evo_app.hypothesis(model1, model2)
     result = hyp(aln)
     self.assertTrue(0 <= result.pvalue <= 1)
Beispiel #13
0
 def test_model_hypothesis_result_repr(self):
     """result objects __repr__ and _repr_html_ methods work correctly"""
     _data = {
         "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
         "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
         "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
     }
     aln = make_aligned_seqs(data=_data, moltype="dna")
     model1 = evo_app.model("F81",
                            opt_args=dict(max_evaluations=25,
                                          limit_action="ignore"))
     model2 = evo_app.model("HKY85",
                            opt_args=dict(max_evaluations=25,
                                          limit_action="ignore"))
     hyp = evo_app.hypothesis(model1, model2)
     result = hyp(aln)
     self.assertIsInstance(result.__repr__(), str)
     self.assertIsInstance(result._repr_html_(), str)
     self.assertIsInstance(result.null.__repr__(), str)
     self.assertIsInstance(result.null._repr_html_(), str)
Beispiel #14
0
 def test_unique_models(self):
     """hypothesis raises ValueError if models not unique"""
     model1 = evo_app.model("HKY85")
     model2 = evo_app.model("HKY85", time_het="max")
     with self.assertRaises(ValueError):
         hyp = evo_app.hypothesis(model1, model2)