예제 #1
0
 def test_model_tree(self):
     """allows tree to be string, None or tree"""
     treestring = "(a,b,c)"
     for tree in (treestring, make_tree(treestring=treestring), None):
         mod = evo_app.model("HKY85", tree=tree)
         expect = None if tree is None else make_tree(treestring=treestring)
         self.assertIsInstance(mod._tree, expect.__class__)
예제 #2
0
파일: test_tree.py 프로젝트: wjjmjh/cogent3
    def test_scale_tree_lengths(self):
        """correctly scales tree lengths"""
        with self.assertRaises(AssertionError):
            _ = tree_app.scale_branches(nuc_to_codon=True, codon_to_nuc=True)

        scale_to_codon = tree_app.scale_branches(nuc_to_codon=True)
        tree = make_tree(treestring="(a:3,b:6,c:9)")
        scale_to_codon = tree_app.scale_branches(nuc_to_codon=True)
        d = scale_to_codon(tree)
        got = {e.name: e.length for e in d.get_edge_vector(include_root=False)}
        expect = {"a": 1.0, "b": 2.0, "c": 3.0}
        self.assertEqual(got, expect)

        scale_from_codon = tree_app.scale_branches(codon_to_nuc=True)
        d = scale_from_codon(d)
        got = {e.name: e.length for e in d.get_edge_vector(include_root=False)}
        expect = {"a": 3.0, "b": 6.0, "c": 9.0}
        self.assertEqual(got, expect)

        by_scalar = tree_app.scale_branches(scalar=0.5)
        d = by_scalar(tree)
        got = {e.name: e.length for e in d.get_edge_vector(include_root=False)}
        expect = {"a": 6.0, "b": 12.0, "c": 18.0}
        self.assertEqual(got, expect)

        # handle case where a length is not defined, setting to minimum
        min_length = tree_app.scale_branches(min_length=66)
        tree = make_tree(treestring="(a:3,b:6,c)")
        new = min_length(tree)
        got = {
            e.name: e.length
            for e in new.get_edge_vector(include_root=False)
        }
        expect = {"a": 3.0, "b": 6.0, "c": 66.0}
        self.assertEqual(got, expect)
예제 #3
0
파일: test_tree.py 프로젝트: wjjmjh/cogent3
 def test_uniformize_tree(self):
     """equivalent topologies should be the same"""
     a = make_tree(treestring="(a,(b,c),(d,e))")
     b = make_tree(treestring="(e,d,(a,(b,c)))")
     make_uniform = tree_app.uniformize_tree(root_at="c",
                                             ordered_names=list("abcde"))
     u_a = make_uniform(a).get_newick()
     u_b = make_uniform(b).get_newick()
     self.assertTrue(u_a == u_b)
     # but different ones different
     c = make_tree(treestring="(e,c,(a,(b,d)))")
     u_c = make_uniform(c).get_newick()
     self.assertFalse(u_a == u_c)
예제 #4
0
 def test_style_edges(self):
     """test style_edges only accepts edges present in tree"""
     tree = make_tree(treestring="(a,b,(c,(d,e)e1)e2)")
     dnd = Dendrogram(tree=tree)
     dnd.style_edges("a", line=dict(color="magenta"))
     with self.assertRaises(ValueError):
         dnd.style_edges("foo", line=dict(color="magenta"))
예제 #5
0
 def test_progress_with_guide_tree(self):
     """progressive align works with provided guide tree"""
     tree = make_tree(treestring=self.treestring)
     aligner = align_app.progressive_align(model="nucleotide",
                                           guide_tree=self.treestring)
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
     aligner = align_app.progressive_align(model="nucleotide",
                                           guide_tree=tree)
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
     # even if it has underscores in name
     treestring = ("(Bandicoot:0.4,FlyingFox:0.05,(Rhesus_macaque:0.06,"
                   "Human:0.0):0.04);")
     aligner = align_app.progressive_align(model="nucleotide",
                                           guide_tree=treestring)
     data = self.seqs.to_dict()
     data["Rhesus macaque"] = data.pop("Rhesus")
     seqs = make_unaligned_seqs(data)
     aln = aligner(seqs)
     self.assertEqual(len(aln), 42)
     # guide tree with no lengths raises value error
     with self.assertRaises(ValueError):
         _ = align_app.progressive_align(
             model="nucleotide",
             guide_tree="(Bandicoot,FlyingFox,(Rhesus_macaque,Human));",
         )
예제 #6
0
    def test_species_tree(self):
        """should match the one used by ensembl"""
        comp = Compara(
            ["human", "rat", "dog", "platypus"],
            release=ENSEMBL_RELEASE,
            account=account,
        )

        # sub-tree should have correct species
        sub_species = comp.get_species_tree(just_members=True)
        self.assertEqual(
            set(sub_species.get_tip_names()),
            {
                "H**o sapiens",
                "Rattus norvegicus",
                "Canis lupus familiaris",
                "Ornithorhynchus anatinus",
            },
        )
        # topology should match current topology belief
        expect = make_tree(
            treestring="(((Homo_sapiens,Rattus_norvegicus),"
            "Canis_lupus_familiaris),Ornithorhynchus_anatinus)",
            underscore_unmunge=True,
        )
        self.assertTrue(sub_species.same_topology(expect))

        # returned full tree should match download from ensembl
        # but taxon names are customised in what they put up on
        # the web-site, so need a better test.
        sptree = comp.get_species_tree(just_members=False)
        expect = load_tree("data/ensembl_all_species.nh",
                           underscore_unmunge=True)
        self.assertTrue(
            len(sptree.get_tip_names()) > len(expect.get_tip_names()))
예제 #7
0
    def test_length_attr_valid(self):
        """Tests whether setting a custom length attribute provides valid x values"""
        tree = make_tree(
            treestring=
            "((a:0.1,b:0.25):0.1,(c:0.02,d:0.03, (e:0.035, f:0.04):0.15):0.3 , g:0.3)"
        )
        geom = SquareTreeGeometry(tree, length_attr="custom")
        geom.params["custom"] = 1

        for e in geom.preorder():
            if e.is_root():
                continue
            e.params["custom"] = e.parent.params.get("custom", 1) * 2
        geom.propagate_properties()

        # .x attribute is cumulative from the root, which we have set to 1
        # for 'custom', e.g. a.x == 2 + 4 == 6
        func = geom.get_node_matching_name
        actual_vals = [
            func("root").x,
            func("a").x,
            func("b").x,
            func("c").x,
            func("d").x,
            func("e").x,
            func("f").x,
            func("g").x,
        ]

        expected_vals = [0, 6, 6, 6, 6, 14, 14, 2]

        # Root x resets to 0 so any assigned value to root is always discarded

        assert_allclose(actual_vals, expected_vals)
예제 #8
0
    def test_roundtrip_model_result(self):
        """mode_result.to_json enables roundtrip and lazy evaluation"""
        _data = {
            "Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
            "Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
            "Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
        }
        aln = make_aligned_seqs(data=_data, moltype="dna")
        tree = make_tree(tip_names=aln.names)
        sm = get_model("HKY85")
        lf = sm.make_likelihood_function(tree)
        lf.set_alignment(aln)
        edge_vals = zip(aln.names, (2, 3, 4))
        for edge, val in edge_vals:
            lf.set_param_rule("kappa", edge=edge, init=val)
        result = model_result(name="test")
        result[1] = lf
        self.assertIs(result[1], lf)
        self.assertEqual(result.nfp, lf.nfp)
        self.assertEqual(result.lnL, lf.lnL)

        data = result.to_json()
        got_obj = deserialise_object(data)
        # lazy evaluation means initially, the value is a dict
        self.assertIsInstance(got_obj[1], dict)
        # and properties match original
        self.assertEqual(got_obj.lnL, result.lnL)
        self.assertEqual(got_obj.nfp, result.nfp)
        self.assertEqual(got_obj.DLC, result.DLC)
        # when we ask for the lf attribute, it's no longer a dict
        self.assertNotIsInstance(got_obj.lf, dict)
        self.assertEqual(got_obj.lf.nfp, got_obj.nfp)
예제 #9
0
    def test_build_phylogeny(self):
        """build a NJ tree"""
        from cogent3 import make_tree

        dists = {
            ("DogFaced", "FlyingFox"): 0.05,
            ("DogFaced", "FreeTaile"): 0.14,
            ("DogFaced", "LittleBro"): 0.16,
            ("DogFaced", "TombBat"): 0.15,
            ("FlyingFox", "DogFaced"): 0.05,
            ("FlyingFox", "FreeTaile"): 0.12,
            ("FlyingFox", "LittleBro"): 0.13,
            ("FlyingFox", "TombBat"): 0.14,
            ("FreeTaile", "DogFaced"): 0.14,
            ("FreeTaile", "FlyingFox"): 0.12,
            ("FreeTaile", "LittleBro"): 0.09,
            ("FreeTaile", "TombBat"): 0.1,
            ("LittleBro", "DogFaced"): 0.16,
            ("LittleBro", "FlyingFox"): 0.13,
            ("LittleBro", "FreeTaile"): 0.09,
            ("LittleBro", "TombBat"): 0.12,
            ("TombBat", "DogFaced"): 0.15,
            ("TombBat", "FlyingFox"): 0.14,
            ("TombBat", "FreeTaile"): 0.1,
            ("TombBat", "LittleBro"): 0.12,
        }
        dists = DistanceMatrix(dists)
        got = dists.quick_tree(show_progress=False)
        expect = make_tree(
            treestring="((TombBat,(DogFaced,FlyingFox)),LittleBro,FreeTaile)")
        self.assertTrue(expect.same_topology(got))
예제 #10
0
 def test_deserialise_likelihood_function(self):
     """correctly deserialise data into likelihood function"""
     # tests multiple alignments
     data = load_aligned_seqs(
         filename=os.path.join(os.getcwd(), "data", "brca1_5.paml")
     )
     half = len(data) // 2
     aln1 = data[:half]
     aln2 = data[half:]
     loci_names = ["1st-half", "2nd-half"]
     loci = [aln1, aln2]
     tree = make_tree(tip_names=data.names)
     model = get_model("HKY85")
     lf = model.make_likelihood_function(tree, loci=loci_names)
     lf.set_alignment(loci)
     lf_rich_dict = lf.to_rich_dict()
     got = deserialise_likelihood_function(lf_rich_dict)
     self.assertEqual(str(lf.defn_for["mprobs"]), str(got.defn_for["mprobs"]))
     self.assertEqual(
         str(lf.defn_for["alignment"].assignments),
         str(got.defn_for["alignment"].assignments),
     )
     # tests single alignment
     model = get_model("HKY85")
     lf = model.make_likelihood_function(tree)
     lf.set_alignment(aln1)
     lf_rich_dict = lf.to_rich_dict()
     got = deserialise_likelihood_function(lf_rich_dict)
     self.assertEqual(str(lf.defn_for["mprobs"]), str(got.defn_for["mprobs"]))
     self.assertEqual(
         str(lf.defn_for["alignment"].assignments),
         str(got.defn_for["alignment"].assignments),
     )
예제 #11
0
def make_trees(filename):
    """Parse a file of (score, tree) lines. Scores can be positive probabilities
    or negative log likelihoods."""
    from cogent3 import make_tree

    infile = open(filename, "r")
    trees = []
    klass = list
    # expect score, tree
    for line in infile:
        line = line.split(None, 1)
        lnL = float(line[0])
        if lnL > 1:
            raise ValueError("likelihoods expected, not %s" % lnL)
        elif lnL > 0:
            assert klass in [list, WeightedTreeCollection]
            klass = WeightedTreeCollection
        else:
            assert klass in [list, LogLikelihoodScoredTreeCollection]
            klass = LogLikelihoodScoredTreeCollection
        tree = make_tree(treestring=line[1])
        trees.append((lnL, tree))
    trees.sort(reverse=True)
    infile.close()
    return klass(trees)
예제 #12
0
 def test_limited_wls(self):
     """testing (well, exercising at least), wls with constrained start"""
     init = make_tree(treestring="((a,c),b,d)")
     reconstructed = wls(self.dists, start=init, show_progress=False)
     self.assertEqual(len(reconstructed.get_tip_names()), 6)
     init2 = make_tree(treestring="((a,d),b,c)")
     reconstructed = wls(self.dists, start=[init, init2], show_progress=False)
     self.assertEqual(len(reconstructed.get_tip_names()), 6)
     init3 = make_tree(treestring="((a,d),b,z)")
     self.assertRaises(Exception, wls, self.dists, start=[init, init3])
     # if start tree has all seq names, should raise an error
     self.assertRaises(
         Exception,
         wls,
         self.dists,
         start=[make_tree(treestring="((a,c),b,(d,(e,f)))")],
     )
 def test_set_constant_lengths(self):
     t = make_tree(treestring="((a:1,b:2):3,(c:4,d:5):6,e:7);")
     lf = self.model.make_likelihood_function(t)  # self.tree)
     lf.set_param_rule("length", is_constant=True)
     # lf.set_constant_lengths(t)
     lf.set_alignment(self.al)
     self.assertEqual(lf.get_param_value("length", "b"), 2)
     self.assertEqual(lf.get_param_value("length", "d"), 5)
예제 #14
0
 def _est_dist_pair_slow(self, aln):
     """returns distance between seq pairs in aln"""
     assert len(aln.names) == 2
     tree = make_tree(tip_names=aln.names)
     lf = self._sm.make_likelihood_function(tree)
     lf.set_alignment(aln)
     lf.set_param_rule("length", is_independent=False)
     lf.optimise(max_restarts=0, show_progress=False)
     return 2 * lf.get_param_value("length", edge=aln.names[0])
예제 #15
0
def _est_simulations():
    # specify the 4 taxon tree, and a 'dummy' alignment
    t = make_tree(treestring="(a:0.4,b:0.3,(c:0.15,d:0.2)edge.0:0.1)root;")

    # how long the simulated alignments should be
    # at 1000000 the estimates get nice and close
    length_of_align = 10000

    #########################
    #
    # For a Jukes Cantor model
    #
    #########################

    sm = substitution_model.TimeReversibleNucleotide()
    lf = sm.make_likelihood_function(t)
    lf.set_constant_lengths()
    lf.set_name("True JC model")
    print(lf)
    simulated = lf.simulate_alignment(sequence_length=length_of_align)
    print(simulated)

    new_lf = sm.make_likelihood_function(t)
    new_lf = new_lf.set_alignment(simulated)
    new_lf.optimise(tolerance=1.0)
    new_lf.optimise(local=True)
    new_lf.set_name("True JC model")
    print(new_lf)

    #########################
    #
    # a Kimura model
    #
    #########################

    # has a ts/tv term, different values for every edge
    sm = substitution_model.TimeReversibleNucleotide(
        predicates={"kappa": "transition"})
    lf = sm.make_likelihood_function(t)
    lf.set_constant_lengths()
    lf.set_param_rule("kappa", is_constant=True, value=4.0, edge_name="a")
    lf.set_param_rule("kappa", is_constant=True, value=0.5, edge_name="b")
    lf.set_param_rule("kappa", is_constant=True, value=0.2, edge_name="c")
    lf.set_param_rule("kappa", is_constant=True, value=3.0, edge_name="d")
    lf.set_param_rule("kappa", is_constant=True, value=2.0, edge_name="edge.0")
    lf.set_name("True Kappa model")
    print(lf)
    simulated = lf.simulate_alignment(sequence_length=length_of_align)
    print(simulated)
    new_lf = sm.make_likelihood_function(t)
    new_lf.set_param_rule("kappa", is_independent=True)
    new_lf.set_alignment(simulated)
    new_lf.optimise(tolerance=1.0)
    new_lf.optimise(local=True)
    new_lf.set_name("Estimated Kappa model")
    print(new_lf)
예제 #16
0
 def test_UPGMA_cluster(self):
     """upgma works on pairwise distance dict"""
     pairwise_dist = self.pairwise_distances
     cluster = upgma(pairwise_dist)
     cluster = cluster.sorted()  # so we can make a stable comparison
     expect = make_tree(
         treestring=
         "(((a:0.5,b:0.5)edge.1:1.75,c:2.25)edge.0:5.875,(d:1.0,e:1.0)edge.2:7.125)root;"
     )
     self.assertTrue(cluster.same_topology(expect))
예제 #17
0
 def test_tip_font(self):
     """test tip_font settable"""
     tree = make_tree(treestring="(a,b,(c,(d,e)e1)e2)")
     dnd = Dendrogram(tree=tree)
     dnd.tip_font |= dict(size=18)
     self.assertEqual(dnd.tip_font.size, 18)
     dnd.tip_font.size = 10
     self.assertEqual(dnd.tip_font.size, 10)
     dnd.tip_font.color = "red"
     self.assertEqual(dnd.tip_font["color"], "red")
 def setUp(self):
     # length all edges 1 except c=2.  b&d transitions all other
     # transverions
     self.al = make_aligned_seqs(
         data={"a": "tata", "b": "tgtc", "c": "gcga", "d": "gaac", "e": "gagc"}
     )
     self.tree = make_tree(treestring="((a,b),(c,d),e);")
     self.model = cogent3.evolve.substitution_model.TimeReversibleNucleotide(
         equal_motif_probs=True, model_gaps=True
     )
예제 #19
0
    def test_ml(self):
        """exercise the ML tree estimation"""
        from numpy.testing import assert_allclose

        aln = load_aligned_seqs(os.path.join(data_path, "brca1.fasta"), moltype="dna")
        aln = aln.take_seqs(["Human", "Mouse", "Rat", "Dog"])
        aln = aln.omit_gap_pos(allowed_gap_frac=0)
        model = get_model("JC69")
        lnL, tree = ML(model, aln).trex(a=3, k=1, show_progress=False)
        assert_allclose(lnL, -8882.217502905267)
        self.assertTrue(tree.same_topology(make_tree("(Mouse,Rat,(Human,Dog));")))
예제 #20
0
 def test_min_max_x_y(self):
     """correctly compute the min and max of x and y"""
     tree = make_tree(treestring="(A:1,B:2,C:3)")
     geom = CircularTreeGeometry(tree)
     geom.propagate_properties()
     got = max(map(abs, [geom.min_x, geom.max_x]))
     expect = max(map(abs, [e.x for e in geom.postorder()]))
     assert_allclose(got, expect)
     got = max(map(abs, [geom.min_y, geom.max_y]))
     expect = max(map(abs, [e.y for e in geom.postorder()]))
     assert_allclose(got, expect)
예제 #21
0
 def test_progress_with_guide_tree(self):
     """progressive align works with provided guide tree"""
     tree = make_tree(treestring=self.treestring)
     aligner = align_app.progressive_align(
         model="nucleotide", guide_tree=self.treestring
     )
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
     aligner = align_app.progressive_align(model="nucleotide", guide_tree=tree)
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
예제 #22
0
def deserialise_tree(data):
    """returns a cogent3 PhyloNode instance"""
    data.pop("version", None)
    # we load tree using make_tree, then populate edge attributes
    newick = data.pop("newick")
    edge_attr = data.pop("edge_attributes")
    tree = cogent3.make_tree(treestring=newick)
    for edge in tree.get_edge_vector():
        params = edge_attr.get(edge.name, {})
        edge.params.update(params)
    return tree
예제 #23
0
 def test_pairwise_clock(self):
     al = make_aligned_seqs(data={"a": "agct", "b": "ggct"})
     tree = make_tree(treestring="(a,b);")
     model = cogent3.evolve.substitution_model.TimeReversibleDinucleotide(
         equal_motif_probs=True, model_gaps=True, mprob_model="tuple")
     lf = model.make_likelihood_function(tree)
     lf.set_local_clock("a", "b")
     lf.set_alignment(al)
     lf.optimise(local=True)
     rd = lf.get_param_value_dict(["edge"], params=["length"])
     self.assertAlmostEqual(lf.get_log_likelihood(), -10.1774488956)
     self.assertEqual(rd["length"]["a"], rd["length"]["b"])
예제 #24
0
 def test_dendro_shape(self):
     """exercising using different values of shape parameter"""
     tree = make_tree(treestring="(a:0.1,b:0.1,(c:0.05,(d:0.01,e:0.02):0.01):0.1)")
     for style in ("square", "angular", "circular", "radial"):
         dnd = Dendrogram(tree, style=style)
         # the figure attribute should be a dict
         fig = dnd.figure
         self.assertIsInstance(fig, UnionDict)
         # should have a layout and a data key
         self.assertTrue("layout" in fig)
         self.assertTrue("data" in fig)
         # data traces should be of type "scatter"
         self.assertEqual({tr.type for tr in fig.data}, {"scatter"})
예제 #25
0
파일: distance.py 프로젝트: mr-c/cogent3
    def _make_pair_alignment(self, seqs, opt_kwargs):
        lf = self._sm.make_likelihood_function(make_tree(tip_names=seqs.names),
                                               aligned=False)
        lf.set_sequences(seqs.named_seqs)

        # allow user to modify the lf config
        if self._modify_lf:
            lf = self._modify_lf(lf)

        if self._rigorous_align:
            lf.optimise(**opt_kwargs)
        lnL = lf.get_log_likelihood()
        return lnL.edge.get_viterbi_path().get_alignment()
예제 #26
0
    def test_getting_node_mprobs(self):
        """return correct motif probability vector for tree nodes"""
        tree = make_tree(treestring="(a:.2,b:.2,(c:.1,d:.1):.1)")
        aln = make_aligned_seqs(data={
            "a": "TGTG",
            "b": "TGTG",
            "c": "TGTG",
            "d": "TGTG"
        })

        motifs = ["T", "C", "A", "G"]
        aX = MotifChange(motifs[0], motifs[3], forward_only=True).aliased("aX")
        bX = MotifChange(motifs[3], motifs[0], forward_only=True).aliased("bX")
        edX = MotifChange(motifs[1], motifs[2],
                          forward_only=True).aliased("edX")
        cX = MotifChange(motifs[2], motifs[1], forward_only=True).aliased("cX")
        sm = NonReversibleNucleotide(predicates=[aX, bX, edX, cX],
                                     equal_motif_probs=True)

        lf = sm.make_likelihood_function(tree)
        lf.set_param_rule("aX", edge="a", value=8.0)
        lf.set_param_rule("bX", edge="b", value=8.0)
        lf.set_param_rule("edX", edge="edge.0", value=2.0)
        lf.set_param_rule("cX", edge="c", value=0.5)
        lf.set_param_rule("edX", edge="d", value=4.0)
        lf.set_alignment(aln)

        # we construct the hand calc variants
        mprobs = ones(4, float) * 0.25
        a = make_p(0.2, (0, 3), 8)
        a = dot(mprobs, a)

        b = make_p(0.2, (3, 0), 8)
        b = dot(mprobs, b)

        e = make_p(0.1, (1, 2), 2)
        e = dot(mprobs, e)

        c = make_p(0.1, (2, 1), 0.5)
        c = dot(e, c)

        d = make_p(0.1, (1, 2), 4)
        d = dot(e, d)

        prob_vectors = lf.get_motif_probs_by_node()
        self.assertFloatEqual(prob_vectors["a"].array, a)
        self.assertFloatEqual(prob_vectors["b"].array, b)
        self.assertFloatEqual(prob_vectors["c"].array, c)
        self.assertFloatEqual(prob_vectors["d"].array, d)
        self.assertFloatEqual(prob_vectors["edge.0"].array, e)
예제 #27
0
 def test_geometry(self):
     """tree geometry class get_edge_names works"""
     tree = make_tree(treestring="(a,b,(c,(d,e)e1)e2)")
     geom = SquareTreeGeometry(tree)
     series = [
         dict(tip1name="d", tip2name="c", clade=True, stem=False),
         dict(tip1name="d", tip2name="c", clade=True, stem=True),
         dict(tip1name="d", tip2name="c", clade=False, stem=True),
         dict(tip1name="d", tip2name="c", clade=True, stem=False, outgroup_name="e"),
         dict(tip1name="d", tip2name="c", clade=False, stem=True, outgroup_name="e"),
     ]
     for kwargs in series[-1:]:
         expect = tree.get_edge_names(**kwargs)
         got = geom.get_edge_names(**kwargs)
         self.assertEqual(got, expect)
예제 #28
0
    def fit(self, aln, initialise=None, construct=True, **opt_args):
        moltypes = {aln.moltype.label, self._sm.moltype.label}
        if moltypes in [{"protein", "dna"}, {"protein", "rna"}]:
            msg = (
                f"substitution model moltype '{self._sm.moltype.label}' and"
                f" alignment moltype '{aln.moltype.label}' are incompatible")
            return NotCompleted("ERROR", self, msg, source=aln)

        evaluation_limit = opt_args.get("max_evaluations", None)
        if self._tree is None or self._unique_trees:
            assert len(aln.names
                       ) == 3, "to model more than 3, you must provide a tree"
            self._tree = make_tree(tip_names=aln.names)

        result = model_result(
            name=self.name,
            stat=sum,
            source=aln.info.source,
            evaluation_limit=evaluation_limit,
        )
        if not self._split_codons:
            lf = self._fit_aln(aln,
                               initialise=initialise,
                               construct=construct,
                               **opt_args)
            result[self.name] = lf
            result.num_evaluations = lf.calculator.evaluations
            result.elapsed_time = lf.calculator.elapsed_time
        else:
            num_evals = 0
            elapsed_time = 0
            for i in range(3):
                codon_pos = aln[i::3]
                lf = self._fit_aln(
                    codon_pos,
                    identifier=i + 1,
                    initialise=initialise,
                    construct=construct,
                    **opt_args,
                )
                result[i + 1] = lf
                num_evals += lf.calculator.evaluations
                elapsed_time += lf.calculator.elapsed_time

            result.num_evaluations = num_evals
            result.elapsed_time = elapsed_time

        return result
예제 #29
0
파일: distance.py 프로젝트: tla256/cogent3
    def _doset(self, sequence_names, dist_opt_args, aln_opt_args, ui):
        # slice the alignment
        seqs = self._seq_collection.take_seqs(sequence_names)
        if self._do_pair_align:
            ui.display("Aligning", progress=0.0)
            align = self._make_pair_alignment(seqs, aln_opt_args)
            ui.display("", progress=0.5)

        else:
            align = seqs
            ui.display("", progress=0.0)
        # note that we may want to consider removing the redundant gaps

        # create the tree object
        tree = make_tree(tip_names=sequence_names)

        # make the parameter controller
        lf = self._sm.make_likelihood_function(tree)
        if not self._threeway:
            lf.set_param_rule("length", is_independent=False)

        if self._motif_probs:
            lf.set_motif_probs(self._motif_probs)

        lf.set_alignment(align)

        # allow user modification of lf using the modify_lf
        if self._modify_lf:
            lf = self._modify_lf(lf)

        lf.optimise(**dist_opt_args)

        # get the statistics
        stats_dict = lf.get_param_value_dict(
            ["edge"], params=["length"] + self._est_params
        )

        # if two-way, grab first distance only
        if not self._threeway:
            result = {"length": list(stats_dict["length"].values())[0] * 2.0}
        else:
            result = {"length": stats_dict["length"]}

        # include any other params requested
        for param in self._est_params:
            result[param] = list(stats_dict[param].values())[0]

        return result
 def test_complex_parameter_rules(self):
     # This test has many local minima and so does not cope
     # with changes to optimiser details.
     model = cogent3.evolve.substitution_model.TimeReversibleNucleotide(
         equal_motif_probs=True, model_gaps=True, predicates={"kappa": "transition"}
     )
     lf = model.make_likelihood_function(self.tree)
     lf.set_param_rule(par_name="kappa", is_independent=True)
     lf.set_param_rule(par_name="kappa", is_independent=False, edges=["b", "d"])
     lf.set_constant_lengths(make_tree(treestring="((a:1,b:1):1,(c:2,d:1):1,e:1);"))
     # print self.pc
     lf.set_alignment(self.al)
     lf.optimise(local=True)
     rd = lf.get_param_value_dict(["edge"], params=["kappa"])
     self.assertAlmostEqual(lf.get_log_likelihood(), -27.3252, 3)
     self.assertEqual(rd["kappa"]["b"], rd["kappa"]["d"])
     self.assertNotEqual(rd["kappa"]["a"], rd["kappa"]["b"])