Beispiel #1
0
    def test_calc_number_subs(self):
        """correctly compute ENS"""
        mprobs = diag([0.1, 0.2, 0.3, 0.4])
        moprobs = array([0.1, 0.2, 0.3, 0.4])

        def get_calibrated_Q(R):
            Q = dot(R, mprobs)
            diag_add = diag(np.sum(Q, axis=1))
            to_divide = np.dot(moprobs, np.sum(Q, axis=1))
            Q -= diag_add
            Q /= to_divide
            return Q

        R = array([[0, 2, 1, 1], [2, 0, 1, 1], [1, 1, 0, 2], [1, 1, 2, 0]],
                  dtype=float)

        Q = get_calibrated_Q(R)
        length = 0.1
        got = expm.expected_number_subs(moprobs, Q, length)
        assert_allclose(got, length)
        # case 2, length != ENS

        A = array([[0, 1, 1, 1], [2, 0, 1, 1], [1, 1, 0, 40], [1, 1, 1, 0]],
                  dtype=float)
        Q = get_calibrated_Q(A)
        length = 0.2
        got = expm.expected_number_subs(moprobs, Q, length)
        self.assertNotAlmostEqual(got, length)
    def get_lengths_as_ens(self, motif_probs=None):
        """returns {edge.name: ens, ...} where ens is the expected number of substitutions

        for a stationary Markov process, this is just branch length
        Parameters
        ----------
        motif_probs : dict or DictArray
            an item for each edge of the tree. Computed if not provided.
        """
        if motif_probs is None:
            motif_probs = self.get_motif_probs_by_node()
        node_names = self.tree.get_node_names()
        node_names.remove("root")
        lengths = {
            e: self.get_param_value("length", edge=e)
            for e in node_names
        }
        if not isinstance(self.model, substitution_model.Stationary):
            ens = {}
            for e in node_names:
                Q = self.get_rate_matrix_for_edge(e)
                length = expected_number_subs(motif_probs[e], Q, lengths[e])
                ens[e] = length

            lengths = ens

        return lengths