Example #1
0
def get_heuristics(M, R):
    """
    Return a multiline string with some heuristics.
    The heuristics are independendent of time and of the information variant.
    Greater stationary distribution shannon entropy suggests less saturation.
    Greater stationary distribution logical entropy suggests less saturation.
    Greater expected rate suggests more saturation.
    Greater spectral rate suggests more saturation.
    @param M: pure mutation rate matrix
    @param R: mutation-selection balance rate matrix
    @return: multiline string
    """
    # get the stationary distributions
    M_v = mrate.R_to_distn(M)
    R_v = mrate.R_to_distn(R)
    # check a different way to get the stationary distribution just for fun
    M_v_nonspectral = mrate.R_to_distn_nonspectral(M)
    R_v_nonspectral = mrate.R_to_distn_nonspectral(R)
    if not np.allclose(M_v, M_v_nonspectral):
        raise ValueError('internal stationary distribution calculation error')
    if not np.allclose(R_v, R_v_nonspectral):
        raise ValueError('internal stationary distribution calculation error')
    # compute the shannon entropy of the matrices
    M_shannon_entropy = -sum(p * math.log(p) for p in M_v)
    R_shannon_entropy = -sum(p * math.log(p) for p in R_v)
    shannon_entropy_sign = np.sign(M_shannon_entropy - R_shannon_entropy)
    # compute the logical entropy of the matrices
    M_logical_entropy = 1 - sum(p * p for p in M_v)
    R_logical_entropy = 1 - sum(p * p for p in R_v)
    logical_entropy_sign = np.sign(M_logical_entropy - R_logical_entropy)
    # compute the expected rate
    M_expected_rate = mrate.Q_to_expected_rate(M)
    R_expected_rate = mrate.Q_to_expected_rate(R)
    expected_rate_sign = np.sign(R_expected_rate - M_expected_rate)
    # compute the spectral rate
    M_spectral_rate = 1 / mrate.R_to_relaxation_time(M)
    R_spectral_rate = 1 / mrate.R_to_relaxation_time(R)
    spectral_rate_sign = np.sign(R_spectral_rate - M_spectral_rate)
    # report the heuristics
    out = StringIO()
    print >> out, 'Greater Shannon entropy of the stationary distribution',
    print >> out, 'suggests more information about divergence time.'
    print >> out, _heuristic_helper(shannon_entropy_sign)
    print >> out
    print >> out, 'Greater logical entropy of the stationary distribution',
    print >> out, 'suggests more information about divergence time.'
    print >> out, _heuristic_helper(logical_entropy_sign)
    print >> out
    print >> out, 'Smaller expected rate',
    print >> out, 'suggests more information about divergence time.'
    print >> out, _heuristic_helper(expected_rate_sign)
    print >> out
    print >> out, 'Smaller spectral rate',
    print >> out, 'suggests more information about divergence time.'
    print >> out, _heuristic_helper(spectral_rate_sign)
    print >> out
    return out.getvalue().strip()
Example #2
0
def get_mi_asymptotics(M, R):
    """
    Return a multiline string with some asymptotics.
    @param M: pure mutation rate matrix
    @param R: mutation-selection balance rate matrix
    @return: multiline string
    """
    out = StringIO()
    # get the stationary distributions
    M_v = mrate.R_to_distn(M)
    R_v = mrate.R_to_distn(R)
    # The shannon entropy of the stationary distribution of the process
    # determines the mutual information at small times.
    M_shannon_entropy = -np.dot(np.log(M_v), M_v)
    R_shannon_entropy = -np.dot(np.log(R_v), R_v)
    if not np.allclose(M_shannon_entropy, R_shannon_entropy):
        print >> out, 'At small enough times'
        if R_shannon_entropy < M_shannon_entropy:
            print >> out, '* pure mutation',
        else:
            print >> out, '* mutation-selection balance',
        print >> out, 'will be more informative'
        print >> out, 'because its stationary distribution has greater',
        print >> out, 'Shannon entropy.'
    else:
        print >> out, 'There is not enough difference between the'
        print >> out, 'Shannon entropies of the stationary distributions'
        print >> out, 'to determine which process'
        print >> out, 'is more informative at times near zero'
    print >> out
    # The spectral gap of the process
    # determines the mutual information at large times.
    M_spectral_gap = sorted(abs(w) for w in scipy.linalg.eigvals(M))[1]
    R_spectral_gap = sorted(abs(w) for w in scipy.linalg.eigvals(R))[1]
    M_cheeg_low, M_cheeg_mid, M_cheeg_high = cheeger.get_cheeger_bounds(M, M_v)
    R_cheeg_low, R_cheeg_mid, R_cheeg_high = cheeger.get_cheeger_bounds(R, R_v)
    if not np.allclose(M_spectral_gap, R_spectral_gap):
        print >> out, 'At large enough times'
        if R_spectral_gap < M_spectral_gap:
            print >> out, '* mutation-selection balance',
        else:
            print >> out, '* pure mutation',
        print >> out, 'will be more informative'
        print >> out, 'because it has a smaller spectral gap.'
        if (R_cheeg_high < M_cheeg_low) or (M_cheeg_high < R_cheeg_low):
            print >> out, 'And also because of the isoperimetric bounds.'
    else:
        print >> out, 'There is not enough difference between the'
        print >> out, 'spectral gaps to determine which process'
        print >> out, 'is more informative at times near infinity'
    print >> out
    # return the text
    return out.getvalue().strip()
Example #3
0
def get_response_content(fs):
    np.set_printoptions(linewidth=200)
    out = StringIO()
    n = fs.nstates
    t = 0.001
    # sample the initial mutation rate matrix
    S = sample_symmetric_rate_matrix(n)
    v = sample_distribution(n)
    M = mrate.to_gtr_halpern_bruno(S, v)
    if not np.allclose(v, mrate.R_to_distn(M)):
        raise ValueError('stationary distribution error')
    print >> out, 't:', t
    print >> out
    print >> out, 'initial GTR matrix:'
    print >> out, M
    print >> out
    # Try to iteratively increase the relaxation time
    # by repeatedly applying Halpern-Bruno selection.
    R = M
    v_old = v
    for i in range(20):
        # print some properties of the matrix
        print >> out, v_old
        print >> out, mrate.R_to_relaxation_time(R)
        print >> out
        f = MyOpt(R, t)
        x0 = [1.0] * (n - 1)
        result = scipy.optimize.fmin(f,
                                     x0,
                                     disp=0,
                                     full_output=1,
                                     ftol=0.000001)
        xopt, fopt, niters, funcalls, warnflag = result
        if fopt > 0:
            print >> out, 'failed to increase relaxation time'
            print >> out
            break
        # compute the next stationary distribution
        v_target = X_to_distn(xopt)
        v_new = (1 - t) * v_old + t * v_target
        print >> out, v_new - v_old
        print >> out
        # compute the next rate matrix and update its stationary distribution
        R = mrate.to_gtr_halpern_bruno(R, v_new)
        if not np.allclose(v_new, mrate.R_to_distn(R)):
            raise ValueError('stationary distribution error')
        v_old = v_new
    print >> out, 'final rate matrix:'
    print >> out, R
    print >> out
    return out.getvalue()
Example #4
0
 def __call__(self, X):
     """
     @param X: a vector to be converted into a finite distribution
     """
     v_target = X_to_distn(X)
     v_new = (1 - self.t) * self.v + self.t * v_target
     R = mrate.to_gtr_halpern_bruno(self.M, v_new)
     if not np.allclose(v_new, mrate.R_to_distn(R)):
         print v_new
         print mrate.R_to_distn(R)
         raise ValueError('stationary distribution error')
     r_sel = mrate.R_to_relaxation_time(R)
     # we want to minimize this
     return self.r_mut - r_sel
Example #5
0
def get_asymptotic_variance_b(R, t):
    """
    Break up the sum into two parts and investigate each separately.
    The second part with only the second derivative is zero.
    """
    # get non-spectral summaries
    n = len(R)
    P = scipy.linalg.expm(R * t)
    p = mrate.R_to_distn(R)
    # get spectral summaries
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    # compute the asymptotic variance
    accum_a = 0
    for i in range(n):
        for j in range(n):
            # define f
            f = p[i] * P[i, j]
            # define the first derivative of f
            f_dt = 0
            for k in range(n):
                f_dt += U[i, k] * U[j, k] * w[k] * math.exp(t * w[k])
            f_dt *= (p[i] * p[j])**.5
            accum_a -= (f_dt * f_dt) / f
    accum_b = 0
    for i in range(n):
        for j in range(n):
            # define the second derivative of f
            f_dtt = 0
            for k in range(n):
                f_dtt += U[i, k] * U[j, k] * w[k] * w[k] * math.exp(t * w[k])
            f_dtt *= (p[i] * p[j])**.5
            # accumulate the contribution of this entry to the expectation
            accum_b += f_dtt
    return -1 / (accum_a + accum_b)
Example #6
0
def get_mutual_information_diff_b(R, t):
    """
    This is a more symmetrized version.
    Note that two of the three terms are probably structurally zero.
    """
    # get non-spectral summaries
    n = len(R)
    P = scipy.linalg.expm(R * t)
    p = mrate.R_to_distn(R)
    # get spectral summaries
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    G = np.zeros_like(R)
    for i in range(n):
        for j in range(n):
            G[i, j] = 0
            for k in range(n):
                G[i, j] += U[i, k] * U[j, k] * math.exp(t * w[k])
    G_diff = np.zeros_like(R)
    for i in range(n):
        for j in range(n):
            G_diff[i, j] = 0
            for k in range(n):
                G_diff[i, j] += U[i, k] * U[j, k] * w[k] * math.exp(t * w[k])
    B = np.outer(U.T[-1], U.T[-1])
    term_a = np.sum(B * G_diff)
    term_b = np.sum(B * G_diff * np.log(G))
    term_c = -np.sum(B * G_diff * np.log(B))
    #print term_a
    #print term_b
    #print term_c
    return term_b
Example #7
0
def get_ll_ratio_wrong(R, t):
    """
    In this function I try to reconstruct a buggy result in an email I got.
    I think that the person who sent the email was trying to
    compute the mutual information but they did something wrong,
    so I want to see if I can figure out exactly what they were doing.
    """
    # define the number of states
    n = len(R)
    # define the transition matrix
    P = scipy.linalg.expm(R * t)
    # define the stationary distribution
    p = mrate.R_to_distn(R)
    #
    expected_likelihood_t = 0
    for i in range(n):
        for j in range(n):
            if p[i] and P[i, j]:
                coeff = p[i] * P[i, j]
                expected_likelihood_t += coeff * p[i] * P[i, j]
    #
    expected_likelihood_inf = 0
    for i in range(n):
        for j in range(n):
            if p[i] and P[i, j]:
                coeff = p[i] * P[i, j]
                #coeff = p[i] * p[j]
                expected_likelihood_inf += coeff * p[i] * p[j]
    #
    lel_t = math.log(expected_likelihood_t)
    lel_inf = math.log(expected_likelihood_inf)
    #
    return lel_t - lel_inf
Example #8
0
def get_rate_matrix_summary(Q):
    out = StringIO()
    Q_v = mrate.R_to_distn(Q)
    Q_r = mrate.Q_to_expected_rate(Q)
    Q_t = mrate.R_to_relaxation_time(Q)
    print >> out, 'rate matrix:'
    print >> out, Q
    print >> out
    print >> out, 'this should be near zero for detailed balance:'
    print >> out, get_detailed_balance_error(Q)
    print >> out
    print >> out, 'computed stationary distribution:'
    print >> out, Q_v
    print >> out
    print >> out, 'expected rate:'
    print >> out, Q_r
    print >> out
    print >> out, 'relaxation time'
    print >> out, Q_t
    print >> out
    print >> out, '(expected rate) * (relaxation time):'
    print >> out, Q_r * Q_t
    print >> out
    print >> out
    return out.getvalue().rstrip()
Example #9
0
 def __init__(self, Q):
     """
     @param Q: rate matrix
     """
     # define intermediate variables
     v = mrate.R_to_distn(Q)
     n = len(v)
     psi = np.sqrt(v)
     c_low, c_mid, c_high = cheeger.get_cheeger_bounds(Q, v)
     # define member variables to summarize the rate matrix
     self.rate_matrix = Q
     self.exch_matrix = Q / v
     if not np.allclose(self.exch_matrix, self.exch_matrix.T):
         print self.exch_matrix
         raise ValueError('expected symmetry')
     self.sim_sym_matrix = np.outer(psi, 1 / psi) * Q
     if not np.allclose(self.sim_sym_matrix, self.sim_sym_matrix.T):
         print self.sim_sym_matrix
         raise ValueError('expected symmetry')
     self.distn = v
     self.distn_shannon_entropy = -ndot(np.log(v), v)
     self.distn_logical_entropy = ndot(v, 1 - v)
     self.expected_rate = -ndot(np.diag(Q), v)
     self.spectrum = scipy.linalg.eigvalsh(self.sim_sym_matrix)
     self.spectral_gap = -self.spectrum[-2]
     self.isoperimetric_low = c_low
     self.isoperimetric_constant = c_mid
     self.isoperimetric_high = c_high
     self.trace_bound_high = -sum(np.diag(Q)) / (n - 1)
Example #10
0
def get_mutual_information_small_approx_d(R, t):
    """
    This is an approximation for small times.
    This uses all of the off-diagonal entries of the mutual information
    and also uses an approximation of the off-diagonal entries.
    """
    n = len(R)
    v = mrate.R_to_distn(R)
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    accum_diag_a = 0
    accum_diag_b = 0
    accum_diag_c = 0
    accum_diag_d = 0
    for i in range(n):
        a = 0
        b = 0
        for k in range(n):
            prefix = U[i, k] * U[i, k]
            a += prefix * math.exp(t * w[k])
        for k in range(n - 1):
            prefix = U[i, k] * U[i, k]
            b += prefix * math.exp(t * w[k])
        x1 = v[i] * v[i]
        x2 = v[i] * b
        y1 = math.log(a)
        y2 = -math.log(v[i])
        accum_diag_a += x1 * y1
        accum_diag_b += x1 * y2
        accum_diag_c += x2 * y1
        accum_diag_d += x2 * y2
    accum_a = 0
    accum_b = 0
    accum_c = 0
    accum_d = 0
    for i in range(n):
        for j in range(n):
            if i != j:
                prefix = (v[i] * v[j])**.5
                a = 0
                for k in range(n):
                    a += U[i, k] * U[j, k] * math.exp(t * w[k])
                b = 0
                for k in range(n - 1):
                    b += U[i, k] * U[j, k] * math.exp(t * w[k])
                x1 = v[i] * v[j]
                x2 = prefix * b
                y1 = math.log(a)
                y2 = -math.log(prefix)
                accum_a += x1 * y1
                accum_b += x1 * y2
                accum_c += x2 * y1
                accum_d += x2 * y2
    terms = [
        accum_diag_a, accum_diag_b, accum_diag_c, accum_diag_d, accum_a,
        accum_b, accum_c, accum_d
    ]
    for term in terms:
        print term
    return sum(terms)
Example #11
0
def get_mutual_information_small_approx_b(R, t):
    """
    This is an approximation for small times.
    Check a decomposition.
    """
    n = len(R)
    v = mrate.R_to_distn(R)
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    accum_a = 0
    accum_b = 0
    accum_c = 0
    accum_d = 0
    for i in range(n):
        a = 0
        b = 0
        for k in range(n):
            prefix = U[i, k] * U[i, k]
            a += prefix * math.exp(t * w[k])
        for k in range(n - 1):
            prefix = U[i, k] * U[i, k]
            b += prefix * math.exp(t * w[k])
        x1 = v[i] * v[i]
        x2 = v[i] * b
        y1 = math.log(a)
        y2 = -math.log(v[i])
        accum_a += x1 * y1
        accum_b += x1 * y2
        accum_c += x2 * y1
        accum_d += x2 * y2
    return accum_a + accum_b + accum_c + accum_d
Example #12
0
def get_mutual_information_stable(R, t):
    """
    This is a more stable function.
    @return: unscaled_result, log_of_scaling_factor
    """
    #FIXME under construction
    n = len(R)
    v = mrate.R_to_distn(R)
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    P = np.zeros_like(R)
    accum = 0
    for i in range(n):
        for j in range(n):
            for k in range(n):
                a = (v[j] / v[i])**0.5
                b = U[i, k] * U[j, k]
                c = math.exp(t * w[k])
                P[i, j] += a * b * c
    # compute the unscaled part of log(X(i,j)/(X(i)*X(j)))
    for i in range(n):
        for j in range(n):
            if v[i] and P[i, j]:
                coeff = v[i] * P[i, j]
                numerator = P[i, j]
                denominator = v[j]
                # the problem is that the following log is nearly zero
                value = coeff * math.log(numerator / denominator)
                accum += np.real(value)
    return accum
Example #13
0
def get_p_id_deriv_ratio(R, t):
    """
    Get (second derivative of p_identity) divided by (first derivative of p_id)
    """
    n = len(R)
    # symmetrize the rate matrix
    v = mrate.R_to_distn(R)
    lam = np.diag(np.sqrt(v))
    rlam = np.diag(np.reciprocal(np.sqrt(v)))
    S = ndot(lam, -R, rlam)
    # eigendecompose the symmetrized rate matrix
    # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam)
    W, V = scipy.linalg.eigh(S)
    # get P and its two derivatives
    P = ndot(rlam, V, np.diag(np.exp(-W * t)), V.T, lam)
    P_dt = ndot(rlam, V, np.diag(-W * np.exp(-W * t)), V.T, lam)
    P_dtt = ndot(rlam, V, np.diag(W * W * np.exp(-W * t)), V.T, lam)
    # get the two derivatives of expected identity
    e_dt = 0.0
    e_dtt = 0.0
    for i in range(n):
        for j in range(n):
            e_dt += v[i] * P_dt[i, i]
            e_dtt += v[i] * P_dtt[i, i]
    return e_dtt / e_dt
Example #14
0
def get_asymptotic_variance_e(R, t):
    """
    Try to mitigate the damage of the aggressive approximation.
    The next step is to try to simplify this complicated correction.
    But I have not been able to do this.
    """
    # get non-spectral summaries
    n = len(R)
    P = scipy.linalg.expm(R * t)
    p = mrate.R_to_distn(R)
    # get spectral summaries
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    # compute the asymptotic variance approximation
    accum = 0
    for k in range(n - 1):
        accum += w[k] * w[k] * math.exp(2 * t * w[k])
    accum_b = 0
    G_a = np.zeros_like(R)
    G_b = np.zeros_like(R)
    for i in range(n):
        for j in range(n):
            prefix = (p[i] * p[j])**-.5
            a = 0
            for k in range(n - 1):
                a += U[i, k] * U[j, k] * math.exp(t * w[k])
            b = 0
            for k in range(n - 1):
                b += U[i, k] * U[j, k] * w[k] * math.exp(t * w[k])
            suffix = a * b * b
            value = prefix * suffix
            accum_b += value
    return 1 / (accum - accum_b)
Example #15
0
def get_expected_ll_ratio(R, t):
    """
    This is also the mutual information.
    It is the mutual information between two observations
    of a finite-state continuous-time Markov process at equilibrium
    where the observations are separated by time t.
    """
    #FIXME redundant function
    # define the number of states
    n = len(R)
    # define the transition matrix
    P = scipy.linalg.expm(R * t)
    # define the stationary distribution
    p = mrate.R_to_distn(R)
    # get the expected log likelihood ratio
    accum = 0
    for i in range(n):
        for j in range(n):
            if p[i] and P[i, j]:
                coeff = p[i] * P[i, j]
                # cancel the p[i] in the numerator and denominator
                #numerator = p[i] * P[i, j]
                #denominator = p[i] * p[j]
                numerator = P[i, j]
                denominator = p[j]
                value = coeff * math.log(numerator / denominator)
                if not np.allclose(np.imag(value), 0):
                    raise ValueError('rogue imaginary number')
                accum += np.real(value)
    return accum
Example #16
0
def get_mutual_information_diff_c(R, t):
    """
    This is a more symmetrized version.
    Some structurally zero terms have been removed.
    """
    # get non-spectral summaries
    n = len(R)
    P = scipy.linalg.expm(R * t)
    p = mrate.R_to_distn(R)
    # get spectral summaries
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    B = np.outer(U.T[-1], U.T[-1])
    G = np.zeros_like(R)
    for i in range(n):
        for j in range(n):
            G[i, j] = 0
            for k in range(n):
                G[i, j] += U[i, k] * U[j, k] * math.exp(t * w[k])
    G_diff = np.zeros_like(R)
    for i in range(n):
        for j in range(n):
            G_diff[i, j] = 0
            for k in range(n):
                G_diff[i, j] += U[i, k] * U[j, k] * w[k] * math.exp(t * w[k])
    return np.sum(B * G_diff * np.log(G))
Example #17
0
 def __init__(self, Q):
     self.Q = Q
     self.relaxation_time = mrate.R_to_relaxation_time(Q)
     self.p = min(mrate.R_to_distn(Q))
     self.N = len(Q)
     self.lam = - 1 / self.relaxation_time
     key_time_points = ctmcmitaylor.get_key_time_points(
         self.lam, self.p, self.N)
     self.time_to_uniformity, self.time_to_usefulness = key_time_points
Example #18
0
 def __init__(self, M, t):
     """
     @param M: mutation matrix
     @param t: the distance to go in the requested direction
     """
     self.M = M
     self.t = t
     # get the stationary distribution of the mutation process
     self.v = mrate.R_to_distn(M)
     # get the mutation process relaxation time
     self.r_mut = mrate.R_to_relaxation_time(M)
Example #19
0
def get_gtr_pollock(R, t):
    """
    This does not depend on f81 assuptions.
    It also does not use any explicitly spectral method.
    But it does use
    the equation (d/dt)sum(p_i P^t_ii) = sum(p_i (Q P^t)_ii)
    And it also uses the scipy expm function.
    """
    v = mrate.R_to_distn(R)
    P = scipy.linalg.expm(R * t)
    return -np.dot(v, np.diag(np.dot(R, P)))
Example #20
0
def get_pinsker_lower_bound_mi(R, t):
    # define the number of states
    n = len(R)
    # define the transition matrix
    P = scipy.linalg.expm(R * t)
    # define the stationary distribution
    p = mrate.R_to_distn(R)
    # define the joint probability matrix at times t and infinity
    J_t = np.dot(np.diag(p), P)
    J_inf = np.outer(p, p)
    #
    return 0.5 * np.sum(abs(J_t - J_inf))**2
Example #21
0
def get_detailed_balance_error(Q):
    """
    @param Q: a rate matrix
    @return: a number that should be near zero if detailed balance is satisfied
    """
    p = mrate.R_to_distn(Q)
    errors = []
    nstates = len(Q)
    for i in range(nstates):
        for j in range(nstates):
            error = p[i] * Q[i, j] - p[j] * Q[j, i]
            errors.append(error)
    return max(abs(x) for x in errors)
Example #22
0
def get_mutual_information_b(R, t):
    """
    This uses some cancellation.
    """
    n = len(R)
    v = mrate.R_to_distn(R)
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    accum_diag_a = 0
    accum_diag_b = 0
    accum_diag_c = 0
    for i in range(n):
        a = 0
        b = 0
        for k in range(n):
            prefix = U[i, k] * U[i, k]
            a += prefix * math.exp(t * w[k])
        for k in range(n - 1):
            prefix = U[i, k] * U[i, k]
            b += prefix * math.exp(t * w[k])
        x1 = v[i] * v[i]
        x2 = v[i] * b
        y1 = math.log(a)
        y2 = -math.log(v[i])
        accum_diag_a += x1 * y1
        accum_diag_b += x1 * y2
        accum_diag_c += x2 * y1
    accum_a = 0
    accum_b = 0
    accum_c = 0
    for i in range(n):
        for j in range(n):
            if i != j:
                prefix = (v[i] * v[j])**.5
                a = 0
                for k in range(n):
                    a += U[i, k] * U[j, k] * math.exp(t * w[k])
                b = 0
                for k in range(n - 1):
                    b += U[i, k] * U[j, k] * math.exp(t * w[k])
                x1 = v[i] * v[j]
                x2 = prefix * b
                y1 = math.log(a)
                y2 = -math.log(prefix)
                accum_a += x1 * y1
                accum_b += x1 * y2
                accum_c += x2 * y1
    terms = [
        accum_diag_a, accum_diag_b, accum_diag_c, accum_a, accum_b, accum_c
    ]
    return sum(terms)
Example #23
0
def get_input_matrix(fs):
    """
    @return: M
    """
    # get the positive strict lower triangular part of the S matrix
    L = []
    for i, line in enumerate(fs.lowtri):
        values = line.split()
        if len(values) != i + 1:
            raise ValueError(
                    'expected %d values on line "%s"' % (i+1, line))
        vs = [float(v) for v in values]
        if any(x<0 for x in vs):
            raise ValueError('exchangeabilities must be nonnegative')
        L.append(vs)
    # get the stationary distribution weights
    distn_weights = [float(v) for v in fs.distn_weights]
    if any(x<=0 for x in distn_weights):
        raise ValueError('stationary weights must be positive')
    # normalize weights to distributions
    distn = [v / sum(distn_weights) for v in distn_weights]
    # get the exchangeability matrix
    nstates = len(L) + 1
    S = np.zeros((nstates, nstates))
    for i, row in enumerate(L):
        for j, v in enumerate(row):
            S[i+1, j] = v
            S[j, i+1] = v
    # check the state space sizes implied by the inputs
    if len(set(len(x) for x in (S, distn_weights))) != 1:
        raise ValueError('the inputs do not agree on the state space size')
    # check for sufficient number of states
    if nstates < 2:
        raise ValueError('at least two states are required')
    # check reducibility of the exchangeability
    if not MatrixUtil.is_symmetric_irreducible(S):
        raise ValueError('exchangeability is not irreducible')
    # get the mutation rate matrix
    M = S * distn * fs.scale
    M -= np.diag(np.sum(M, axis=1))
    # check sign symmetry and irreducibility
    if not MatrixUtil.is_symmetric_irreducible(np.sign(M)):
        raise ValueError(
                'mutation rate matrix is not sign symmetric irreducible')
    # check the stationary distributions
    distn_observed = mrate.R_to_distn(M)
    if not np.allclose(distn_observed, distn):
        raise ValueError(
                'internal mut stationary distribution computation error')
    # return the values
    return M
Example #24
0
def get_mutual_information_diff_approx_c(R, t):
    """
    This is an approximation for large times.
    It has been rewritten using orthogonality.
    It has also been rewritten using orthonormality.
    """
    n = len(R)
    v = mrate.R_to_distn(R)
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    accum = 0
    for k in range(n - 1):
        accum += w[k] * math.exp(2 * t * w[k])
    return accum
Example #25
0
def get_mutual_information_approx_b(R, t):
    """
    This is an approximation for large times.
    It has been rewritten using orthogonality.
    """
    n = len(R)
    v = mrate.R_to_distn(R)
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    accum = 0
    for i in range(n):
        for j in range(n):
            for k in range(n - 1):
                accum += ((U[i, k] * U[j, k])**2) * math.exp(2 * t * w[k]) / 2
    return accum
Example #26
0
def get_row_based_plb_mi(R, t):
    """
    Row based pinsker lower bound of mutual information.
    """
    # define the number of states
    n = len(R)
    # define the transition matrix
    P = scipy.linalg.expm(R * t)
    # define the stationary distribution
    p = mrate.R_to_distn(R)
    # define the joint probability matrix at times t and infinity
    J_t = np.dot(np.diag(p), P)
    J_inf = np.outer(p, p)
    #
    return sum(x * 0.5 * np.sum(abs(row - p))**2 for x, row in zip(p, P))
Example #27
0
def get_asymptotic_variance_d(R, t):
    """
    Use a very aggressive approximation.
    """
    # get non-spectral summaries
    n = len(R)
    P = scipy.linalg.expm(R * t)
    p = mrate.R_to_distn(R)
    # get spectral summaries
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    # compute the asymptotic variance approximation
    accum = 0
    for k in range(n - 1):
        accum += w[k] * w[k] * math.exp(2 * t * w[k])
    return 1 / accum
Example #28
0
def get_mutual_information_small_approx_c(R, t):
    """
    This is an approximation for small times.
    This is an even more aggressive approximation.
    """
    n = len(R)
    v = mrate.R_to_distn(R)
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    accum = 0
    for i in range(n):
        a = 0
        for k in range(n):
            prefix = U[i, k] * U[i, k]
            a += prefix * math.exp(t * w[k])
        accum += -v[i] * math.log(v[i]) * a
    return accum
Example #29
0
def get_mutual_information_diff(R, t):
    # define the number of states
    n = len(R)
    # define the transition matrix and its derivative
    P = scipy.linalg.expm(R * t)
    P_diff = mrate.expm_diff_spectral(R, t)
    # define the stationary distribution
    p = mrate.R_to_distn(R)
    # get the expected log likelihood ratio
    accum = 0
    for i in range(n):
        for j in range(n):
            if p[i] and P[i, j]:
                prefix = p[i] * P_diff[i, j]
                suffix = 1 + math.log(P[i, j]) - math.log(p[j])
                accum += prefix * suffix
    return accum
Example #30
0
def get_mutual_information_small_approx(R, t):
    """
    This is an approximation for small times.
    """
    n = len(R)
    v = mrate.R_to_distn(R)
    S = mrate.symmetrized(R)
    w, U = np.linalg.eigh(S)
    accum = 0
    for i in range(n):
        a = 0
        for k in range(n):
            a += (U[i, k]**2) * math.exp(t * w[k])
        accum += v[i] * a * math.log(a / v[i])
    #print [R[i, i] for i in range(n)]
    #print [sum(U[i, k] * U[i, k] * w[k] for k in range(n)) for i in range(n)]
    #print [sum(U[i, k] * U[i, k] for k in range(n)) for i in range(n)]
    return accum