Example #1
0
def get_heuristics(M, R):
    """
    Return a multiline string with some heuristics.
    The heuristics are independendent of time and of the information variant.
    Greater stationary distribution shannon entropy suggests less saturation.
    Greater stationary distribution logical entropy suggests less saturation.
    Greater expected rate suggests more saturation.
    Greater spectral rate suggests more saturation.
    @param M: pure mutation rate matrix
    @param R: mutation-selection balance rate matrix
    @return: multiline string
    """
    # get the stationary distributions
    M_v = mrate.R_to_distn(M)
    R_v = mrate.R_to_distn(R)
    # check a different way to get the stationary distribution just for fun
    M_v_nonspectral = mrate.R_to_distn_nonspectral(M)
    R_v_nonspectral = mrate.R_to_distn_nonspectral(R)
    if not np.allclose(M_v, M_v_nonspectral):
        raise ValueError('internal stationary distribution calculation error')
    if not np.allclose(R_v, R_v_nonspectral):
        raise ValueError('internal stationary distribution calculation error')
    # compute the shannon entropy of the matrices
    M_shannon_entropy = -sum(p * math.log(p) for p in M_v)
    R_shannon_entropy = -sum(p * math.log(p) for p in R_v)
    shannon_entropy_sign = np.sign(M_shannon_entropy - R_shannon_entropy)
    # compute the logical entropy of the matrices
    M_logical_entropy = 1 - sum(p * p for p in M_v)
    R_logical_entropy = 1 - sum(p * p for p in R_v)
    logical_entropy_sign = np.sign(M_logical_entropy - R_logical_entropy)
    # compute the expected rate
    M_expected_rate = mrate.Q_to_expected_rate(M)
    R_expected_rate = mrate.Q_to_expected_rate(R)
    expected_rate_sign = np.sign(R_expected_rate - M_expected_rate)
    # compute the spectral rate
    M_spectral_rate = 1 / mrate.R_to_relaxation_time(M)
    R_spectral_rate = 1 / mrate.R_to_relaxation_time(R)
    spectral_rate_sign = np.sign(R_spectral_rate - M_spectral_rate)
    # report the heuristics
    out = StringIO()
    print >> out, 'Greater Shannon entropy of the stationary distribution',
    print >> out, 'suggests more information about divergence time.'
    print >> out, _heuristic_helper(shannon_entropy_sign)
    print >> out
    print >> out, 'Greater logical entropy of the stationary distribution',
    print >> out, 'suggests more information about divergence time.'
    print >> out, _heuristic_helper(logical_entropy_sign)
    print >> out
    print >> out, 'Smaller expected rate',
    print >> out, 'suggests more information about divergence time.'
    print >> out, _heuristic_helper(expected_rate_sign)
    print >> out
    print >> out, 'Smaller spectral rate',
    print >> out, 'suggests more information about divergence time.'
    print >> out, _heuristic_helper(spectral_rate_sign)
    print >> out
    return out.getvalue().strip()
Example #2
0
def get_rate_matrix_summary(Q):
    out = StringIO()
    Q_v = mrate.R_to_distn(Q)
    Q_r = mrate.Q_to_expected_rate(Q)
    Q_t = mrate.R_to_relaxation_time(Q)
    print >> out, 'rate matrix:'
    print >> out, Q
    print >> out
    print >> out, 'this should be near zero for detailed balance:'
    print >> out, get_detailed_balance_error(Q)
    print >> out
    print >> out, 'computed stationary distribution:'
    print >> out, Q_v
    print >> out
    print >> out, 'expected rate:'
    print >> out, Q_r
    print >> out
    print >> out, 'relaxation time'
    print >> out, Q_t
    print >> out
    print >> out, '(expected rate) * (relaxation time):'
    print >> out, Q_r * Q_t
    print >> out
    print >> out
    return out.getvalue().rstrip()
Example #3
0
def do_weighted_square(fs, to_gtr):
    out = StringIO()
    # define the mutation rate matrix
    A = np.array([
        [0, 9, 0, 1],
        [9, 0, 1, 0],
        [0, 1, 0, 9],
        [1, 0, 9, 0]], dtype=float)
    M = A - np.diag(np.sum(A, axis=1))
    M /= mrate.Q_to_expected_rate(M)
    print >> out, '*** mutation rate matrix (4-state square) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    # kill the last two states by natural selection
    p_other = (1 - 2*fs.p_mid)/2
    p_target = (p_other, p_other, fs.p_mid, fs.p_mid)
    Q = to_gtr(M, p_target)
    print >> out, '*** mutation-selection balance ***'
    print >> out
    print >> out, get_rate_matrix_summary(Q)
    print >> out
    print >> out
    return out.getvalue().rstrip()
Example #4
0
def do_funkily_weighted_square(fs, to_gtr):
    out = StringIO()
    # define the mutation rate matrix
    A = np.array([
        [0, 1, 0, 1],
        [1, 0, 1, 0],
        [0, 1, 0, 1],
        [1, 0, 1, 0]], dtype=float)
    M = A - np.diag(np.sum(A, axis=1))
    M /= mrate.Q_to_expected_rate(M)
    print >> out, '*** mutation rate matrix (4-state square) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    # Use funky weights.
    p_small = fs.p_mid
    p_medium = 0.3
    p_big = 1.0 - (p_medium + 2*p_small)
    p_target = (p_big, p_medium, p_small, p_small)
    Q = to_gtr(M, p_target)
    print >> out, '*** mutation-selection balance ***'
    print >> out
    print >> out, get_rate_matrix_summary(Q)
    print >> out
    print >> out
    return out.getvalue().rstrip()
Example #5
0
 def test_small_variance(self):
     """
     a = .1
     b = .2
     c = .7
     R = np.array([
         [-(b+c), b, c],
         [a, -(a+c), c],
         [a, b, -(a+b)]])
     """
     n = 4
     v = sample_distribution(n)
     S = sample_symmetric_rate_matrix(n)
     R = mrate.to_gtr_halpern_bruno(S, v)
     t = 0.0000001
     total_rate = mrate.Q_to_expected_rate(R)
     var = get_ml_variance(R, t)
     print 'time:', t
     print 'variance:', var
     print 'total rate:', total_rate
     print 'variance per time:', var / t
     print 'reciprocal of total rate:', 1 / total_rate
     print 'total rate times time:', total_rate * t
     print '(reciprocal of total rate) times time:', t / total_rate
     print
Example #6
0
def do_mut_hyper_2_3(fs, to_gtr):
    out = StringIO()
    # define the path mutation rate matrix
    M = mrate.get_sparse_sequence_rate_matrix(2, 3)
    print >> out, '*** mutation rate matrix (8-state cube) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    # kill the last state by natural selection
    p_other = (1 - fs.p_mid)/7
    p_target = [p_other]*7 + [fs.p_mid]
    Q = to_gtr(M, p_target)
    print >> out, '*** mutation-selection balance ***'
    print >> out
    print >> out, get_rate_matrix_summary(Q)
    print >> out
    print >> out
    # define a reference mutation rate matrix
    R = mrate.get_sparse_sequence_rate_matrix(2, 3)
    nstates = 7
    M = np.zeros((nstates, nstates))
    for i in range(nstates):
        for j in range(nstates):
            if i != j:
                M[i, j] = R[i, j]
    M -= np.diag(np.sum(M, axis=1))
    M /= mrate.Q_to_expected_rate(M)
    print >> out, '*** reference mutation rate matrix (corner removed) ***'
    print >> out
    print >> out, get_rate_matrix_summary(M)
    print >> out
    print >> out
    return out.getvalue().rstrip()
Example #7
0
def get_statistic_ratios(Q_mut, Q_sels):
    """
    @param Q_mut: mutation rate matrix
    @param Q_sels: mutations-selection balance rate matrices
    @return: ER_ratios, NSR_ratios, ER_NSR_ratios
    """
    ER_mut = mrate.Q_to_expected_rate(Q_mut)
    ER_sels = [mrate.Q_to_expected_rate(Q) for Q in Q_sels]
    ER_ratios = [ER_sel / ER_mut for ER_sel in ER_sels]
    ER_NSR_mut = 1 / mrate.R_to_relaxation_time(Q_mut)
    ER_NSR_sels = [1 / mrate.R_to_relaxation_time(Q) for Q in Q_sels]
    ER_NSR_ratios = [ER_NSR_sel / ER_NSR_mut for ER_NSR_sel in ER_NSR_sels]
    NSR_ratios = [a / b for a, b in zip(ER_NSR_ratios, ER_ratios)]
    # do some extra investigation
    """
    nsels = len(Q_sels)
    for i in range(nsels):
        if ER_NSR_ratios[i] < 1:
            print 'found a slower-decaying mutation-selection matrix:'
            print Q_sels[i]
            print
    print
    print 'ER_mut:'
    print ER_mut
    print
    print 'ER_NSR_mut:'
    print ER_NSR_mut
    print
    print 'ER_sels:'
    for x in ER_sels:
        print x
    print
    print 'ER_NSR_sels:'
    for x in ER_NSR_sels:
        print x
    print
    """
    return ER_ratios, NSR_ratios, ER_NSR_ratios
Example #8
0
def get_response_content(fs):
    out = StringIO()
    np.set_printoptions(linewidth=200)
    # define the barbell mutation rate matrix
    M, p = get_barbell_rate_matrix(fs.p_mid)
    nstates = len(p)
    print >> out, 'barbell mutation matrix:'
    print >> out, M
    print >> out
    print >> out, 'all of these should be zero for detailed balance:'
    for i in range(nstates):
        for j in range(nstates):
            print >> out, p[i] * M[i, j] - p[j] * M[j, i]
    print >> out
    print >> out, 'expected rate of the barbell mutation matrix:'
    print >> out, mrate.Q_to_expected_rate(M)
    print >> out
    p_target = np.array([1 / 3., 1 / 3., 1 / 3.])
    print >> out, 'target stationary distribution:'
    print >> out, p_target
    print >> out
    Q = mrate.to_gtr_halpern_bruno(M, p_target)
    print >> out, 'mutation-selection balance rate matrix:'
    print >> out, Q
    print >> out
    v = mrate.R_to_distn(Q)
    print >> out, 'computed stationary distribution:'
    print >> out, v
    print >> out
    print >> out, 'expected rate of the mutation-selection balance rate matrix:'
    print >> out, mrate.Q_to_expected_rate(Q)
    print >> out
    print >> out, 'all of these should be zero for detailed balance:'
    for i in range(nstates):
        for j in range(nstates):
            print >> out, v[i] * Q[i, j] - v[j] * Q[j, i]
    print >> out
    return out.getvalue()
Example #9
0
 def __str__(self):
     out = StringIO()
     print >> out, 'rate matrix:'
     print >> out, self.Q
     print >> out
     print >> out, 'relaxation time:'
     print >> out, self.relaxation_time
     print >> out
     print >> out, 'min stationary probability:'
     print >> out, self.p
     print >> out
     print >> out, 'expected rate:'
     print >> out, mrate.Q_to_expected_rate(self.Q)
     print >> out
     print >> out, 'time to uniform-over-entries taylor h_2 bound:'
     print >> out, self.time_to_uniformity
     print >> out
     print >> out, 'time to informativeness for weak inequality:'
     print >> out, self.time_to_usefulness
     print >> out
     return out.getvalue().rstrip()
Example #10
0
def get_response_content(fs):
    M, R = get_input_matrices(fs)
    M_v = mrate.R_to_distn(M)
    R_v = mrate.R_to_distn(R)
    t = fs.t
    mi_mut = ctmcmi.get_mutual_information(M, t)
    mi_bal = ctmcmi.get_mutual_information(R, t)
    fi_mut = divtime.get_fisher_information(M, t)
    fi_bal = divtime.get_fisher_information(R, t)
    if fs.info_mut:
        information_sign = np.sign(mi_mut - mi_bal)
    elif fs.info_fis:
        information_sign = np.sign(fi_mut - fi_bal)
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    print >> out
    print >> out, '<pre>'
    print >> out, 'Explicitly computed answer',
    print >> out, '(not a heuristic but may be numerically imprecise):'
    if information_sign == 1:
        print >> out, '* pure mutation',
        print >> out, 'is more informative'
    elif information_sign == -1:
        print >> out, '* the balance of mutation and selection',
        print >> out, 'is more informative'
    else:
        print >> out, '  the information contents of the two processes',
        print >> out, 'are numerically indistinguishable'
    print >> out
    print >> out
    if fs.info_mut:
        print >> out, 'Mutual information properties',
        print >> out, 'at very small and very large times:'
        print >> out
        print >> out, get_mi_asymptotics(M, R)
        print >> out
        print >> out
    print >> out, 'Heuristics without regard to time or to the selected',
    print >> out, 'information variant (Fisher vs. mutual information):'
    print >> out
    print >> out, get_heuristics(M, R)
    print >> out
    print >> out
    print >> out, 'Input summary:'
    print >> out
    print >> out, 'mutation rate matrix:'
    print >> out, M
    print >> out
    print >> out, 'mutation process stationary distribution:'
    print >> out, M_v
    print >> out
    print >> out, 'mutation-selection balance rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'mutation-selection balance stationary distribution:'
    print >> out, R_v
    print >> out
    print >> out, 'mutation process expected rate:'
    print >> out, mrate.Q_to_expected_rate(M)
    print >> out
    print >> out, 'mutation-selection balance expected rate:'
    print >> out, mrate.Q_to_expected_rate(R)
    print >> out
    print >> out
    print >> out, 'The following information calculations',
    print >> out, 'depend on t = %s:' % t
    print >> out
    print >> out, 'log(ratio(E(L))) for pure mutation:'
    print >> out, ctmcmi.get_ll_ratio_wrong(M, t)
    print >> out
    print >> out, 'log(ratio(E(L))) for mut-sel balance:'
    print >> out, ctmcmi.get_ll_ratio_wrong(R, t)
    print >> out
    print >> out, 'mutual information for pure mutation:'
    print >> out, mi_mut
    print >> out
    print >> out, 'mutual information for mut-sel balance:'
    print >> out, mi_bal
    print >> out
    print >> out, 'pinsker lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_pinsker_lower_bound_mi(M, t)
    print >> out
    print >> out, 'pinsker lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_pinsker_lower_bound_mi(R, t)
    print >> out
    print >> out, 'row based pinsker lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_row_based_plb_mi(M, t)
    print >> out
    print >> out, 'row based pinsker lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_row_based_plb_mi(R, t)
    print >> out
    print >> out, 'row based hellinger lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_row_based_hellinger_lb_mi(M, t)
    print >> out
    print >> out, 'row based hellinger lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_row_based_hellinger_lb_mi(R, t)
    print >> out
    print >> out, 'Fisher information for pure mutation:'
    print >> out, fi_mut
    print >> out
    print >> out, 'Fisher information for mut-sel balance:'
    print >> out, fi_bal
    print >> out
    print >> out, '</pre>'
    #
    # create the summaries
    summaries = (RateMatrixSummary(M), RateMatrixSummary(R))
    print >> out, get_html_table(summaries)
    print >> out
    print >> out, '<html>'
    print >> out, '<body>'
    return out.getvalue()
Example #11
0
def get_time_point_summary(Q_mut, Q_sels, t):
    """
    @param Q_mut: the mutation rate matrix
    @param Q_sels: sequence of mutation-selection rate matrices
    @param t: the time point under consideration
    @return: a sequence of statistics
    """
    # Compute the following statistics at this time point:
    # t
    # mutation MI
    # selection MI max
    # selection MI high
    # selection MI mean
    # selection MI low
    # selection MI min
    # correlation fn 1
    # correlation fn 2
    # correlation fn 3
    # correlation fn 4
    # correlation fn 5
    # proportion sign agreement fn 1
    # proportion sign agreement fn 2
    # proportion sign agreement fn 3
    # proportion sign agreement fn 4
    # proportion sign agreement fn 5
    # informativeness fn 1
    # informativeness fn 2
    # informativeness fn 3
    # informativeness fn 4
    # informativeness fn 5
    #
    # First compute the mutual information for mut and mut-sel.
    nsels = len(Q_sels)
    mi_mut = ctmcmi.get_mutual_information(Q_mut, t)
    mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels]
    mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels]
    # Now compute some other functions
    v0 = [ctmcmi.get_mutual_information_small_approx_c(Q, t) for Q in Q_sels]
    v1 = [ctmcmi.get_mutual_information_small_approx(Q, t) for Q in Q_sels]
    v2 = [ctmcmi.get_mutual_information_approx_c(Q, t) for Q in Q_sels]
    v3 = [math.exp(-2*t/mrate.R_to_relaxation_time(Q)) for Q in Q_sels]
    v4 = [math.exp(-t*mrate.Q_to_expected_rate(Q)) for Q in Q_sels]
    # Now that we have computed all of the vectors at this time point,
    # we can compute the statistics that we want to report.
    statistics = []
    statistics.append(t)
    statistics.append(mi_mut)
    # add the mutual information statistics
    sorted_mi = sorted(mi_sels)
    n_extreme = nsels / 20
    statistics.append(sorted_mi[-1])
    statistics.append(sorted_mi[-n_extreme])
    statistics.append(sum(sorted_mi) / nsels)
    statistics.append(sorted_mi[n_extreme-1])
    statistics.append(sorted_mi[0])
    # add the correlations
    for v in (v0, v1, v2, v3, v4):
        r, p = scipy.stats.stats.pearsonr(v, mi_sels)
        statistics.append(r)
    # add the sign proportions
    for v in (v0, v1, v2, v3, v4):
        v_signs = [1 if value > mi_mut else -1 for value in v]
        total = sum(1 for a, b in zip(mi_signs, v_signs) if a == b)
        p = float(total) / nsels
        statistics.append(p)
    # add the informativenesses
    for v in (v0, v1, v2, v3, v4):
        v_signs = [1 if value > mi_mut else -1 for value in v]
        informativeness = 0
        for pair in ((1, 1), (1, -1), (-1, 1), (-1, -1)):
            v_value, m_value = pair
            v_marginal_count = sum(1 for x in v_signs if x == v_value)
            m_marginal_count = sum(1 for x in mi_signs if x == m_value)
            joint_count = sum(1 for x in zip(v_signs, mi_signs) if x == pair)
            if joint_count:
                joint_prob = joint_count / float(nsels)
                a = math.log(joint_prob)
                b = math.log(v_marginal_count / float(nsels))
                c = math.log(m_marginal_count / float(nsels))
                informativeness += joint_prob * (a - b - c)
        statistics.append(informativeness)
    # return the statistics
    return statistics
Example #12
0
def process(fs):
    nstates = fs.nstates
    np.set_printoptions(linewidth=200)
    t = fs.t
    ### sample a random time
    ##time_mu = 0.01
    ##t = random.expovariate(1 / time_mu)
    # Sample a rate matrix.
    # Use a trick by Robert Kern to left and right multiply by diagonals.
    # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/
    # 026809.html
    S = MatrixUtil.sample_pos_sym_matrix(nstates)
    v = mrate.sample_distn(nstates)
    R = (v**-0.5)[:,np.newaxis] * S * (v**0.5)
    R -= np.diag(np.sum(R, axis=1))
    # Construct a parent-independent process
    # with the same max rate and stationary distribution
    # as the sampled process.
    if fs.parent_indep:
        Q = np.outer(np.ones(nstates), v)
        Q -= np.diag(np.sum(Q, axis=1))
        pi_rescaling_factor = max(np.diag(R) / np.diag(Q))
        Q *= pi_rescaling_factor
        Z = msimpl.get_fast_meta_f81_autobarrier(Q)
    # Construct a child-independent process
    # with the same expected rate
    # as the sampled process
    if fs.child_indep:
        C = np.outer(1/v, np.ones(nstates))
        C -= np.diag(np.sum(C, axis=1))
        ci_rescaling_factor = np.max(R / C)
        #expected_rate = -ndot(np.diag(R), v)
        #ci_rescaling_factor = expected_rate / (nstates*(nstates-1))
        #ci_rescaling_factor = expected_rate / (nstates*nstates)
        C *= ci_rescaling_factor
        Q = C
    if fs.bipartitioned:
        Q = msimpl.get_fast_meta_f81_autobarrier(R)
    # Check that the mutual information of the
    # parent independent process is smaller.
    out = StringIO()
    print >> out, 'sampled symmetric part of the rate matrix S:'
    print >> out, S
    print >> out
    print >> out, 'sampled stationary distribution v:'
    print >> out, v
    print >> out
    print >> out, 'shannon entropy of stationary distribution v:'
    print >> out, -np.dot(np.log(v), v)
    print >> out
    print >> out, 'sqrt stationary distribution:'
    print >> out, np.sqrt(v)
    print >> out
    print >> out, 'implied rate matrix R:'
    print >> out, R
    print >> out
    print >> out, 'eigenvalues of R:', scipy.linalg.eigvals(R)
    print >> out
    print >> out, 'relaxation rate of R:',
    print >> out, sorted(np.abs(scipy.linalg.eigvals(R)))[1]
    print >> out
    print >> out, 'expected rate of R:', mrate.Q_to_expected_rate(R)
    print >> out
    print >> out, 'cheeger bounds of R:', get_cheeger_bounds(R, v)
    print >> out
    print >> out, 'randomization rate of R:', get_randomization_rate(R, v)
    print >> out
    candidates = [get_randomization_candidate(R, v, i) for i in range(nstates)]
    if np.allclose(get_randomization_rate(R, v), candidates):
        print >> out, 'all candidates are equal to this rate'
    else:
        print >> out, 'not all candidates are equal to this rate'
    print >> out
    print >> out, 'simplified rate matrix Q:'
    print >> out, Q
    print >> out
    qv = mrate.R_to_distn(Q)
    print >> out, 'stationary distribution of Q:'
    print >> out, qv
    print >> out
    print >> out, 'ratio qv/v:'
    print >> out, qv / v
    print >> out
    print >> out, 'shannon entropy of stationary distribution of Q:'
    print >> out, -np.dot(np.log(qv), qv)
    print >> out
    if fs.parent_indep:
        print >> out, 'parent independent rescaling factor:'
        print >> out, pi_rescaling_factor
        print >> out
    if fs.child_indep:
        print >> out, 'child independent rescaling factor:'
        print >> out, ci_rescaling_factor
        print >> out
    print >> out, 'eigenvalues of Q:', scipy.linalg.eigvals(Q)
    print >> out
    print >> out, 'relaxation rate of Q:',
    print >> out, sorted(np.abs(scipy.linalg.eigvals(Q)))[1]
    print >> out
    print >> out, 'expected rate of Q:', mrate.Q_to_expected_rate(Q)
    print >> out
    print >> out, 'cheeger bounds of Q:', get_cheeger_bounds(Q, v)
    print >> out
    print >> out, 'randomization rate of Q:', get_randomization_rate(Q, v)
    print >> out
    candidates = [get_randomization_candidate(Q, v, i) for i in range(nstates)]
    if np.allclose(get_randomization_rate(Q, v), candidates):
        print >> out, 'all candidates are equal to this rate'
    else:
        print >> out, 'warning: not all candidates are equal to this rate'
    print >> out
    print >> out, 'E(rate) of Q divided by logical entropy:',
    print >> out, mrate.Q_to_expected_rate(Q) / ndot(v, 1-v)
    print >> out
    print >> out, 'symmetric matrix similar to Q:'
    S = ndot(np.diag(np.sqrt(v)), Q, np.diag(1/np.sqrt(v)))
    print >> out, S
    print >> out
    print >> out, 'eigendecomposition of the similar matrix:'
    W, V = scipy.linalg.eigh(S)
    print >> out, V
    print >> out, np.diag(W)
    print >> out, V.T
    print >> out
    #
    print >> out, 'time:', t
    print >> out
    print >> out, 'stationary distn logical entropy:', ndot(v, 1-v)
    print >> out
    # 
    P_by_hand = get_pi_transition_matrix(Q, v, t)
    print >> out, 'simplified-process transition matrix computed by hand:'
    print >> out, P_by_hand
    print >> out
    print >> out, 'simplified-process transition matrix computed by expm:'
    print >> out, scipy.linalg.expm(Q*t)
    print >> out
    #
    print >> out, 'simplified-process m.i. by hand:'
    print >> out, get_pi_mi(Q, v, t)
    print >> out
    print >> out, 'simplified-process m.i. by expm:'
    print >> out, ctmcmi.get_expected_ll_ratio(Q, t)
    print >> out
    #
    print >> out, 'original process m.i. by expm:'
    print >> out, ctmcmi.get_expected_ll_ratio(R, t)
    print >> out
    #
    print >> out, 'stationary distn Shannon entropy:'
    print >> out, -ndot(v, np.log(v))
    print >> out
    #
    if fs.parent_indep:
        print >> out, 'approximate simplified process m.i. 2nd order approx:'
        print >> out, get_pi_mi_t2_approx(Q, v, t)
        print >> out
        print >> out, 'approximate simplified process m.i. "better" approx:'
        print >> out, get_pi_mi_t2_diag_approx(Q, v, t)
        print >> out
        print >> out, '"f81-ization plus barrier" of pure f81-ization:'
        print >> out, Z
        print >> out
    #
    return out.getvalue().rstrip()
Example #13
0
def sample_row():
    n = 4
    # sample the exchangeability
    S = np.zeros((n, n))
    S[1, 0] = random.expovariate(1)
    S[2, 0] = random.expovariate(1)
    S[2, 1] = random.expovariate(1)
    S[3, 0] = random.expovariate(1)
    S[3, 1] = random.expovariate(1)
    S[3, 2] = random.expovariate(1)
    # sample the mutation stationary distribution
    mdistn = np.array([random.expovariate(1) for i in range(n)])
    mdistn /= np.sum(mdistn)
    # sample the mutation selection balance stationary distribution
    bdistn = np.array([random.expovariate(1) for i in range(n)])
    bdistn /= np.sum(bdistn)
    # sample the time
    t = random.expovariate(1)
    # sample the info type
    infotype = random.choice(('infotype.mi', 'infotype.fi'))
    # Compute some intermediate variables
    # from which the summary statistics and the label are computed.
    S = S + S.T
    M = S * mdistn
    M -= np.diag(np.sum(M, axis=1))
    R = mrate.to_gtr_halpern_bruno(M, bdistn)
    shannon_ent_mut = -sum(p * log(p) for p in mdistn)
    shannon_ent_bal = -sum(p * log(p) for p in bdistn)
    logical_ent_mut = 1.0 - sum(p * p for p in mdistn)
    logical_ent_bal = 1.0 - sum(p * p for p in bdistn)
    expected_rate_mut = mrate.Q_to_expected_rate(M)
    expected_rate_bal = mrate.Q_to_expected_rate(R)
    spectral_rate_mut = 1 / mrate.R_to_relaxation_time(M)
    spectral_rate_bal = 1 / mrate.R_to_relaxation_time(R)
    mi_mut = ctmcmi.get_mutual_information(M, t)
    mi_bal = ctmcmi.get_mutual_information(R, t)
    fi_mut = divtime.get_fisher_information(M, t)
    fi_bal = divtime.get_fisher_information(R, t)
    # compute the summary statistics
    summary_entries = [
        shannon_ent_bal - shannon_ent_mut,
        logical_ent_bal - logical_ent_mut,
        log(shannon_ent_bal) - log(shannon_ent_mut),
        log(logical_ent_bal) - log(logical_ent_mut),
        expected_rate_bal - expected_rate_mut,
        spectral_rate_bal - spectral_rate_mut,
        log(expected_rate_bal) - log(expected_rate_mut),
        log(spectral_rate_bal) - log(spectral_rate_mut),
        mi_bal - mi_mut,
        fi_bal - fi_mut,
        math.log(mi_bal) - math.log(mi_mut),
        math.log(fi_bal) - math.log(fi_mut),
    ]
    # get the definition entries
    definition_entries = [
        S[1, 0],
        S[2, 0],
        S[2, 1],
        S[3, 0],
        S[3, 1],
        S[3, 2],
        mdistn[0],
        mdistn[1],
        mdistn[2],
        mdistn[3],
        bdistn[0],
        bdistn[1],
        bdistn[2],
        bdistn[3],
        infotype,
        t,
    ]
    # define the label
    if infotype == 'infotype.mi' and mi_mut > mi_bal:
        label = 'mut.is.better'
    elif infotype == 'infotype.mi' and mi_mut < mi_bal:
        label = 'bal.is.better'
    elif infotype == 'infotype.fi' and fi_mut > fi_bal:
        label = 'mut.is.better'
    elif infotype == 'infotype.fi' and fi_mut < fi_bal:
        label = 'bal.is.better'
    else:
        label = 'indistinguishable'
    # return the row
    return definition_entries + summary_entries + [label]