Ejemplo n.º 1
0
def get_response_content(fs):
    M, R = get_input_matrices(fs)
    # create the R table string and scripts
    headers = [
        't', 'mi.true.mut', 'mi.true.mutsel', 'mi.analog.mut',
        'mi.analog.mutsel'
    ]
    npoints = 100
    t_low = 0.0
    t_high = 5.0
    t_incr = (t_high - t_low) / (npoints - 1)
    t_values = [t_low + t_incr * i for i in range(npoints)]
    # get the data for the R table
    arr = []
    for t in t_values:
        mi_mut = ctmcmi.get_mutual_information(M, t)
        mi_mutsel = ctmcmi.get_mutual_information(R, t)
        mi_analog_mut = ctmcmi.get_ll_ratio_wrong(M, t)
        mi_analog_mutsel = ctmcmi.get_ll_ratio_wrong(R, t)
        row = [t, mi_mut, mi_mutsel, mi_analog_mut, mi_analog_mutsel]
        arr.append(row)
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Ejemplo n.º 2
0
def get_response_content(fs):
    M, R = get_input_matrices(fs)
    # create the R table string and scripts
    headers = [
            't',
            'mi.true.mut',
            'mi.true.mutsel',
            'mi.analog.mut',
            'mi.analog.mutsel']
    npoints = 100
    t_low = 0.0
    t_high = 5.0
    t_incr = (t_high - t_low) / (npoints - 1)
    t_values = [t_low + t_incr*i for i in range(npoints)]
    # get the data for the R table
    arr = []
    for t in t_values:
        mi_mut = ctmcmi.get_mutual_information(M, t)
        mi_mutsel = ctmcmi.get_mutual_information(R, t)
        mi_analog_mut = ctmcmi.get_ll_ratio_wrong(M, t)
        mi_analog_mutsel = ctmcmi.get_ll_ratio_wrong(R, t)
        row = [t, mi_mut, mi_mutsel, mi_analog_mut, mi_analog_mutsel]
        arr.append(row)
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Ejemplo n.º 3
0
 def test_large_variance(self):
     n = 4
     v = sample_distribution(n)
     S = sample_symmetric_rate_matrix(n)
     R = mrate.to_gtr_halpern_bruno(S, v)
     """
     a = .1
     b = .2
     c = .7
     R = np.array([
         [-(b+c), b, c],
         [a, -(a+c), c],
         [a, b, -(a+b)]])
     """
     t = 2.0
     dt = 0.0000001
     rtime = mrate.R_to_relaxation_time(R)
     var_a = get_ml_variance(R, t)
     var_b = get_ml_variance(R, t+dt)
     var_slope = (var_b - var_a) / dt
     deriv_ratio = get_p_id_deriv_ratio(R, t)
     clever_ratio = get_ml_variance_ratio(R, t)
     print 'time:', t
     print 'variance:', var_a
     print 'variance slope:', var_slope
     print 'var_slope / var_a:', var_slope / var_a
     print 'var_slope / var_a [clever]:', clever_ratio
     print 'log variance:', math.log(var_a)
     print 'relaxation time:', rtime
     print '2 / relaxation_time:', 2 / rtime
     print "p_id(t)'' / p_id(t)':", deriv_ratio
     print
     print '--- new attempt ---'
     print 'mutual information:', ctmcmi.get_mutual_information(R, t)
     print 'reciprocal of MI:', 1.0 / ctmcmi.get_mutual_information(R, t)
     print 'asymptotic variance:', get_asymptotic_variance(R, t)
     print 'asymptotic variance (ver. 2):', get_asymptotic_variance_b(R, t)
     print 'asymptotic variance (ver. 3):', get_asymptotic_variance_c(R, t)
     print 'AV approx (ver. 4):', get_asymptotic_variance_d(R, t)
     print 'AV approx (ver. 5):', get_asymptotic_variance_e(R, t)
     print
     print '--- another thing ---'
     fi_slow = get_fisher_info_known_distn(R, v, t)
     fi_fast = get_fisher_info_known_distn_fast(R, v, t)
     print 'slow asymptotic variance:', 1 / fi_slow
     print 'fast asymptotic variance:', 1 / fi_fast
     print
Ejemplo n.º 4
0
 def test_large_variance(self):
     n = 4
     v = sample_distribution(n)
     S = sample_symmetric_rate_matrix(n)
     R = mrate.to_gtr_halpern_bruno(S, v)
     """
     a = .1
     b = .2
     c = .7
     R = np.array([
         [-(b+c), b, c],
         [a, -(a+c), c],
         [a, b, -(a+b)]])
     """
     t = 2.0
     dt = 0.0000001
     rtime = mrate.R_to_relaxation_time(R)
     var_a = get_ml_variance(R, t)
     var_b = get_ml_variance(R, t + dt)
     var_slope = (var_b - var_a) / dt
     deriv_ratio = get_p_id_deriv_ratio(R, t)
     clever_ratio = get_ml_variance_ratio(R, t)
     print 'time:', t
     print 'variance:', var_a
     print 'variance slope:', var_slope
     print 'var_slope / var_a:', var_slope / var_a
     print 'var_slope / var_a [clever]:', clever_ratio
     print 'log variance:', math.log(var_a)
     print 'relaxation time:', rtime
     print '2 / relaxation_time:', 2 / rtime
     print "p_id(t)'' / p_id(t)':", deriv_ratio
     print
     print '--- new attempt ---'
     print 'mutual information:', ctmcmi.get_mutual_information(R, t)
     print 'reciprocal of MI:', 1.0 / ctmcmi.get_mutual_information(R, t)
     print 'asymptotic variance:', get_asymptotic_variance(R, t)
     print 'asymptotic variance (ver. 2):', get_asymptotic_variance_b(R, t)
     print 'asymptotic variance (ver. 3):', get_asymptotic_variance_c(R, t)
     print 'AV approx (ver. 4):', get_asymptotic_variance_d(R, t)
     print 'AV approx (ver. 5):', get_asymptotic_variance_e(R, t)
     print
     print '--- another thing ---'
     fi_slow = get_fisher_info_known_distn(R, v, t)
     fi_fast = get_fisher_info_known_distn_fast(R, v, t)
     print 'slow asymptotic variance:', 1 / fi_slow
     print 'fast asymptotic variance:', 1 / fi_fast
     print
Ejemplo n.º 5
0
def get_time_point_summary(Q_mut, Q_sels, t):
    """
    @param Q_mut: the mutation rate matrix
    @param Q_sels: sequence of mutation-selection rate matrices
    @param t: the time point under consideration
    @return: a list of signs, and a sequence of statistics
    """
    # Compute the following statistics at this time point:
    # t
    # mutation MI
    # selection MI max
    # selection MI high
    # selection MI mean
    # selection MI low
    # selection MI min
    # proportion
    #
    # First compute the mutual information for mut and mut-sel.
    nsels = len(Q_sels)
    mi_mut = ctmcmi.get_mutual_information(Q_mut, t)
    mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels]
    mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels]
    # Now that we have computed all of the vectors at this time point,
    # we can compute the statistics that we want to report.
    statistics = []
    statistics.append(t)
    statistics.append(mi_mut)
    # add the mutual information statistics
    sorted_mi = sorted(mi_sels)
    n_extreme = nsels / 20
    statistics.append(sorted_mi[-1])
    statistics.append(sorted_mi[-n_extreme])
    statistics.append(sum(sorted_mi) / nsels)
    statistics.append(sorted_mi[n_extreme-1])
    statistics.append(sorted_mi[0])
    # add the proportion
    statistics.append(sum(1 for x in mi_signs if x == 1) / float(nsels))
    # return the statistics
    return mi_signs, statistics
Ejemplo n.º 6
0
def get_time_point_summary(Q_mut, Q_sels, t):
    """
    @param Q_mut: the mutation rate matrix
    @param Q_sels: sequence of mutation-selection rate matrices
    @param t: the time point under consideration
    @return: a list of signs, and a sequence of statistics
    """
    # Compute the following statistics at this time point:
    # t
    # mutation MI
    # selection MI max
    # selection MI high
    # selection MI mean
    # selection MI low
    # selection MI min
    # proportion
    #
    # First compute the mutual information for mut and mut-sel.
    nsels = len(Q_sels)
    mi_mut = ctmcmi.get_mutual_information(Q_mut, t)
    mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels]
    mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels]
    # Now that we have computed all of the vectors at this time point,
    # we can compute the statistics that we want to report.
    statistics = []
    statistics.append(t)
    statistics.append(mi_mut)
    # add the mutual information statistics
    sorted_mi = sorted(mi_sels)
    n_extreme = nsels / 20
    statistics.append(sorted_mi[-1])
    statistics.append(sorted_mi[-n_extreme])
    statistics.append(sum(sorted_mi) / nsels)
    statistics.append(sorted_mi[n_extreme - 1])
    statistics.append(sorted_mi[0])
    # add the proportion
    statistics.append(sum(1 for x in mi_signs if x == 1) / float(nsels))
    # return the statistics
    return mi_signs, statistics
Ejemplo n.º 7
0
def get_time_point_summary(Q_mut, Q_sels, t):
    """
    @param Q_mut: the mutation rate matrix
    @param Q_sels: sequence of mutation-selection rate matrices
    @param t: the time point under consideration
    @return: a sequence of statistics
    """
    # Compute the following statistics at this time point:
    # t
    # mutation MI
    # selection MI max
    # selection MI high
    # selection MI mean
    # selection MI low
    # selection MI min
    # correlation fn 1
    # correlation fn 2
    # correlation fn 3
    # correlation fn 4
    # correlation fn 5
    # proportion sign agreement fn 1
    # proportion sign agreement fn 2
    # proportion sign agreement fn 3
    # proportion sign agreement fn 4
    # proportion sign agreement fn 5
    # informativeness fn 1
    # informativeness fn 2
    # informativeness fn 3
    # informativeness fn 4
    # informativeness fn 5
    #
    # First compute the mutual information for mut and mut-sel.
    nsels = len(Q_sels)
    mi_mut = ctmcmi.get_mutual_information(Q_mut, t)
    mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels]
    mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels]
    # Now compute some other functions
    v0 = [ctmcmi.get_mutual_information_small_approx_c(Q, t) for Q in Q_sels]
    v1 = [ctmcmi.get_mutual_information_small_approx(Q, t) for Q in Q_sels]
    v2 = [ctmcmi.get_mutual_information_approx_c(Q, t) for Q in Q_sels]
    v3 = [math.exp(-2*t/mrate.R_to_relaxation_time(Q)) for Q in Q_sels]
    v4 = [math.exp(-t*mrate.Q_to_expected_rate(Q)) for Q in Q_sels]
    # Now that we have computed all of the vectors at this time point,
    # we can compute the statistics that we want to report.
    statistics = []
    statistics.append(t)
    statistics.append(mi_mut)
    # add the mutual information statistics
    sorted_mi = sorted(mi_sels)
    n_extreme = nsels / 20
    statistics.append(sorted_mi[-1])
    statistics.append(sorted_mi[-n_extreme])
    statistics.append(sum(sorted_mi) / nsels)
    statistics.append(sorted_mi[n_extreme-1])
    statistics.append(sorted_mi[0])
    # add the correlations
    for v in (v0, v1, v2, v3, v4):
        r, p = scipy.stats.stats.pearsonr(v, mi_sels)
        statistics.append(r)
    # add the sign proportions
    for v in (v0, v1, v2, v3, v4):
        v_signs = [1 if value > mi_mut else -1 for value in v]
        total = sum(1 for a, b in zip(mi_signs, v_signs) if a == b)
        p = float(total) / nsels
        statistics.append(p)
    # add the informativenesses
    for v in (v0, v1, v2, v3, v4):
        v_signs = [1 if value > mi_mut else -1 for value in v]
        informativeness = 0
        for pair in ((1, 1), (1, -1), (-1, 1), (-1, -1)):
            v_value, m_value = pair
            v_marginal_count = sum(1 for x in v_signs if x == v_value)
            m_marginal_count = sum(1 for x in mi_signs if x == m_value)
            joint_count = sum(1 for x in zip(v_signs, mi_signs) if x == pair)
            if joint_count:
                joint_prob = joint_count / float(nsels)
                a = math.log(joint_prob)
                b = math.log(v_marginal_count / float(nsels))
                c = math.log(m_marginal_count / float(nsels))
                informativeness += joint_prob * (a - b - c)
        statistics.append(informativeness)
    # return the statistics
    return statistics
Ejemplo n.º 8
0
def get_response_content(fs):
    out = StringIO()
    np.set_printoptions(linewidth=200)
    # get the user defined variables
    n = fs.nstates
    t = fs.divtime
    #h = fs.delta
    # sample a random rate matrix
    v = divtime.sample_distribution(n)
    S = divtime.sample_symmetric_rate_matrix(n)
    R = mrate.to_gtr_halpern_bruno(S, v)
    # get some properties of the rate matrix
    distn = mrate.R_to_distn(R)
    spectrum = np.linalg.eigvalsh(mrate.symmetrized(R))
    #spectrum, U = np.linalg.eigh(mrate.symmetrized(R))
    #spectrum = np.linalg.eigvals(R)
    # report some information about the mutual information curve
    mi = ctmcmi.get_mutual_information(R, t)
    mi_diff = ctmcmi.get_mutual_information_diff(R, t)
    mi_diff_b = ctmcmi.get_mutual_information_diff_b(R, t)
    mi_diff_c = ctmcmi.get_mutual_information_diff_c(R, t)
    print >> out, 'arbitrary large-ish divergence time:'
    print >> out, t
    print >> out
    print >> out, 'randomly sampled reversible rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'stationary distribution:'
    print >> out, distn
    print >> out
    print >> out, 'spectrum of the rate matrix:'
    print >> out, spectrum
    print >> out
    print >> out, 'mutual information at t = %f:' % t
    print >> out, mi
    print >> out
    print >> out, 'mutual information at t = %f (ver. 2):' % t
    print >> out, ctmcmi.get_mutual_information_b(R, t)
    print >> out
    print >> out, 'large t approximation of MI at t = %f:' % t
    print >> out, ctmcmi.get_mutual_information_approx(R, t)
    print >> out
    print >> out, 'large t approximation of MI at t = %f (ver. 2):' % t
    print >> out, ctmcmi.get_mutual_information_approx_b(R, t)
    print >> out
    print >> out, 'large t approximation of MI at t = %f (ver. 3):' % t
    print >> out, ctmcmi.cute_MI_alternate(R, t)
    print >> out
    print >> out, 'large t approximation of MI at t = %f (ver. 4):' % t
    print >> out, ctmcmi.get_mutual_information_approx_c(R, t)
    print >> out
    print >> out, 'small t approximation of MI at t = %f:' % t
    print >> out, ctmcmi.get_mutual_information_small_approx(R, t)
    print >> out
    print >> out, 'small t approximation of MI at t = %f (ver. 2):' % t
    print >> out, ctmcmi.get_mutual_information_small_approx_b(R, t)
    print >> out
    print >> out, 'small t approximation of MI at t = %f (ver. 3):' % t
    print >> out, ctmcmi.get_mutual_information_small_approx_c(R, t)
    print >> out
    print >> out, 'small t approximation of MI at t = %f (ver. 4):' % t
    print >> out, ctmcmi.get_mutual_information_small_approx_d(R, t)
    print >> out
    print >> out, 'mutual information diff at t = %f:' % t
    print >> out, mi_diff
    print >> out
    print >> out, 'mutual information diff at t = %f (ver. 2):' % t
    print >> out, mi_diff_b
    print >> out
    print >> out, 'mutual information diff at t = %f (ver. 3):' % t
    print >> out, mi_diff_c
    print >> out
    print >> out, 'large t approximation of MI diff at t = %f:' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx(R, t)
    print >> out
    print >> out, 'large t approximation of MI diff at t = %f: (ver. 2)' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx_b(R, t)
    print >> out
    print >> out, 'large t approximation of MI diff at t = %f: (ver. 4)' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx_c(R, t)
    print >> out
    print >> out, 'log of mutual information at t = %f:' % t
    print >> out, math.log(mi)
    print >> out
    #print >> out, 'estimated derivative',
    #print >> out, 'of log of mutual information at t = %f:' % t
    #print >> out, (math.log(mi_c) - math.log(mi_a)) / (2*h)
    #print >> out
    print >> out, 'estimated derivative of log of MI',
    print >> out, 'at t = %f:' % t
    print >> out, mi_diff / mi
    print >> out
    print >> out, 'large t approximation of derivative of log of MI',
    print >> out, 'at t = %f:' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx(
        R, t) / ctmcmi.get_mutual_information_approx(R, t)
    print >> out
    print >> out, 'large t approximation of derivative of log of MI',
    print >> out, 'at t = %f (ver. 2):' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx_b(
        R, t) / ctmcmi.get_mutual_information_approx_b(R, t)
    print >> out
    print >> out, 'twice the relevant eigenvalue:'
    print >> out, 2 * spectrum[-2]
    print >> out
    print >> out
    #print >> out, 'estimated derivative',
    #print >> out, 'of mutual information at t = %f:' % t
    #print >> out, (mi_c - mi_a) / (2*h)
    #print >> out
    #print >> out, '(estimated derivative of mutual information) /',
    #print >> out, '(mutual information) at t = %f:' % t
    #print >> out, (mi_c - mi_a) / (2*h*mi_b)
    #print >> out
    return out.getvalue()
Ejemplo n.º 9
0
 def __call__(self):
     """
     Look for a counterexample.
     """
     n = self.nstates
     # sample a random rate and time and stationary distribution
     r = random.expovariate(1)
     t = random.expovariate(1)
     v = np.random.exponential(1, n)
     v /= np.sum(v)
     # construct the F81 rate matrix
     R = r * np.outer(np.ones(n), v)
     R -= np.diag(np.sum(R, axis=1))
     # get some information criterion values
     mi_general = ctmcmi.get_mutual_information(R, t)
     fi_general = divtime.get_fisher_information(R, t)
     pollock_general = get_gtr_pollock(R, t)
     mi_f81 = get_f81_mi(r, v, t)
     fi_f81 = get_f81_fi(r, v, t)
     pollock_f81 = get_f81_pollock(r, v, t)
     if n == 2:
         fi_f81_2state = get_f81_fi_2state(r, v, t)
     # check for contradictions
     try:
         if not np.allclose(mi_general, mi_f81):
             raise Contradiction('mutual information')
         if not np.allclose(fi_general, fi_f81):
             raise Contradiction('fisher information')
         if not np.allclose(pollock_general, pollock_f81):
             raise Contradiction('neg slope identity proportion')
         if n == 2:
             if not np.allclose(fi_general, fi_f81_2state):
                 raise Contradiction('fisher information (2-state)')
     except Contradiction as e:
         out = StringIO()
         print >> out, 'found', str(e), 'contradiction'
         print >> out
         print >> out, 'GTR mutual information:'
         print >> out, mi_general
         print >> out
         print >> out, 'F81 mutual information:'
         print >> out, mi_f81
         print >> out
         print >> out, 'GTR Fisher information:'
         print >> out, fi_general
         print >> out
         print >> out, 'F81 Fisher information:'
         print >> out, fi_f81
         print >> out
         if n == 2:
             print >> out, 'F81 2-state Fisher information:'
             print >> out, fi_f81_2state
             print >> out
         print >> out, 'GTR neg slope identity proportion:'
         print >> out, pollock_general
         print >> out
         print >> out, 'F81 neg slope identity proportion:'
         print >> out, pollock_f81
         print >> out
         self.counterexample = out.getvalue()
         return True
     return False
Ejemplo n.º 10
0
def get_response_content(fs):
    M = get_input_matrix(fs)
    # create the R table string and scripts
    headers = ['t']
    if fs.show_entropy:
        headers.append('ub.entropy')
    headers.extend([
            'ub.jc.spectral',
            'ub.f81.spectral',
            'mutual.information',
            'lb.2.state.spectral',
            'lb.2.state',
            'lb.f81',
            ])
    npoints = 100
    t_low = fs.start_time
    t_high = fs.stop_time
    t_incr = (t_high - t_low) / (npoints - 1)
    t_values = [t_low + t_incr*i for i in range(npoints)]
    # define some extra stuff
    v = mrate.R_to_distn(M)
    entropy = -np.dot(v, np.log(v))
    n = len(M)
    gap = sorted(abs(x) for x in np.linalg.eigvals(M))[1]
    print 'stationary distn:', v
    print 'entropy:', entropy
    print 'spectral gap:', gap
    M_slow_jc = gap * (1.0 / n) * (np.ones((n,n)) - n*np.eye(n))
    M_slow_f81 = gap * np.outer(np.ones(n), v)
    M_slow_f81 -= np.diag(np.sum(M_slow_f81, axis=1))
    M_f81 = msimpl.get_fast_f81(M)
    M_2state = msimpl.get_fast_two_state_autobarrier(M)
    M_2state_spectral = -gap * M_2state / np.trace(M_2state)
    # get the data for the R table
    arr = []
    for u in t_values:
        # experiment with log time
        #t = math.exp(u)
        t = u
        mi_slow_jc = ctmcmi.get_mutual_information(M_slow_jc, t)
        mi_slow_f81 = ctmcmi.get_mutual_information(M_slow_f81, t)
        mi_mut = ctmcmi.get_mutual_information(M, t)
        mi_2state_spectral = ctmcmi.get_mutual_information(M_2state_spectral, t)
        mi_f81 = ctmcmi.get_mutual_information(M_f81, t)
        mi_2state = ctmcmi.get_mutual_information(M_2state, t)
        row = [u]
        if fs.show_entropy:
            row.append(entropy)
        row.extend([mi_slow_jc, mi_slow_f81,
                mi_mut, mi_2state_spectral, mi_2state, mi_f81])
        arr.append(row)
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Ejemplo n.º 11
0
def get_response_content(fs):
    M, R = get_input_matrices(fs)
    M_v = mrate.R_to_distn(M)
    R_v = mrate.R_to_distn(R)
    t = fs.t
    mi_mut = ctmcmi.get_mutual_information(M, t)
    mi_bal = ctmcmi.get_mutual_information(R, t)
    fi_mut = divtime.get_fisher_information(M, t)
    fi_bal = divtime.get_fisher_information(R, t)
    if fs.info_mut:
        information_sign = np.sign(mi_mut - mi_bal)
    elif fs.info_fis:
        information_sign = np.sign(fi_mut - fi_bal)
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    print >> out
    print >> out, '<pre>'
    print >> out, 'Explicitly computed answer',
    print >> out, '(not a heuristic but may be numerically imprecise):'
    if information_sign == 1:
        print >> out, '* pure mutation',
        print >> out, 'is more informative'
    elif information_sign == -1:
        print >> out, '* the balance of mutation and selection',
        print >> out, 'is more informative'
    else:
        print >> out, '  the information contents of the two processes',
        print >> out, 'are numerically indistinguishable'
    print >> out
    print >> out
    if fs.info_mut:
        print >> out, 'Mutual information properties',
        print >> out, 'at very small and very large times:'
        print >> out
        print >> out, get_mi_asymptotics(M, R)
        print >> out
        print >> out
    print >> out, 'Heuristics without regard to time or to the selected',
    print >> out, 'information variant (Fisher vs. mutual information):'
    print >> out
    print >> out, get_heuristics(M, R)
    print >> out
    print >> out
    print >> out, 'Input summary:'
    print >> out
    print >> out, 'mutation rate matrix:'
    print >> out, M
    print >> out
    print >> out, 'mutation process stationary distribution:'
    print >> out, M_v
    print >> out
    print >> out, 'mutation-selection balance rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'mutation-selection balance stationary distribution:'
    print >> out, R_v
    print >> out
    print >> out, 'mutation process expected rate:'
    print >> out, mrate.Q_to_expected_rate(M)
    print >> out
    print >> out, 'mutation-selection balance expected rate:'
    print >> out, mrate.Q_to_expected_rate(R)
    print >> out
    print >> out
    print >> out, 'The following information calculations',
    print >> out, 'depend on t = %s:' % t
    print >> out
    print >> out, 'log(ratio(E(L))) for pure mutation:'
    print >> out, ctmcmi.get_ll_ratio_wrong(M, t)
    print >> out
    print >> out, 'log(ratio(E(L))) for mut-sel balance:'
    print >> out, ctmcmi.get_ll_ratio_wrong(R, t)
    print >> out
    print >> out, 'mutual information for pure mutation:'
    print >> out, mi_mut
    print >> out
    print >> out, 'mutual information for mut-sel balance:'
    print >> out, mi_bal
    print >> out
    print >> out, 'pinsker lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_pinsker_lower_bound_mi(M, t)
    print >> out
    print >> out, 'pinsker lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_pinsker_lower_bound_mi(R, t)
    print >> out
    print >> out, 'row based pinsker lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_row_based_plb_mi(M, t)
    print >> out
    print >> out, 'row based pinsker lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_row_based_plb_mi(R, t)
    print >> out
    print >> out, 'row based hellinger lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_row_based_hellinger_lb_mi(M, t)
    print >> out
    print >> out, 'row based hellinger lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_row_based_hellinger_lb_mi(R, t)
    print >> out
    print >> out, 'Fisher information for pure mutation:'
    print >> out, fi_mut
    print >> out
    print >> out, 'Fisher information for mut-sel balance:'
    print >> out, fi_bal
    print >> out
    print >> out, '</pre>'
    #
    # create the summaries
    summaries = (RateMatrixSummary(M), RateMatrixSummary(R))
    print >> out, get_html_table(summaries)
    print >> out
    print >> out, '<html>'
    print >> out, '<body>'
    return out.getvalue()
Ejemplo n.º 12
0
def get_response_content(fs):
    out = StringIO()
    np.set_printoptions(linewidth=200)
    # get the user defined variables
    n = fs.nstates
    # sample a random rate matrix
    v = divtime.sample_distribution(n)
    S = divtime.sample_symmetric_rate_matrix(n)
    R = mrate.to_gtr_halpern_bruno(S, v)
    # get some properties of the rate matrix and its re-symmetrization
    S = mrate.symmetrized(R)
    distn = mrate.R_to_distn(R)
    w, U = np.linalg.eigh(S)
    D = np.diag(U.T[-1])**2
    D_inv = np.diag(np.reciprocal(U.T[-1]))**2
    for t in (1.0, 2.0):
        P = scipy.linalg.expm(R * t)
        M = ndot(D**.5, scipy.linalg.expm(S * t), D**.5)
        M_star = ndot(D_inv**.5, scipy.linalg.expm(S * t), D_inv**.5)
        M_star_log = np.log(M_star)
        M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log)
        E = M * np.log(M_star)
        E_w, E_U = np.linalg.eigh(E)
        print >> out, 't:'
        print >> out, t
        print >> out
        print >> out, 'randomly sampled rate matrix R'
        print >> out, R
        print >> out
        print >> out, 'symmetrized matrix S'
        print >> out, S
        print >> out
        print >> out, 'stationary distribution diagonal D'
        print >> out, D
        print >> out
        print >> out, 'R = D^-1/2 S D^1/2'
        print >> out, ndot(D_inv**.5, S, D**.5)
        print >> out
        print >> out, 'probability matrix e^(R*t) = P'
        print >> out, P
        print >> out
        print >> out, 'P = D^-1/2 e^(S*t) D^1/2'
        print >> out, ndot(D_inv**.5, scipy.linalg.expm(S * t), D**.5)
        print >> out
        print >> out, 'pairwise distribution matrix M'
        print >> out, 'M = D^1/2 e^(S*t) D^1/2'
        print >> out, M
        print >> out
        print >> out, 'sum of entries of M'
        print >> out, np.sum(M)
        print >> out
        print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2'
        print >> out, M_star
        print >> out
        print >> out, 'entrywise logarithm logij(M_star)'
        print >> out, np.log(M_star)
        print >> out
        print >> out, 'Hadamard product M o logij(M_star) = E'
        print >> out, E
        print >> out
        print >> out, 'spectrum of M:'
        print >> out, np.linalg.eigvalsh(M)
        print >> out
        print >> out, 'spectrum of logij(M_star):'
        print >> out, M_star_log_w
        print >> out
        print >> out, 'corresponding eigenvectors of logij(M_star) as columns:'
        print >> out, M_star_log_U
        print >> out
        print >> out, 'spectrum of E:'
        print >> out, E_w
        print >> out
        print >> out, 'corresponding eigenvectors of E as columns:'
        print >> out, E_U
        print >> out
        print >> out, 'entrywise square roots of stationary distribution:'
        print >> out, np.sqrt(v)
        print >> out
        print >> out, 'sum of entries of E:'
        print >> out, np.sum(E)
        print >> out
        print >> out, 'mutual information:'
        print >> out, ctmcmi.get_mutual_information(R, t)
        print >> out
        print >> out
    return out.getvalue()
Ejemplo n.º 13
0
def get_response_content(fs):
    out = StringIO()
    np.set_printoptions(linewidth=200)
    # get the user defined variables
    n = fs.nstates
    # sample a random rate matrix
    v = divtime.sample_distribution(n)
    S = divtime.sample_symmetric_rate_matrix(n)
    R = mrate.to_gtr_halpern_bruno(S, v)
    # get some properties of the rate matrix and its re-symmetrization
    S = mrate.symmetrized(R)
    distn = mrate.R_to_distn(R)
    w, U = np.linalg.eigh(S)
    D = np.diag(U.T[-1])**2
    D_inv = np.diag(np.reciprocal(U.T[-1]))**2
    for t in (1.0, 2.0):
        P = scipy.linalg.expm(R*t)
        M = ndot(D**.5, scipy.linalg.expm(S*t), D**.5)
        M_star = ndot(D_inv**.5, scipy.linalg.expm(S*t), D_inv**.5)
        M_star_log = np.log(M_star)
        M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log)
        E = M * np.log(M_star)
        E_w, E_U = np.linalg.eigh(E)
        print >> out, 't:'
        print >> out, t
        print >> out
        print >> out, 'randomly sampled rate matrix R'
        print >> out, R
        print >> out
        print >> out, 'symmetrized matrix S'
        print >> out, S
        print >> out
        print >> out, 'stationary distribution diagonal D'
        print >> out, D
        print >> out
        print >> out, 'R = D^-1/2 S D^1/2'
        print >> out, ndot(D_inv**.5, S, D**.5)
        print >> out
        print >> out, 'probability matrix e^(R*t) = P'
        print >> out, P
        print >> out
        print >> out, 'P = D^-1/2 e^(S*t) D^1/2'
        print >> out, ndot(D_inv**.5, scipy.linalg.expm(S*t), D**.5)
        print >> out
        print >> out, 'pairwise distribution matrix M'
        print >> out, 'M = D^1/2 e^(S*t) D^1/2'
        print >> out, M
        print >> out
        print >> out, 'sum of entries of M'
        print >> out, np.sum(M)
        print >> out
        print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2'
        print >> out, M_star
        print >> out
        print >> out, 'entrywise logarithm logij(M_star)'
        print >> out, np.log(M_star)
        print >> out
        print >> out, 'Hadamard product M o logij(M_star) = E'
        print >> out, E
        print >> out
        print >> out, 'spectrum of M:'
        print >> out, np.linalg.eigvalsh(M)
        print >> out
        print >> out, 'spectrum of logij(M_star):'
        print >> out, M_star_log_w
        print >> out
        print >> out, 'corresponding eigenvectors of logij(M_star) as columns:'
        print >> out, M_star_log_U
        print >> out
        print >> out, 'spectrum of E:'
        print >> out, E_w
        print >> out
        print >> out, 'corresponding eigenvectors of E as columns:'
        print >> out, E_U
        print >> out
        print >> out, 'entrywise square roots of stationary distribution:'
        print >> out, np.sqrt(v)
        print >> out
        print >> out, 'sum of entries of E:'
        print >> out, np.sum(E)
        print >> out
        print >> out, 'mutual information:'
        print >> out, ctmcmi.get_mutual_information(R, t)
        print >> out
        print >> out
    return out.getvalue()
Ejemplo n.º 14
0
def sample_row():
    n = 4
    # sample the exchangeability
    S = np.zeros((n, n))
    S[1, 0] = random.expovariate(1)
    S[2, 0] = random.expovariate(1)
    S[2, 1] = random.expovariate(1)
    S[3, 0] = random.expovariate(1)
    S[3, 1] = random.expovariate(1)
    S[3, 2] = random.expovariate(1)
    # sample the mutation stationary distribution
    mdistn = np.array([random.expovariate(1) for i in range(n)])
    mdistn /= np.sum(mdistn)
    # sample the mutation selection balance stationary distribution
    bdistn = np.array([random.expovariate(1) for i in range(n)])
    bdistn /= np.sum(bdistn)
    # sample the time
    t = random.expovariate(1)
    # sample the info type
    infotype = random.choice(('infotype.mi', 'infotype.fi'))
    # Compute some intermediate variables
    # from which the summary statistics and the label are computed.
    S = S + S.T
    M = S * mdistn
    M -= np.diag(np.sum(M, axis=1))
    R = mrate.to_gtr_halpern_bruno(M, bdistn)
    shannon_ent_mut = -sum(p * log(p) for p in mdistn)
    shannon_ent_bal = -sum(p * log(p) for p in bdistn)
    logical_ent_mut = 1.0 - sum(p * p for p in mdistn)
    logical_ent_bal = 1.0 - sum(p * p for p in bdistn)
    expected_rate_mut = mrate.Q_to_expected_rate(M)
    expected_rate_bal = mrate.Q_to_expected_rate(R)
    spectral_rate_mut = 1 / mrate.R_to_relaxation_time(M)
    spectral_rate_bal = 1 / mrate.R_to_relaxation_time(R)
    mi_mut = ctmcmi.get_mutual_information(M, t)
    mi_bal = ctmcmi.get_mutual_information(R, t)
    fi_mut = divtime.get_fisher_information(M, t)
    fi_bal = divtime.get_fisher_information(R, t)
    # compute the summary statistics
    summary_entries = [
        shannon_ent_bal - shannon_ent_mut,
        logical_ent_bal - logical_ent_mut,
        log(shannon_ent_bal) - log(shannon_ent_mut),
        log(logical_ent_bal) - log(logical_ent_mut),
        expected_rate_bal - expected_rate_mut,
        spectral_rate_bal - spectral_rate_mut,
        log(expected_rate_bal) - log(expected_rate_mut),
        log(spectral_rate_bal) - log(spectral_rate_mut),
        mi_bal - mi_mut,
        fi_bal - fi_mut,
        math.log(mi_bal) - math.log(mi_mut),
        math.log(fi_bal) - math.log(fi_mut),
    ]
    # get the definition entries
    definition_entries = [
        S[1, 0],
        S[2, 0],
        S[2, 1],
        S[3, 0],
        S[3, 1],
        S[3, 2],
        mdistn[0],
        mdistn[1],
        mdistn[2],
        mdistn[3],
        bdistn[0],
        bdistn[1],
        bdistn[2],
        bdistn[3],
        infotype,
        t,
    ]
    # define the label
    if infotype == 'infotype.mi' and mi_mut > mi_bal:
        label = 'mut.is.better'
    elif infotype == 'infotype.mi' and mi_mut < mi_bal:
        label = 'bal.is.better'
    elif infotype == 'infotype.fi' and fi_mut > fi_bal:
        label = 'mut.is.better'
    elif infotype == 'infotype.fi' and fi_mut < fi_bal:
        label = 'bal.is.better'
    else:
        label = 'indistinguishable'
    # return the row
    return definition_entries + summary_entries + [label]
Ejemplo n.º 15
0
 def __call__(self):
     """
     Look for a counterexample.
     """
     n = self.nstates
     # sample a random rate and time and stationary distribution
     r = random.expovariate(1)
     t = random.expovariate(1)
     v = np.random.exponential(1, n)
     v /= np.sum(v)
     # construct the F81 rate matrix
     R = r * np.outer(np.ones(n), v)
     R -= np.diag(np.sum(R, axis=1))
     # get some information criterion values
     mi_general = ctmcmi.get_mutual_information(R, t)
     fi_general = divtime.get_fisher_information(R, t)
     pollock_general = get_gtr_pollock(R, t)
     mi_f81 = get_f81_mi(r, v, t)
     fi_f81 = get_f81_fi(r, v, t)
     pollock_f81 = get_f81_pollock(r, v, t)
     if n == 2:
         fi_f81_2state = get_f81_fi_2state(r, v, t)
     # check for contradictions
     try:
         if not np.allclose(mi_general, mi_f81):
             raise Contradiction('mutual information')
         if not np.allclose(fi_general, fi_f81):
             raise Contradiction('fisher information')
         if not np.allclose(pollock_general, pollock_f81):
             raise Contradiction('neg slope identity proportion')
         if n == 2:
             if not np.allclose(fi_general, fi_f81_2state):
                 raise Contradiction('fisher information (2-state)')
     except Contradiction as e:
         out = StringIO()
         print >> out, 'found', str(e), 'contradiction'
         print >> out
         print >> out, 'GTR mutual information:'
         print >> out, mi_general
         print >> out
         print >> out, 'F81 mutual information:'
         print >> out, mi_f81
         print >> out
         print >> out, 'GTR Fisher information:'
         print >> out, fi_general
         print >> out
         print >> out, 'F81 Fisher information:'
         print >> out, fi_f81
         print >> out
         if n == 2:
             print >> out, 'F81 2-state Fisher information:'
             print >> out, fi_f81_2state
             print >> out
         print >> out, 'GTR neg slope identity proportion:'
         print >> out, pollock_general
         print >> out
         print >> out, 'F81 neg slope identity proportion:'
         print >> out, pollock_f81
         print >> out
         self.counterexample = out.getvalue()
         return True
     return False
Ejemplo n.º 16
0
def sample_row():
    n = 4
    # sample the exchangeability
    S = np.zeros((n, n))
    S[1,0] = random.expovariate(1)
    S[2,0] = random.expovariate(1)
    S[2,1] = random.expovariate(1)
    S[3,0] = random.expovariate(1)
    S[3,1] = random.expovariate(1)
    S[3,2] = random.expovariate(1)
    # sample the mutation stationary distribution
    mdistn = np.array([random.expovariate(1) for i in range(n)])
    mdistn /= np.sum(mdistn)
    # sample the mutation selection balance stationary distribution
    bdistn = np.array([random.expovariate(1) for i in range(n)])
    bdistn /= np.sum(bdistn)
    # sample the time
    t = random.expovariate(1)
    # sample the info type
    infotype = random.choice(('infotype.mi', 'infotype.fi'))
    # Compute some intermediate variables
    # from which the summary statistics and the label are computed.
    S = S + S.T
    M = S * mdistn
    M -= np.diag(np.sum(M, axis=1))
    R = mrate.to_gtr_halpern_bruno(M, bdistn)
    shannon_ent_mut = -sum(p*log(p) for p in mdistn)
    shannon_ent_bal = -sum(p*log(p) for p in bdistn)
    logical_ent_mut = 1.0 - sum(p*p for p in mdistn)
    logical_ent_bal = 1.0 - sum(p*p for p in bdistn)
    expected_rate_mut = mrate.Q_to_expected_rate(M)
    expected_rate_bal = mrate.Q_to_expected_rate(R)
    spectral_rate_mut = 1 / mrate.R_to_relaxation_time(M)
    spectral_rate_bal = 1 / mrate.R_to_relaxation_time(R)
    mi_mut = ctmcmi.get_mutual_information(M, t)
    mi_bal = ctmcmi.get_mutual_information(R, t)
    fi_mut = divtime.get_fisher_information(M, t)
    fi_bal = divtime.get_fisher_information(R, t)
    # compute the summary statistics
    summary_entries = [
            shannon_ent_bal - shannon_ent_mut,
            logical_ent_bal - logical_ent_mut,
            log(shannon_ent_bal) - log(shannon_ent_mut),
            log(logical_ent_bal) - log(logical_ent_mut),
            expected_rate_bal - expected_rate_mut,
            spectral_rate_bal - spectral_rate_mut,
            log(expected_rate_bal) - log(expected_rate_mut),
            log(spectral_rate_bal) - log(spectral_rate_mut),
            mi_bal - mi_mut,
            fi_bal - fi_mut,
            math.log(mi_bal) - math.log(mi_mut),
            math.log(fi_bal) - math.log(fi_mut),
            ]
    # get the definition entries
    definition_entries = [
            S[1,0], S[2,0], S[2,1], S[3,0], S[3,1], S[3,2],
            mdistn[0], mdistn[1], mdistn[2], mdistn[3],
            bdistn[0], bdistn[1], bdistn[2], bdistn[3],
            infotype,
            t,
            ]
    # define the label
    if infotype == 'infotype.mi' and mi_mut > mi_bal:
        label = 'mut.is.better'
    elif infotype == 'infotype.mi' and mi_mut < mi_bal:
        label = 'bal.is.better'
    elif infotype == 'infotype.fi' and fi_mut > fi_bal:
        label = 'mut.is.better'
    elif infotype == 'infotype.fi' and fi_mut < fi_bal:
        label = 'bal.is.better'
    else:
        label = 'indistinguishable'
    # return the row
    return definition_entries + summary_entries + [label]
Ejemplo n.º 17
0
def get_response_content(fs):
    out = StringIO()
    np.set_printoptions(linewidth=200)
    # get the user defined variables
    n = fs.nstates
    t = fs.divtime
    #h = fs.delta
    # sample a random rate matrix
    v = divtime.sample_distribution(n)
    S = divtime.sample_symmetric_rate_matrix(n)
    R = mrate.to_gtr_halpern_bruno(S, v)
    # get some properties of the rate matrix
    distn = mrate.R_to_distn(R)
    spectrum = np.linalg.eigvalsh(mrate.symmetrized(R))
    #spectrum, U = np.linalg.eigh(mrate.symmetrized(R))
    #spectrum = np.linalg.eigvals(R)
    # report some information about the mutual information curve
    mi = ctmcmi.get_mutual_information(R, t)
    mi_diff = ctmcmi.get_mutual_information_diff(R, t)
    mi_diff_b = ctmcmi.get_mutual_information_diff_b(R, t)
    mi_diff_c = ctmcmi.get_mutual_information_diff_c(R, t)
    print >> out, 'arbitrary large-ish divergence time:'
    print >> out, t
    print >> out
    print >> out, 'randomly sampled reversible rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'stationary distribution:'
    print >> out, distn
    print >> out
    print >> out, 'spectrum of the rate matrix:'
    print >> out, spectrum
    print >> out
    print >> out, 'mutual information at t = %f:' % t
    print >> out, mi
    print >> out
    print >> out, 'mutual information at t = %f (ver. 2):' % t
    print >> out, ctmcmi.get_mutual_information_b(R, t)
    print >> out
    print >> out, 'large t approximation of MI at t = %f:' % t
    print >> out, ctmcmi.get_mutual_information_approx(R, t)
    print >> out
    print >> out, 'large t approximation of MI at t = %f (ver. 2):' % t
    print >> out, ctmcmi.get_mutual_information_approx_b(R, t)
    print >> out
    print >> out, 'large t approximation of MI at t = %f (ver. 3):' % t
    print >> out, ctmcmi.cute_MI_alternate(R, t)
    print >> out
    print >> out, 'large t approximation of MI at t = %f (ver. 4):' % t
    print >> out, ctmcmi.get_mutual_information_approx_c(R, t)
    print >> out
    print >> out, 'small t approximation of MI at t = %f:' % t
    print >> out, ctmcmi.get_mutual_information_small_approx(R, t)
    print >> out
    print >> out, 'small t approximation of MI at t = %f (ver. 2):' % t
    print >> out, ctmcmi.get_mutual_information_small_approx_b(R, t)
    print >> out
    print >> out, 'small t approximation of MI at t = %f (ver. 3):' % t
    print >> out, ctmcmi.get_mutual_information_small_approx_c(R, t)
    print >> out
    print >> out, 'small t approximation of MI at t = %f (ver. 4):' % t
    print >> out, ctmcmi.get_mutual_information_small_approx_d(R, t)
    print >> out
    print >> out, 'mutual information diff at t = %f:' % t
    print >> out, mi_diff
    print >> out
    print >> out, 'mutual information diff at t = %f (ver. 2):' % t
    print >> out, mi_diff_b
    print >> out
    print >> out, 'mutual information diff at t = %f (ver. 3):' % t
    print >> out, mi_diff_c
    print >> out
    print >> out, 'large t approximation of MI diff at t = %f:' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx(R, t)
    print >> out
    print >> out, 'large t approximation of MI diff at t = %f: (ver. 2)' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx_b(R, t)
    print >> out
    print >> out, 'large t approximation of MI diff at t = %f: (ver. 4)' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx_c(R, t)
    print >> out
    print >> out, 'log of mutual information at t = %f:' % t
    print >> out, math.log(mi)
    print >> out
    #print >> out, 'estimated derivative',
    #print >> out, 'of log of mutual information at t = %f:' % t
    #print >> out, (math.log(mi_c) - math.log(mi_a)) / (2*h)
    #print >> out
    print >> out, 'estimated derivative of log of MI',
    print >> out, 'at t = %f:' % t
    print >> out, mi_diff / mi
    print >> out
    print >> out, 'large t approximation of derivative of log of MI',
    print >> out, 'at t = %f:' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx(R,
            t) / ctmcmi.get_mutual_information_approx(R, t)
    print >> out
    print >> out, 'large t approximation of derivative of log of MI',
    print >> out, 'at t = %f (ver. 2):' % t
    print >> out, ctmcmi.get_mutual_information_diff_approx_b(R,
            t) / ctmcmi.get_mutual_information_approx_b(R, t)
    print >> out
    print >> out, 'twice the relevant eigenvalue:'
    print >> out, 2 * spectrum[-2]
    print >> out
    print >> out
    #print >> out, 'estimated derivative',
    #print >> out, 'of mutual information at t = %f:' % t
    #print >> out, (mi_c - mi_a) / (2*h)
    #print >> out
    #print >> out, '(estimated derivative of mutual information) /',
    #print >> out, '(mutual information) at t = %f:' % t
    #print >> out, (mi_c - mi_a) / (2*h*mi_b)
    #print >> out
    return out.getvalue()
Ejemplo n.º 18
0
def get_response_content(fs):
    M, R = get_input_matrices(fs)
    M_v = mrate.R_to_distn(M)
    R_v = mrate.R_to_distn(R)
    t = fs.t
    mi_mut = ctmcmi.get_mutual_information(M, t)
    mi_bal = ctmcmi.get_mutual_information(R, t)
    fi_mut = divtime.get_fisher_information(M, t)
    fi_bal = divtime.get_fisher_information(R, t)
    if fs.info_mut:
        information_sign = np.sign(mi_mut - mi_bal)
    elif fs.info_fis:
        information_sign = np.sign(fi_mut - fi_bal)
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    print >> out
    print >> out, '<pre>'
    print >> out, 'Explicitly computed answer',
    print >> out, '(not a heuristic but may be numerically imprecise):'
    if information_sign == 1:
        print >> out, '* pure mutation',
        print >> out, 'is more informative'
    elif information_sign == -1:
        print >> out, '* the balance of mutation and selection',
        print >> out, 'is more informative'
    else:
        print >> out, '  the information contents of the two processes',
        print >> out, 'are numerically indistinguishable'
    print >> out
    print >> out
    if fs.info_mut:
        print >> out, 'Mutual information properties',
        print >> out, 'at very small and very large times:'
        print >> out
        print >> out, get_mi_asymptotics(M, R)
        print >> out
        print >> out
    print >> out, 'Heuristics without regard to time or to the selected',
    print >> out, 'information variant (Fisher vs. mutual information):'
    print >> out
    print >> out, get_heuristics(M, R)
    print >> out
    print >> out
    print >> out, 'Input summary:'
    print >> out
    print >> out, 'mutation rate matrix:'
    print >> out, M
    print >> out
    print >> out, 'mutation process stationary distribution:'
    print >> out, M_v
    print >> out
    print >> out, 'mutation-selection balance rate matrix:'
    print >> out, R
    print >> out
    print >> out, 'mutation-selection balance stationary distribution:'
    print >> out, R_v
    print >> out
    print >> out, 'mutation process expected rate:'
    print >> out, mrate.Q_to_expected_rate(M)
    print >> out
    print >> out, 'mutation-selection balance expected rate:'
    print >> out, mrate.Q_to_expected_rate(R)
    print >> out
    print >> out
    print >> out, 'The following information calculations',
    print >> out, 'depend on t = %s:' % t
    print >> out
    print >> out, 'log(ratio(E(L))) for pure mutation:'
    print >> out, ctmcmi.get_ll_ratio_wrong(M, t)
    print >> out
    print >> out, 'log(ratio(E(L))) for mut-sel balance:'
    print >> out, ctmcmi.get_ll_ratio_wrong(R, t)
    print >> out
    print >> out, 'mutual information for pure mutation:'
    print >> out, mi_mut
    print >> out
    print >> out, 'mutual information for mut-sel balance:'
    print >> out, mi_bal
    print >> out
    print >> out, 'pinsker lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_pinsker_lower_bound_mi(M, t)
    print >> out
    print >> out, 'pinsker lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_pinsker_lower_bound_mi(R, t)
    print >> out
    print >> out, 'row based pinsker lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_row_based_plb_mi(M, t)
    print >> out
    print >> out, 'row based pinsker lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_row_based_plb_mi(R, t)
    print >> out
    print >> out, 'row based hellinger lower bound mi for pure mutation:'
    print >> out, ctmcmi.get_row_based_hellinger_lb_mi(M, t)
    print >> out
    print >> out, 'row based hellinger lower bound mi for mut-sel balance:'
    print >> out, ctmcmi.get_row_based_hellinger_lb_mi(R, t)
    print >> out
    print >> out, 'Fisher information for pure mutation:'
    print >> out, fi_mut
    print >> out
    print >> out, 'Fisher information for mut-sel balance:'
    print >> out, fi_bal
    print >> out
    print >> out, '</pre>'
    #
    # create the summaries
    summaries = (RateMatrixSummary(M), RateMatrixSummary(R))
    print >> out, get_html_table(summaries)
    print >> out
    print >> out, '<html>'
    print >> out, '<body>'
    return out.getvalue()
Ejemplo n.º 19
0
def get_time_point_summary(Q_mut, Q_sels, t):
    """
    @param Q_mut: the mutation rate matrix
    @param Q_sels: sequence of mutation-selection rate matrices
    @param t: the time point under consideration
    @return: a sequence of statistics
    """
    # Compute the following statistics at this time point:
    # t
    # mutation MI
    # selection MI max
    # selection MI high
    # selection MI mean
    # selection MI low
    # selection MI min
    # correlation fn 1
    # correlation fn 2
    # correlation fn 3
    # correlation fn 4
    # correlation fn 5
    # proportion sign agreement fn 1
    # proportion sign agreement fn 2
    # proportion sign agreement fn 3
    # proportion sign agreement fn 4
    # proportion sign agreement fn 5
    # informativeness fn 1
    # informativeness fn 2
    # informativeness fn 3
    # informativeness fn 4
    # informativeness fn 5
    # mutual information proportion
    #
    # First compute the mutual information for mut and mut-sel.
    nsels = len(Q_sels)
    mi_mut = ctmcmi.get_mutual_information(Q_mut, t)
    mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels]
    mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels]
    # Now compute some other functions
    v0 = [ctmcmi.get_mutual_information_small_approx_c(Q, t) for Q in Q_sels]
    v1 = [ctmcmi.get_mutual_information_small_approx(Q, t) for Q in Q_sels]
    v2 = [ctmcmi.get_mutual_information_approx_c(Q, t) for Q in Q_sels]
    v3 = [math.exp(-2*t/mrate.R_to_relaxation_time(Q)) for Q in Q_sels]
    v4 = [math.exp(-t*mrate.Q_to_expected_rate(Q)) for Q in Q_sels]
    # Now that we have computed all of the vectors at this time point,
    # we can compute the statistics that we want to report.
    statistics = []
    statistics.append(t)
    statistics.append(mi_mut)
    # add the mutual information statistics
    sorted_mi = sorted(mi_sels)
    n_extreme = nsels / 20
    statistics.append(sorted_mi[-1])
    statistics.append(sorted_mi[-n_extreme])
    statistics.append(sum(sorted_mi) / nsels)
    statistics.append(sorted_mi[n_extreme-1])
    statistics.append(sorted_mi[0])
    # add the correlations
    for v in (v0, v1, v2, v3, v4):
        r, p = scipy.stats.stats.pearsonr(v, mi_sels)
        statistics.append(r)
    # add the sign proportions
    for v in (v0, v1, v2, v3, v4):
        v_signs = [1 if value > mi_mut else -1 for value in v]
        total = sum(1 for a, b in zip(mi_signs, v_signs) if a == b)
        p = float(total) / nsels
        statistics.append(p)
    # add the informativenesses
    for v in (v0, v1, v2, v3, v4):
        v_signs = [1 if value > mi_mut else -1 for value in v]
        informativeness = 0
        for pair in ((1, 1), (1, -1), (-1, 1), (-1, -1)):
            v_value, m_value = pair
            v_marginal_count = sum(1 for x in v_signs if x == v_value)
            m_marginal_count = sum(1 for x in mi_signs if x == m_value)
            joint_count = sum(1 for x in zip(v_signs, mi_signs) if x == pair)
            if joint_count:
                joint_prob = joint_count / float(nsels)
                a = math.log(joint_prob)
                b = math.log(v_marginal_count / float(nsels))
                c = math.log(m_marginal_count / float(nsels))
                informativeness += joint_prob * (a - b - c)
        statistics.append(informativeness)
    # add the mutual information sign proportion
    statistics.append(sum(1 for x in mi_signs if x == 1) / float(nsels))
    return statistics