def get_response_content(fs): M, R = get_input_matrices(fs) # create the R table string and scripts headers = [ 't', 'mi.true.mut', 'mi.true.mutsel', 'mi.analog.mut', 'mi.analog.mutsel' ] npoints = 100 t_low = 0.0 t_high = 5.0 t_incr = (t_high - t_low) / (npoints - 1) t_values = [t_low + t_incr * i for i in range(npoints)] # get the data for the R table arr = [] for t in t_values: mi_mut = ctmcmi.get_mutual_information(M, t) mi_mutsel = ctmcmi.get_mutual_information(R, t) mi_analog_mut = ctmcmi.get_ll_ratio_wrong(M, t) mi_analog_mutsel = ctmcmi.get_ll_ratio_wrong(R, t) row = [t, mi_mut, mi_mutsel, mi_analog_mut, mi_analog_mutsel] arr.append(row) # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): M, R = get_input_matrices(fs) # create the R table string and scripts headers = [ 't', 'mi.true.mut', 'mi.true.mutsel', 'mi.analog.mut', 'mi.analog.mutsel'] npoints = 100 t_low = 0.0 t_high = 5.0 t_incr = (t_high - t_low) / (npoints - 1) t_values = [t_low + t_incr*i for i in range(npoints)] # get the data for the R table arr = [] for t in t_values: mi_mut = ctmcmi.get_mutual_information(M, t) mi_mutsel = ctmcmi.get_mutual_information(R, t) mi_analog_mut = ctmcmi.get_ll_ratio_wrong(M, t) mi_analog_mutsel = ctmcmi.get_ll_ratio_wrong(R, t) row = [t, mi_mut, mi_mutsel, mi_analog_mut, mi_analog_mutsel] arr.append(row) # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def test_large_variance(self): n = 4 v = sample_distribution(n) S = sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) """ a = .1 b = .2 c = .7 R = np.array([ [-(b+c), b, c], [a, -(a+c), c], [a, b, -(a+b)]]) """ t = 2.0 dt = 0.0000001 rtime = mrate.R_to_relaxation_time(R) var_a = get_ml_variance(R, t) var_b = get_ml_variance(R, t+dt) var_slope = (var_b - var_a) / dt deriv_ratio = get_p_id_deriv_ratio(R, t) clever_ratio = get_ml_variance_ratio(R, t) print 'time:', t print 'variance:', var_a print 'variance slope:', var_slope print 'var_slope / var_a:', var_slope / var_a print 'var_slope / var_a [clever]:', clever_ratio print 'log variance:', math.log(var_a) print 'relaxation time:', rtime print '2 / relaxation_time:', 2 / rtime print "p_id(t)'' / p_id(t)':", deriv_ratio print print '--- new attempt ---' print 'mutual information:', ctmcmi.get_mutual_information(R, t) print 'reciprocal of MI:', 1.0 / ctmcmi.get_mutual_information(R, t) print 'asymptotic variance:', get_asymptotic_variance(R, t) print 'asymptotic variance (ver. 2):', get_asymptotic_variance_b(R, t) print 'asymptotic variance (ver. 3):', get_asymptotic_variance_c(R, t) print 'AV approx (ver. 4):', get_asymptotic_variance_d(R, t) print 'AV approx (ver. 5):', get_asymptotic_variance_e(R, t) print print '--- another thing ---' fi_slow = get_fisher_info_known_distn(R, v, t) fi_fast = get_fisher_info_known_distn_fast(R, v, t) print 'slow asymptotic variance:', 1 / fi_slow print 'fast asymptotic variance:', 1 / fi_fast print
def test_large_variance(self): n = 4 v = sample_distribution(n) S = sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) """ a = .1 b = .2 c = .7 R = np.array([ [-(b+c), b, c], [a, -(a+c), c], [a, b, -(a+b)]]) """ t = 2.0 dt = 0.0000001 rtime = mrate.R_to_relaxation_time(R) var_a = get_ml_variance(R, t) var_b = get_ml_variance(R, t + dt) var_slope = (var_b - var_a) / dt deriv_ratio = get_p_id_deriv_ratio(R, t) clever_ratio = get_ml_variance_ratio(R, t) print 'time:', t print 'variance:', var_a print 'variance slope:', var_slope print 'var_slope / var_a:', var_slope / var_a print 'var_slope / var_a [clever]:', clever_ratio print 'log variance:', math.log(var_a) print 'relaxation time:', rtime print '2 / relaxation_time:', 2 / rtime print "p_id(t)'' / p_id(t)':", deriv_ratio print print '--- new attempt ---' print 'mutual information:', ctmcmi.get_mutual_information(R, t) print 'reciprocal of MI:', 1.0 / ctmcmi.get_mutual_information(R, t) print 'asymptotic variance:', get_asymptotic_variance(R, t) print 'asymptotic variance (ver. 2):', get_asymptotic_variance_b(R, t) print 'asymptotic variance (ver. 3):', get_asymptotic_variance_c(R, t) print 'AV approx (ver. 4):', get_asymptotic_variance_d(R, t) print 'AV approx (ver. 5):', get_asymptotic_variance_e(R, t) print print '--- another thing ---' fi_slow = get_fisher_info_known_distn(R, v, t) fi_fast = get_fisher_info_known_distn_fast(R, v, t) print 'slow asymptotic variance:', 1 / fi_slow print 'fast asymptotic variance:', 1 / fi_fast print
def get_time_point_summary(Q_mut, Q_sels, t): """ @param Q_mut: the mutation rate matrix @param Q_sels: sequence of mutation-selection rate matrices @param t: the time point under consideration @return: a list of signs, and a sequence of statistics """ # Compute the following statistics at this time point: # t # mutation MI # selection MI max # selection MI high # selection MI mean # selection MI low # selection MI min # proportion # # First compute the mutual information for mut and mut-sel. nsels = len(Q_sels) mi_mut = ctmcmi.get_mutual_information(Q_mut, t) mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels] mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels] # Now that we have computed all of the vectors at this time point, # we can compute the statistics that we want to report. statistics = [] statistics.append(t) statistics.append(mi_mut) # add the mutual information statistics sorted_mi = sorted(mi_sels) n_extreme = nsels / 20 statistics.append(sorted_mi[-1]) statistics.append(sorted_mi[-n_extreme]) statistics.append(sum(sorted_mi) / nsels) statistics.append(sorted_mi[n_extreme-1]) statistics.append(sorted_mi[0]) # add the proportion statistics.append(sum(1 for x in mi_signs if x == 1) / float(nsels)) # return the statistics return mi_signs, statistics
def get_time_point_summary(Q_mut, Q_sels, t): """ @param Q_mut: the mutation rate matrix @param Q_sels: sequence of mutation-selection rate matrices @param t: the time point under consideration @return: a list of signs, and a sequence of statistics """ # Compute the following statistics at this time point: # t # mutation MI # selection MI max # selection MI high # selection MI mean # selection MI low # selection MI min # proportion # # First compute the mutual information for mut and mut-sel. nsels = len(Q_sels) mi_mut = ctmcmi.get_mutual_information(Q_mut, t) mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels] mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels] # Now that we have computed all of the vectors at this time point, # we can compute the statistics that we want to report. statistics = [] statistics.append(t) statistics.append(mi_mut) # add the mutual information statistics sorted_mi = sorted(mi_sels) n_extreme = nsels / 20 statistics.append(sorted_mi[-1]) statistics.append(sorted_mi[-n_extreme]) statistics.append(sum(sorted_mi) / nsels) statistics.append(sorted_mi[n_extreme - 1]) statistics.append(sorted_mi[0]) # add the proportion statistics.append(sum(1 for x in mi_signs if x == 1) / float(nsels)) # return the statistics return mi_signs, statistics
def get_time_point_summary(Q_mut, Q_sels, t): """ @param Q_mut: the mutation rate matrix @param Q_sels: sequence of mutation-selection rate matrices @param t: the time point under consideration @return: a sequence of statistics """ # Compute the following statistics at this time point: # t # mutation MI # selection MI max # selection MI high # selection MI mean # selection MI low # selection MI min # correlation fn 1 # correlation fn 2 # correlation fn 3 # correlation fn 4 # correlation fn 5 # proportion sign agreement fn 1 # proportion sign agreement fn 2 # proportion sign agreement fn 3 # proportion sign agreement fn 4 # proportion sign agreement fn 5 # informativeness fn 1 # informativeness fn 2 # informativeness fn 3 # informativeness fn 4 # informativeness fn 5 # # First compute the mutual information for mut and mut-sel. nsels = len(Q_sels) mi_mut = ctmcmi.get_mutual_information(Q_mut, t) mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels] mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels] # Now compute some other functions v0 = [ctmcmi.get_mutual_information_small_approx_c(Q, t) for Q in Q_sels] v1 = [ctmcmi.get_mutual_information_small_approx(Q, t) for Q in Q_sels] v2 = [ctmcmi.get_mutual_information_approx_c(Q, t) for Q in Q_sels] v3 = [math.exp(-2*t/mrate.R_to_relaxation_time(Q)) for Q in Q_sels] v4 = [math.exp(-t*mrate.Q_to_expected_rate(Q)) for Q in Q_sels] # Now that we have computed all of the vectors at this time point, # we can compute the statistics that we want to report. statistics = [] statistics.append(t) statistics.append(mi_mut) # add the mutual information statistics sorted_mi = sorted(mi_sels) n_extreme = nsels / 20 statistics.append(sorted_mi[-1]) statistics.append(sorted_mi[-n_extreme]) statistics.append(sum(sorted_mi) / nsels) statistics.append(sorted_mi[n_extreme-1]) statistics.append(sorted_mi[0]) # add the correlations for v in (v0, v1, v2, v3, v4): r, p = scipy.stats.stats.pearsonr(v, mi_sels) statistics.append(r) # add the sign proportions for v in (v0, v1, v2, v3, v4): v_signs = [1 if value > mi_mut else -1 for value in v] total = sum(1 for a, b in zip(mi_signs, v_signs) if a == b) p = float(total) / nsels statistics.append(p) # add the informativenesses for v in (v0, v1, v2, v3, v4): v_signs = [1 if value > mi_mut else -1 for value in v] informativeness = 0 for pair in ((1, 1), (1, -1), (-1, 1), (-1, -1)): v_value, m_value = pair v_marginal_count = sum(1 for x in v_signs if x == v_value) m_marginal_count = sum(1 for x in mi_signs if x == m_value) joint_count = sum(1 for x in zip(v_signs, mi_signs) if x == pair) if joint_count: joint_prob = joint_count / float(nsels) a = math.log(joint_prob) b = math.log(v_marginal_count / float(nsels)) c = math.log(m_marginal_count / float(nsels)) informativeness += joint_prob * (a - b - c) statistics.append(informativeness) # return the statistics return statistics
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates t = fs.divtime #h = fs.delta # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix distn = mrate.R_to_distn(R) spectrum = np.linalg.eigvalsh(mrate.symmetrized(R)) #spectrum, U = np.linalg.eigh(mrate.symmetrized(R)) #spectrum = np.linalg.eigvals(R) # report some information about the mutual information curve mi = ctmcmi.get_mutual_information(R, t) mi_diff = ctmcmi.get_mutual_information_diff(R, t) mi_diff_b = ctmcmi.get_mutual_information_diff_b(R, t) mi_diff_c = ctmcmi.get_mutual_information_diff_c(R, t) print >> out, 'arbitrary large-ish divergence time:' print >> out, t print >> out print >> out, 'randomly sampled reversible rate matrix:' print >> out, R print >> out print >> out, 'stationary distribution:' print >> out, distn print >> out print >> out, 'spectrum of the rate matrix:' print >> out, spectrum print >> out print >> out, 'mutual information at t = %f:' % t print >> out, mi print >> out print >> out, 'mutual information at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_b(R, t) print >> out print >> out, 'large t approximation of MI at t = %f:' % t print >> out, ctmcmi.get_mutual_information_approx(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_approx_b(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 3):' % t print >> out, ctmcmi.cute_MI_alternate(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 4):' % t print >> out, ctmcmi.get_mutual_information_approx_c(R, t) print >> out print >> out, 'small t approximation of MI at t = %f:' % t print >> out, ctmcmi.get_mutual_information_small_approx(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_small_approx_b(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 3):' % t print >> out, ctmcmi.get_mutual_information_small_approx_c(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 4):' % t print >> out, ctmcmi.get_mutual_information_small_approx_d(R, t) print >> out print >> out, 'mutual information diff at t = %f:' % t print >> out, mi_diff print >> out print >> out, 'mutual information diff at t = %f (ver. 2):' % t print >> out, mi_diff_b print >> out print >> out, 'mutual information diff at t = %f (ver. 3):' % t print >> out, mi_diff_c print >> out print >> out, 'large t approximation of MI diff at t = %f:' % t print >> out, ctmcmi.get_mutual_information_diff_approx(R, t) print >> out print >> out, 'large t approximation of MI diff at t = %f: (ver. 2)' % t print >> out, ctmcmi.get_mutual_information_diff_approx_b(R, t) print >> out print >> out, 'large t approximation of MI diff at t = %f: (ver. 4)' % t print >> out, ctmcmi.get_mutual_information_diff_approx_c(R, t) print >> out print >> out, 'log of mutual information at t = %f:' % t print >> out, math.log(mi) print >> out #print >> out, 'estimated derivative', #print >> out, 'of log of mutual information at t = %f:' % t #print >> out, (math.log(mi_c) - math.log(mi_a)) / (2*h) #print >> out print >> out, 'estimated derivative of log of MI', print >> out, 'at t = %f:' % t print >> out, mi_diff / mi print >> out print >> out, 'large t approximation of derivative of log of MI', print >> out, 'at t = %f:' % t print >> out, ctmcmi.get_mutual_information_diff_approx( R, t) / ctmcmi.get_mutual_information_approx(R, t) print >> out print >> out, 'large t approximation of derivative of log of MI', print >> out, 'at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_diff_approx_b( R, t) / ctmcmi.get_mutual_information_approx_b(R, t) print >> out print >> out, 'twice the relevant eigenvalue:' print >> out, 2 * spectrum[-2] print >> out print >> out #print >> out, 'estimated derivative', #print >> out, 'of mutual information at t = %f:' % t #print >> out, (mi_c - mi_a) / (2*h) #print >> out #print >> out, '(estimated derivative of mutual information) /', #print >> out, '(mutual information) at t = %f:' % t #print >> out, (mi_c - mi_a) / (2*h*mi_b) #print >> out return out.getvalue()
def __call__(self): """ Look for a counterexample. """ n = self.nstates # sample a random rate and time and stationary distribution r = random.expovariate(1) t = random.expovariate(1) v = np.random.exponential(1, n) v /= np.sum(v) # construct the F81 rate matrix R = r * np.outer(np.ones(n), v) R -= np.diag(np.sum(R, axis=1)) # get some information criterion values mi_general = ctmcmi.get_mutual_information(R, t) fi_general = divtime.get_fisher_information(R, t) pollock_general = get_gtr_pollock(R, t) mi_f81 = get_f81_mi(r, v, t) fi_f81 = get_f81_fi(r, v, t) pollock_f81 = get_f81_pollock(r, v, t) if n == 2: fi_f81_2state = get_f81_fi_2state(r, v, t) # check for contradictions try: if not np.allclose(mi_general, mi_f81): raise Contradiction('mutual information') if not np.allclose(fi_general, fi_f81): raise Contradiction('fisher information') if not np.allclose(pollock_general, pollock_f81): raise Contradiction('neg slope identity proportion') if n == 2: if not np.allclose(fi_general, fi_f81_2state): raise Contradiction('fisher information (2-state)') except Contradiction as e: out = StringIO() print >> out, 'found', str(e), 'contradiction' print >> out print >> out, 'GTR mutual information:' print >> out, mi_general print >> out print >> out, 'F81 mutual information:' print >> out, mi_f81 print >> out print >> out, 'GTR Fisher information:' print >> out, fi_general print >> out print >> out, 'F81 Fisher information:' print >> out, fi_f81 print >> out if n == 2: print >> out, 'F81 2-state Fisher information:' print >> out, fi_f81_2state print >> out print >> out, 'GTR neg slope identity proportion:' print >> out, pollock_general print >> out print >> out, 'F81 neg slope identity proportion:' print >> out, pollock_f81 print >> out self.counterexample = out.getvalue() return True return False
def get_response_content(fs): M = get_input_matrix(fs) # create the R table string and scripts headers = ['t'] if fs.show_entropy: headers.append('ub.entropy') headers.extend([ 'ub.jc.spectral', 'ub.f81.spectral', 'mutual.information', 'lb.2.state.spectral', 'lb.2.state', 'lb.f81', ]) npoints = 100 t_low = fs.start_time t_high = fs.stop_time t_incr = (t_high - t_low) / (npoints - 1) t_values = [t_low + t_incr*i for i in range(npoints)] # define some extra stuff v = mrate.R_to_distn(M) entropy = -np.dot(v, np.log(v)) n = len(M) gap = sorted(abs(x) for x in np.linalg.eigvals(M))[1] print 'stationary distn:', v print 'entropy:', entropy print 'spectral gap:', gap M_slow_jc = gap * (1.0 / n) * (np.ones((n,n)) - n*np.eye(n)) M_slow_f81 = gap * np.outer(np.ones(n), v) M_slow_f81 -= np.diag(np.sum(M_slow_f81, axis=1)) M_f81 = msimpl.get_fast_f81(M) M_2state = msimpl.get_fast_two_state_autobarrier(M) M_2state_spectral = -gap * M_2state / np.trace(M_2state) # get the data for the R table arr = [] for u in t_values: # experiment with log time #t = math.exp(u) t = u mi_slow_jc = ctmcmi.get_mutual_information(M_slow_jc, t) mi_slow_f81 = ctmcmi.get_mutual_information(M_slow_f81, t) mi_mut = ctmcmi.get_mutual_information(M, t) mi_2state_spectral = ctmcmi.get_mutual_information(M_2state_spectral, t) mi_f81 = ctmcmi.get_mutual_information(M_f81, t) mi_2state = ctmcmi.get_mutual_information(M_2state, t) row = [u] if fs.show_entropy: row.append(entropy) row.extend([mi_slow_jc, mi_slow_f81, mi_mut, mi_2state_spectral, mi_2state, mi_f81]) arr.append(row) # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): M, R = get_input_matrices(fs) M_v = mrate.R_to_distn(M) R_v = mrate.R_to_distn(R) t = fs.t mi_mut = ctmcmi.get_mutual_information(M, t) mi_bal = ctmcmi.get_mutual_information(R, t) fi_mut = divtime.get_fisher_information(M, t) fi_bal = divtime.get_fisher_information(R, t) if fs.info_mut: information_sign = np.sign(mi_mut - mi_bal) elif fs.info_fis: information_sign = np.sign(fi_mut - fi_bal) out = StringIO() print >> out, '<html>' print >> out, '<body>' print >> out print >> out, '<pre>' print >> out, 'Explicitly computed answer', print >> out, '(not a heuristic but may be numerically imprecise):' if information_sign == 1: print >> out, '* pure mutation', print >> out, 'is more informative' elif information_sign == -1: print >> out, '* the balance of mutation and selection', print >> out, 'is more informative' else: print >> out, ' the information contents of the two processes', print >> out, 'are numerically indistinguishable' print >> out print >> out if fs.info_mut: print >> out, 'Mutual information properties', print >> out, 'at very small and very large times:' print >> out print >> out, get_mi_asymptotics(M, R) print >> out print >> out print >> out, 'Heuristics without regard to time or to the selected', print >> out, 'information variant (Fisher vs. mutual information):' print >> out print >> out, get_heuristics(M, R) print >> out print >> out print >> out, 'Input summary:' print >> out print >> out, 'mutation rate matrix:' print >> out, M print >> out print >> out, 'mutation process stationary distribution:' print >> out, M_v print >> out print >> out, 'mutation-selection balance rate matrix:' print >> out, R print >> out print >> out, 'mutation-selection balance stationary distribution:' print >> out, R_v print >> out print >> out, 'mutation process expected rate:' print >> out, mrate.Q_to_expected_rate(M) print >> out print >> out, 'mutation-selection balance expected rate:' print >> out, mrate.Q_to_expected_rate(R) print >> out print >> out print >> out, 'The following information calculations', print >> out, 'depend on t = %s:' % t print >> out print >> out, 'log(ratio(E(L))) for pure mutation:' print >> out, ctmcmi.get_ll_ratio_wrong(M, t) print >> out print >> out, 'log(ratio(E(L))) for mut-sel balance:' print >> out, ctmcmi.get_ll_ratio_wrong(R, t) print >> out print >> out, 'mutual information for pure mutation:' print >> out, mi_mut print >> out print >> out, 'mutual information for mut-sel balance:' print >> out, mi_bal print >> out print >> out, 'pinsker lower bound mi for pure mutation:' print >> out, ctmcmi.get_pinsker_lower_bound_mi(M, t) print >> out print >> out, 'pinsker lower bound mi for mut-sel balance:' print >> out, ctmcmi.get_pinsker_lower_bound_mi(R, t) print >> out print >> out, 'row based pinsker lower bound mi for pure mutation:' print >> out, ctmcmi.get_row_based_plb_mi(M, t) print >> out print >> out, 'row based pinsker lower bound mi for mut-sel balance:' print >> out, ctmcmi.get_row_based_plb_mi(R, t) print >> out print >> out, 'row based hellinger lower bound mi for pure mutation:' print >> out, ctmcmi.get_row_based_hellinger_lb_mi(M, t) print >> out print >> out, 'row based hellinger lower bound mi for mut-sel balance:' print >> out, ctmcmi.get_row_based_hellinger_lb_mi(R, t) print >> out print >> out, 'Fisher information for pure mutation:' print >> out, fi_mut print >> out print >> out, 'Fisher information for mut-sel balance:' print >> out, fi_bal print >> out print >> out, '</pre>' # # create the summaries summaries = (RateMatrixSummary(M), RateMatrixSummary(R)) print >> out, get_html_table(summaries) print >> out print >> out, '<html>' print >> out, '<body>' return out.getvalue()
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix and its re-symmetrization S = mrate.symmetrized(R) distn = mrate.R_to_distn(R) w, U = np.linalg.eigh(S) D = np.diag(U.T[-1])**2 D_inv = np.diag(np.reciprocal(U.T[-1]))**2 for t in (1.0, 2.0): P = scipy.linalg.expm(R * t) M = ndot(D**.5, scipy.linalg.expm(S * t), D**.5) M_star = ndot(D_inv**.5, scipy.linalg.expm(S * t), D_inv**.5) M_star_log = np.log(M_star) M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log) E = M * np.log(M_star) E_w, E_U = np.linalg.eigh(E) print >> out, 't:' print >> out, t print >> out print >> out, 'randomly sampled rate matrix R' print >> out, R print >> out print >> out, 'symmetrized matrix S' print >> out, S print >> out print >> out, 'stationary distribution diagonal D' print >> out, D print >> out print >> out, 'R = D^-1/2 S D^1/2' print >> out, ndot(D_inv**.5, S, D**.5) print >> out print >> out, 'probability matrix e^(R*t) = P' print >> out, P print >> out print >> out, 'P = D^-1/2 e^(S*t) D^1/2' print >> out, ndot(D_inv**.5, scipy.linalg.expm(S * t), D**.5) print >> out print >> out, 'pairwise distribution matrix M' print >> out, 'M = D^1/2 e^(S*t) D^1/2' print >> out, M print >> out print >> out, 'sum of entries of M' print >> out, np.sum(M) print >> out print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2' print >> out, M_star print >> out print >> out, 'entrywise logarithm logij(M_star)' print >> out, np.log(M_star) print >> out print >> out, 'Hadamard product M o logij(M_star) = E' print >> out, E print >> out print >> out, 'spectrum of M:' print >> out, np.linalg.eigvalsh(M) print >> out print >> out, 'spectrum of logij(M_star):' print >> out, M_star_log_w print >> out print >> out, 'corresponding eigenvectors of logij(M_star) as columns:' print >> out, M_star_log_U print >> out print >> out, 'spectrum of E:' print >> out, E_w print >> out print >> out, 'corresponding eigenvectors of E as columns:' print >> out, E_U print >> out print >> out, 'entrywise square roots of stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'sum of entries of E:' print >> out, np.sum(E) print >> out print >> out, 'mutual information:' print >> out, ctmcmi.get_mutual_information(R, t) print >> out print >> out return out.getvalue()
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix and its re-symmetrization S = mrate.symmetrized(R) distn = mrate.R_to_distn(R) w, U = np.linalg.eigh(S) D = np.diag(U.T[-1])**2 D_inv = np.diag(np.reciprocal(U.T[-1]))**2 for t in (1.0, 2.0): P = scipy.linalg.expm(R*t) M = ndot(D**.5, scipy.linalg.expm(S*t), D**.5) M_star = ndot(D_inv**.5, scipy.linalg.expm(S*t), D_inv**.5) M_star_log = np.log(M_star) M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log) E = M * np.log(M_star) E_w, E_U = np.linalg.eigh(E) print >> out, 't:' print >> out, t print >> out print >> out, 'randomly sampled rate matrix R' print >> out, R print >> out print >> out, 'symmetrized matrix S' print >> out, S print >> out print >> out, 'stationary distribution diagonal D' print >> out, D print >> out print >> out, 'R = D^-1/2 S D^1/2' print >> out, ndot(D_inv**.5, S, D**.5) print >> out print >> out, 'probability matrix e^(R*t) = P' print >> out, P print >> out print >> out, 'P = D^-1/2 e^(S*t) D^1/2' print >> out, ndot(D_inv**.5, scipy.linalg.expm(S*t), D**.5) print >> out print >> out, 'pairwise distribution matrix M' print >> out, 'M = D^1/2 e^(S*t) D^1/2' print >> out, M print >> out print >> out, 'sum of entries of M' print >> out, np.sum(M) print >> out print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2' print >> out, M_star print >> out print >> out, 'entrywise logarithm logij(M_star)' print >> out, np.log(M_star) print >> out print >> out, 'Hadamard product M o logij(M_star) = E' print >> out, E print >> out print >> out, 'spectrum of M:' print >> out, np.linalg.eigvalsh(M) print >> out print >> out, 'spectrum of logij(M_star):' print >> out, M_star_log_w print >> out print >> out, 'corresponding eigenvectors of logij(M_star) as columns:' print >> out, M_star_log_U print >> out print >> out, 'spectrum of E:' print >> out, E_w print >> out print >> out, 'corresponding eigenvectors of E as columns:' print >> out, E_U print >> out print >> out, 'entrywise square roots of stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'sum of entries of E:' print >> out, np.sum(E) print >> out print >> out, 'mutual information:' print >> out, ctmcmi.get_mutual_information(R, t) print >> out print >> out return out.getvalue()
def sample_row(): n = 4 # sample the exchangeability S = np.zeros((n, n)) S[1, 0] = random.expovariate(1) S[2, 0] = random.expovariate(1) S[2, 1] = random.expovariate(1) S[3, 0] = random.expovariate(1) S[3, 1] = random.expovariate(1) S[3, 2] = random.expovariate(1) # sample the mutation stationary distribution mdistn = np.array([random.expovariate(1) for i in range(n)]) mdistn /= np.sum(mdistn) # sample the mutation selection balance stationary distribution bdistn = np.array([random.expovariate(1) for i in range(n)]) bdistn /= np.sum(bdistn) # sample the time t = random.expovariate(1) # sample the info type infotype = random.choice(('infotype.mi', 'infotype.fi')) # Compute some intermediate variables # from which the summary statistics and the label are computed. S = S + S.T M = S * mdistn M -= np.diag(np.sum(M, axis=1)) R = mrate.to_gtr_halpern_bruno(M, bdistn) shannon_ent_mut = -sum(p * log(p) for p in mdistn) shannon_ent_bal = -sum(p * log(p) for p in bdistn) logical_ent_mut = 1.0 - sum(p * p for p in mdistn) logical_ent_bal = 1.0 - sum(p * p for p in bdistn) expected_rate_mut = mrate.Q_to_expected_rate(M) expected_rate_bal = mrate.Q_to_expected_rate(R) spectral_rate_mut = 1 / mrate.R_to_relaxation_time(M) spectral_rate_bal = 1 / mrate.R_to_relaxation_time(R) mi_mut = ctmcmi.get_mutual_information(M, t) mi_bal = ctmcmi.get_mutual_information(R, t) fi_mut = divtime.get_fisher_information(M, t) fi_bal = divtime.get_fisher_information(R, t) # compute the summary statistics summary_entries = [ shannon_ent_bal - shannon_ent_mut, logical_ent_bal - logical_ent_mut, log(shannon_ent_bal) - log(shannon_ent_mut), log(logical_ent_bal) - log(logical_ent_mut), expected_rate_bal - expected_rate_mut, spectral_rate_bal - spectral_rate_mut, log(expected_rate_bal) - log(expected_rate_mut), log(spectral_rate_bal) - log(spectral_rate_mut), mi_bal - mi_mut, fi_bal - fi_mut, math.log(mi_bal) - math.log(mi_mut), math.log(fi_bal) - math.log(fi_mut), ] # get the definition entries definition_entries = [ S[1, 0], S[2, 0], S[2, 1], S[3, 0], S[3, 1], S[3, 2], mdistn[0], mdistn[1], mdistn[2], mdistn[3], bdistn[0], bdistn[1], bdistn[2], bdistn[3], infotype, t, ] # define the label if infotype == 'infotype.mi' and mi_mut > mi_bal: label = 'mut.is.better' elif infotype == 'infotype.mi' and mi_mut < mi_bal: label = 'bal.is.better' elif infotype == 'infotype.fi' and fi_mut > fi_bal: label = 'mut.is.better' elif infotype == 'infotype.fi' and fi_mut < fi_bal: label = 'bal.is.better' else: label = 'indistinguishable' # return the row return definition_entries + summary_entries + [label]
def sample_row(): n = 4 # sample the exchangeability S = np.zeros((n, n)) S[1,0] = random.expovariate(1) S[2,0] = random.expovariate(1) S[2,1] = random.expovariate(1) S[3,0] = random.expovariate(1) S[3,1] = random.expovariate(1) S[3,2] = random.expovariate(1) # sample the mutation stationary distribution mdistn = np.array([random.expovariate(1) for i in range(n)]) mdistn /= np.sum(mdistn) # sample the mutation selection balance stationary distribution bdistn = np.array([random.expovariate(1) for i in range(n)]) bdistn /= np.sum(bdistn) # sample the time t = random.expovariate(1) # sample the info type infotype = random.choice(('infotype.mi', 'infotype.fi')) # Compute some intermediate variables # from which the summary statistics and the label are computed. S = S + S.T M = S * mdistn M -= np.diag(np.sum(M, axis=1)) R = mrate.to_gtr_halpern_bruno(M, bdistn) shannon_ent_mut = -sum(p*log(p) for p in mdistn) shannon_ent_bal = -sum(p*log(p) for p in bdistn) logical_ent_mut = 1.0 - sum(p*p for p in mdistn) logical_ent_bal = 1.0 - sum(p*p for p in bdistn) expected_rate_mut = mrate.Q_to_expected_rate(M) expected_rate_bal = mrate.Q_to_expected_rate(R) spectral_rate_mut = 1 / mrate.R_to_relaxation_time(M) spectral_rate_bal = 1 / mrate.R_to_relaxation_time(R) mi_mut = ctmcmi.get_mutual_information(M, t) mi_bal = ctmcmi.get_mutual_information(R, t) fi_mut = divtime.get_fisher_information(M, t) fi_bal = divtime.get_fisher_information(R, t) # compute the summary statistics summary_entries = [ shannon_ent_bal - shannon_ent_mut, logical_ent_bal - logical_ent_mut, log(shannon_ent_bal) - log(shannon_ent_mut), log(logical_ent_bal) - log(logical_ent_mut), expected_rate_bal - expected_rate_mut, spectral_rate_bal - spectral_rate_mut, log(expected_rate_bal) - log(expected_rate_mut), log(spectral_rate_bal) - log(spectral_rate_mut), mi_bal - mi_mut, fi_bal - fi_mut, math.log(mi_bal) - math.log(mi_mut), math.log(fi_bal) - math.log(fi_mut), ] # get the definition entries definition_entries = [ S[1,0], S[2,0], S[2,1], S[3,0], S[3,1], S[3,2], mdistn[0], mdistn[1], mdistn[2], mdistn[3], bdistn[0], bdistn[1], bdistn[2], bdistn[3], infotype, t, ] # define the label if infotype == 'infotype.mi' and mi_mut > mi_bal: label = 'mut.is.better' elif infotype == 'infotype.mi' and mi_mut < mi_bal: label = 'bal.is.better' elif infotype == 'infotype.fi' and fi_mut > fi_bal: label = 'mut.is.better' elif infotype == 'infotype.fi' and fi_mut < fi_bal: label = 'bal.is.better' else: label = 'indistinguishable' # return the row return definition_entries + summary_entries + [label]
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates t = fs.divtime #h = fs.delta # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix distn = mrate.R_to_distn(R) spectrum = np.linalg.eigvalsh(mrate.symmetrized(R)) #spectrum, U = np.linalg.eigh(mrate.symmetrized(R)) #spectrum = np.linalg.eigvals(R) # report some information about the mutual information curve mi = ctmcmi.get_mutual_information(R, t) mi_diff = ctmcmi.get_mutual_information_diff(R, t) mi_diff_b = ctmcmi.get_mutual_information_diff_b(R, t) mi_diff_c = ctmcmi.get_mutual_information_diff_c(R, t) print >> out, 'arbitrary large-ish divergence time:' print >> out, t print >> out print >> out, 'randomly sampled reversible rate matrix:' print >> out, R print >> out print >> out, 'stationary distribution:' print >> out, distn print >> out print >> out, 'spectrum of the rate matrix:' print >> out, spectrum print >> out print >> out, 'mutual information at t = %f:' % t print >> out, mi print >> out print >> out, 'mutual information at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_b(R, t) print >> out print >> out, 'large t approximation of MI at t = %f:' % t print >> out, ctmcmi.get_mutual_information_approx(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_approx_b(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 3):' % t print >> out, ctmcmi.cute_MI_alternate(R, t) print >> out print >> out, 'large t approximation of MI at t = %f (ver. 4):' % t print >> out, ctmcmi.get_mutual_information_approx_c(R, t) print >> out print >> out, 'small t approximation of MI at t = %f:' % t print >> out, ctmcmi.get_mutual_information_small_approx(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_small_approx_b(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 3):' % t print >> out, ctmcmi.get_mutual_information_small_approx_c(R, t) print >> out print >> out, 'small t approximation of MI at t = %f (ver. 4):' % t print >> out, ctmcmi.get_mutual_information_small_approx_d(R, t) print >> out print >> out, 'mutual information diff at t = %f:' % t print >> out, mi_diff print >> out print >> out, 'mutual information diff at t = %f (ver. 2):' % t print >> out, mi_diff_b print >> out print >> out, 'mutual information diff at t = %f (ver. 3):' % t print >> out, mi_diff_c print >> out print >> out, 'large t approximation of MI diff at t = %f:' % t print >> out, ctmcmi.get_mutual_information_diff_approx(R, t) print >> out print >> out, 'large t approximation of MI diff at t = %f: (ver. 2)' % t print >> out, ctmcmi.get_mutual_information_diff_approx_b(R, t) print >> out print >> out, 'large t approximation of MI diff at t = %f: (ver. 4)' % t print >> out, ctmcmi.get_mutual_information_diff_approx_c(R, t) print >> out print >> out, 'log of mutual information at t = %f:' % t print >> out, math.log(mi) print >> out #print >> out, 'estimated derivative', #print >> out, 'of log of mutual information at t = %f:' % t #print >> out, (math.log(mi_c) - math.log(mi_a)) / (2*h) #print >> out print >> out, 'estimated derivative of log of MI', print >> out, 'at t = %f:' % t print >> out, mi_diff / mi print >> out print >> out, 'large t approximation of derivative of log of MI', print >> out, 'at t = %f:' % t print >> out, ctmcmi.get_mutual_information_diff_approx(R, t) / ctmcmi.get_mutual_information_approx(R, t) print >> out print >> out, 'large t approximation of derivative of log of MI', print >> out, 'at t = %f (ver. 2):' % t print >> out, ctmcmi.get_mutual_information_diff_approx_b(R, t) / ctmcmi.get_mutual_information_approx_b(R, t) print >> out print >> out, 'twice the relevant eigenvalue:' print >> out, 2 * spectrum[-2] print >> out print >> out #print >> out, 'estimated derivative', #print >> out, 'of mutual information at t = %f:' % t #print >> out, (mi_c - mi_a) / (2*h) #print >> out #print >> out, '(estimated derivative of mutual information) /', #print >> out, '(mutual information) at t = %f:' % t #print >> out, (mi_c - mi_a) / (2*h*mi_b) #print >> out return out.getvalue()
def get_time_point_summary(Q_mut, Q_sels, t): """ @param Q_mut: the mutation rate matrix @param Q_sels: sequence of mutation-selection rate matrices @param t: the time point under consideration @return: a sequence of statistics """ # Compute the following statistics at this time point: # t # mutation MI # selection MI max # selection MI high # selection MI mean # selection MI low # selection MI min # correlation fn 1 # correlation fn 2 # correlation fn 3 # correlation fn 4 # correlation fn 5 # proportion sign agreement fn 1 # proportion sign agreement fn 2 # proportion sign agreement fn 3 # proportion sign agreement fn 4 # proportion sign agreement fn 5 # informativeness fn 1 # informativeness fn 2 # informativeness fn 3 # informativeness fn 4 # informativeness fn 5 # mutual information proportion # # First compute the mutual information for mut and mut-sel. nsels = len(Q_sels) mi_mut = ctmcmi.get_mutual_information(Q_mut, t) mi_sels = [ctmcmi.get_mutual_information(Q, t) for Q in Q_sels] mi_signs = [1 if mi_sel > mi_mut else -1 for mi_sel in mi_sels] # Now compute some other functions v0 = [ctmcmi.get_mutual_information_small_approx_c(Q, t) for Q in Q_sels] v1 = [ctmcmi.get_mutual_information_small_approx(Q, t) for Q in Q_sels] v2 = [ctmcmi.get_mutual_information_approx_c(Q, t) for Q in Q_sels] v3 = [math.exp(-2*t/mrate.R_to_relaxation_time(Q)) for Q in Q_sels] v4 = [math.exp(-t*mrate.Q_to_expected_rate(Q)) for Q in Q_sels] # Now that we have computed all of the vectors at this time point, # we can compute the statistics that we want to report. statistics = [] statistics.append(t) statistics.append(mi_mut) # add the mutual information statistics sorted_mi = sorted(mi_sels) n_extreme = nsels / 20 statistics.append(sorted_mi[-1]) statistics.append(sorted_mi[-n_extreme]) statistics.append(sum(sorted_mi) / nsels) statistics.append(sorted_mi[n_extreme-1]) statistics.append(sorted_mi[0]) # add the correlations for v in (v0, v1, v2, v3, v4): r, p = scipy.stats.stats.pearsonr(v, mi_sels) statistics.append(r) # add the sign proportions for v in (v0, v1, v2, v3, v4): v_signs = [1 if value > mi_mut else -1 for value in v] total = sum(1 for a, b in zip(mi_signs, v_signs) if a == b) p = float(total) / nsels statistics.append(p) # add the informativenesses for v in (v0, v1, v2, v3, v4): v_signs = [1 if value > mi_mut else -1 for value in v] informativeness = 0 for pair in ((1, 1), (1, -1), (-1, 1), (-1, -1)): v_value, m_value = pair v_marginal_count = sum(1 for x in v_signs if x == v_value) m_marginal_count = sum(1 for x in mi_signs if x == m_value) joint_count = sum(1 for x in zip(v_signs, mi_signs) if x == pair) if joint_count: joint_prob = joint_count / float(nsels) a = math.log(joint_prob) b = math.log(v_marginal_count / float(nsels)) c = math.log(m_marginal_count / float(nsels)) informativeness += joint_prob * (a - b - c) statistics.append(informativeness) # add the mutual information sign proportion statistics.append(sum(1 for x in mi_signs if x == 1) / float(nsels)) return statistics