def do_mut_hyper_2_3(fs, to_gtr): out = StringIO() # define the path mutation rate matrix M = mrate.get_sparse_sequence_rate_matrix(2, 3) print >> out, '*** mutation rate matrix (8-state cube) ***' print >> out print >> out, get_rate_matrix_summary(M) print >> out print >> out # kill the last state by natural selection p_other = (1 - fs.p_mid)/7 p_target = [p_other]*7 + [fs.p_mid] Q = to_gtr(M, p_target) print >> out, '*** mutation-selection balance ***' print >> out print >> out, get_rate_matrix_summary(Q) print >> out print >> out # define a reference mutation rate matrix R = mrate.get_sparse_sequence_rate_matrix(2, 3) nstates = 7 M = np.zeros((nstates, nstates)) for i in range(nstates): for j in range(nstates): if i != j: M[i, j] = R[i, j] M -= np.diag(np.sum(M, axis=1)) M /= mrate.Q_to_expected_rate(M) print >> out, '*** reference mutation rate matrix (corner removed) ***' print >> out print >> out, get_rate_matrix_summary(M) print >> out print >> out return out.getvalue().rstrip()
def do_mut_hyper_2_3_square(fs, to_gtr): out = StringIO() # define the path mutation rate matrix M = mrate.get_sparse_sequence_rate_matrix(2, 3) print >> out, '*** mutation rate matrix (8-state cube) ***' print >> out print >> out, get_rate_matrix_summary(M) print >> out print >> out # kill the last state by natural selection p_other = (1 - 4*fs.p_mid)/4 p_target = [p_other]*4 + [fs.p_mid]*4 Q = to_gtr(M, p_target) print >> out, '*** mutation-selection balance ***' print >> out print >> out, get_rate_matrix_summary(Q) print >> out print >> out # define a reference mutation rate matrix M = mrate.get_sparse_sequence_rate_matrix(2, 2) print >> out, '*** reference mutation rate matrix (square) ***' print >> out print >> out, get_rate_matrix_summary(M) print >> out print >> out return out.getvalue().rstrip()
def do_mut_hyper_2_2(fs, to_gtr): out = StringIO() # define the path mutation rate matrix M = mrate.get_sparse_sequence_rate_matrix(2, 2) print >> out, '*** mutation rate matrix (4-state square) ***' print >> out print >> out, get_rate_matrix_summary(M) print >> out print >> out # kill the last state by natural selection p_other = (1 - fs.p_mid)/3 p_target = (p_other, p_other, p_other, fs.p_mid) Q = to_gtr(M, p_target) print >> out, '*** mutation-selection balance ***' print >> out print >> out, get_rate_matrix_summary(Q) print >> out print >> out # define a reference mutation rate matrix M = mrate.get_path_rate_matrix(3) print >> out, '*** reference mutation rate matrix (3-state path) ***' print >> out print >> out, get_rate_matrix_summary(M) print >> out print >> out return out.getvalue().rstrip()
def get_table_string_and_scripts(fs): nstates = fs.nresidues**fs.nsites if nstates > 256: raise ValueError('the mutation rate matrix is too big') # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1 / s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1 / s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1 / tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i * incr for i in range(fs.ntimes)] # compute the statistics nsels = len(Q_sels) pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] mi_sign_lists, time_stats = zip(*pairs) ncrossing_list = [] # look at how the signs change over time for each selection sample for signs in zip(*mi_sign_lists): count = 0 for sign_a, sign_b in iterutils.pairwise(signs): if sign_a != sign_b: count += 1 ncrossing_list.append(count) # get the R scripts scripts = [ get_r_band_script(nsels, time_stats), get_r_prop_script(nsels, time_stats), get_r_cross_script(ncrossing_list) ] table_string = RUtil.get_table_string(time_stats, g_time_stats_headers) return table_string, scripts
def get_table_string_and_scripts(fs): nstates = fs.nresidues ** fs.nsites if nstates > 256: raise ValueError('the mutation rate matrix is too big') # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1/s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1/s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1/tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i*incr for i in range(fs.ntimes)] # compute the statistics nsels = len(Q_sels) pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] mi_sign_lists, time_stats = zip(*pairs) ncrossing_list = [] # look at how the signs change over time for each selection sample for signs in zip(*mi_sign_lists): count = 0 for sign_a, sign_b in iterutils.pairwise(signs): if sign_a != sign_b: count += 1 ncrossing_list.append(count) # get the R scripts scripts = [ get_r_band_script(nsels, time_stats), get_r_prop_script(nsels, time_stats), get_r_cross_script(ncrossing_list)] table_string = RUtil.get_table_string(time_stats, g_time_stats_headers) return table_string, scripts
def get_table_string_and_scripts(fs): """ The latex documentbody should have a bunch of tikz pieces in it. Each tikz piece should have been generated from R. """ nstates = fs.nresidues ** fs.nsites if nstates > 256: raise ValueError("the mutation rate matrix is too big") # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1 / s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1 / s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1 / tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i * incr for i in range(fs.ntimes)] # compute the statistics nsels = len(Q_sels) time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] # get the R scripts scripts = [ # get_r_tikz_mi_plot(nsels, time_stats), get_r_tikz_corr_plot(nsels, time_stats), get_r_tikz_prop_plot(nsels, time_stats), get_r_tikz_info_plot(nsels, time_stats), ] table_string = RUtil.get_table_string(time_stats, g_time_stats_headers) return table_string, scripts
def get_table_string_and_scripts(fs): """ The latex documentbody should have a bunch of tikz pieces in it. Each tikz piece should have been generated from R. """ nstates = fs.nresidues ** fs.nsites if nstates > 256: raise ValueError('the mutation rate matrix is too big') # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1/s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1/s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1/tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i*incr for i in range(fs.ntimes)] # compute the statistics nsels = len(Q_sels) time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] # get the R scripts scripts = [ #get_r_tikz_mi_plot(nsels, time_stats), get_r_tikz_corr_plot(nsels, time_stats), get_r_tikz_prop_plot(nsels, time_stats), get_r_tikz_info_plot(nsels, time_stats)] table_string = RUtil.get_table_string(time_stats, g_time_stats_headers) return table_string, scripts
def get_time_stats(fs): nstates = fs.nresidues ** fs.nsites if nstates > 256: raise ValueError('the mutation rate matrix is too big') # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1/s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1/s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1/tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i*incr for i in range(fs.ntimes)] # compute the statistics time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] return time_stats
def get_time_stats(fs): nstates = fs.nresidues**fs.nsites if nstates > 256: raise ValueError('the mutation rate matrix is too big') # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # sample a bunch of mutation-selection rate matrices Q_sels = [] for selection_index in range(fs.nselections): # sample the selection parameters if fs.low_var: v = 0.2 elif fs.medium_var: v = 1 elif fs.high_var: v = 5.0 elif fs.really_high_var: v = 25.0 s = math.sqrt(v) if fs.neg_skew: sels = [-random.expovariate(1 / s) for i in range(nstates)] elif fs.no_skew: sels = [random.gauss(0, s) for i in range(nstates)] elif fs.pos_skew: sels = [random.expovariate(1 / s) for i in range(nstates)] # define the mutation-selection rate matrix using Halpern-Bruno Q = np.zeros_like(Q_mut) for i in range(nstates): for j in range(nstates): if i != j: tau = math.exp(-(sels[j] - sels[i])) coeff = math.log(tau) / (1 - 1 / tau) Q[i, j] = Q_mut[i, j] * coeff for i in range(nstates): Q[i, i] = -np.sum(Q[i]) Q_sels.append(Q) # define the time points incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) times = [fs.t_low + i * incr for i in range(fs.ntimes)] # compute the statistics time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times] return time_stats
def get_response_content(fs): nstates = fs.nresidues ** fs.nsites if nstates > 256: raise ValueError('the mutation rate matrix is too big') # get the mutation matrix Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites) # get the random selection matrix which we will use from now on Q_sel = sample_rate_matrix(fs, Q_mut) # define the time points #incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1) #times = [fs.t_low + i*incr for i in range(fs.ntimes)] mut_info = RateProperties(Q_mut) sel_info = RateProperties(Q_sel) # compute the intersection time x_time_top = math.log(2 * nstates - 1) x_time_bot = 2 * abs(mut_info.lam - sel_info.lam) x_time = x_time_top / x_time_bot # compute the upper bound on the judgement time T_second_order = max( x_time, mut_info.time_to_usefulness, sel_info.time_to_usefulness) # define the name of the eventually winning process if mut_info.relaxation_time > sel_info.relaxation_time: x = 'mutation' slow_info = mut_info fast_info = sel_info else: x = 'mutation-selection balance' slow_info = sel_info fast_info = mut_info eventual_winner_name = 'the %s process' % x # get a more sophisticated bound third_order_x_time = ctmcmitaylor.get_sophisticated_time_bound( -slow_info.lam, -fast_info.lam, slow_info.N, fast_info.N, slow_info.p, fast_info.p) if third_order_x_time is not None: T_third_order = max( third_order_x_time, mut_info.time_to_uniformity, sel_info.time_to_uniformity) else: T_third_order = None # Define a naive crossing time. # This is not a bound on the true mutual information doomsday, # but it shows a limit of our approach. # It is the bound on the spectral taylor approximation # given only the second eigenvalues and not the other ones. naive_x_time_top = math.log(nstates - 1) naive_x_time_bot = 2 * abs(mut_info.lam - sel_info.lam) naive_x_time = naive_x_time_top / naive_x_time_bot # write the report np.set_printoptions(linewidth=200) out = StringIO() print >> out, '*** mutation rate matrix info ***' print >> out print >> out, mut_info print >> out print >> out print >> out, '*** mutation-selection balance rate matrix info ***' print >> out print >> out, sel_info print >> out print >> out print >> out, '*** note ***' print >> out print >> out, 'with the general approach taken here,' print >> out, 'we will not find an eigenvalue time bound' print >> out, 'smaller than', naive_x_time print >> out print >> out print >> out, '*** weak inequality ***' print >> out print >> out, 'When t >', T_second_order, eventual_winner_name print >> out, 'has greater mutual information (MI) and approximate MI.' print >> out print >> out print >> out, '*** stronger inequality ***' print >> out if T_third_order is None: print >> out, 'the numerical solver failed to converge' else: print >> out, 'When t >', T_third_order, eventual_winner_name print >> out, 'has greater mutual information (MI) and approximate MI.' return out.getvalue()