def get_backward_info(N_diploid, theta, Nr, Ns): """ Compute expectations and variances for the two substitution pathways. Here backward is somewhat of a misnomer; it is meant as a contrast to forward simulation. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: (t1, v1), (t2, v2) """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) return (t1, v1), (t2, v2)
def get_backward_info(N_diploid, theta, Nr, Ns): """ Compute expectations and variances for the two substitution pathways. Here backward is somewhat of a misnomer; it is meant as a contrast to forward simulation. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: (t1, v1), (t2, v2) """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) return (t1, v1), (t2, v2)
def get_plot_array(N_diploid, theta, Nr_values, Ns_values): """ @param N_diploid: diploid population size @param theta: mutation rate @param Nr_values: recombination rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # define the haplotypes AB, Ab, aB, ab = 0, 1, 2, 3 # initialize the state space N_hap = 2 * N_diploid k = 4 M = multinomstate.get_sorted_states(N_hap, k) nstates = M.shape[0] # compute the inverse map T = multinomstate.get_inverse_map(M) # lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) # arr = [] for Nr in Nr_values: # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # precompute the product of mutation and recombination. MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) # """ # What is the distribution over next fixed states # from the current state? # This question can be answered # by hacking with transience and absorption. Q = P[:-k, :-k] R = P[:-k, -k:] B = linalg.solve(np.eye(nstates-k) - Q, R) # At this point B is the matrix whose nstates-k rows give # distributions over the k fixed states. # Next construct the transition matrix that is conditional # upon first hitting the ab fixed state. w = np.zeros(nstates) w[:-k] = R[:, -1] w[-k:] = np.array([0, 0, 0, 1]) P_t2 = P * w # normalize after scaling by the weights v = P_t2.sum(axis=1) P_t2 /= v[:, np.newaxis] # Get the hitting time from state AB to state ab. # Because of the conditioning, this should be the same # as the expected time to reach state ab given that state ab # is the first reached fixed state. # Note that this means that the first step is away from AB. # Or actually we can just use expected time to absorption. Q = P_t2[:-1, :-1] c = np.ones(nstates-1) t = linalg.lstsq(np.eye(nstates-1) - Q, c) t2 = t[-4] # Now do type 1 events. w = np.zeros(nstates) w[:-k] = 1 - R[:, 0] w[-k:] = np.array([0, 0, 0, 1]) P_t2 = P * w # """ # Get the probability of type 2. # This uses the stochastic complement. # Wait this is wrong. # This is the probability of a direct transition. X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) p_direct = H[0, 3] / (1 - H[0,0]) # The following line is Equation (1) of the Nasrallah manuscript. p_t2 = (2*p_direct) / (1 + p_direct) p_t1 = 1 - p_t2 """ expectation_of_variance = p_t2*v2 + p_t1*v1 variance_of_expectation = p_t2*p_t1*(t1 - t2)*(t1 - t2) pooled_variance = ( expectation_of_variance + variance_of_expectation) """ # # Just do a simulation, # assuming that the wait times are normally distributed. nsamples = 500 n1 = np.random.binomial(nsamples, p_t1) n2 = nsamples - n1 X1 = np.random.normal(t1, math.sqrt(v1), n1) X2 = np.random.normal(t2, math.sqrt(v2), n2) X_pooled = np.hstack((X1, X2)) x = np.mean(X1) - np.mean(X2) s_pooled = math.sqrt(np.var(X_pooled) / nsamples) t_statistic = x / s_pooled row.append(t_statistic) # #x = (t1 - t2) / math.sqrt(variance / 200.0) #x = (t1 - t2) / math.sqrt((v1 + v2) / 200.0) #x = (t1 - t2) / math.sqrt(pooled_variance) #x = (t1 - t2) #row.append(math.log(t1) - math.log(t2)) #row.append(x) #row.append(v2) arr.append(row) return arr