def create_mutation_transition_matrix(npop, mutation_ab, mutation_ba): """ The states are indexed by the number of mutants. @param npop: total population size @param mutation_ab: wild-type to mutant transition probability @param mutation_ba: mutant to wild-type transition probability @return: a transition matrix """ StatsUtil.assert_probability(mutation_ab) StatsUtil.assert_probability(mutation_ba) nstates = npop + 1 P = np.zeros((nstates, nstates)) for a in range(nstates): for n_mut_to_wild in range(a + 1): ba_observed_n = n_mut_to_wild ba_max_n = a ba_p_success = mutation_ba ba_log_p = StatsUtil.binomial_log_pmf(ba_observed_n, ba_max_n, ba_p_success) for n_wild_to_mut in range(npop - a + 1): ab_observed_n = n_wild_to_mut ab_max_n = npop - a ab_p_success = mutation_ab ab_log_p = StatsUtil.binomial_log_pmf(ab_observed_n, ab_max_n, ab_p_success) # p = math.exp(ba_log_p + ab_log_p) b = a + n_wild_to_mut - n_mut_to_wild P[a, b] += p return P
def create_mutation_transition_matrix(npop, mutation_ab, mutation_ba): """ The states are indexed by the number of mutants. @param npop: total population size @param mutation_ab: wild-type to mutant transition probability @param mutation_ba: mutant to wild-type transition probability @return: a transition matrix """ StatsUtil.assert_probability(mutation_ab) StatsUtil.assert_probability(mutation_ba) nstates = npop + 1 P = np.zeros((nstates, nstates)) for a in range(nstates): for n_mut_to_wild in range(a+1): ba_observed_n = n_mut_to_wild ba_max_n = a ba_p_success = mutation_ba ba_log_p = StatsUtil.binomial_log_pmf( ba_observed_n, ba_max_n, ba_p_success) for n_wild_to_mut in range(npop - a + 1): ab_observed_n = n_wild_to_mut ab_max_n = npop - a ab_p_success = mutation_ab ab_log_p = StatsUtil.binomial_log_pmf( ab_observed_n, ab_max_n, ab_p_success) # p = math.exp(ba_log_p + ab_log_p) b = a + n_wild_to_mut - n_mut_to_wild P[a, b] += p return P
def get_transition_matrix_slow(N_diploid, k, mutation, fit): """ Mutation probabilities are away from a fixed state. @param N_diploid: diploid population size @param k: number of alleles e.g. 4 for A,C,G,T @param mutation: k by k matrix of per-generation mutation probabilities @param fit: sequence of k fitness values @return: a transition matrix """ N = N_diploid * 2 states = [tuple(s) for s in gen_states(N, k)] nstates = len(states) s_to_i = dict((s, i) for i, s in enumerate(states)) P = np.zeros((nstates, nstates)) # Add rows corresponding to transitions from population states # for which an allele is currently fixed in the population. for i in range(k): P[i, i] = mutation[i, i] for j in range(k): if i == j: continue state = [0] * k state[i] = N - 1 state[j] = 1 P[i, s_to_i[tuple(state)]] = mutation[i, j] # Add rows corresponding to transitions from polymorphic population states. for i, j in combinations(range(k), 2): for h in range(1, N): state = [0] * k state[i] = h state[j] = N - h index = s_to_i[tuple(state)] # Compute each child probability of having allele j. #pi, pj = wrightfisher.genic_diallelic(fit[i], fit[j], h, N-h) #s = fit[i] - fit[j] s = 1 - fit[j] / fit[i] pi, pj = wrightfisher.genic_diallelic(1.0, 1.0 - s, h, N - h) # Add entries corresponding to fixation of an allele. P[index, i] = math.exp(StatsUtil.binomial_log_pmf(N, N, pi)) P[index, j] = math.exp(StatsUtil.binomial_log_pmf(0, N, pi)) # Add entries corresponding to transitions to polymorphic states. for hsink in range(1, N): sink_state = [0] * k sink_state[i] = hsink sink_state[j] = N - hsink sink_index = s_to_i[tuple(sink_state)] logp = StatsUtil.binomial_log_pmf(hsink, N, pi) P[index, sink_index] = math.exp(logp) return P
def get_transition_matrix_slow(N_diploid, k, mutation, fit): """ Mutation probabilities are away from a fixed state. @param N_diploid: diploid population size @param k: number of alleles e.g. 4 for A,C,G,T @param mutation: k by k matrix of per-generation mutation probabilities @param fit: sequence of k fitness values @return: a transition matrix """ N = N_diploid * 2 states = [tuple(s) for s in gen_states(N,k)] nstates = len(states) s_to_i = dict((s, i) for i, s in enumerate(states)) P = np.zeros((nstates, nstates)) # Add rows corresponding to transitions from population states # for which an allele is currently fixed in the population. for i in range(k): P[i, i] = mutation[i, i] for j in range(k): if i == j: continue state = [0]*k state[i] = N-1 state[j] = 1 P[i, s_to_i[tuple(state)]] = mutation[i, j] # Add rows corresponding to transitions from polymorphic population states. for i, j in combinations(range(k), 2): for h in range(1, N): state = [0]*k state[i] = h state[j] = N-h index = s_to_i[tuple(state)] # Compute each child probability of having allele j. #pi, pj = wrightfisher.genic_diallelic(fit[i], fit[j], h, N-h) #s = fit[i] - fit[j] s = 1 - fit[j] / fit[i] pi, pj = wrightfisher.genic_diallelic(1.0, 1.0 - s, h, N-h) # Add entries corresponding to fixation of an allele. P[index, i] = math.exp(StatsUtil.binomial_log_pmf(N, N, pi)) P[index, j] = math.exp(StatsUtil.binomial_log_pmf(0, N, pi)) # Add entries corresponding to transitions to polymorphic states. for hsink in range(1, N): sink_state = [0]*k sink_state[i] = hsink sink_state[j] = N-hsink sink_index = s_to_i[tuple(sink_state)] logp = StatsUtil.binomial_log_pmf(hsink, N, pi) P[index, sink_index] = math.exp(logp) return P
def get_two_allele_distribution(N_big, N_small, f0, f1, f_subsample): """ Assumes small genic selection. Assumes small mutation. The mutational bias does not affect the distribution. @param N_big: total number of alleles in the population @param N_small: number of alleles sampled from the population @param f0: fitness of allele 0 @param f1: fitness of allele 1 @param f_subsample: subsampling function @return: distribution over all non-fixed population states """ # construct a transition matrix nstates = N_big + 1 P = np.zeros((nstates, nstates)) for i in range(nstates): p0, p1 = wrightfisher.genic_diallelic(f0, f1, i, N_big - i) if i == 0: P[i, 1] = 1.0 elif i == N_big: P[i, N_big - 1] = 1.0 else: for j in range(nstates): logp = StatsUtil.binomial_log_pmf(j, N_big, p0) P[i, j] = math.exp(logp) # find the stationary distribution v = MatrixUtil.get_stationary_distribution(P) MatrixUtil.assert_distribution(v) if not np.allclose(v, np.dot(v, P)): raise ValueError('expected a left eigenvector with eigenvalue 1') # return the stationary distribution conditional on dimorphism print v distn = f_subsample(v, N_small) return distn[1:-1] / np.sum(distn[1:-1])
def get_response_content(fs): npop = fs.nB + fs.nb nstates = npop + 1 # Check the complexity; # solving a system of linear equations takes about n^3 effort. if nstates ** 3 > 1e6: raise ValueError('sorry this population size is too large') # Compute the transition matrix. # This assumes no mutation or selection or recombination. # It is pure binomial. P = np.zeros((nstates, nstates)) for i in range(nstates): nB_initial = i for j in range(nstates): nB_final = j log_p = StatsUtil.binomial_log_pmf( nB_final, npop, nB_initial / float(npop)) P[i, j] = math.exp(log_p) # Put the puzzle into the form Ax=b # so that it can be solved by a generic linear solver. A = P - np.eye(nstates) b = np.zeros(nstates) # Adjust the matrix to disambiguate absorbing states. A[0, 0] = 1.0 A[npop, npop] = 1.0 b[0] = 0.0 b[npop] = 1.0 # Solve Ax=b for x. x = linalg.solve(A, b) # Print the solution. out = StringIO() print >> out, 'probability of eventual fixation (as opposed to extinction)' print >> out, 'of allele B in the population:' print >> out, x[fs.nB] return out.getvalue()
def get_expected_transitions_binomial(prandom, nstates, nsteps): """ This function is for transition matrices defined by their size and a single parameter. Use binomial coefficients to compute transition expectations. @param prandom: the probability of randomization at each step @param nstates: the number of states in the chain @param nsteps: one fewer than the length of the sequence @return: (expected_t_same, expected_t_different) """ # handle corner cases if not nsteps: return 0.0, float('nan') if nsteps == 1: return 0.0, 1.0 if not prandom: return 0.0, float('nan') # precalculate stuff p_notrans = prandom / nstates + (1 - prandom) p_any_trans = 1.0 - p_notrans # precalculate expected probability of each endpoint pair state prandom_total = 1 - (1 - prandom)**nsteps p_notrans_total = prandom_total / nstates + (1 - prandom_total) # initialize expectations e_same = 0 e_different = 0 # define expectations for ntrans in range(nsteps+1): log_p_ntrans = StatsUtil.binomial_log_pmf(ntrans, nsteps, p_any_trans) p_ntrans = math.exp(log_p_ntrans) p_same = (1 - (1 - nstates)**(1 - ntrans))/nstates e_same += p_same * p_ntrans * ntrans e_different += (1 - p_same) * p_ntrans * ntrans e_same /= p_notrans_total e_different /= (1 - p_notrans_total) return e_same, e_different
def get_expected_transitions_binomial(prandom, nstates, nsteps): """ This function is for transition matrices defined by their size and a single parameter. Use binomial coefficients to compute transition expectations. @param prandom: the probability of randomization at each step @param nstates: the number of states in the chain @param nsteps: one fewer than the length of the sequence @return: (expected_t_same, expected_t_different) """ # handle corner cases if not nsteps: return 0.0, float('nan') if nsteps == 1: return 0.0, 1.0 if not prandom: return 0.0, float('nan') # precalculate stuff p_notrans = prandom / nstates + (1 - prandom) p_any_trans = 1.0 - p_notrans # precalculate expected probability of each endpoint pair state prandom_total = 1 - (1 - prandom)**nsteps p_notrans_total = prandom_total / nstates + (1 - prandom_total) # initialize expectations e_same = 0 e_different = 0 # define expectations for ntrans in range(nsteps + 1): log_p_ntrans = StatsUtil.binomial_log_pmf(ntrans, nsteps, p_any_trans) p_ntrans = math.exp(log_p_ntrans) p_same = (1 - (1 - nstates)**(1 - ntrans)) / nstates e_same += p_same * p_ntrans * ntrans e_different += (1 - p_same) * p_ntrans * ntrans e_same /= p_notrans_total e_different /= (1 - p_notrans_total) return e_same, e_different
def create_drift_selection_transition_matrix(npop, selection_ratio): """ The states are indexed by the number of mutants. @param npop: total population size @param selection_ratio: a value larger than unity means mutants are fitter @return: a transition matrix """ nstates = npop + 1 P = np.zeros((nstates, nstates)) for a in range(nstates): # compute the i.i.d probability of picking a mutant p = (selection_ratio * a) / (selection_ratio * a + (npop - a)) for b in range(nstates): # These are from a binomial distribution # with npop trials and p probability of success per trial. # (n choose k) p^k (1-p)^(n-k) observed_n = b max_n = npop p_success = p P[a, b] = math.exp( StatsUtil.binomial_log_pmf(observed_n, max_n, p_success)) return P
def create_drift_selection_transition_matrix(npop, selection_ratio): """ The states are indexed by the number of mutants. @param npop: total population size @param selection_ratio: a value larger than unity means mutants are fitter @return: a transition matrix """ nstates = npop + 1 P = np.zeros((nstates, nstates)) for a in range(nstates): # compute the i.i.d probability of picking a mutant p = (selection_ratio * a) / (selection_ratio * a + (npop-a)) for b in range(nstates): # These are from a binomial distribution # with npop trials and p probability of success per trial. # (n choose k) p^k (1-p)^(n-k) observed_n = b max_n = npop p_success = p P[a, b] = math.exp(StatsUtil.binomial_log_pmf( observed_n, max_n, p_success)) return P