Example #1
0
def create_mutation_transition_matrix(npop, mutation_ab, mutation_ba):
    """
    The states are indexed by the number of mutants.
    @param npop: total population size
    @param mutation_ab: wild-type to mutant transition probability
    @param mutation_ba: mutant to wild-type transition probability
    @return: a transition matrix
    """
    StatsUtil.assert_probability(mutation_ab)
    StatsUtil.assert_probability(mutation_ba)
    nstates = npop + 1
    P = np.zeros((nstates, nstates))
    for a in range(nstates):
        for n_mut_to_wild in range(a + 1):
            ba_observed_n = n_mut_to_wild
            ba_max_n = a
            ba_p_success = mutation_ba
            ba_log_p = StatsUtil.binomial_log_pmf(ba_observed_n, ba_max_n,
                                                  ba_p_success)
            for n_wild_to_mut in range(npop - a + 1):
                ab_observed_n = n_wild_to_mut
                ab_max_n = npop - a
                ab_p_success = mutation_ab
                ab_log_p = StatsUtil.binomial_log_pmf(ab_observed_n, ab_max_n,
                                                      ab_p_success)
                #
                p = math.exp(ba_log_p + ab_log_p)
                b = a + n_wild_to_mut - n_mut_to_wild
                P[a, b] += p
    return P
Example #2
0
def create_mutation_transition_matrix(npop, mutation_ab, mutation_ba):
    """
    The states are indexed by the number of mutants.
    @param npop: total population size
    @param mutation_ab: wild-type to mutant transition probability
    @param mutation_ba: mutant to wild-type transition probability
    @return: a transition matrix
    """
    StatsUtil.assert_probability(mutation_ab)
    StatsUtil.assert_probability(mutation_ba)
    nstates = npop + 1
    P = np.zeros((nstates, nstates))
    for a in range(nstates):
        for n_mut_to_wild in range(a+1):
            ba_observed_n = n_mut_to_wild
            ba_max_n = a
            ba_p_success = mutation_ba
            ba_log_p = StatsUtil.binomial_log_pmf(
                    ba_observed_n, ba_max_n, ba_p_success)
            for n_wild_to_mut in range(npop - a + 1):
                ab_observed_n = n_wild_to_mut
                ab_max_n = npop - a
                ab_p_success = mutation_ab
                ab_log_p = StatsUtil.binomial_log_pmf(
                        ab_observed_n, ab_max_n, ab_p_success)
                #
                p = math.exp(ba_log_p + ab_log_p)
                b = a + n_wild_to_mut - n_mut_to_wild
                P[a, b] += p
    return P
Example #3
0
def get_transition_matrix_slow(N_diploid, k, mutation, fit):
    """
    Mutation probabilities are away from a fixed state.
    @param N_diploid: diploid population size
    @param k: number of alleles e.g. 4 for A,C,G,T
    @param mutation: k by k matrix of per-generation mutation probabilities
    @param fit: sequence of k fitness values
    @return: a transition matrix
    """
    N = N_diploid * 2
    states = [tuple(s) for s in gen_states(N, k)]
    nstates = len(states)
    s_to_i = dict((s, i) for i, s in enumerate(states))
    P = np.zeros((nstates, nstates))
    # Add rows corresponding to transitions from population states
    # for which an allele is currently fixed in the population.
    for i in range(k):
        P[i, i] = mutation[i, i]
        for j in range(k):
            if i == j:
                continue
            state = [0] * k
            state[i] = N - 1
            state[j] = 1
            P[i, s_to_i[tuple(state)]] = mutation[i, j]
    # Add rows corresponding to transitions from polymorphic population states.
    for i, j in combinations(range(k), 2):
        for h in range(1, N):
            state = [0] * k
            state[i] = h
            state[j] = N - h
            index = s_to_i[tuple(state)]
            # Compute each child probability of having allele j.
            #pi, pj = wrightfisher.genic_diallelic(fit[i], fit[j], h, N-h)
            #s = fit[i] - fit[j]
            s = 1 - fit[j] / fit[i]
            pi, pj = wrightfisher.genic_diallelic(1.0, 1.0 - s, h, N - h)
            # Add entries corresponding to fixation of an allele.
            P[index, i] = math.exp(StatsUtil.binomial_log_pmf(N, N, pi))
            P[index, j] = math.exp(StatsUtil.binomial_log_pmf(0, N, pi))
            # Add entries corresponding to transitions to polymorphic states.
            for hsink in range(1, N):
                sink_state = [0] * k
                sink_state[i] = hsink
                sink_state[j] = N - hsink
                sink_index = s_to_i[tuple(sink_state)]
                logp = StatsUtil.binomial_log_pmf(hsink, N, pi)
                P[index, sink_index] = math.exp(logp)
    return P
Example #4
0
def get_transition_matrix_slow(N_diploid, k, mutation, fit):
    """
    Mutation probabilities are away from a fixed state.
    @param N_diploid: diploid population size
    @param k: number of alleles e.g. 4 for A,C,G,T
    @param mutation: k by k matrix of per-generation mutation probabilities
    @param fit: sequence of k fitness values
    @return: a transition matrix
    """
    N = N_diploid * 2
    states = [tuple(s) for s in gen_states(N,k)]
    nstates = len(states)
    s_to_i = dict((s, i) for i, s in enumerate(states))
    P = np.zeros((nstates, nstates))
    # Add rows corresponding to transitions from population states
    # for which an allele is currently fixed in the population.
    for i in range(k):
        P[i, i] = mutation[i, i]
        for j in range(k):
            if i == j:
                continue
            state = [0]*k
            state[i] = N-1
            state[j] = 1
            P[i, s_to_i[tuple(state)]] = mutation[i, j]
    # Add rows corresponding to transitions from polymorphic population states.
    for i, j in combinations(range(k), 2):
        for h in range(1, N):
            state = [0]*k
            state[i] = h
            state[j] = N-h
            index = s_to_i[tuple(state)]
            # Compute each child probability of having allele j.
            #pi, pj = wrightfisher.genic_diallelic(fit[i], fit[j], h, N-h)
            #s = fit[i] - fit[j]
            s = 1 - fit[j] / fit[i]
            pi, pj = wrightfisher.genic_diallelic(1.0, 1.0 - s, h, N-h)
            # Add entries corresponding to fixation of an allele.
            P[index, i] = math.exp(StatsUtil.binomial_log_pmf(N, N, pi))
            P[index, j] = math.exp(StatsUtil.binomial_log_pmf(0, N, pi))
            # Add entries corresponding to transitions to polymorphic states.
            for hsink in range(1, N):
                sink_state = [0]*k
                sink_state[i] = hsink
                sink_state[j] = N-hsink
                sink_index = s_to_i[tuple(sink_state)]
                logp = StatsUtil.binomial_log_pmf(hsink, N, pi)
                P[index, sink_index] = math.exp(logp)
    return P
Example #5
0
def get_two_allele_distribution(N_big, N_small, f0, f1, f_subsample):
    """
    Assumes small genic selection.
    Assumes small mutation.
    The mutational bias does not affect the distribution.
    @param N_big: total number of alleles in the population
    @param N_small: number of alleles sampled from the population
    @param f0: fitness of allele 0
    @param f1: fitness of allele 1
    @param f_subsample: subsampling function
    @return: distribution over all non-fixed population states
    """
    # construct a transition matrix
    nstates = N_big + 1
    P = np.zeros((nstates, nstates))
    for i in range(nstates):
        p0, p1 = wrightfisher.genic_diallelic(f0, f1, i, N_big - i)
        if i == 0:
            P[i, 1] = 1.0
        elif i == N_big:
            P[i, N_big - 1] = 1.0
        else:
            for j in range(nstates):
                logp = StatsUtil.binomial_log_pmf(j, N_big, p0)
                P[i, j] = math.exp(logp)
    # find the stationary distribution
    v = MatrixUtil.get_stationary_distribution(P)
    MatrixUtil.assert_distribution(v)
    if not np.allclose(v, np.dot(v, P)):
        raise ValueError('expected a left eigenvector with eigenvalue 1')
    # return the stationary distribution conditional on dimorphism
    print v
    distn = f_subsample(v, N_small)
    return distn[1:-1] / np.sum(distn[1:-1])
Example #6
0
def get_response_content(fs):
    npop = fs.nB + fs.nb
    nstates = npop + 1
    # Check the complexity;
    # solving a system of linear equations takes about n^3 effort.
    if nstates ** 3 > 1e6:
        raise ValueError('sorry this population size is too large')
    # Compute the transition matrix.
    # This assumes no mutation or selection or recombination.
    # It is pure binomial.
    P = np.zeros((nstates, nstates))
    for i in range(nstates):
        nB_initial = i
        for j in range(nstates):
            nB_final = j
            log_p = StatsUtil.binomial_log_pmf(
                    nB_final, npop, nB_initial / float(npop))
            P[i, j] = math.exp(log_p)
    # Put the puzzle into the form Ax=b
    # so that it can be solved by a generic linear solver.
    A = P - np.eye(nstates)
    b = np.zeros(nstates)
    # Adjust the matrix to disambiguate absorbing states.
    A[0, 0] = 1.0
    A[npop, npop] = 1.0
    b[0] = 0.0
    b[npop] = 1.0
    # Solve Ax=b for x.
    x = linalg.solve(A, b)
    # Print the solution.
    out = StringIO()
    print >> out, 'probability of eventual fixation (as opposed to extinction)'
    print >> out, 'of allele B in the population:'
    print >> out, x[fs.nB]
    return out.getvalue()
Example #7
0
def get_expected_transitions_binomial(prandom, nstates, nsteps):
    """
    This function is for transition matrices defined by their size and a single parameter.
    Use binomial coefficients to compute transition expectations.
    @param prandom: the probability of randomization at each step
    @param nstates: the number of states in the chain
    @param nsteps: one fewer than the length of the sequence
    @return: (expected_t_same, expected_t_different)
    """
    # handle corner cases
    if not nsteps:
        return 0.0, float('nan')
    if nsteps == 1:
        return 0.0, 1.0
    if not prandom:
        return 0.0, float('nan')
    # precalculate stuff
    p_notrans = prandom / nstates + (1 - prandom)
    p_any_trans = 1.0 - p_notrans
    # precalculate expected probability of each endpoint pair state
    prandom_total = 1 - (1 - prandom)**nsteps
    p_notrans_total = prandom_total / nstates + (1 - prandom_total)
    # initialize expectations
    e_same = 0
    e_different = 0
    # define expectations
    for ntrans in range(nsteps+1):
        log_p_ntrans = StatsUtil.binomial_log_pmf(ntrans, nsteps, p_any_trans)
        p_ntrans = math.exp(log_p_ntrans)
        p_same = (1 - (1 - nstates)**(1 - ntrans))/nstates
        e_same += p_same * p_ntrans * ntrans
        e_different += (1 - p_same) * p_ntrans * ntrans
    e_same /= p_notrans_total
    e_different /= (1 - p_notrans_total)
    return e_same, e_different
Example #8
0
def get_expected_transitions_binomial(prandom, nstates, nsteps):
    """
    This function is for transition matrices defined by their size and a single parameter.
    Use binomial coefficients to compute transition expectations.
    @param prandom: the probability of randomization at each step
    @param nstates: the number of states in the chain
    @param nsteps: one fewer than the length of the sequence
    @return: (expected_t_same, expected_t_different)
    """
    # handle corner cases
    if not nsteps:
        return 0.0, float('nan')
    if nsteps == 1:
        return 0.0, 1.0
    if not prandom:
        return 0.0, float('nan')
    # precalculate stuff
    p_notrans = prandom / nstates + (1 - prandom)
    p_any_trans = 1.0 - p_notrans
    # precalculate expected probability of each endpoint pair state
    prandom_total = 1 - (1 - prandom)**nsteps
    p_notrans_total = prandom_total / nstates + (1 - prandom_total)
    # initialize expectations
    e_same = 0
    e_different = 0
    # define expectations
    for ntrans in range(nsteps + 1):
        log_p_ntrans = StatsUtil.binomial_log_pmf(ntrans, nsteps, p_any_trans)
        p_ntrans = math.exp(log_p_ntrans)
        p_same = (1 - (1 - nstates)**(1 - ntrans)) / nstates
        e_same += p_same * p_ntrans * ntrans
        e_different += (1 - p_same) * p_ntrans * ntrans
    e_same /= p_notrans_total
    e_different /= (1 - p_notrans_total)
    return e_same, e_different
Example #9
0
def create_drift_selection_transition_matrix(npop, selection_ratio):
    """
    The states are indexed by the number of mutants.
    @param npop: total population size
    @param selection_ratio: a value larger than unity means mutants are fitter
    @return: a transition matrix
    """
    nstates = npop + 1
    P = np.zeros((nstates, nstates))
    for a in range(nstates):
        # compute the i.i.d probability of picking a mutant
        p = (selection_ratio * a) / (selection_ratio * a + (npop - a))
        for b in range(nstates):
            # These are from a binomial distribution
            # with npop trials and p probability of success per trial.
            # (n choose k) p^k (1-p)^(n-k)
            observed_n = b
            max_n = npop
            p_success = p
            P[a, b] = math.exp(
                StatsUtil.binomial_log_pmf(observed_n, max_n, p_success))
    return P
Example #10
0
def create_drift_selection_transition_matrix(npop, selection_ratio):
    """
    The states are indexed by the number of mutants.
    @param npop: total population size
    @param selection_ratio: a value larger than unity means mutants are fitter
    @return: a transition matrix
    """
    nstates = npop + 1
    P = np.zeros((nstates, nstates))
    for a in range(nstates):
        # compute the i.i.d probability of picking a mutant
        p = (selection_ratio * a) / (selection_ratio * a + (npop-a))
        for b in range(nstates):
            # These are from a binomial distribution
            # with npop trials and p probability of success per trial.
            # (n choose k) p^k (1-p)^(n-k)
            observed_n = b
            max_n = npop
            p_success = p
            P[a, b] = math.exp(StatsUtil.binomial_log_pmf(
                observed_n, max_n, p_success))
    return P