Ejemplo n.º 1
0
 def __call__(self, X):
     """
     @param X: six params defining mutation and selection
     @return: negative log likelihood
     """
     # define the hardcoded number of alleles
     k = 4
     # unpack the params
     params = X.tolist()
     theta, ka, kb, g0, g1, g2 = params
     if any(x < 0 for x in (theta, ka, kb)):
         return float('inf')
     mutation, fitnesses = kaizeng.params_to_mutation_fitness(
         self.N, params)
     # get the transition matrix
     P = kaizeng.get_transition_matrix(self.N, k, mutation, fitnesses)
     v = MatrixUtil.get_stationary_distribution(P)
     return -StatsUtil.multinomial_log_pmf(v, self.observed_counts)
Ejemplo n.º 2
0
 def __call__(self, X):
     """
     @param X: six params defining mutation and selection
     @return: negative log likelihood
     """
     # define the hardcoded number of alleles
     k = 4
     # unpack the params
     params = X.tolist()
     theta, ka, kb, g0, g1, g2 = params
     if any(x < 0 for x in (theta, ka, kb)):
         return float('inf')
     mutation, fitnesses = kaizeng.params_to_mutation_fitness(
             self.N, params)
     # get the transition matrix
     P = kaizeng.get_transition_matrix(self.N, k, mutation, fitnesses)
     v = MatrixUtil.get_stationary_distribution(P)
     return -StatsUtil.multinomial_log_pmf(v, self.observed_counts)
Ejemplo n.º 3
0
def get_response_content(fs):
    np.set_printoptions(linewidth=200)
    out = StringIO()
    nsamples = 1
    arr = []
    #
    nsites = 50000
    N = 15*2
    k = 4
    params = (0.002, 1, 1, 0, 0, 0)
    #params = (0.008, 1, 1, 0.5, 1, 1.5)
    mutation, fitnesses = kaizeng.params_to_mutation_fitness(N, params)
    #
    tm = time.time()
    P = kaizeng.get_transition_matrix(N, k, mutation, fitnesses)
    print 'time to construct transition matrix:', time.time() - tm
    #
    tm = time.time()
    v = MatrixUtil.get_stationary_distribution(P)
    print 'time to get stationary distribution:', time.time() - tm
    #
    tm = time.time()
    counts = np.random.multinomial(nsites, v)
    print 'time to sample multinomial counts:', time.time() - tm
    #
    tm = time.time()
    logp = StatsUtil.multinomial_log_pmf(v, counts)
    print 'time to get multinomial log pmf:', time.time() - tm
    #
    for i in range(nsamples):
        counts = np.random.multinomial(nsites, v)
        X0 = np.array(params)
        g = G(N, counts)
        Xopt = optimize.fmin(g, X0)
        arr.append(Xopt)
    print >> out, np.array(arr)
    return out.getvalue()
Ejemplo n.º 4
0
def get_response_content(fs):
    np.set_printoptions(linewidth=200)
    out = StringIO()
    nsamples = 1
    arr = []
    #
    nsites = 50000
    N = 15 * 2
    k = 4
    params = (0.002, 1, 1, 0, 0, 0)
    #params = (0.008, 1, 1, 0.5, 1, 1.5)
    mutation, fitnesses = kaizeng.params_to_mutation_fitness(N, params)
    #
    tm = time.time()
    P = kaizeng.get_transition_matrix(N, k, mutation, fitnesses)
    print 'time to construct transition matrix:', time.time() - tm
    #
    tm = time.time()
    v = MatrixUtil.get_stationary_distribution(P)
    print 'time to get stationary distribution:', time.time() - tm
    #
    tm = time.time()
    counts = np.random.multinomial(nsites, v)
    print 'time to sample multinomial counts:', time.time() - tm
    #
    tm = time.time()
    logp = StatsUtil.multinomial_log_pmf(v, counts)
    print 'time to get multinomial log pmf:', time.time() - tm
    #
    for i in range(nsamples):
        counts = np.random.multinomial(nsites, v)
        X0 = np.array(params)
        g = G(N, counts)
        Xopt = optimize.fmin(g, X0)
        arr.append(Xopt)
    print >> out, np.array(arr)
    return out.getvalue()
Ejemplo n.º 5
0
def get_response_content(fs):
    N_small = 10
    N_big_diploid = fs.N_big_diploid
    N_big_haploid = N_big_diploid * 2
    if N_big_haploid < N_small:
        raise ValueError('use a larger diploid population size')
    if fs.with_replacement:
        f_subsample = StatsUtil.subsample_pmf_with_replacement
    elif fs.without_replacement:
        f_subsample = StatsUtil.subsample_pmf_without_replacement
    else:
        raise ValueError('subsampling option error')
    k = 4
    gamma = fs.gamma_1
    params_list = [
            (0.008, 1, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2),
            (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)]
    allele_histograms = np.zeros((2, N_big_haploid + 1))
    for i, params in enumerate(params_list):
        mutation, selection = kaizeng.params_to_mutation_fitness(
                N_big_haploid, params)
        P = kaizeng.get_transition_matrix(
                N_big_diploid, k, mutation, selection)
        v = MatrixUtil.get_stationary_distribution(P)
        for state_index, counts in enumerate(kaizeng.gen_states(
            N_big_haploid, k)):
            if counts[0] and counts[1]:
                allele_histograms[i, counts[0]] += v[state_index]
    # Define the r table.
    # There are nine columns each corresponding to an allele frequency.
    # There are three rows each corresponding to a configuration.
    arr = []
    # Use the two allele approximation
    # from mcvean and charlesworth 1999 referred to by zeng 2011.
    # I'm not sure if I am using the right equation.
    g0 = fs.gamma_0
    g1 = fs.gamma_1
    """
    s_0 = -gamma_0 / float(N_big)
    s_1 = -gamma_1 / float(N_big)
    hist = np.zeros(N_small+1)
    for i in range(1, N_small):
        x = i / float(N_small)
        hist[i] = math.exp(1*N_big*(s_0 - s_1)*x) / (x*(1-x))
    h = hist[1:-1]
    h /= np.sum(h)
    arr.append(h.tolist())
    """
    arr.append(diallelic_approximation(N_small, g0, g1).tolist())
    # Use the exact two allele distribution.
    # Well, it is exact if I understand the right scaling
    # of the population size and fitnesses.
    f0 = 1.0
    f1 = 1.0 - gamma / N_big_haploid
    #f0 = 1.0 + gamma / N
    #f1 = 1.0
    #f0 = 1.0 + 1.5 / (4*N)
    #f1 = 1.0 - 1.5 / (4*N)
    h = get_two_allele_distribution(
            N_big_haploid, N_small, f0, f1, f_subsample)
    arr.append(h.tolist())
    # Get frequencies for the other two configurations
    for hist in allele_histograms:
        # Get probabilities conditional on dimorphism.
        hist[0] = 0
        hist[-1] = 0
        hist /= np.sum(hist)
        # Get the subsampled pmf.
        distn = f_subsample(hist, N_small)
        MatrixUtil.assert_distribution(distn)
        # Get probabiities conditional on dimorphism of the sample.
        distn[0] = 0
        distn[-1] = 0
        distn /= np.sum(distn)
        # Add to the table of densities.
        arr.append(distn[1:-1].tolist())
    # Get a large population approximation
    # when there is mutational bias.
    params = (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)
    mutation, fitness = kaizeng.params_to_mutation_fitness(
            N_big_haploid, params)
    gammas = np.array([fs.gamma_0, fs.gamma_1, fs.gamma_2, 0])
    h = kaizeng.get_large_population_approximation(N_small, k, gammas, mutation)
    arr.append(h.tolist())
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
            'barplot',
            'mdat',
            'legend.text=' + mk_call_str(
                'c',
                '"two-allele large N limit"',
                '"two-allele"',
                '"four-allele without mutational bias"',
                '"four-allele with mutational bias (kappa_{1,2}=2)"',
                '"four-allele with mutational bias, large N limit"',
                ),
            'args.legend = list(x="topleft", bty="n")',
            'names.arg = c(1,2,3,4,5,6,7,8,9)',
            main='title.string',
            xlab='"frequency of allele 1"',
            ylab='"frequency"',
            col=mk_call_str(
                'c',
                '"red"',
                '"white"',
                '"black"',
                '"gray"',
                '"blue"',
                ),
            beside='TRUE',
            )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
            script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Ejemplo n.º 6
0
def get_response_content(fs):
    N_small = 10
    N_big_diploid = fs.N_big_diploid
    N_big_haploid = N_big_diploid * 2
    if N_big_haploid < N_small:
        raise ValueError('use a larger diploid population size')
    if fs.with_replacement:
        f_subsample = StatsUtil.subsample_pmf_with_replacement
    elif fs.without_replacement:
        f_subsample = StatsUtil.subsample_pmf_without_replacement
    else:
        raise ValueError('subsampling option error')
    k = 4
    gamma = fs.gamma_1
    params_list = [(0.008, 1, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2),
                   (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)]
    allele_histograms = np.zeros((2, N_big_haploid + 1))
    for i, params in enumerate(params_list):
        mutation, selection = kaizeng.params_to_mutation_fitness(
            N_big_haploid, params)
        P = kaizeng.get_transition_matrix(N_big_diploid, k, mutation,
                                          selection)
        v = MatrixUtil.get_stationary_distribution(P)
        for state_index, counts in enumerate(
                kaizeng.gen_states(N_big_haploid, k)):
            if counts[0] and counts[1]:
                allele_histograms[i, counts[0]] += v[state_index]
    # Define the r table.
    # There are nine columns each corresponding to an allele frequency.
    # There are three rows each corresponding to a configuration.
    arr = []
    # Use the two allele approximation
    # from mcvean and charlesworth 1999 referred to by zeng 2011.
    # I'm not sure if I am using the right equation.
    g0 = fs.gamma_0
    g1 = fs.gamma_1
    """
    s_0 = -gamma_0 / float(N_big)
    s_1 = -gamma_1 / float(N_big)
    hist = np.zeros(N_small+1)
    for i in range(1, N_small):
        x = i / float(N_small)
        hist[i] = math.exp(1*N_big*(s_0 - s_1)*x) / (x*(1-x))
    h = hist[1:-1]
    h /= np.sum(h)
    arr.append(h.tolist())
    """
    arr.append(diallelic_approximation(N_small, g0, g1).tolist())
    # Use the exact two allele distribution.
    # Well, it is exact if I understand the right scaling
    # of the population size and fitnesses.
    f0 = 1.0
    f1 = 1.0 - gamma / N_big_haploid
    #f0 = 1.0 + gamma / N
    #f1 = 1.0
    #f0 = 1.0 + 1.5 / (4*N)
    #f1 = 1.0 - 1.5 / (4*N)
    h = get_two_allele_distribution(N_big_haploid, N_small, f0, f1,
                                    f_subsample)
    arr.append(h.tolist())
    # Get frequencies for the other two configurations
    for hist in allele_histograms:
        # Get probabilities conditional on dimorphism.
        hist[0] = 0
        hist[-1] = 0
        hist /= np.sum(hist)
        # Get the subsampled pmf.
        distn = f_subsample(hist, N_small)
        MatrixUtil.assert_distribution(distn)
        # Get probabiities conditional on dimorphism of the sample.
        distn[0] = 0
        distn[-1] = 0
        distn /= np.sum(distn)
        # Add to the table of densities.
        arr.append(distn[1:-1].tolist())
    # Get a large population approximation
    # when there is mutational bias.
    params = (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)
    mutation, fitness = kaizeng.params_to_mutation_fitness(
        N_big_haploid, params)
    gammas = np.array([fs.gamma_0, fs.gamma_1, fs.gamma_2, 0])
    h = kaizeng.get_large_population_approximation(N_small, k, gammas,
                                                   mutation)
    arr.append(h.tolist())
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
        'barplot',
        'mdat',
        'legend.text=' + mk_call_str(
            'c',
            '"two-allele large N limit"',
            '"two-allele"',
            '"four-allele without mutational bias"',
            '"four-allele with mutational bias (kappa_{1,2}=2)"',
            '"four-allele with mutational bias, large N limit"',
        ),
        'args.legend = list(x="topleft", bty="n")',
        'names.arg = c(1,2,3,4,5,6,7,8,9)',
        main='title.string',
        xlab='"frequency of allele 1"',
        ylab='"frequency"',
        col=mk_call_str(
            'c',
            '"red"',
            '"white"',
            '"black"',
            '"gray"',
            '"blue"',
        ),
        beside='TRUE',
    )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
        script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Ejemplo n.º 7
0
    return out.getvalue()

if __name__ == '__main__':
    k = 4
    nsamples = 100
    settings_list = [
            [15, 50000, [0.002, 1, 1, 0, 0, 0]],
            [15, 50000, [0.002, 1, 2, 0.4, -1.2, 2]],
            [10, 10000, [0.008, 1, 1, 0.5, 1, 1.5]],
            [6, 5000, [0.01, 1, 2, 0, 0, 0]]]
    for N_diploid, nsites, params in settings_list:
        N = 2*N_diploid
        print 'diploid population size = %s, sequence length = %s' % (
                N_diploid, nsites)
        print '\t'.join(str(x) for x in ['Input'] + params)
        mutation, fitnesses = kaizeng.params_to_mutation_fitness(N, params)
        P = kaizeng.get_transition_matrix(N, k, mutation, fitnesses)
        v = MatrixUtil.get_stationary_distribution(P)
        arr = []
        for i in range(nsamples):
            counts = np.random.multinomial(nsites, v)
            X0 = np.array(params)
            g = G(N, counts)
            Xopt = optimize.fmin(g, X0)
            arr.append(Xopt.tolist())
        means = []
        cis = []
        for mles in zip(*arr):
            means.append(np.mean(mles))
            x = sorted(mles)
            cis.append([x[2], x[-2]])
Ejemplo n.º 8
0
    return out.getvalue()


if __name__ == '__main__':
    k = 4
    nsamples = 100
    settings_list = [[15, 50000, [0.002, 1, 1, 0, 0, 0]],
                     [15, 50000, [0.002, 1, 2, 0.4, -1.2, 2]],
                     [10, 10000, [0.008, 1, 1, 0.5, 1, 1.5]],
                     [6, 5000, [0.01, 1, 2, 0, 0, 0]]]
    for N_diploid, nsites, params in settings_list:
        N = 2 * N_diploid
        print 'diploid population size = %s, sequence length = %s' % (
            N_diploid, nsites)
        print '\t'.join(str(x) for x in ['Input'] + params)
        mutation, fitnesses = kaizeng.params_to_mutation_fitness(N, params)
        P = kaizeng.get_transition_matrix(N, k, mutation, fitnesses)
        v = MatrixUtil.get_stationary_distribution(P)
        arr = []
        for i in range(nsamples):
            counts = np.random.multinomial(nsites, v)
            X0 = np.array(params)
            g = G(N, counts)
            Xopt = optimize.fmin(g, X0)
            arr.append(Xopt.tolist())
        means = []
        cis = []
        for mles in zip(*arr):
            means.append(np.mean(mles))
            x = sorted(mles)
            cis.append([x[2], x[-2]])
Ejemplo n.º 9
0
def get_response_content(fs):
    N_diploid = 5
    N_haploid = N_diploid * 2
    k = 4
    gamma = 1.5
    params_list = [
            (0.008, 1, 1, 0, gamma, 1),
            (0.008, 2, 1, 0, gamma, 1)]
    allele_histograms = np.zeros((2, N_haploid+1))
    for i, params in enumerate(params_list):
        mutation, fitnesses = kaizeng.params_to_mutation_fitness(
                N_haploid, params)
        P = kaizeng.get_transition_matrix(
                N_diploid, k, mutation, fitnesses)
        v = MatrixUtil.get_stationary_distribution(P)
        for state_index, counts in enumerate(kaizeng.gen_states(N_haploid, k)):
            if counts[0] and counts[1]:
                allele_histograms[i, counts[0]] += v[state_index]
    # Define the r table.
    # There are nine columns each corresponding to an allele frequency.
    # There are three rows each corresponding to a configuration.
    arr = []
    # Use the exact two allele distribution.
    # Well, it is exact if I understand the right scaling
    # of the population size and fitnesses.
    f0 = 1.0
    f1 = 1.0 - gamma / N_haploid
    #f0 = 1.0 + gamma / N
    #f1 = 1.0
    #f0 = 1.0 + 1.5 / (4*N)
    #f1 = 1.0 - 1.5 / (4*N)
    h = get_two_allele_distribution(N_diploid, f0, f1)
    arr.append(h.tolist())
    # Use the two allele approximation
    # from mcvean and charlesworth 1999 referred to by zeng 2011.
    # I'm not sure if I am using the right equation.
    """
    gamma_0 = 0
    gamma_1 = 1.5
    s_0 = -gamma_0 / float(N)
    s_1 = -gamma_1 / float(N)
    hist = np.zeros(N+1)
    for i in range(1, N):
        x = i / float(N)
        hist[i] = math.exp(1*N*(s_0 - s_1)*x) / (x*(1-x))
    h = hist[1:-1]
    h /= np.sum(h)
    arr.append(h.tolist())
    """
    # Get frequencies for the other two configurations
    for hist in allele_histograms:
        h = hist[1:-1]
        h /= np.sum(h)
        arr.append(h.tolist())
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2, gamma = 1.5"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
            'barplot',
            'mdat',
            'legend.text=' + mk_call_str(
                'c',
                '"two-allele"',
                '"four-allele without mutational bias"',
                '"four-allele with mutational bias kappa_{1,2}=2"',
                ),
            'args.legend = list(x="topleft", bty="n")',
            'names.arg = c(1,2,3,4,5,6,7,8,9)',
            main='title.string',
            xlab='"frequency of allele 1"',
            ylab='"frequency"',
            col=mk_call_str(
                'c',
                #'"red"',
                '"white"',
                '"black"',
                '"gray"',
                ),
            beside='TRUE',
            )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
            script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Ejemplo n.º 10
0
def get_response_content(fs):
    N_diploid = 5
    N_haploid = N_diploid * 2
    k = 4
    gamma = 1.5
    params_list = [(0.008, 1, 1, 0, gamma, 1), (0.008, 2, 1, 0, gamma, 1)]
    allele_histograms = np.zeros((2, N_haploid + 1))
    for i, params in enumerate(params_list):
        mutation, fitnesses = kaizeng.params_to_mutation_fitness(
            N_haploid, params)
        P = kaizeng.get_transition_matrix(N_diploid, k, mutation, fitnesses)
        v = MatrixUtil.get_stationary_distribution(P)
        for state_index, counts in enumerate(kaizeng.gen_states(N_haploid, k)):
            if counts[0] and counts[1]:
                allele_histograms[i, counts[0]] += v[state_index]
    # Define the r table.
    # There are nine columns each corresponding to an allele frequency.
    # There are three rows each corresponding to a configuration.
    arr = []
    # Use the exact two allele distribution.
    # Well, it is exact if I understand the right scaling
    # of the population size and fitnesses.
    f0 = 1.0
    f1 = 1.0 - gamma / N_haploid
    #f0 = 1.0 + gamma / N
    #f1 = 1.0
    #f0 = 1.0 + 1.5 / (4*N)
    #f1 = 1.0 - 1.5 / (4*N)
    h = get_two_allele_distribution(N_diploid, f0, f1)
    arr.append(h.tolist())
    # Use the two allele approximation
    # from mcvean and charlesworth 1999 referred to by zeng 2011.
    # I'm not sure if I am using the right equation.
    """
    gamma_0 = 0
    gamma_1 = 1.5
    s_0 = -gamma_0 / float(N)
    s_1 = -gamma_1 / float(N)
    hist = np.zeros(N+1)
    for i in range(1, N):
        x = i / float(N)
        hist[i] = math.exp(1*N*(s_0 - s_1)*x) / (x*(1-x))
    h = hist[1:-1]
    h /= np.sum(h)
    arr.append(h.tolist())
    """
    # Get frequencies for the other two configurations
    for hist in allele_histograms:
        h = hist[1:-1]
        h /= np.sum(h)
        arr.append(h.tolist())
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2, gamma = 1.5"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
        'barplot',
        'mdat',
        'legend.text=' + mk_call_str(
            'c',
            '"two-allele"',
            '"four-allele without mutational bias"',
            '"four-allele with mutational bias kappa_{1,2}=2"',
        ),
        'args.legend = list(x="topleft", bty="n")',
        'names.arg = c(1,2,3,4,5,6,7,8,9)',
        main='title.string',
        xlab='"frequency of allele 1"',
        ylab='"frequency"',
        col=mk_call_str(
            'c',
            #'"red"',
            '"white"',
            '"black"',
            '"gray"',
        ),
        beside='TRUE',
    )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
        script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data