Python RUtil.matrix_to_R_string Examples

Example #1

0

Show file

File: 20120820a.py Project: argriffing/xgcode

def get_response_content(fs):
    N_small = 10
    N_big_diploid = fs.N_big_diploid
    N_big_haploid = N_big_diploid * 2
    if N_big_haploid < N_small:
        raise ValueError('use a larger diploid population size')
    if fs.with_replacement:
        f_subsample = StatsUtil.subsample_pmf_with_replacement
    elif fs.without_replacement:
        f_subsample = StatsUtil.subsample_pmf_without_replacement
    else:
        raise ValueError('subsampling option error')
    k = 4
    gamma = fs.gamma_1
    params_list = [
            (0.008, 1, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2),
            (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)]
    allele_histograms = np.zeros((2, N_big_haploid + 1))
    for i, params in enumerate(params_list):
        mutation, selection = kaizeng.params_to_mutation_fitness(
                N_big_haploid, params)
        P = kaizeng.get_transition_matrix(
                N_big_diploid, k, mutation, selection)
        v = MatrixUtil.get_stationary_distribution(P)
        for state_index, counts in enumerate(kaizeng.gen_states(
            N_big_haploid, k)):
            if counts[0] and counts[1]:
                allele_histograms[i, counts[0]] += v[state_index]
    # Define the r table.
    # There are nine columns each corresponding to an allele frequency.
    # There are three rows each corresponding to a configuration.
    arr = []
    # Use the two allele approximation
    # from mcvean and charlesworth 1999 referred to by zeng 2011.
    # I'm not sure if I am using the right equation.
    g0 = fs.gamma_0
    g1 = fs.gamma_1
    """
    s_0 = -gamma_0 / float(N_big)
    s_1 = -gamma_1 / float(N_big)
    hist = np.zeros(N_small+1)
    for i in range(1, N_small):
        x = i / float(N_small)
        hist[i] = math.exp(1*N_big*(s_0 - s_1)*x) / (x*(1-x))
    h = hist[1:-1]
    h /= np.sum(h)
    arr.append(h.tolist())
    """
    arr.append(diallelic_approximation(N_small, g0, g1).tolist())
    # Use the exact two allele distribution.
    # Well, it is exact if I understand the right scaling
    # of the population size and fitnesses.
    f0 = 1.0
    f1 = 1.0 - gamma / N_big_haploid
    #f0 = 1.0 + gamma / N
    #f1 = 1.0
    #f0 = 1.0 + 1.5 / (4*N)
    #f1 = 1.0 - 1.5 / (4*N)
    h = get_two_allele_distribution(
            N_big_haploid, N_small, f0, f1, f_subsample)
    arr.append(h.tolist())
    # Get frequencies for the other two configurations
    for hist in allele_histograms:
        # Get probabilities conditional on dimorphism.
        hist[0] = 0
        hist[-1] = 0
        hist /= np.sum(hist)
        # Get the subsampled pmf.
        distn = f_subsample(hist, N_small)
        MatrixUtil.assert_distribution(distn)
        # Get probabiities conditional on dimorphism of the sample.
        distn[0] = 0
        distn[-1] = 0
        distn /= np.sum(distn)
        # Add to the table of densities.
        arr.append(distn[1:-1].tolist())
    # Get a large population approximation
    # when there is mutational bias.
    params = (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)
    mutation, fitness = kaizeng.params_to_mutation_fitness(
            N_big_haploid, params)
    gammas = np.array([fs.gamma_0, fs.gamma_1, fs.gamma_2, 0])
    h = kaizeng.get_large_population_approximation(N_small, k, gammas, mutation)
    arr.append(h.tolist())
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
            'barplot',
            'mdat',
            'legend.text=' + mk_call_str(
                'c',
                '"two-allele large N limit"',
                '"two-allele"',
                '"four-allele without mutational bias"',
                '"four-allele with mutational bias (kappa_{1,2}=2)"',
                '"four-allele with mutational bias, large N limit"',
                ),
            'args.legend = list(x="topleft", bty="n")',
            'names.arg = c(1,2,3,4,5,6,7,8,9)',
            main='title.string',
            xlab='"frequency of allele 1"',
            ylab='"frequency"',
            col=mk_call_str(
                'c',
                '"red"',
                '"white"',
                '"black"',
                '"gray"',
                '"blue"',
                ),
            beside='TRUE',
            )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
            script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data

Example #2

0

Show file

File: 20120820a.py Project: BIGtigr/xgcode

def get_response_content(fs):
    N_small = 10
    N_big_diploid = fs.N_big_diploid
    N_big_haploid = N_big_diploid * 2
    if N_big_haploid < N_small:
        raise ValueError('use a larger diploid population size')
    if fs.with_replacement:
        f_subsample = StatsUtil.subsample_pmf_with_replacement
    elif fs.without_replacement:
        f_subsample = StatsUtil.subsample_pmf_without_replacement
    else:
        raise ValueError('subsampling option error')
    k = 4
    gamma = fs.gamma_1
    params_list = [(0.008, 1, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2),
                   (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)]
    allele_histograms = np.zeros((2, N_big_haploid + 1))
    for i, params in enumerate(params_list):
        mutation, selection = kaizeng.params_to_mutation_fitness(
            N_big_haploid, params)
        P = kaizeng.get_transition_matrix(N_big_diploid, k, mutation,
                                          selection)
        v = MatrixUtil.get_stationary_distribution(P)
        for state_index, counts in enumerate(
                kaizeng.gen_states(N_big_haploid, k)):
            if counts[0] and counts[1]:
                allele_histograms[i, counts[0]] += v[state_index]
    # Define the r table.
    # There are nine columns each corresponding to an allele frequency.
    # There are three rows each corresponding to a configuration.
    arr = []
    # Use the two allele approximation
    # from mcvean and charlesworth 1999 referred to by zeng 2011.
    # I'm not sure if I am using the right equation.
    g0 = fs.gamma_0
    g1 = fs.gamma_1
    """
    s_0 = -gamma_0 / float(N_big)
    s_1 = -gamma_1 / float(N_big)
    hist = np.zeros(N_small+1)
    for i in range(1, N_small):
        x = i / float(N_small)
        hist[i] = math.exp(1*N_big*(s_0 - s_1)*x) / (x*(1-x))
    h = hist[1:-1]
    h /= np.sum(h)
    arr.append(h.tolist())
    """
    arr.append(diallelic_approximation(N_small, g0, g1).tolist())
    # Use the exact two allele distribution.
    # Well, it is exact if I understand the right scaling
    # of the population size and fitnesses.
    f0 = 1.0
    f1 = 1.0 - gamma / N_big_haploid
    #f0 = 1.0 + gamma / N
    #f1 = 1.0
    #f0 = 1.0 + 1.5 / (4*N)
    #f1 = 1.0 - 1.5 / (4*N)
    h = get_two_allele_distribution(N_big_haploid, N_small, f0, f1,
                                    f_subsample)
    arr.append(h.tolist())
    # Get frequencies for the other two configurations
    for hist in allele_histograms:
        # Get probabilities conditional on dimorphism.
        hist[0] = 0
        hist[-1] = 0
        hist /= np.sum(hist)
        # Get the subsampled pmf.
        distn = f_subsample(hist, N_small)
        MatrixUtil.assert_distribution(distn)
        # Get probabiities conditional on dimorphism of the sample.
        distn[0] = 0
        distn[-1] = 0
        distn /= np.sum(distn)
        # Add to the table of densities.
        arr.append(distn[1:-1].tolist())
    # Get a large population approximation
    # when there is mutational bias.
    params = (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)
    mutation, fitness = kaizeng.params_to_mutation_fitness(
        N_big_haploid, params)
    gammas = np.array([fs.gamma_0, fs.gamma_1, fs.gamma_2, 0])
    h = kaizeng.get_large_population_approximation(N_small, k, gammas,
                                                   mutation)
    arr.append(h.tolist())
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
        'barplot',
        'mdat',
        'legend.text=' + mk_call_str(
            'c',
            '"two-allele large N limit"',
            '"two-allele"',
            '"four-allele without mutational bias"',
            '"four-allele with mutational bias (kappa_{1,2}=2)"',
            '"four-allele with mutational bias, large N limit"',
        ),
        'args.legend = list(x="topleft", bty="n")',
        'names.arg = c(1,2,3,4,5,6,7,8,9)',
        main='title.string',
        xlab='"frequency of allele 1"',
        ylab='"frequency"',
        col=mk_call_str(
            'c',
            '"red"',
            '"white"',
            '"black"',
            '"gray"',
            '"blue"',
        ),
        beside='TRUE',
    )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
        script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data

Example #3

0

Show file

File: 20120824a.py Project: argriffing/xgcode

def get_response_content(fs):
    N_diploid = fs.N_diploid
    N = N_diploid * 2
    k = 2
    gamma = fs.gamma
    # define the fitnesses and the selection value
    f0 = 1.0
    f1 = 1.0 - gamma / N
    s = 1 - f1 / f0
    if f1 <= 0:
        raise ValueError('the extreme selection caused a non-positive fitness')
    # get a wright fisher transition matrix
    P = np.exp(wfengine.create_genic_diallelic(N_diploid, s))
    """
    # condition on no fixation
    for i in range(N):
        P[i] /= 1 - P[i, N]
    # remove the fixed state from the transition matrix
    P = P[:N, :N]
    """
    # add mutations
    P[0, 0] = 0
    P[0, 1] = 1
    P[N, N] = 0
    P[N, 1] = 1
    # compute the stationary distribution
    v = MatrixUtil.get_stationary_distribution(P)
    # get the distribution over dimorphic states
    h = v[1:-1]
    h /= np.sum(h)
    # look at continuous approximations
    w = np.zeros(N+1)
    for i in range(1, N):
        x = i / float(N)
        #x0 = i / float(N)
        #x1 = (i + 1) / float(N)
        #value = sojourn_definite(x0, x1, gamma)
        value = sojourn_kernel(x, gamma)
        w[i] = value
    w = w[1:-1]
    w /= np.sum(w)
    # get the array for the R plot
    arr = [h.tolist(), w.tolist()]
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
            'barplot',
            'mdat',
            'legend.text=' + mk_call_str(
                'c',
                '"exact discrete distribution"',
                '"continuous approximation"',
                #'"two-allele large N limit"',
                #'"two-allele"',
                #'"four-allele without mutational bias"',
                #'"four-allele with mutational bias (kappa_{1,2}=2)"',
                #'"four-allele with mutational bias, large N limit"',
                ),
            'args.legend = list(x="topright", bty="n")',
            'names.arg = 1:%s' % (N-1),
            main='title.string',
            xlab='"frequency of allele 1"',
            ylab='"frequency"',
            col=mk_call_str(
                'c',
                #'"red"',
                #'"white"',
                '"black"',
                #'"gray"',
                '"red"',
                ),
            beside='TRUE',
            border='NA',
            )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
            script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data

Example #4

0

Show file

File: 20120824a.py Project: BIGtigr/xgcode

def get_response_content(fs):
    N_diploid = fs.N_diploid
    N = N_diploid * 2
    k = 2
    gamma = fs.gamma
    # define the fitnesses and the selection value
    f0 = 1.0
    f1 = 1.0 - gamma / N
    s = 1 - f1 / f0
    if f1 <= 0:
        raise ValueError('the extreme selection caused a non-positive fitness')
    # get a wright fisher transition matrix
    P = np.exp(wfengine.create_genic_diallelic(N_diploid, s))
    """
    # condition on no fixation
    for i in range(N):
        P[i] /= 1 - P[i, N]
    # remove the fixed state from the transition matrix
    P = P[:N, :N]
    """
    # add mutations
    P[0, 0] = 0
    P[0, 1] = 1
    P[N, N] = 0
    P[N, 1] = 1
    # compute the stationary distribution
    v = MatrixUtil.get_stationary_distribution(P)
    # get the distribution over dimorphic states
    h = v[1:-1]
    h /= np.sum(h)
    # look at continuous approximations
    w = np.zeros(N + 1)
    for i in range(1, N):
        x = i / float(N)
        #x0 = i / float(N)
        #x1 = (i + 1) / float(N)
        #value = sojourn_definite(x0, x1, gamma)
        value = sojourn_kernel(x, gamma)
        w[i] = value
    w = w[1:-1]
    w /= np.sum(w)
    # get the array for the R plot
    arr = [h.tolist(), w.tolist()]
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
        'barplot',
        'mdat',
        'legend.text=' + mk_call_str(
            'c',
            '"exact discrete distribution"',
            '"continuous approximation"',
            #'"two-allele large N limit"',
            #'"two-allele"',
            #'"four-allele without mutational bias"',
            #'"four-allele with mutational bias (kappa_{1,2}=2)"',
            #'"four-allele with mutational bias, large N limit"',
        ),
        'args.legend = list(x="topright", bty="n")',
        'names.arg = 1:%s' % (N - 1),
        main='title.string',
        xlab='"frequency of allele 1"',
        ylab='"frequency"',
        col=mk_call_str(
            'c',
            #'"red"',
            #'"white"',
            '"black"',
            #'"gray"',
            '"red"',
        ),
        beside='TRUE',
        border='NA',
    )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
        script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data

Example #5

0

Show file

File: 20120817b.py Project: argriffing/xgcode

def get_response_content(fs):
    N_diploid = 5
    N_haploid = N_diploid * 2
    k = 4
    gamma = 1.5
    params_list = [
            (0.008, 1, 1, 0, gamma, 1),
            (0.008, 2, 1, 0, gamma, 1)]
    allele_histograms = np.zeros((2, N_haploid+1))
    for i, params in enumerate(params_list):
        mutation, fitnesses = kaizeng.params_to_mutation_fitness(
                N_haploid, params)
        P = kaizeng.get_transition_matrix(
                N_diploid, k, mutation, fitnesses)
        v = MatrixUtil.get_stationary_distribution(P)
        for state_index, counts in enumerate(kaizeng.gen_states(N_haploid, k)):
            if counts[0] and counts[1]:
                allele_histograms[i, counts[0]] += v[state_index]
    # Define the r table.
    # There are nine columns each corresponding to an allele frequency.
    # There are three rows each corresponding to a configuration.
    arr = []
    # Use the exact two allele distribution.
    # Well, it is exact if I understand the right scaling
    # of the population size and fitnesses.
    f0 = 1.0
    f1 = 1.0 - gamma / N_haploid
    #f0 = 1.0 + gamma / N
    #f1 = 1.0
    #f0 = 1.0 + 1.5 / (4*N)
    #f1 = 1.0 - 1.5 / (4*N)
    h = get_two_allele_distribution(N_diploid, f0, f1)
    arr.append(h.tolist())
    # Use the two allele approximation
    # from mcvean and charlesworth 1999 referred to by zeng 2011.
    # I'm not sure if I am using the right equation.
    """
    gamma_0 = 0
    gamma_1 = 1.5
    s_0 = -gamma_0 / float(N)
    s_1 = -gamma_1 / float(N)
    hist = np.zeros(N+1)
    for i in range(1, N):
        x = i / float(N)
        hist[i] = math.exp(1*N*(s_0 - s_1)*x) / (x*(1-x))
    h = hist[1:-1]
    h /= np.sum(h)
    arr.append(h.tolist())
    """
    # Get frequencies for the other two configurations
    for hist in allele_histograms:
        h = hist[1:-1]
        h /= np.sum(h)
        arr.append(h.tolist())
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2, gamma = 1.5"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
            'barplot',
            'mdat',
            'legend.text=' + mk_call_str(
                'c',
                '"two-allele"',
                '"four-allele without mutational bias"',
                '"four-allele with mutational bias kappa_{1,2}=2"',
                ),
            'args.legend = list(x="topleft", bty="n")',
            'names.arg = c(1,2,3,4,5,6,7,8,9)',
            main='title.string',
            xlab='"frequency of allele 1"',
            ylab='"frequency"',
            col=mk_call_str(
                'c',
                #'"red"',
                '"white"',
                '"black"',
                '"gray"',
                ),
            beside='TRUE',
            )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
            script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data

Example #6

0

Show file

File: 20120817b.py Project: BIGtigr/xgcode

def get_response_content(fs):
    N_diploid = 5
    N_haploid = N_diploid * 2
    k = 4
    gamma = 1.5
    params_list = [(0.008, 1, 1, 0, gamma, 1), (0.008, 2, 1, 0, gamma, 1)]
    allele_histograms = np.zeros((2, N_haploid + 1))
    for i, params in enumerate(params_list):
        mutation, fitnesses = kaizeng.params_to_mutation_fitness(
            N_haploid, params)
        P = kaizeng.get_transition_matrix(N_diploid, k, mutation, fitnesses)
        v = MatrixUtil.get_stationary_distribution(P)
        for state_index, counts in enumerate(kaizeng.gen_states(N_haploid, k)):
            if counts[0] and counts[1]:
                allele_histograms[i, counts[0]] += v[state_index]
    # Define the r table.
    # There are nine columns each corresponding to an allele frequency.
    # There are three rows each corresponding to a configuration.
    arr = []
    # Use the exact two allele distribution.
    # Well, it is exact if I understand the right scaling
    # of the population size and fitnesses.
    f0 = 1.0
    f1 = 1.0 - gamma / N_haploid
    #f0 = 1.0 + gamma / N
    #f1 = 1.0
    #f0 = 1.0 + 1.5 / (4*N)
    #f1 = 1.0 - 1.5 / (4*N)
    h = get_two_allele_distribution(N_diploid, f0, f1)
    arr.append(h.tolist())
    # Use the two allele approximation
    # from mcvean and charlesworth 1999 referred to by zeng 2011.
    # I'm not sure if I am using the right equation.
    """
    gamma_0 = 0
    gamma_1 = 1.5
    s_0 = -gamma_0 / float(N)
    s_1 = -gamma_1 / float(N)
    hist = np.zeros(N+1)
    for i in range(1, N):
        x = i / float(N)
        hist[i] = math.exp(1*N*(s_0 - s_1)*x) / (x*(1-x))
    h = hist[1:-1]
    h /= np.sum(h)
    arr.append(h.tolist())
    """
    # Get frequencies for the other two configurations
    for hist in allele_histograms:
        h = hist[1:-1]
        h /= np.sum(h)
        arr.append(h.tolist())
    # define the r script
    out = StringIO()
    print >> out, 'title.string <- "allele 1 vs allele 2, gamma = 1.5"'
    print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr)
    print >> out, mk_call_str(
        'barplot',
        'mdat',
        'legend.text=' + mk_call_str(
            'c',
            '"two-allele"',
            '"four-allele without mutational bias"',
            '"four-allele with mutational bias kappa_{1,2}=2"',
        ),
        'args.legend = list(x="topleft", bty="n")',
        'names.arg = c(1,2,3,4,5,6,7,8,9)',
        main='title.string',
        xlab='"frequency of allele 1"',
        ylab='"frequency"',
        col=mk_call_str(
            'c',
            #'"red"',
            '"white"',
            '"black"',
            '"gray"',
        ),
        beside='TRUE',
    )
    #print >> out, 'box()'
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
        script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data