Esempio n. 1
0
def get_response_content(fs):
    # precompute some transition matrices
    P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix(
        fs.npop, fs.selection_ratio)
    MatrixUtil.assert_transition_matrix(P_drift_selection)
    P_mutation = pgmsinglesite.create_mutation_transition_matrix(
        fs.npop, fs.mutation_ab, fs.mutation_ba)
    MatrixUtil.assert_transition_matrix(P_mutation)
    # define the R table headers
    headers = ['generation', 'number.of.mutants']
    # compute the path samples
    P = np.dot(P_drift_selection, P_mutation)
    mypath = PathSampler.sample_endpoint_conditioned_path(
        fs.nmutants_initial, fs.nmutants_final, fs.ngenerations, P)
    arr = [[i, nmutants] for i, nmutants in enumerate(mypath)]
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 2
0
def get_r_tikz_mi_plot_script(nsels, time_stats):
    """
    At each time point plot mutual information for all matrices.
    @param time_stats: a list of stats for each time point
    @return: tikz code corresponding to an R plot
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    mi_mut = time_stats_trans[1]
    mi_min_sels = time_stats_trans[6]
    mi_max_sels = time_stats_trans[2]
    y_low = min(mi_min_sels + mi_mut)
    y_high = max(mi_max_sels + mi_mut)
    ylim = RUtil.mk_call_str("c", y_low, y_high)
    print >> out, RUtil.mk_call_str(
        "plot",
        "my.table$t",
        "my.table$mut",
        type='"n"',
        ylim=ylim,
        xlab='"time"',
        ylab='"MI"',
        main='"MI for mut process and %d mut.sel processes"' % nsels,
    )
    colors = ("red", "blue", "green", "black", "green", "blue")
    plot_indices = (1, 2, 3, 4, 5, 6)
    for c, plot_index in zip(colors, plot_indices):
        header = g_time_stats_headers[plot_index]
        print >> out, RUtil.mk_call_str("lines", "my.table$t", "my.table$%s" % header, col='"%s"' % c)
    return out.getvalue()
Esempio n. 3
0
def get_latex_documentbody(fs):
    """
    This is obsolete because I am now using pure R output.
    The latex documentbody should have a bunch of tikz pieces in it.
    Each tikz piece should have been generated from R.
    """
    Q_mut, Q_sels = get_qmut_qsels(fs)
    # compute the statistics
    ER_ratios, NSR_ratios, ER_NSR_ratios = get_statistic_ratios(Q_mut, Q_sels)
    M = zip(*(ER_ratios, NSR_ratios, ER_NSR_ratios))
    column_headers = ('ER.ratio', 'NSR.ratio', 'ER.times.NSR.ratio')
    table_string = RUtil.get_table_string(M, column_headers)
    nsels = len(Q_sels)
    # define the R scripts
    scripts = []
    for name in column_headers:
        scripts.append(get_r_tikz_script(nsels, name))
    # get the tikz codes from R, for each histogram
    retcode, r_out, r_err, tikz_code_list = RUtil.run_plotter_multiple_scripts(
        table_string, scripts, 'tikz', width=3, height=2)
    if retcode:
        raise RUtil.RError(r_err)
    #
    # show some timings
    print 'R did not fail, but here is its stderr:'
    print r_err
    #
    # write the latex code
    out = StringIO()
    #print >> out, '\\pagestyle{empty}'
    for tikz_code in tikz_code_list:
        print >> out, tikz_code
    # return the latex code, consisting mainly of a bunch of tikz plots
    return out.getvalue()
Esempio n. 4
0
def get_response_content(fs):
    # precompute some transition matrices
    P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix(
            fs.npop, fs.selection_ratio)
    MatrixUtil.assert_transition_matrix(P_drift_selection)
    P_mutation = pgmsinglesite.create_mutation_transition_matrix(
            fs.npop, fs.mutation_ab, fs.mutation_ba)
    MatrixUtil.assert_transition_matrix(P_mutation)
    # define the R table headers
    headers = ['generation', 'number.of.mutants']
    # compute the path samples
    P = np.dot(P_drift_selection, P_mutation)
    mypath = PathSampler.sample_endpoint_conditioned_path(
            fs.nmutants_initial, fs.nmutants_final, fs.ngenerations, P)
    arr = [[i, nmutants] for i, nmutants in enumerate(mypath)]
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 5
0
def get_r_tikz_info_plot(nsels, time_stats):
    """
    @param time_stats: a list of stats for each time point
    @return: tikz code corresponding to an R plot
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    y_low = 0
    y_high = math.log(2)
    ylim = RUtil.mk_call_str("c", y_low, y_high)
    print >> out, RUtil.mk_call_str(
        "plot",
        "my.table$t",
        "my.table$info.mi.diag.approx",
        type='"n"',
        ylim=ylim,
        xlab='"time"',
        ylab='"info"',
        main='"informativeness with respect to MI"',
    )
    colors = ("red", "orange", "green", "blue", "black")
    plot_indices = (17, 18, 19, 20, 21)
    for c, plot_index in zip(colors, plot_indices):
        header = g_time_stats_headers[plot_index]
        print >> out, RUtil.mk_call_str("lines", "my.table$t", "my.table$%s" % header, col='"%s"' % c)
    return out.getvalue()
Esempio n. 6
0
def main(args):
    # get the end positions,
    # forcing the first end position to be 5
    # and the last end position to be 898.
    incr = (g_nchar - 5) / float(args.nlengths - 1)
    stop_positions = [5 + int(i * incr) for i in range(args.nlengths)]
    stop_positions[-1] = g_nchar
    # run BEAST and create the R stuff
    table_string, scripts = get_table_string_and_scripts(
        stop_positions, args.nsamples)
    # create the comboscript
    out = StringIO()
    print >> out, 'library(ggplot2)'
    print >> out, 'par(mfrow=c(3,1))'
    for script in scripts:
        print >> out, script
    comboscript = out.getvalue()
    # create the R output image
    device_name = Form.g_imageformat_to_r_function['pdf']
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, comboscript, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    # write the image data
    with open(args.outfile, 'wb') as fout:
        fout.write(image_data)
Esempio n. 7
0
def get_response_content(fs):
    f_info = ctmcmi.get_mutual_info_known_distn
    # define the R table headers
    headers = ['log.probability.ratio', 'mutual.information']
    # make the array
    arr = []
    for x in np.linspace(fs.x_min, fs.x_max, 101):
        row = [x]
        proc = evozoo.AlternatingHypercube_d_1(3)
        X = np.array([x])
        distn = proc.get_distn(X)
        Q = proc.get_rate_matrix(X)
        info = f_info(Q, distn, fs.t)
        row.append(info)
        arr.append(row)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 8
0
def get_r_tikz_corr_plot(nsels, time_stats):
    """
    @param time_stats: a list of stats for each time point
    @return: tikz code corresponding to an R plot
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    y_low = -1
    y_high = 1
    ylim = RUtil.mk_call_str("c", y_low, y_high)
    print >> out, RUtil.mk_call_str(
        "plot",
        "my.table$t",
        "my.table$corr.mi.diag.approx",
        type='"n"',
        ylim=ylim,
        xlab='"time"',
        ylab='"correlation"',
        main='"correlation with mutual information"',
    )
    colors = ("red", "orange", "green", "blue", "black")
    plot_indices = (7, 8, 9, 10, 11)
    for c, plot_index in zip(colors, plot_indices):
        header = g_time_stats_headers[plot_index]
        print >> out, RUtil.mk_call_str("lines", "my.table$t", "my.table$%s" % header, col='"%s"' % c)
    return out.getvalue()
Esempio n. 9
0
def get_r_tikz_info_plot(nsels, time_stats):
    """
    @param time_stats: a list of stats for each time point
    @return: tikz code corresponding to an R plot
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    y_low = 0
    y_high = math.log(2)
    ylim = RUtil.mk_call_str('c', y_low, y_high)
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$t',
            'my.table$info.mi.diag.approx',
            type='"n"',
            ylim=ylim,
            xlab='"time"',
            ylab='"info"',
            main='"informativeness with respect to MI"')
    colors = ('red', 'orange', 'green', 'blue', 'black')
    plot_indices = (17, 18, 19, 20, 21)
    for c, plot_index in zip(colors, plot_indices):
        header = g_time_stats_headers[plot_index]
        print >> out, RUtil.mk_call_str(
                'lines',
                'my.table$t',
                'my.table$%s' % header,
                col='"%s"' % c)
    return out.getvalue()
Esempio n. 10
0
def get_response_content(fs):
    M, R = get_input_matrices(fs)
    # create the R table string and scripts
    headers = [
        't', 'mi.true.mut', 'mi.true.mutsel', 'mi.analog.mut',
        'mi.analog.mutsel'
    ]
    npoints = 100
    t_low = 0.0
    t_high = 5.0
    t_incr = (t_high - t_low) / (npoints - 1)
    t_values = [t_low + t_incr * i for i in range(npoints)]
    # get the data for the R table
    arr = []
    for t in t_values:
        mi_mut = ctmcmi.get_mutual_information(M, t)
        mi_mutsel = ctmcmi.get_mutual_information(R, t)
        mi_analog_mut = ctmcmi.get_ll_ratio_wrong(M, t)
        mi_analog_mutsel = ctmcmi.get_ll_ratio_wrong(R, t)
        row = [t, mi_mut, mi_mutsel, mi_analog_mut, mi_analog_mutsel]
        arr.append(row)
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 11
0
def get_response_content(fs):
    # validate and store user input
    if fs.x_max <= fs.x_min:
        raise ValueError('check the min and max logs')
    f_info = divtime.get_fisher_info_known_distn_fast
    # define the R table headers
    headers = ['log.probability.ratio', 'fisher.information']
    # make the array
    arr = []
    for x in np.linspace(fs.x_min, fs.x_max, 101):
        row = [x]
        proc = evozoo.DistinguishedCornerPairHypercube_d_1(3)
        X = np.array([x])
        distn = proc.get_distn(X)
        Q = proc.get_rate_matrix(X)
        info = f_info(Q, distn, fs.t)
        row.append(info)
        arr.append(row)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 12
0
def get_response_content(fs):
    # validate and store user input
    if fs.x_max <= fs.x_min:
        raise ValueError('check the min and max logs')
    f_info = divtime.get_fisher_info_known_distn_fast
    # define the R table headers
    headers = ['log.probability.ratio', 'fisher.information']
    # make the array
    arr = []
    for x in np.linspace(fs.x_min, fs.x_max, 101):
        row = [x]
        proc = evozoo.DistinguishedCornerPairHypercube_d_1(3)
        X = np.array([x])
        distn = proc.get_distn(X)
        Q = proc.get_rate_matrix(X)
        info = f_info(Q, distn, fs.t)
        row.append(info)
        arr.append(row)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 13
0
def get_response_content(fs):
    M, R = get_input_matrices(fs)
    # create the R table string and scripts
    headers = [
            't',
            'mi.true.mut',
            'mi.true.mutsel',
            'mi.analog.mut',
            'mi.analog.mutsel']
    npoints = 100
    t_low = 0.0
    t_high = 5.0
    t_incr = (t_high - t_low) / (npoints - 1)
    t_values = [t_low + t_incr*i for i in range(npoints)]
    # get the data for the R table
    arr = []
    for t in t_values:
        mi_mut = ctmcmi.get_mutual_information(M, t)
        mi_mutsel = ctmcmi.get_mutual_information(R, t)
        mi_analog_mut = ctmcmi.get_ll_ratio_wrong(M, t)
        mi_analog_mutsel = ctmcmi.get_ll_ratio_wrong(R, t)
        row = [t, mi_mut, mi_mutsel, mi_analog_mut, mi_analog_mutsel]
        arr.append(row)
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 14
0
def get_r_tikz_mi_plot_script(nsels, time_stats):
    """
    At each time point plot mutual information for all matrices.
    @param time_stats: a list of stats for each time point
    @return: tikz code corresponding to an R plot
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    mi_mut = time_stats_trans[1]
    mi_min_sels = time_stats_trans[6]
    mi_max_sels = time_stats_trans[2]
    y_low = min(mi_min_sels + mi_mut)
    y_high = max(mi_max_sels + mi_mut)
    ylim = RUtil.mk_call_str('c', y_low, y_high)
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$t',
            'my.table$mut',
            type='"n"',
            ylim=ylim,
            xlab='"time"',
            ylab='"MI"',
            main='"MI for mut process and %d mut.sel processes"' % nsels)
    colors = ('red', 'blue', 'green', 'black', 'green', 'blue')
    plot_indices = (1, 2, 3, 4, 5, 6)
    for c, plot_index in zip(colors, plot_indices):
        header = g_time_stats_headers[plot_index]
        print >> out, RUtil.mk_call_str(
                'lines',
                'my.table$t',
                'my.table$%s' % header,
                col='"%s"' % c)
    return out.getvalue()
Esempio n. 15
0
def get_r_tikz_corr_plot(nsels, time_stats):
    """
    @param time_stats: a list of stats for each time point
    @return: tikz code corresponding to an R plot
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    y_low = -1
    y_high = 1
    ylim = RUtil.mk_call_str('c', y_low, y_high)
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$t',
            'my.table$corr.mi.diag.approx',
            type='"n"',
            ylim=ylim,
            xlab='"time"',
            ylab='"correlation"',
            main='"correlation with mutual information"')
    colors = ('red', 'orange', 'green', 'blue', 'black')
    plot_indices = (7, 8, 9, 10, 11)
    for c, plot_index in zip(colors, plot_indices):
        header = g_time_stats_headers[plot_index]
        print >> out, RUtil.mk_call_str(
                'lines',
                'my.table$t',
                'my.table$%s' % header,
                col='"%s"' % c)
    return out.getvalue()
Esempio n. 16
0
def get_r_tikz_prop_plot(nsels, time_stats):
    """
    @param time_stats: a list of stats for each time point
    @return: tikz code corresponding to an R plot
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    y_low = 0
    y_high = 1
    ylim = RUtil.mk_call_str('c', y_low, y_high)
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$t',
            'my.table$prop.mi.diag.approx',
            type='"n"',
            ylim=ylim,
            xlab='"time"',
            ylab='"proportion"',
            main='"proportion of same sign difference as MI"')
    colors = ('red', 'orange', 'green', 'blue', 'black')
    plot_indices = (12, 13, 14, 15, 16)
    for c, plot_index in zip(colors, plot_indices):
        header = g_time_stats_headers[plot_index]
        print >> out, RUtil.mk_call_str(
                'lines',
                'my.table$t',
                'my.table$%s' % header,
                col='"%s"' % c)
    return out.getvalue()
Esempio n. 17
0
def get_r_tikz_prop_plot(nsels, time_stats):
    """
    @param time_stats: a list of stats for each time point
    @return: tikz code corresponding to an R plot
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    y_low = 0
    y_high = 1
    ylim = RUtil.mk_call_str("c", y_low, y_high)
    print >> out, RUtil.mk_call_str(
        "plot",
        "my.table$t",
        "my.table$prop.mi.diag.approx",
        type='"n"',
        ylim=ylim,
        xlab='"time"',
        ylab='"proportion"',
        main='"proportion of same sign difference as MI"',
    )
    colors = ("red", "orange", "green", "blue", "black")
    plot_indices = (12, 13, 14, 15, 16)
    for c, plot_index in zip(colors, plot_indices):
        header = g_time_stats_headers[plot_index]
        print >> out, RUtil.mk_call_str("lines", "my.table$t", "my.table$%s" % header, col='"%s"' % c)
    return out.getvalue()
Esempio n. 18
0
def get_response_content(fs):
    # check the r table
    RUtil.RTable(fs.table.splitlines())
    # make the plot
    device = Form.g_imageformat_to_r_function[fs.imageformat]
    image_data = RUtil.run_plotter_concise(fs.table, g_script_body, device)
    return image_data
Esempio n. 19
0
def get_latex_documentbody(fs):
    """
    This is obsolete because I am now using pure R output.
    The latex documentbody should have a bunch of tikz pieces in it.
    Each tikz piece should have been generated from R.
    """
    Q_mut, Q_sels = get_qmut_qsels(fs)
    # compute the statistics
    ER_ratios, NSR_ratios, ER_NSR_ratios  = get_statistic_ratios(Q_mut, Q_sels)
    M = zip(*(ER_ratios, NSR_ratios, ER_NSR_ratios))
    column_headers = ('ER.ratio', 'NSR.ratio', 'ER.times.NSR.ratio')
    table_string = RUtil.get_table_string(M, column_headers)
    nsels = len(Q_sels)
    # define the R scripts
    scripts = []
    for name in column_headers:
        scripts.append(get_r_tikz_script(nsels, name))
    # get the tikz codes from R, for each histogram
    retcode, r_out, r_err, tikz_code_list = RUtil.run_plotter_multiple_scripts(
            table_string, scripts, 'tikz',
            width=3, height=2)
    if retcode:
        raise RUtil.RError(r_err)
    #
    # show some timings
    print 'R did not fail, but here is its stderr:'
    print r_err
    #
    # write the latex code
    out = StringIO()
    #print >> out, '\\pagestyle{empty}'
    for tikz_code in tikz_code_list:
        print >> out, tikz_code
    # return the latex code, consisting mainly of a bunch of tikz plots
    return out.getvalue()
Esempio n. 20
0
def get_response_content(fs):
    f_info = ctmcmi.get_mutual_info_known_distn
    # define the R table headers
    headers = ['log.probability.ratio', 'mutual.information']
    # make the array
    arr = []
    for x in np.linspace(fs.x_min, fs.x_max, 101):
        row = [x]
        proc = evozoo.AlternatingHypercube_d_1(3)
        X = np.array([x])
        distn = proc.get_distn(X)
        Q = proc.get_rate_matrix(X)
        info = f_info(Q, distn, fs.t)
        row.append(info)
        arr.append(row)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 21
0
def main(args):
    # set up the logger
    f = logging.getLogger('toplevel.logger')
    h = logging.StreamHandler()
    h.setFormatter(logging.Formatter('%(message)s %(asctime)s'))
    f.addHandler(h)
    if args.verbose:
        f.setLevel(logging.DEBUG)
    else:
        f.setLevel(logging.WARNING)
    f.info('(local) permute columns of the alignment')
    header_seq_pairs = beasttut.get_456_col_permuted_header_seq_pairs()
    f.info('(local) run BEAST serially locally and build the R stuff')
    table_string, scripts = get_table_string_and_scripts(
            g_start_stop_pairs, args.nsamples, header_seq_pairs)
    f.info('(local) create the composite R script')
    out = StringIO()
    print >> out, 'library(ggplot2)'
    print >> out, 'par(mfrow=c(3,1))'
    for script in scripts:
        print >> out, script
    comboscript = out.getvalue()
    f.info('(local) run R to create the pdf')
    device_name = Form.g_imageformat_to_r_function['pdf']
    retcode, r_out, r_err, image_data = RUtil.run_plotter( 
        table_string, comboscript, device_name, keep_intermediate=True) 
    if retcode: 
        raise RUtil.RError(r_err) 
    f.info('(local) write the .pdf file')
    with open(args.outfile, 'wb') as fout:
        fout.write(image_data)
    f.info('(local) return from toplevel')
Esempio n. 22
0
def get_r_tikz_stub():
    user_script = RUtil.g_stub
    device_name = 'tikz'
    retcode, r_out, r_err, tikz_code = RUtil.run_plotter_no_table(
            user_script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return tikz_code
Esempio n. 23
0
def get_response_content(fs):
    f_info = divtime.get_fisher_info_known_distn_fast
    requested_triples = []
    for triple in g_process_triples:
        name, desc, zoo_obj = triple
        if getattr(fs, name):
            requested_triples.append(triple)
    if not requested_triples:
        raise ValueError('nothing to plot')
    # define the R table headers
    r_names = [a.replace('_', '.') for a, b, c in requested_triples]
    headers = ['t'] + r_names
    # Spend a lot of time doing the optimizations
    # to construct the points for the R table.
    arr = []
    for t in cbreaker.throttled(progrid.gen_binary(fs.start_time,
                                                   fs.stop_time),
                                nseconds=5,
                                ncount=200):
        row = [t]
        for python_name, desc, zoo_class in requested_triples:
            zoo_obj = zoo_class(fs.d)
            df = zoo_obj.get_df()
            opt_dep = OptDep(zoo_obj, t, f_info)
            if df:
                X0 = np.random.randn(df)
                xopt = scipy.optimize.fmin(opt_dep,
                                           X0,
                                           maxiter=10000,
                                           maxfun=10000)
                # I would like to use scipy.optimize.minimize
                # except that this requires a newer version of
                # scipy than is packaged for ubuntu right now.
                # fmin_bfgs seems to have problems sometimes
                # either hanging or maxiter=10K is too big.
                """
                xopt = scipy.optimize.fmin_bfgs(opt_dep, X0,
                        gtol=1e-8, maxiter=10000)
                """
            else:
                xopt = np.array([])
            info_value = -opt_dep(xopt)
            row.append(info_value)
        arr.append(row)
    arr.sort()
    npoints = len(arr)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 24
0
def get_response_content(fs):
    # precompute some transition matrices
    P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix(
            fs.npop, fs.selection_ratio)
    MatrixUtil.assert_transition_matrix(P_drift_selection)
    P_mutation = pgmsinglesite.create_mutation_transition_matrix(
            fs.npop, fs.mutation_ab, fs.mutation_ba)
    MatrixUtil.assert_transition_matrix(P_mutation)
    # define the R table headers
    headers = [
            'generation',
            'number.of.mutants',
            'probability',
            'log.prob',
            ]
    # compute the transition matrix
    P = np.dot(P_drift_selection, P_mutation)
    # Compute the endpoint conditional probabilities for various states
    # along the unobserved path.
    nstates = fs.npop + 1
    M = np.zeros((nstates, fs.ngenerations))
    M[fs.nmutants_initial, 0] = 1.0
    M[fs.nmutants_final, fs.ngenerations-1] = 1.0
    for i in range(fs.ngenerations-2):
        A_exponent = i + 1
        B_exponent = fs.ngenerations - 1 - A_exponent
        A = np.linalg.matrix_power(P, A_exponent)
        B = np.linalg.matrix_power(P, B_exponent)
        weights = np.zeros(nstates)
        for k in range(nstates):
            weights[k] = A[fs.nmutants_initial, k] * B[k, fs.nmutants_final]
        weights /= np.sum(weights)
        for k, p in enumerate(weights):
            M[k, i+1] = p
    arr = []
    for g in range(fs.ngenerations):
        for k in range(nstates):
            p = M[k, g]
            if p:
                logp = math.log(p)
            else:
                logp = float('-inf')
            row = [g, k, p, logp]
            arr.append(row)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 25
0
def get_response_content(fs):
    f_info = divtime.get_fisher_info_known_distn_fast
    requested_triples = []
    for triple in g_process_triples:
        name, desc, zoo_obj = triple
        if getattr(fs, name):
            requested_triples.append(triple)
    if not requested_triples:
        raise ValueError('nothing to plot')
    # define the R table headers
    r_names = [a.replace('_', '.') for a, b, c in requested_triples]
    headers = ['t'] + r_names
    # Spend a lot of time doing the optimizations
    # to construct the points for the R table.
    arr = []
    for t in cbreaker.throttled(
            progrid.gen_binary(fs.start_time, fs.stop_time),
            nseconds=5, ncount=200):
        row = [t]
        for python_name, desc, zoo_class in requested_triples:
            zoo_obj = zoo_class(fs.d)
            df = zoo_obj.get_df()
            opt_dep = OptDep(zoo_obj, t, f_info)
            if df:
                X0 = np.random.randn(df)
                xopt = scipy.optimize.fmin(
                        opt_dep, X0, maxiter=10000, maxfun=10000)
                # I would like to use scipy.optimize.minimize
                # except that this requires a newer version of
                # scipy than is packaged for ubuntu right now.
                # fmin_bfgs seems to have problems sometimes
                # either hanging or maxiter=10K is too big.
                """
                xopt = scipy.optimize.fmin_bfgs(opt_dep, X0,
                        gtol=1e-8, maxiter=10000)
                """
            else:
                xopt = np.array([])
            info_value = -opt_dep(xopt)
            row.append(info_value)
        arr.append(row)
    arr.sort()
    npoints = len(arr)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 26
0
def get_response_content(fs):
    # get the r table
    rtable = RUtil.RTable(fs.table.splitlines())
    header_row = rtable.headers
    data_rows = rtable.data
    Carbone.validate_headers(header_row)
    # check requested variable names as column headers
    if fs.var_a not in header_row:
        raise ValueError('the first variable name is not column header')
    if fs.var_b not in header_row:
        raise ValueError('the second variable name is not column header')
    return RUtil.run_with_table(fs.table, fs, get_script_content)
Esempio n. 27
0
def get_response_content(fs):
    # define some fixed values
    N_diploid = 10
    N_hap = 2 * N_diploid
    #Nr = fs.Nr
    plot_density = 2
    # define some mutation rates
    theta_values = [0.001, 0.01, 0.1, 1.0]
    # define some selection coefficients to plot
    Ns_low = 0.0
    Ns_high = 3.0
    Ns_values = np.linspace(Ns_low, Ns_high, 3 * plot_density + 1)
    # get the values for each h
    Nr_values = (0, 5)
    arr_0 = get_plot_array(N_diploid, Nr_values[0], theta_values, Ns_values)
    arr_1 = get_plot_array(N_diploid, Nr_values[1], theta_values, Ns_values)
    if fs.scale_to_2N_200:
        arr_0 = (200 / float(N_hap)) * np.array(arr_0)
        arr_1 = (200 / float(N_hap)) * np.array(arr_1)
        ylab = '"generations * theta * (200 / 2N)"'
    else:
        ylab = '"generations * theta"'
    # define x and y plot limits
    xlim = (Ns_low, Ns_high)
    ylim = (np.min((arr_0, arr_1)), np.max((arr_0, arr_1)))
    if fs.ylogscale:
        ylogstr = '"y"'
    else:
        ylogstr = '""'
    # http://sphaerula.com/legacy/R/multiplePlotFigure.html
    out = StringIO()
    print >> out, mk_call_str(
        'par',
        mfrow='c(1,2)',
        oma='c(0,0,2,0)',
    )
    print >> out, get_plot('left', Nr_values[0], arr_0, theta_values,
                           Ns_values, xlim, ylim, ylogstr, ylab)
    print >> out, get_plot('right', Nr_values[1], arr_1, theta_values,
                           Ns_values, xlim, ylim, ylogstr, '""')
    print >> out, mk_call_str(
        'title',
        '"mean hitting time, 2N=%s"' % N_hap,
        outer='TRUE',
    )
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(
        script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 28
0
def main(args):
    # check args
    if gmpy.popcount(args.ntiles) != 1:
        raise ValueError('the number of tiles should be a power of two')
    # set up the logger
    f = logging.getLogger('toplevel.logger')
    h = logging.StreamHandler()
    h.setFormatter(logging.Formatter('%(message)s %(asctime)s'))
    f.addHandler(h)
    if args.verbose:
        f.setLevel(logging.DEBUG)
    else:
        f.setLevel(logging.WARNING)
    f.info('(local) read the xml contents')
    if args.infile is None:
        xmldata = sys.stdin.read()
    else:
        with open(args.infile) as fin:
            xmldata = fin.read()
    f.info('(local) modify the log filename and chain length xml contents')
    xmldata = beast.set_nsamples(xmldata, args.mcmc_id, args.nsamples)
    xmldata = beast.set_log_filename(xmldata, args.log_id, args.log_filename)
    xmldata = beast.set_log_logevery(xmldata, args.log_id, args.log_logevery)
    f.info('(local) define the hierarchically nested intervals')
    start_stop_pairs = tuple(
        (a + 1, b) for a, b in beasttiling.gen_hierarchical_slices(
            args.tile_width, args.offset, args.tile_width * args.ntiles))
    f.info('(local) run BEAST serially locally and build the R stuff')
    table_string, full_table_string, scripts = get_table_strings_and_scripts(
        xmldata, args.alignment_id, start_stop_pairs, args.nsamples)
    if args.full_table_out:
        f.info('(local) create the verbose R table')
        with open(args.full_table_out, 'w') as fout:
            fout.write(full_table_string)
    f.info('(local) create the composite R script')
    out = StringIO()
    print >> out, 'library(ggplot2)'
    print >> out, 'par(mfrow=c(3,1))'
    for script in scripts:
        print >> out, script
    comboscript = out.getvalue()
    f.info('(local) run R to create the pdf')
    device_name = Form.g_imageformat_to_r_function['pdf']
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, comboscript, device_name, keep_intermediate=True)
    if retcode:
        raise RUtil.RError(r_err)
    f.info('(local) write the .pdf file')
    with open(args.outfile, 'wb') as fout:
        fout.write(image_data)
    f.info('(local) return from toplevel')
Esempio n. 29
0
def hard_coded_analysis():
    branch_length = 5.0
    sequence_length = 1000
    nsequences = 1000
    estimate_triple_list = []
    column_headers = ('most.info', 'less.info', 'least.info')
    for i in range(nsequences):
        # sample sequence changes at three levels of informativeness
        sequence_changes = sample_sequence_changes(
                branch_length, sequence_length)
        # get a distance estimate for each level of informativeness
        estimate_triple = sample_distance(*sequence_changes)
        estimate_triple_list.append(estimate_triple)
    print RUtil.get_table_string(estimate_triple_list, column_headers)
Esempio n. 30
0
def get_latex_documentbody(fs):
    """
    This is obsolete.
    """
    out = StringIO()
    table_string, scripts = get_table_string_and_scripts(fs)
    for script in scripts:
        retcode, r_out, r_err, tikz_code = RUtil.run_plotter(
                table_string, script, 'tikz',
                width=5, height=5)
        if retcode:
            raise RUtil.RError(r_err)
        print >> out, tikz_code
    return out.getvalue()
Esempio n. 31
0
def get_table_string_and_scripts_from_logs(start_stop_pairs, log_paths,
                                           nsamples):
    """
    This is for analysis of remote execution.
    """
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_stop_pair, log_path in zip(start_stop_pairs, log_paths):
        start_pos, stop_pos = start_stop_pair
        sequence_length = stop_pos - start_pos + 1
        means, variations, covs = read_log(log_path, nsamples)
        midpoint = (start_pos + stop_pos) / 2.0
        row = [sequence_length, midpoint]
        for values in means, variations, covs:
            corr_info = mcmc.Correlation()
            corr_info.analyze(values)
            hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values)
            row.extend([hpd_low, corr_info.mean, hpd_high])
        data_arr.append(row)
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints)
    # return the table string and scripts
    return table_string, scripts
Esempio n. 32
0
def get_table_string_and_scripts_par(start_stop_pairs, nsamples):
    """
    Local command-line multi-process only.
    """
    # define the pool of processes corresponding to the number of cores
    mypool = Pool(processes=4)
    # do the multiprocessing
    start_stop_n_triples = [(a, b, nsamples) for a, b in start_stop_pairs]
    post_pairs_list = mypool.map(forked_function, start_stop_n_triples)
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_stop_pair, post_pairs in zip(start_stop_pairs, post_pairs_list):
        start_pos, stop_pos = start_stop_pair
        sequence_length = stop_pos - start_pos + 1
        midpoint = (start_pos + stop_pos) / 2.0
        row = [sequence_length, midpoint]
        for corr_info, hpd_interval in post_pairs:
            hpd_low, hpd_high = hpd_interval
            row.extend([hpd_low, corr_info.mean, hpd_high])
        data_arr.append(row)
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints)
    # return the table string and scripts
    return table_string, scripts
Esempio n. 33
0
def get_table_string_and_scripts(start_stop_pairs, nsamples):
    """
    Command-line only.
    """
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_pos, stop_pos in start_stop_pairs:
        sequence_length = stop_pos - start_pos + 1
        means, variations, covs = get_value_lists(start_pos, stop_pos,
                                                  nsamples)
        midpoint = (start_pos + stop_pos) / 2.0
        row = [sequence_length, midpoint]
        for values in means, variations, covs:
            corr_info = mcmc.Correlation()
            corr_info.analyze(values)
            hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values)
            row.extend([hpd_low, corr_info.mean, hpd_high])
        data_arr.append(row)
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints)
    # return the table string and scripts
    return table_string, scripts
Esempio n. 34
0
def get_table_string_and_scripts_par(start_stop_pairs, nsamples):
    """
    Local command-line multi-process only.
    """
    # define the pool of processes corresponding to the number of cores
    mypool = Pool(processes=4)
    # do the multiprocessing
    start_stop_n_triples = [(a, b, nsamples) for a, b in start_stop_pairs]
    post_pairs_list = mypool.map(forked_function, start_stop_n_triples)
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_stop_pair, post_pairs in zip(start_stop_pairs, post_pairs_list):
        start_pos, stop_pos = start_stop_pair
        sequence_length = stop_pos - start_pos + 1
        midpoint = (start_pos + stop_pos) / 2.0
        row = [sequence_length, midpoint]
        for corr_info, hpd_interval in post_pairs:
            hpd_low, hpd_high = hpd_interval
            row.extend([hpd_low, corr_info.mean, hpd_high])
        data_arr.append(row)
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints)
    # return the table string and scripts
    return table_string, scripts
Esempio n. 35
0
def get_table_string_and_scripts(stop_positions, nsamples):
    """
    Command-line only.
    """
    start_position = 1
    # build the array for the R table
    data_arr = []
    for stop_position in stop_positions:
        sequence_length = stop_position - start_position + 1
        means, variations, covs = get_value_lists(start_position,
                                                  stop_position, nsamples)
        row = [sequence_length]
        for values in means, variations, covs:
            corr_info = mcmc.Correlation()
            corr_info.analyze(values)
            hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values)
            row.extend([hpd_low, corr_info.mean, hpd_high])
        data_arr.append(row)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    sequence_lengths = [x - start_position + 1 for x in stop_positions]
    scripts = get_ggplot2_scripts(sequence_lengths)
    # return the table string and scripts
    return table_string, scripts
Esempio n. 36
0
def test_accumulate():
    def add(item, sum):
        return item + (0 if sum is None else sum)

    list = (5, 10, 15, 20, 25, 30, 2, -1)  # 106
    sum = RUtil.accumulate(list, add)
    assert (106 == sum)
Esempio n. 37
0
def get_table_string_and_scripts(stop_positions, nsamples):
    """
    Command-line only.
    """
    start_position = 1
    # build the array for the R table
    data_arr = []
    for stop_position in stop_positions:
        sequence_length = stop_position - start_position + 1
        means, variations, covs = get_value_lists(
                start_position, stop_position, nsamples)
        row = [sequence_length]
        for values in means, variations, covs:
            corr_info = mcmc.Correlation()
            corr_info.analyze(values)
            hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values)
            row.extend([hpd_low, corr_info.mean, hpd_high])
        data_arr.append(row)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    sequence_lengths = [x - start_position + 1 for x in stop_positions]
    scripts = get_ggplot2_scripts(sequence_lengths)
    # return the table string and scripts
    return table_string, scripts
Esempio n. 38
0
def get_response_content(fs):
    # get the table string and scripts
    table_string, scripts = get_table_string_and_scripts(fs)
    # create a comboscript
    out = StringIO()
    print >> out, 'par(mfrow=c(3,1))'
    for script in scripts:
        print >> out, script
    comboscript = out.getvalue()
    # create the R plot image 
    device_name = Form.g_imageformat_to_r_function[fs.imageformat] 
    retcode, r_out, r_err, image_data = RUtil.run_plotter( 
        table_string, comboscript, device_name) 
    if retcode: 
        raise RUtil.RError(r_err) 
    return image_data 
Esempio n. 39
0
def get_response_content(fs):
    # define some fixed values
    N_diploid = 6
    N_hap = 2 * N_diploid
    plot_density = 8
    # define some mutation rates
    theta_values = [0.001, 0.01, 0.1, 1.0]
    # define some selection coefficients to plot
    Ns_low = 0.0
    Ns_high = 3.0
    Ns_values = np.linspace(Ns_low, Ns_high, 3 * plot_density + 1)
    # get the values for each h
    Nr_values = (0, 5)
    arr_0 = get_plot_array(N_diploid, Nr_values[0], theta_values, Ns_values)
    arr_1 = get_plot_array(N_diploid, Nr_values[1], theta_values, Ns_values)
    ylab = '"expected returns to AB"'
    # define x and y plot limits
    xlim = (Ns_low, Ns_high)
    ylim = (np.min((arr_0, arr_1)), np.max((arr_0, arr_1)))
    ylogstr = '""'
    # http://sphaerula.com/legacy/R/multiplePlotFigure.html
    out = StringIO()
    print >> out, mk_call_str("par", mfrow="c(1,2)", oma="c(0,0,2,0)")
    print >> out, get_plot("left", Nr_values[0], arr_0, theta_values, Ns_values, xlim, ylim, ylogstr, ylab)
    print >> out, get_plot("right", Nr_values[1], arr_1, theta_values, Ns_values, xlim, ylim, ylogstr, '""')
    print >> out, mk_call_str("title", '"expected number of returns to AB, 2N=%s"' % N_hap, outer="TRUE")
    script = out.getvalue().rstrip()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table(script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 40
0
def get_table_string_and_scripts(start_stop_pairs, nsamples):
    """
    Command-line only.
    """
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_pos, stop_pos in start_stop_pairs:
        sequence_length = stop_pos - start_pos + 1
        means, variations, covs = get_value_lists(
                start_pos, stop_pos, nsamples)
        midpoint = (start_pos + stop_pos) / 2.0
        row = [sequence_length, midpoint]
        for values in means, variations, covs:
            corr_info = mcmc.Correlation()
            corr_info.analyze(values)
            hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values)
            row.extend([hpd_low, corr_info.mean, hpd_high])
        data_arr.append(row)
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints)
    # return the table string and scripts
    return table_string, scripts
Esempio n. 41
0
def get_response_content(fs):
    # read the table
    rtable = RUtil.RTable(fs.table.splitlines())
    header_row = rtable.headers
    data_rows = rtable.data
    Carbone.validate_headers(header_row)
    # get the numpy array of conformant points
    h_to_i = dict((h, i + 1) for i, h in enumerate(header_row))
    axis_headers = fs.axes
    if not axis_headers:
        raise ValueError('no Euclidean axes were provided')
    axis_set = set(axis_headers)
    header_set = set(header_row)
    bad_axes = axis_set - header_set
    if bad_axes:
        raise ValueError('invalid axes: ' + ', '.join(bad_axes))
    axis_lists = []
    for h in axis_headers:
        index = h_to_i[h]
        try:
            axis_list = Carbone.get_numeric_column(data_rows, index)
        except Carbone.NumericError:
            raise ValueError('expected the axis column %s '
                             'to be numeric' % h)
        axis_lists.append(axis_list)
    points = np.array(zip(*axis_lists))
    # find the set of indices of duplicate points
    dup_indices = get_dup_indices(points, fs.radius)
    # get the data rows with duplicate indices removed
    new_rows = [row for i, row in enumerate(data_rows) if i not in dup_indices]
    # construct the new table
    out = StringIO()
    print >> out, '\t'.join(header_row)
    print >> out, '\n'.join('\t'.join(row) for row in new_rows)
    return out.getvalue()
Esempio n. 42
0
def main(args):
    # get the end positions,
    # forcing the first end position to be 5
    # and the last end position to be 898.
    incr = (g_nchar - 5) / float(args.nlengths - 1)
    stop_positions = [5 + int(i * incr) for i in range(args.nlengths)]
    stop_positions[-1] = g_nchar
    # run BEAST and create the R stuff
    table_string, scripts = get_table_string_and_scripts(
            stop_positions, args.nsamples)
    # create the comboscript
    out = StringIO()
    print >> out, 'library(ggplot2)'
    print >> out, 'par(mfrow=c(3,1))'
    for script in scripts:
        print >> out, script
    comboscript = out.getvalue()
    # create the R output image
    device_name = Form.g_imageformat_to_r_function['pdf']
    retcode, r_out, r_err, image_data = RUtil.run_plotter( 
        table_string, comboscript, device_name) 
    if retcode: 
        raise RUtil.RError(r_err) 
    # write the image data
    with open(args.outfile, 'wb') as fout:
        fout.write(image_data)
Esempio n. 43
0
def get_response_content(fs):
    rtable = RUtil.RTable(fs.table.splitlines())
    header_row = rtable.headers
    data_rows = rtable.data
    points = get_rtable_info(rtable, fs.annotation, fs.axes)
    # do the clustering
    cluster_map = agglom.get_initial_cluster_map(points)
    w_ssd_map = agglom.get_initial_w_ssd_map(points)
    b_ssd_map = agglom.get_initial_b_ssd_map(points)
    q = agglom.get_initial_queue(b_ssd_map)
    while len(cluster_map) > fs.k:
        pair = agglom.get_pair_fast(cluster_map, q)
        agglom.merge_fast(cluster_map, w_ssd_map, b_ssd_map, q, pair)
    # create the map from a point index to a cluster index
    point_to_cluster = {}
    for cluster_index, point_indices in cluster_map.items():
        for point_index in point_indices:
            point_to_cluster[point_index] = cluster_index
    # define the raw labels which may be big numbers
    raw_labels = [point_to_cluster[i] for i, p in enumerate(points)]
    # rename the labels with small numbers
    raw_to_label = dict((b, a) for  a, b in enumerate(sorted(set(raw_labels))))
    labels = [raw_to_label[raw] for raw in raw_labels]
    # get the response
    lines = ['\t'.join(header_row + [fs.annotation])]
    for i, (label, data_row) in enumerate(zip(labels, data_rows)):
        row = data_row + [str(label)]
        lines.append('\t'.join(row))
    # return the response
    return '\n'.join(lines) + '\n'
Esempio n. 44
0
def get_table_string_and_scripts_from_logs(
        start_stop_pairs, log_paths, nsamples):
    """
    This is for analysis of remote execution.
    """
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_stop_pair, log_path in zip(
            start_stop_pairs, log_paths):
        start_pos, stop_pos = start_stop_pair
        sequence_length = stop_pos - start_pos + 1
        means, variations, covs = read_log(log_path, nsamples)
        midpoint = (start_pos + stop_pos) / 2.0
        row = [sequence_length, midpoint]
        for values in means, variations, covs:
            corr_info = mcmc.Correlation()
            corr_info.analyze(values)
            hpd_low, hpd_high = mcmc.get_hpd_interval(0.95, values)
            row.extend([hpd_low, corr_info.mean, hpd_high])
        data_arr.append(row)
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    scripts = get_ggplot2_scripts(nsamples, sequence_lengths, midpoints)
    # return the table string and scripts
    return table_string, scripts
Esempio n. 45
0
def get_r_tikz_stub():
    user_script = RUtil.g_stub
    device_name = "tikz"
    retcode, r_out, r_err, tikz_code = RUtil.run_plotter_no_table(user_script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return tikz_code
Esempio n. 46
0
def get_response_content(fs):
    f_info = ctmcmi.get_mutual_info_known_distn
    requested_triples = []
    for triple in g_process_triples:
        name, desc, zoo_obj = triple
        if getattr(fs, name):
            requested_triples.append(triple)
    if not requested_triples:
        raise ValueError('nothing to plot')
    # define the R table headers
    headers = ['t']
    if fs.log4:
        headers.append('log.4')
    if fs.log3:
        headers.append('log.3')
    r_names = [a.replace('_', '.') for a, b, c in requested_triples]
    headers.extend(r_names)
    # Spend a lot of time doing the optimizations
    # to construct the points for the R table.
    times = np.linspace(fs.start_time, fs.stop_time, 101)
    arr = []
    for t in times:
        row = [t]
        if fs.log4:
            row.append(math.log(4))
        if fs.log3:
            row.append(math.log(3))
        for python_name, desc, zoo_obj in requested_triples:
            X = np.array([])
            info_value = f_info(
                    zoo_obj.get_rate_matrix(X),
                    zoo_obj.get_distn(X),
                    t)
            row.append(info_value)
        arr.append(row)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 47
0
def get_table_string_and_scripts(fs):
    nstates = fs.nresidues**fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1 / s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1 / s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1 / tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i * incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    mi_sign_lists, time_stats = zip(*pairs)
    ncrossing_list = []
    # look at how the signs change over time for each selection sample
    for signs in zip(*mi_sign_lists):
        count = 0
        for sign_a, sign_b in iterutils.pairwise(signs):
            if sign_a != sign_b:
                count += 1
        ncrossing_list.append(count)
    # get the R scripts
    scripts = [
        get_r_band_script(nsels, time_stats),
        get_r_prop_script(nsels, time_stats),
        get_r_cross_script(ncrossing_list)
    ]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
Esempio n. 48
0
def get_response_content(fs):
    Q_mut, Q_sels = get_qmut_qsels(fs)
    # compute the statistics
    ER_ratios, NSR_ratios, ER_NSR_ratios = get_statistic_ratios(Q_mut, Q_sels)
    M = zip(*(ER_ratios, NSR_ratios, ER_NSR_ratios))
    column_headers = ('ER.ratio', 'NSR.ratio', 'ER.times.NSR.ratio')
    table_string = RUtil.get_table_string(M, column_headers)
    nsels = len(Q_sels)
    # get the R script
    comboscript = get_r_comboscript(nsels, column_headers)
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, comboscript, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 49
0
def get_R_tick_cmd(axis, positions):
    """
    @param axis: 1 for x, 2 for y
    @param positions: a sequence of positions
    @return: a single line R command to draw the ticks
    """
    s = 'c(' + ', '.join(str(x) for x in positions) + ')'
    return RUtil.mk_call_str('axis', axis, at=s)
Esempio n. 50
0
def get_R_tick_cmd(axis, positions):
    """
    @param axis: 1 for x, 2 for y
    @param positions: a sequence of positions
    @return: a single line R command to draw the ticks
    """
    s = 'c(' + ', '.join(str(x) for x in positions) + ')'
    return RUtil.mk_call_str('axis', axis, at=s)
Esempio n. 51
0
def get_response_content(fs):
    Q_mut, Q_sels = get_qmut_qsels(fs)
    # compute the statistics
    ER_ratios, NSR_ratios, ER_NSR_ratios  = get_statistic_ratios(Q_mut, Q_sels)
    M = zip(*(ER_ratios, NSR_ratios, ER_NSR_ratios))
    column_headers = ('ER.ratio', 'NSR.ratio', 'ER.times.NSR.ratio')
    table_string = RUtil.get_table_string(M, column_headers)
    nsels = len(Q_sels)
    # get the R script
    comboscript = get_r_comboscript(nsels, column_headers)
    # create the R plot image 
    device_name = Form.g_imageformat_to_r_function[fs.imageformat] 
    retcode, r_out, r_err, image_data = RUtil.run_plotter( 
        table_string, comboscript, device_name) 
    if retcode: 
        raise RUtil.RError(r_err) 
    return image_data 
Esempio n. 52
0
def get_response_content(fs):
    # check the r table
    RUtil.RTable(fs.table.splitlines())
    # make the plot
    device = Form.g_imageformat_to_r_function[fs.imageformat]
    image_data = RUtil.run_plotter_concise(
            fs.table, g_script_body, device)
    return image_data
Esempio n. 53
0
def get_table_string_and_scripts(fs):
    nstates = fs.nresidues ** fs.nsites
    if nstates > 256:
        raise ValueError('the mutation rate matrix is too big')
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1/s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1/s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1/tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i*incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    pairs = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    mi_sign_lists, time_stats = zip(*pairs)
    ncrossing_list = []
    # look at how the signs change over time for each selection sample
    for signs in zip(*mi_sign_lists):
        count = 0
        for sign_a, sign_b in iterutils.pairwise(signs):
            if sign_a != sign_b:
                count += 1
        ncrossing_list.append(count)
    # get the R scripts
    scripts = [
            get_r_band_script(nsels, time_stats),
            get_r_prop_script(nsels, time_stats),
            get_r_cross_script(ncrossing_list)]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
Esempio n. 54
0
def get_response_content(fs):
    f_info = ctmcmi.get_mutual_info_known_distn
    requested_triples = []
    for triple in g_process_triples:
        name, desc, zoo_obj = triple
        if getattr(fs, name):
            requested_triples.append(triple)
    if not requested_triples:
        raise ValueError('nothing to plot')
    # define the R table headers
    headers = ['t']
    if fs.log4:
        headers.append('log.4')
    if fs.log3:
        headers.append('log.3')
    r_names = [a.replace('_', '.') for a, b, c in requested_triples]
    headers.extend(r_names)
    # Spend a lot of time doing the optimizations
    # to construct the points for the R table.
    times = np.linspace(fs.start_time, fs.stop_time, 101)
    arr = []
    for t in times:
        row = [t]
        if fs.log4:
            row.append(math.log(4))
        if fs.log3:
            row.append(math.log(3))
        for python_name, desc, zoo_obj in requested_triples:
            X = np.array([])
            info_value = f_info(zoo_obj.get_rate_matrix(X),
                                zoo_obj.get_distn(X), t)
            row.append(info_value)
        arr.append(row)
    # create the R table string and scripts
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    script = get_ggplot()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
        table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 55
0
def get_plot_scripts(sequence_lengths):
    scripts = []
    # get the plot for the mean
    out = StringIO()
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$sequence.length',
            'my.table$mean.mean',
            xlab="''",
            ylab="'mean'",
            xaxt="'n'",
            main="'posterior statistics of rates among branches'",
            #type='"n"',
            )
    print >> out, get_R_tick_cmd(1, sequence_lengths)
    scripts.append(out.getvalue().rstrip())
    # get the plot for the mean
    out = StringIO()
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$sequence.length',
            'my.table$var.mean',
            xlab="''",
            ylab="'coeff of variation'",
            xaxt="'n'",
            #type='"n"',
            )
    print >> out, get_R_tick_cmd(1, sequence_lengths)
    scripts.append(out.getvalue().rstrip())
    # get the plot for the mean
    out = StringIO()
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$sequence.length',
            'my.table$cov.mean',
            xlab="'sequence length'",
            ylab="'parent-child correlation'",
            xaxt="'n'",
            #type='"n"',
            )
    print >> out, get_R_tick_cmd(1, sequence_lengths)
    scripts.append(out.getvalue().rstrip())
    return scripts
Esempio n. 56
0
def get_table_string_and_scripts(fs):
    """
    The latex documentbody should have a bunch of tikz pieces in it.
    Each tikz piece should have been generated from R.
    """
    nstates = fs.nresidues ** fs.nsites
    if nstates > 256:
        raise ValueError("the mutation rate matrix is too big")
    # get the mutation matrix
    Q_mut = mrate.get_sparse_sequence_rate_matrix(fs.nresidues, fs.nsites)
    # sample a bunch of mutation-selection rate matrices
    Q_sels = []
    for selection_index in range(fs.nselections):
        # sample the selection parameters
        if fs.low_var:
            v = 0.2
        elif fs.medium_var:
            v = 1
        elif fs.high_var:
            v = 5.0
        elif fs.really_high_var:
            v = 25.0
        s = math.sqrt(v)
        if fs.neg_skew:
            sels = [-random.expovariate(1 / s) for i in range(nstates)]
        elif fs.no_skew:
            sels = [random.gauss(0, s) for i in range(nstates)]
        elif fs.pos_skew:
            sels = [random.expovariate(1 / s) for i in range(nstates)]
        # define the mutation-selection rate matrix using Halpern-Bruno
        Q = np.zeros_like(Q_mut)
        for i in range(nstates):
            for j in range(nstates):
                if i != j:
                    tau = math.exp(-(sels[j] - sels[i]))
                    coeff = math.log(tau) / (1 - 1 / tau)
                    Q[i, j] = Q_mut[i, j] * coeff
        for i in range(nstates):
            Q[i, i] = -np.sum(Q[i])
        Q_sels.append(Q)
    # define the time points
    incr = (fs.t_high - fs.t_low) / (fs.ntimes - 1)
    times = [fs.t_low + i * incr for i in range(fs.ntimes)]
    # compute the statistics
    nsels = len(Q_sels)
    time_stats = [get_time_point_summary(Q_mut, Q_sels, t) for t in times]
    # get the R scripts
    scripts = [
        # get_r_tikz_mi_plot(nsels, time_stats),
        get_r_tikz_corr_plot(nsels, time_stats),
        get_r_tikz_prop_plot(nsels, time_stats),
        get_r_tikz_info_plot(nsels, time_stats),
    ]
    table_string = RUtil.get_table_string(time_stats, g_time_stats_headers)
    return table_string, scripts
Esempio n. 57
0
def get_response_content(fs):
    # create the R table string and scripts
    headers = [
            'entropy',
            'analog']
    distributions = []
    nstates = 4
    npoints = 5000
    arr = []
    best_pair = None
    for i in range(npoints):
        weights = [random.expovariate(1) for j in range(nstates)]
        total = sum(weights)
        distn = [x / total for x in weights]
        entropy = -sum(p * math.log(p) for p in distn)
        sum_squares = sum(p*p for p in distn)
        sum_cubes = sum(p*p*p for p in distn)
        analog = math.log(sum_squares / sum_cubes)
        row = [entropy, analog]
        arr.append(row)
        dist = (entropy - 1.0)**2 + (analog - 0.4)**2
        if (best_pair is None) or (dist < best_pair[0]):
            best_pair = (dist, distn)
    # get the R table
    table_string = RUtil.get_table_string(arr, headers)
    # get the R script
    out = StringIO()
    title = ', '.join(str(x) for x in best_pair[1])
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$entropy',
            'my.table$analog',
            pch='20',
            main='"%s"' % title)
    script = out.getvalue()
    # create the R plot image
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data
Esempio n. 58
0
def get_r_prop_script(nsels, time_stats):
    """
    @param time_stats: a list of stats for each time point
    @return: R code
    """
    out = StringIO()
    time_stats_trans = zip(*time_stats)
    y_low = 0
    y_high = 1
    ylim = RUtil.mk_call_str('c', y_low, y_high)
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$t',
            'my.table$prop.sel.vs.mut',
            type='"l"',
            ylim=ylim,
            xlab='"time"',
            ylab='"proportion"',
            main='"proportion of mut-sel MI greater than mutation MI"')
    return out.getvalue()
Esempio n. 59
0
def get_response_content(fs):
    # legend labels
    label_a = 'N=%d mu=%f' % (fs.nstates_a, fs.mu_a)
    label_b = 'N=%d mu=%f' % (fs.nstates_b, fs.mu_b)
    arr, headers = make_table(fs)
    # compute the max value
    ymax = math.log(max(fs.nstates_a, fs.nstates_b))
    nfifths = int(math.floor(ymax * 5.0)) + 1
    ylim = RUtil.mk_call_str('c', 0, 0.2 * nfifths)
    # write the R script body
    out = StringIO()
    print >> out, RUtil.mk_call_str(
            'plot',
            'my.table$t',
            'my.table$alpha',
            type='"n"',
            ylim=ylim,
            xlab='"time"',
            ylab='"information"',
            main='"comparison of an information criterion for two processes"',
            )
    # draw some horizontal lines
    for i in range(nfifths+1):
        print >> out, RUtil.mk_call_str(
                'abline',
                h=0.2*i,
                col='"lightgray"',
                lty='"dotted"')
    colors = ('darkblue', 'darkred')
    for c, header in zip(colors, headers[1:]):
        print >> out, RUtil.mk_call_str(
                'lines',
                'my.table$t',
                'my.table$%s' % header,
                col='"%s"' % c,
                )
    legend_names = (label_a, label_b)
    legend_name_str = 'c(' + ', '.join('"%s"' % s for s in legend_names) + ')'
    legend_col_str = 'c(' + ', '.join('"%s"' % s for s in colors) + ')'
    legend_lty_str = 'c(' + ', '.join('1' for s in colors) + ')'
    print >> out, RUtil.mk_call_str(
            'legend',
            '"%s"' % fs.legend_placement,
            legend_name_str,
            col=legend_col_str,
            lty=legend_lty_str,
            )
    script_body = out.getvalue()
    # create the R plot image
    table_string = RUtil.get_table_string(arr, headers)
    device_name = Form.g_imageformat_to_r_function[fs.imageformat]
    retcode, r_out, r_err, image_data = RUtil.run_plotter(
            table_string, script_body, device_name)
    if retcode:
        raise RUtil.RError(r_err)
    return image_data