Example #1
0
def get_table_string_and_scripts(start_stop_pairs, nsamples, header_seq_pairs):
    """
    Command-line only.
    """
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_pos, stop_pos in start_stop_pairs:
        sequence_length = stop_pos - start_pos + 1
        midpoint = (start_pos + stop_pos) / 2.0
        arr = get_loganalysis_array(
                start_pos, stop_pos, nsamples, header_seq_pairs)
        mean_low = arr[1][4]
        mean_mean = arr[1][1]
        mean_high = arr[1][5]
        var_low = arr[2][4]
        var_mean = arr[2][1]
        var_high = arr[2][5]
        cov_low = arr[3][4]
        cov_mean = arr[3][1]
        cov_high = arr[3][5]
        row = [
                sequence_length, midpoint,
                mean_low, mean_mean, mean_high,
                var_low, var_mean, var_high,
                cov_low, cov_mean, cov_high]
        data_arr.append(row)
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table string
    table_string = RUtil.get_table_string(data_arr, g_headers)
    # get the scripts
    scripts = beasttut.get_ggplot2_scripts(
            nsamples, sequence_lengths, midpoints)
    # return the table string and scripts
    return table_string, scripts
Example #2
0
def get_table_strings_and_scripts(
        xmldata, alignment_id, start_stop_pairs,
        nsamples):
    """
    Command-line only.
    @param xmldata: xml data already adjusted for nsamples and log filename
    @param alignment_id: xml element id
    @param start_stop_pairs: alignment interval bounds
    @param nsamples: an extra parameter for script generation
    @return: short table string, long table string, scripts (for short table)
    """
    # init the array for the full R table
    full_data_arr = []
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_pos, stop_pos in start_stop_pairs:
        sequence_length = stop_pos - start_pos + 1
        midpoint = (start_pos + stop_pos) / 2.0
        interval_xml_data = beast.set_alignment_interval(
                xmldata, alignment_id, start_pos, stop_pos)
        row_labels, col_labels, arr = get_loganalysis_labeled_array(
                interval_xml_data)
        stat_name_to_row_index = dict(
                (x, i) for i, x in enumerate(row_labels))
        summary_name_to_col_index = dict(
                (x, i) for i, x in enumerate(col_labels))
        # define row indices of interest
        mean_row_index = stat_name_to_row_index['meanRate']
        var_row_index = stat_name_to_row_index['coefficientOfVariation']
        cov_row_index = stat_name_to_row_index['covariance']
        # define column indices of interest
        mean_col_index = summary_name_to_col_index['mean']
        low_col_index = summary_name_to_col_index['hpdLower']
        high_col_index = summary_name_to_col_index['hpdUpper']
        row = [
                sequence_length,
                midpoint,
                arr[mean_row_index][low_col_index],
                arr[mean_row_index][mean_col_index],
                arr[mean_row_index][high_col_index],
                arr[var_row_index][low_col_index],
                arr[var_row_index][mean_col_index],
                arr[var_row_index][high_col_index],
                arr[cov_row_index][low_col_index],
                arr[cov_row_index][mean_col_index],
                arr[cov_row_index][high_col_index],
                ]
        data_arr.append(row)
        # add rows to the full data array
        for row_index, row_label in enumerate(row_labels):
            for col_index, col_label in enumerate(col_labels):
                row = [
                        sequence_length,
                        midpoint,
                        '"' + row_label + '"',
                        '"' + col_label + '"',
                        arr[row_index][col_index],
                        ]
                full_data_arr.append(row)
        # add entries to some utility arrays
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table strings
    table_string = RUtil.get_table_string(data_arr, g_headers)
    full_table_string = RUtil.get_table_string(
            full_data_arr,
            [
                'sequence.length',
                'midpoint',
                'statistic.name',
                'posterior.analysis',
                'value'
                ],
            force_float=False,
            )
    # get the scripts
    scripts = beasttut.get_ggplot2_scripts(
            nsamples, sequence_lengths, midpoints)
    # return the table string and scripts
    return table_string, full_table_string, scripts
Example #3
0
def get_table_strings_and_scripts(xmldata, alignment_id, start_stop_pairs,
                                  nsamples):
    """
    Command-line only.
    @param xmldata: xml data already adjusted for nsamples and log filename
    @param alignment_id: xml element id
    @param start_stop_pairs: alignment interval bounds
    @param nsamples: an extra parameter for script generation
    @return: short table string, long table string, scripts (for short table)
    """
    # init the array for the full R table
    full_data_arr = []
    # build the array for the R table
    data_arr = []
    sequence_lengths = []
    midpoints = []
    for start_pos, stop_pos in start_stop_pairs:
        sequence_length = stop_pos - start_pos + 1
        midpoint = (start_pos + stop_pos) / 2.0
        interval_xml_data = beast.set_alignment_interval(
            xmldata, alignment_id, start_pos, stop_pos)
        row_labels, col_labels, arr = get_loganalysis_labeled_array(
            interval_xml_data)
        stat_name_to_row_index = dict((x, i) for i, x in enumerate(row_labels))
        summary_name_to_col_index = dict(
            (x, i) for i, x in enumerate(col_labels))
        # define row indices of interest
        mean_row_index = stat_name_to_row_index['meanRate']
        var_row_index = stat_name_to_row_index['coefficientOfVariation']
        cov_row_index = stat_name_to_row_index['covariance']
        # define column indices of interest
        mean_col_index = summary_name_to_col_index['mean']
        low_col_index = summary_name_to_col_index['hpdLower']
        high_col_index = summary_name_to_col_index['hpdUpper']
        row = [
            sequence_length,
            midpoint,
            arr[mean_row_index][low_col_index],
            arr[mean_row_index][mean_col_index],
            arr[mean_row_index][high_col_index],
            arr[var_row_index][low_col_index],
            arr[var_row_index][mean_col_index],
            arr[var_row_index][high_col_index],
            arr[cov_row_index][low_col_index],
            arr[cov_row_index][mean_col_index],
            arr[cov_row_index][high_col_index],
        ]
        data_arr.append(row)
        # add rows to the full data array
        for row_index, row_label in enumerate(row_labels):
            for col_index, col_label in enumerate(col_labels):
                row = [
                    sequence_length,
                    midpoint,
                    '"' + row_label + '"',
                    '"' + col_label + '"',
                    arr[row_index][col_index],
                ]
                full_data_arr.append(row)
        # add entries to some utility arrays
        sequence_lengths.append(sequence_length)
        midpoints.append(midpoint)
    # build the table strings
    table_string = RUtil.get_table_string(data_arr, g_headers)
    full_table_string = RUtil.get_table_string(
        full_data_arr,
        [
            'sequence.length', 'midpoint', 'statistic.name',
            'posterior.analysis', 'value'
        ],
        force_float=False,
    )
    # get the scripts
    scripts = beasttut.get_ggplot2_scripts(nsamples, sequence_lengths,
                                           midpoints)
    # return the table string and scripts
    return table_string, full_table_string, scripts