예제 #1
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    out_dir = opts.output_dir
    try:
        create_dir(out_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    map_f = open(opts.mapping_fp, 'U')
    dm_f = open(opts.distance_matrix_fp, 'U')

    fields = map(strip, opts.fields.split(','))
    fields = [field.strip('"').strip("'") for field in fields]

    color_individual_within_by_field = opts.color_individual_within_by_field
    results = make_distance_boxplots(dm_f, map_f, fields, width=opts.width,
            height=opts.height, suppress_all_within=opts.suppress_all_within,
            suppress_all_between=opts.suppress_all_between,
            suppress_individual_within=opts.suppress_individual_within,
            suppress_individual_between=opts.suppress_individual_between,
            y_min=opts.y_min, y_max=opts.y_max,
            whisker_length=opts.whisker_length, box_width=opts.box_width,
            box_color=opts.box_color,
            color_individual_within_by_field=color_individual_within_by_field,
            sort=opts.sort)

    for field, plot_figure, plot_data, plot_labels, plot_colors in results:
        output_plot_fp = join(out_dir, "%s_Distances.%s" %
                              (field, opts.imagetype))
        plot_figure.savefig(output_plot_fp, format=opts.imagetype,
                            transparent=opts.transparent)

        if not opts.suppress_significance_tests:
            sig_tests_f = open(join(out_dir, "%s_Stats.txt" % field), 'w')
            sig_tests_results = all_pairs_t_test(plot_labels, plot_data,
                    tail_type=opts.tail_type,
                    num_permutations=opts.num_permutations)
            sig_tests_f.write(sig_tests_results)
            sig_tests_f.close()

        if opts.save_raw_data:
            # Write the raw plot data into a tab-delimited file.
            assert(len(plot_labels) == len(plot_data))
            raw_data_fp = join(out_dir, "%s_Distances.txt" % field)
            raw_data_f = open(raw_data_fp, 'w')

            for label, data in zip(plot_labels, plot_data):
                raw_data_f.write(label.replace(" ", "_") + "\t")
                raw_data_f.write("\t".join(map(str, data)))
                raw_data_f.write("\n")
            raw_data_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory " "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(open(opts.distance_matrix_fp, "U"))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option."
        )
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(open(opts.mapping_fp, "U"))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option."
        )

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == "auto":
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number " "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == "auto":
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number " "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(","))
    comparison_field_states = [field_state.strip('"').strip("'") for field_state in comparison_field_states]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to " "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(
        dist_matrix_header, dist_matrix, mapping_header, mapping, field, comparison_field_states
    )

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = sorted(map(float, field_states))
        except:
            option_parser.error(
                "The 'numeric' label type is invalid because "
                "not all field states could be converted into "
                "numbers. Please specify a different label "
                "type."
            )

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) for color in data_color_order]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must " "be greater than zero.")

    plot_figure = grouped_distributions(
        opts.plot_type,
        plot_data,
        x_values=x_spacing,
        data_point_labels=plot_x_axis_labels,
        distribution_labels=comparison_field_states,
        distribution_markers=plot_colors,
        x_label=plot_x_label,
        y_label=plot_y_label,
        title=plot_title,
        x_tick_labels_orientation=opts.x_tick_labels_orientation,
        y_min=y_min,
        y_max=y_max,
        whisker_length=opts.whisker_length,
        error_bar_type=opts.error_bar_type,
        distribution_width=opts.distribution_width,
        figure_width=width,
        figure_height=height,
    )

    # Save the plot in the specified format.
    output_plot_fp = join(opts.output_dir, "%s_Distance_Comparisons.%s" % (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp, format=opts.imagetype, transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), "w")

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append("%s vs %s" % (data_point_label, comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(
            sig_tests_labels, sig_tests_data, tail_type=opts.tail_type, num_permutations=opts.num_permutations
        )
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert len(plot_x_axis_labels) == len(plot_data), (
            "The number of " + "labels do not match the number of points along the x-axis."
        )
        raw_data_fp = join(opts.output_dir, "%s_Distance_Comparisons.txt" % field)
        raw_data_f = open(raw_data_fp, "w")

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert len(comparison_field_states) == len(data), (
                "The "
                + "number of specified comparison groups does not match "
                + "the number of groups found at the current point along "
                + "the x-axis."
            )
            for comp_field_state, comp_grp_data in zip(comparison_field_states, data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" + "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()
예제 #3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    out_dir = opts.output_dir
    try:
        create_dir(out_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    map_f = open(opts.mapping_fp, 'U')
    dm_f = open(opts.distance_matrix_fp, 'U')

    fields = map(strip, opts.fields.split(','))
    fields = [field.strip('"').strip("'") for field in fields]

    color_individual_within_by_field = opts.color_individual_within_by_field
    results = make_distance_boxplots(
        dm_f,
        map_f,
        fields,
        width=opts.width,
        height=opts.height,
        suppress_all_within=opts.suppress_all_within,
        suppress_all_between=opts.suppress_all_between,
        suppress_individual_within=opts.suppress_individual_within,
        suppress_individual_between=opts.suppress_individual_between,
        y_min=opts.y_min,
        y_max=opts.y_max,
        whisker_length=opts.whisker_length,
        box_width=opts.box_width,
        box_color=opts.box_color,
        color_individual_within_by_field=color_individual_within_by_field,
        sort=opts.sort)

    for field, plot_figure, plot_data, plot_labels, plot_colors in results:
        output_plot_fp = join(out_dir,
                              "%s_Distances.%s" % (field, opts.imagetype))
        plot_figure.savefig(output_plot_fp,
                            format=opts.imagetype,
                            transparent=opts.transparent)

        if not opts.suppress_significance_tests:
            sig_tests_f = open(join(out_dir, "%s_Stats.txt" % field), 'w')
            sig_tests_results = all_pairs_t_test(
                plot_labels,
                plot_data,
                tail_type=opts.tail_type,
                num_permutations=opts.num_permutations)
            sig_tests_f.write(sig_tests_results)
            sig_tests_f.close()

        if opts.save_raw_data:
            # Write the raw plot data into a tab-delimited file.
            assert (len(plot_labels) == len(plot_data))
            raw_data_fp = join(out_dir, "%s_Distances.txt" % field)
            raw_data_f = open(raw_data_fp, 'w')

            for label, data in zip(plot_labels, plot_data):
                raw_data_f.write(label.replace(" ", "_") + "\t")
                raw_data_f.write("\t".join(map(str, data)))
                raw_data_f.write("\n")
            raw_data_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(
            open(opts.distance_matrix_fp, 'U'))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option.")
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(
            open(opts.mapping_fp, 'U'))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option.")

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == 'auto':
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number "
                                "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == 'auto':
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number "
                                "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(','))
    comparison_field_states = [
        field_state.strip('"').strip("'")
        for field_state in comparison_field_states
    ]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to "
                            "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(
        dist_matrix_header, dist_matrix, mapping_header, mapping, field,
        comparison_field_states)

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = sorted(map(float, field_states))
        except:
            option_parser.error("The 'numeric' label type is invalid because "
                                "not all field states could be converted into "
                                "numbers. Please specify a different label "
                                "type.")

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(
                comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [
            matplotlib_rgb_color(data_colors[color].toRGB())
            for color in data_color_order
        ]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must "
                            "be greater than zero.")

    plot_figure = generate_comparative_plots(
        opts.plot_type,
        plot_data,
        x_values=x_spacing,
        data_point_labels=plot_x_axis_labels,
        distribution_labels=comparison_field_states,
        distribution_markers=plot_colors,
        x_label=plot_x_label,
        y_label=plot_y_label,
        title=plot_title,
        x_tick_labels_orientation=opts.x_tick_labels_orientation,
        y_min=y_min,
        y_max=y_max,
        whisker_length=opts.whisker_length,
        error_bar_type=opts.error_bar_type,
        distribution_width=opts.distribution_width,
        figure_width=width,
        figure_height=height)

    # Save the plot in the specified format.
    output_plot_fp = join(
        opts.output_dir,
        "%s_Distance_Comparisons.%s" % (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp,
                        format=opts.imagetype,
                        transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), 'w')

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append('%s vs %s' %
                                        (data_point_label, comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(
            sig_tests_labels,
            sig_tests_data,
            tail_type=opts.tail_type,
            num_permutations=opts.num_permutations)
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\
            "labels do not match the number of points along the x-axis."
        raw_data_fp = join(opts.output_dir,
                           "%s_Distance_Comparisons.txt" % field)
        raw_data_f = open(raw_data_fp, 'w')

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert (len(comparison_field_states) == len(data)), "The " +\
                "number of specified comparison groups does not match " +\
                "the number of groups found at the current point along " +\
                "the x-axis."
            for comp_field_state, comp_grp_data in zip(comparison_field_states,
                                                       data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" +
                                 "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()
예제 #5
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(
            open(opts.distance_matrix_fp, 'U'))
    except:
        option_parser.error("This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option.")

    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(
            open(opts.mapping_fp, 'U'))
    except QiimeParseError:
        option_parser.error("This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option.")

    fields = opts.fields
    fields = map(strip, fields.split(','))
    fields = [field.strip('"').strip("'") for field in fields]

    if fields is None:
        option_parser.error("You must provide at least one field using the -f "
                            "option.")

    # Make sure each field is in the mapping file.
    for field in fields:
        if field not in mapping_header:
            option_parser.error("The field '%s' is not in the provided "
                "mapping file. Please supply correct fields (using the -f "
                "option) corresponding to fields in the mapping file."
                % field)

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == 'auto':
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number "
                                "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == 'auto':
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number "
                                "or 'auto'.")

    # Generate the various boxplots, depending on what the user wanted
    # suppressed. Add them all to one encompassing plot.
    for field in fields:
        plot_data = []
        plot_labels = []

        if not opts.suppress_all_within:
            plot_data.append(get_all_grouped_distances(dist_matrix_header,
                    dist_matrix, mapping_header, mapping, field, within=True))
            plot_labels.append("All within %s" % field)
        if not opts.suppress_all_between:
            plot_data.append(get_all_grouped_distances(dist_matrix_header,
                    dist_matrix, mapping_header, mapping, field, within=False))
            plot_labels.append("All between %s" % field)
        if not opts.suppress_individual_within:
            within_dists = get_grouped_distances(dist_matrix_header,
                    dist_matrix, mapping_header, mapping, field, within=True)
            for grouping in within_dists:
                plot_data.append(grouping[2])
                plot_labels.append("%s vs. %s" % (grouping[0], grouping[1]))
        if not opts.suppress_individual_between:
            between_dists = get_grouped_distances(dist_matrix_header,
                    dist_matrix, mapping_header, mapping, field, within=False)
            for grouping in between_dists:
                plot_data.append(grouping[2])
                plot_labels.append("%s vs. %s" % (grouping[0], grouping[1]))

        # We now have our data and labels ready, so plot them!
        assert (len(plot_data) == len(plot_labels)), "The number " +\
                "of boxplot labels does not match the number of " +\
                "boxplots."
        if plot_data:
            if opts.sort:
                # Sort our plot data in order of increasing median.
                sorted_data = []
                for label, distribution in zip(plot_labels, plot_data):
                    sorted_data.append((label, distribution,
                        median(distribution)))
                sorted_data.sort(key=itemgetter(2))
                plot_labels = []
                plot_data = []
                for label, distribution, median_value in sorted_data:
                    plot_labels.append(label)
                    plot_data.append(distribution)

            width = opts.width
            height = opts.height
            if width is None:
                width = len(plot_data) * opts.box_width + 2
            if width <= 0 or height <= 0:
                option_parser.error("The specified width and height of the "
                                    "image must be greater than zero.")

            plot_figure = generate_box_plots(plot_data,
                    x_tick_labels=plot_labels, title="%s Distances" % field,
                    x_label="Grouping", y_label="Distance",
                    x_tick_labels_orientation='vertical', y_min=y_min,
                    y_max=y_max, whisker_length=opts.whisker_length,
                    box_width=opts.box_width, box_color=opts.box_color,
                    figure_width=width, figure_height=height)

            output_plot_fp = join(opts.output_dir, "%s_Distances.%s"
                                       % (field, opts.imagetype))
            plot_figure.savefig(output_plot_fp, format=opts.imagetype,
                    transparent=opts.transparent)
        else:
            option_parser.error("You have chosen to suppress all plots. At "
                                "least one type of plot must be unsuppressed.")

        if not opts.suppress_significance_tests:
            sig_tests_f = open(join(
                opts.output_dir, "%s_Stats.xls" % field), 'w')
            sig_tests_results = all_pairs_t_test(plot_labels, plot_data,
                    tail_type=opts.tail_type,
                    num_permutations=opts.num_permutations)
            sig_tests_f.write(sig_tests_results)
            sig_tests_f.close()

        if opts.save_raw_data:
            # Write the raw plot data into a tab-delimited file.
            assert(len(plot_labels) == len(plot_data))
            raw_data_fp = join(opts.output_dir, "%s_Distances.xls"
                                    % field)
            raw_data_f = open(raw_data_fp, 'w')

            for label, data in zip(plot_labels, plot_data):
                raw_data_f.write(label.replace(" ", "_") + "\t")
                raw_data_f.write("\t".join(map(str, data)))
                raw_data_f.write("\n")
            raw_data_f.close()