def test_grouped_distributions_empty_marker_list(self):
     """grouped_distributions() should use the predefined list of
     markers if an empty list is provided by the user."""
     grouped_distributions('scatter', self.ValidTypicalData,
                           [1, 4, 10, 11], ["T0", "T1", "T2", "T3"],
                           ["Infants", "Children", "Teens"], [],
                           "x-axis label", "y-axis label", "Test")
Beispiel #2
0
 def test_grouped_distributions_empty_marker_list(self):
     """grouped_distributions() should use the predefined list of
     markers if an empty list is provided by the user."""
     grouped_distributions('scatter', self.ValidTypicalData, [1, 4, 10, 11],
                           ["T0", "T1", "T2", "T3"],
                           ["Infants", "Children", "Teens"], [],
                           "x-axis label", "y-axis label", "Test")
 def test_grouped_distributions_error(self):
     """grouped_distributions() should raise a ValueError for an
     invalid plot type."""
     with npt.assert_raises(ValueError):
         grouped_distributions('pie', self.ValidTypicalData,
                               [1, 4, 10, 11], ["T0", "T1", "T2", "T3"],
                               ["Infants", "Children", "Teens"],
                               ['b', 'g', 'y'],
                               "x-axis label", "y-axis label", "Test")
Beispiel #4
0
 def test_grouped_distributions_error(self):
     """grouped_distributions() should raise a ValueError for an
     invalid plot type."""
     with npt.assert_raises(ValueError):
         grouped_distributions('pie', self.ValidTypicalData, [1, 4, 10, 11],
                               ["T0", "T1", "T2", "T3"],
                               ["Infants", "Children", "Teens"],
                               ['b', 'g', 'y'], "x-axis label",
                               "y-axis label", "Test")
    def test_grouped_distributions_negative_distribution_width(self):
        args = ('box', self.ValidTypicalData, [1, 4, 10, 11],
                ["T0", "T1", "T2", "T3"], ["Infants", "Children", "Teens"],
                ['b', 'g', 'y'], "x-axis label", "y-axis label", "Test")

        with self.assertRaises(ValueError):
            grouped_distributions(*args, distribution_width=0)

        with self.assertRaises(ValueError):
            grouped_distributions(*args, distribution_width=-42)
 def test_grouped_distributions_box(self):
     """Should return a valid boxplot Figure object."""
     fig = grouped_distributions('box', self.ValidTypicalData,
                                 [1, 4, 10, 11], ["T0", "T1", "T2", "T3"],
                                 ["Infants", "Children", "Teens"],
                                 ['b', 'g', 'y'], "x-axis label",
                                 "y-axis label", "Test")
     ax = fig.get_axes()[0]
     self.assertEqual(ax.get_title(), "Test")
     self.assertEqual(ax.get_xlabel(), "x-axis label")
     self.assertEqual(ax.get_ylabel(), "y-axis label")
     self.assertEqual(len(ax.get_xticklabels()), 4)
     np.testing.assert_allclose(ax.get_xticks(),
                                [1.075, 1.975, 3.775, 4.075])
Beispiel #7
0
 def test_grouped_distributions_box(self):
     """Should return a valid boxplot Figure object."""
     fig = grouped_distributions('box', self.ValidTypicalData,
                                 [1, 4, 10, 11], ["T0", "T1", "T2", "T3"],
                                 ["Infants", "Children", "Teens"],
                                 ['b', 'g', 'y'], "x-axis label",
                                 "y-axis label", "Test")
     ax = fig.get_axes()[0]
     self.assertEqual(ax.get_title(), "Test")
     self.assertEqual(ax.get_xlabel(), "x-axis label")
     self.assertEqual(ax.get_ylabel(), "y-axis label")
     self.assertEqual(len(ax.get_xticklabels()), 4)
     np.testing.assert_allclose(ax.get_xticks(),
                                [1.075, 1.975, 3.775, 4.075])
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory " "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(open(opts.distance_matrix_fp, "U"))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option."
        )
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(open(opts.mapping_fp, "U"))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option."
        )

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == "auto":
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number " "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == "auto":
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number " "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(","))
    comparison_field_states = [field_state.strip('"').strip("'") for field_state in comparison_field_states]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to " "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(
        dist_matrix_header, dist_matrix, mapping_header, mapping, field, comparison_field_states
    )

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = sorted(map(float, field_states))
        except:
            option_parser.error(
                "The 'numeric' label type is invalid because "
                "not all field states could be converted into "
                "numbers. Please specify a different label "
                "type."
            )

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) for color in data_color_order]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must " "be greater than zero.")

    plot_figure = grouped_distributions(
        opts.plot_type,
        plot_data,
        x_values=x_spacing,
        data_point_labels=plot_x_axis_labels,
        distribution_labels=comparison_field_states,
        distribution_markers=plot_colors,
        x_label=plot_x_label,
        y_label=plot_y_label,
        title=plot_title,
        x_tick_labels_orientation=opts.x_tick_labels_orientation,
        y_min=y_min,
        y_max=y_max,
        whisker_length=opts.whisker_length,
        error_bar_type=opts.error_bar_type,
        distribution_width=opts.distribution_width,
        figure_width=width,
        figure_height=height,
    )

    # Save the plot in the specified format.
    output_plot_fp = join(opts.output_dir, "%s_Distance_Comparisons.%s" % (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp, format=opts.imagetype, transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), "w")

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append("%s vs %s" % (data_point_label, comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(
            sig_tests_labels, sig_tests_data, tail_type=opts.tail_type, num_permutations=opts.num_permutations
        )
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert len(plot_x_axis_labels) == len(plot_data), (
            "The number of " + "labels do not match the number of points along the x-axis."
        )
        raw_data_fp = join(opts.output_dir, "%s_Distance_Comparisons.txt" % field)
        raw_data_f = open(raw_data_fp, "w")

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert len(comparison_field_states) == len(data), (
                "The "
                + "number of specified comparison groups does not match "
                + "the number of groups found at the current point along "
                + "the x-axis."
            )
            for comp_field_state, comp_grp_data in zip(comparison_field_states, data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" + "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(
            open(opts.distance_matrix_fp, 'U'))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option.")
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(
            open(opts.mapping_fp, 'U'))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option.")

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == 'auto':
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number "
                                "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == 'auto':
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number "
                                "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(','))
    comparison_field_states = [
        field_state.strip('"').strip("'")
        for field_state in comparison_field_states
    ]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to "
                            "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(
        dist_matrix_header, dist_matrix, mapping_header, mapping, field,
        comparison_field_states)

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = sorted(map(float, field_states))
        except:
            option_parser.error("The 'numeric' label type is invalid because "
                                "not all field states could be converted into "
                                "numbers. Please specify a different label "
                                "type.")

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(
                comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [
            matplotlib_rgb_color(data_colors[color].toRGB())
            for color in data_color_order
        ]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must "
                            "be greater than zero.")

    plot_figure = grouped_distributions(
        opts.plot_type,
        plot_data,
        x_values=x_spacing,
        data_point_labels=plot_x_axis_labels,
        distribution_labels=comparison_field_states,
        distribution_markers=plot_colors,
        x_label=plot_x_label,
        y_label=plot_y_label,
        title=plot_title,
        x_tick_labels_orientation=opts.x_tick_labels_orientation,
        y_min=y_min,
        y_max=y_max,
        whisker_length=opts.whisker_length,
        error_bar_type=opts.error_bar_type,
        distribution_width=opts.distribution_width,
        figure_width=width,
        figure_height=height)

    # Save the plot in the specified format.
    output_plot_fp = join(
        opts.output_dir,
        "%s_Distance_Comparisons.%s" % (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp,
                        format=opts.imagetype,
                        transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), 'w')

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append('%s vs %s' %
                                        (data_point_label, comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(
            sig_tests_labels,
            sig_tests_data,
            tail_type=opts.tail_type,
            num_permutations=opts.num_permutations)
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\
            "labels do not match the number of points along the x-axis."
        raw_data_fp = join(opts.output_dir,
                           "%s_Distance_Comparisons.txt" % field)
        raw_data_f = open(raw_data_fp, 'w')

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert (len(comparison_field_states) == len(data)), "The " +\
                "number of specified comparison groups does not match " +\
                "the number of groups found at the current point along " +\
                "the x-axis."
            for comp_field_state, comp_grp_data in zip(comparison_field_states,
                                                       data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" +
                                 "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()