コード例 #1
0
ファイル: util.py プロジェクト: gregcaporaso/microbiogeo
def get_color_pool():
    # We don't like yellow...
    color_order = data_color_order[:]
    color_order.remove('yellow1')
    color_order.remove('yellow2')
    return [matplotlib_rgb_color(data_colors[color].toRGB())
            for color in color_order]
コード例 #2
0
 def test_matplotlib_rgb_color(self):
     """matplotlib_rgb_color should correctly convert RGB to decimal.
     """
     test_data = [(255, 255, 255), (0, 0, 0), (255, 0, 100), (100, 253, 18)]
     exp = [(1.0,1.0,1.0),(0.,0.,0.),(1.,0.,0.39215686274509803),\
         (0.39215686274509803,0.99215686274509807,0.070588235294117646)]
     for t, e in zip(test_data, exp):
         self.assertEqual(matplotlib_rgb_color(t), e)
コード例 #3
0
 def test_matplotlib_rgb_color(self):
     """matplotlib_rgb_color should correctly convert RGB to decimal.
     """
     test_data = [(255,255,255),(0,0,0),(255,0,100),(100,253,18)]
     exp = [(1.0,1.0,1.0),(0.,0.,0.),(1.,0.,0.39215686274509803),\
         (0.39215686274509803,0.99215686274509807,0.070588235294117646)]
     for t, e in zip(test_data,exp):
         self.assertEqual(matplotlib_rgb_color(t),e)
コード例 #4
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(
            open(opts.distance_matrix_fp, 'U'))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option.")
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(
            open(opts.mapping_fp, 'U'))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option.")

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == 'auto':
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number "
                                "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == 'auto':
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number "
                                "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(','))
    comparison_field_states = [
        field_state.strip('"').strip("'")
        for field_state in comparison_field_states
    ]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to "
                            "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(
        dist_matrix_header, dist_matrix, mapping_header, mapping, field,
        comparison_field_states)

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = map(float, field_states)
            x_spacing.sort()
        except:
            option_parser.error("The 'numeric' label type is invalid because "
                                "not all field states could be converted into "
                                "numbers. Please specify a different label "
                                "type.")

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(
                comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) \
                       for color in data_color_order]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must "
                            "be greater than zero.")

    plot_figure = generate_comparative_plots(
        opts.plot_type,
        plot_data,
        x_values=x_spacing,
        data_point_labels=plot_x_axis_labels,
        distribution_labels=comparison_field_states,
        distribution_markers=plot_colors,
        x_label=plot_x_label,
        y_label=plot_y_label,
        title=plot_title,
        x_tick_labels_orientation=opts.x_tick_labels_orientation,
        y_min=y_min,
        y_max=y_max,
        whisker_length=opts.whisker_length,
        error_bar_type=opts.error_bar_type,
        distribution_width=opts.distribution_width,
        figure_width=width,
        figure_height=height)

    # Save the plot in the specified format.
    output_plot_fp = join(
        opts.output_dir,
        "%s_Distance_Comparisons.%s" % (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp,
                        format=opts.imagetype,
                        transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), 'w')

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append('%s vs %s' %
                                        (data_point_label, comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(
            sig_tests_labels,
            sig_tests_data,
            tail_type=opts.tail_type,
            num_permutations=opts.num_permutations)
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\
                "labels do not match the number of points along the x-axis."
        raw_data_fp = join(opts.output_dir,
                           "%s_Distance_Comparisons.txt" % field)
        raw_data_f = open(raw_data_fp, 'w')

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert (len(comparison_field_states) == len(data)), "The " +\
                    "number of specified comparison groups does not match " +\
                    "the number of groups found at the current point along " +\
                    "the x-axis."
            for comp_field_state, comp_grp_data in zip(comparison_field_states,
                                                       data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" +
                                 "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()
コード例 #5
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the distance matrix and mapping file.
    try:
        dist_matrix_header, dist_matrix = parse_distmat(
            open(opts.distance_matrix_fp, 'U'))
    except:
        option_parser.error("This does not look like a valid distance matrix "
            "file. Please supply a valid distance matrix file using the -d "
            "option.")
    try:
        mapping, mapping_header, mapping_comments = parse_mapping_file(
            open(opts.mapping_fp, 'U'))
    except QiimeParseError:
        option_parser.error("This does not look like a valid metadata mapping "
            "file. Please supply a valid mapping file using the -m option.")

    # Make sure the y_min and y_max options make sense, as they can be either
    # 'auto' or a number.
    y_min = opts.y_min
    y_max = opts.y_max
    try:
        y_min = float(y_min)
    except ValueError:
        if y_min == 'auto':
            y_min = None
        else:
            option_parser.error("The --y_min option must be either a number "
                                "or 'auto'.")
    try:
        y_max = float(y_max)
    except ValueError:
        if y_max == 'auto':
            y_max = None
        else:
            option_parser.error("The --y_max option must be either a number "
                                "or 'auto'.")

    # Parse the field states that will be compared to every other field state.
    comparison_field_states = opts.comparison_groups
    comparison_field_states = map(strip, comparison_field_states.split(','))
    comparison_field_states = [field_state.strip('"').strip("'")
                               for field_state in comparison_field_states]
    if comparison_field_states is None:
        option_parser.error("You must provide at least one field state to "
                            "compare (using the -c option).")

    # Get distance comparisons between each field state and each of the
    # comparison field states.
    field = opts.field
    comparison_groupings = get_field_state_comparisons(dist_matrix_header,
            dist_matrix, mapping_header, mapping, field,
            comparison_field_states)

    # Grab a list of all field states that had the comparison field states
    # compared against them. These will be plotted along the x-axis.
    field_states = comparison_groupings.keys()

    def custom_comparator(x, y):
        try:
            num_x = float(x)
            num_y = float(y)
            return int(num_x - num_y)
        except:
            if x < y:
                return -1
            elif x > y:
                return 1
            else:
                return 0

    # Sort the field states as numbers if the elements are numbers, else sort
    # them lexically.
    field_states.sort(custom_comparator)

    # If the label type is numeric, get a list of all field states in sorted
    # numeric order. These will be used to determine the spacing of the
    # field state 'points' along the x-axis.
    x_spacing = None
    if opts.label_type == "numeric":
        try:
            x_spacing = map(float, field_states)
            x_spacing.sort()
        except:
            option_parser.error("The 'numeric' label type is invalid because "
                                "not all field states could be converted into "
                                "numbers. Please specify a different label "
                                "type.")

    # Accumulate the data for each field state 'point' along the x-axis.
    plot_data = []
    plot_x_axis_labels = []
    for field_state in field_states:
        field_state_data = []
        for comp_field_state in comparison_field_states:
            field_state_data.append(
                    comparison_groupings[field_state][comp_field_state])
        plot_data.append(field_state_data)
        plot_x_axis_labels.append(field_state)

    # Plot the data and labels.
    plot_title = "Distance Comparisons"
    plot_x_label = field
    plot_y_label = "Distance"

    # If we are creating a bar chart or box plot, grab a list of good data
    # colors to use.
    plot_type = opts.plot_type
    plot_colors = None
    if plot_type == "bar" or plot_type == "box":
        plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) \
                       for color in data_color_order]

    assert plot_data, "Error: there is no data to plot!"

    width = opts.width
    height = opts.height
    if width <= 0 or height <= 0:
        option_parser.error("The specified width and height of the image must "
                            "be greater than zero.")

    plot_figure = generate_comparative_plots(opts.plot_type, plot_data,
            x_values=x_spacing, data_point_labels=plot_x_axis_labels,
            distribution_labels=comparison_field_states,
            distribution_markers=plot_colors, x_label=plot_x_label,
            y_label=plot_y_label, title=plot_title,
            x_tick_labels_orientation=opts.x_tick_labels_orientation,
            y_min=y_min, y_max=y_max, whisker_length=opts.whisker_length,
            error_bar_type=opts.error_bar_type,
            distribution_width=opts.distribution_width,
            group_spacing=opts.group_spacing, figure_width=width,
            figure_height=height)

    # Save the plot in the specified format.
    output_plot_fp = join(opts.output_dir, "%s_Distance_Comparisons.%s" %
                          (field, opts.imagetype))
    plot_figure.savefig(output_plot_fp, format=opts.imagetype,
            transparent=opts.transparent)

    if not opts.suppress_significance_tests:
        sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), 'w')

        # Rearrange the plot data into a format suitable for all_pairs_t_test.
        sig_tests_labels = []
        sig_tests_data = []
        for data_point, data_point_label in zip(plot_data, plot_x_axis_labels):
            for dist, comp_field in zip(data_point, comparison_field_states):
                sig_tests_labels.append('%s vs %s' % (data_point_label,
                                                      comp_field))
                sig_tests_data.append(dist)

        sig_tests_results = all_pairs_t_test(sig_tests_labels, sig_tests_data,
                tail_type=opts.tail_type,
                num_permutations=opts.num_permutations)
        sig_tests_f.write(sig_tests_results)
        sig_tests_f.close()

    if opts.save_raw_data:
        # Write the raw plot data into a tab-delimited file, where each line
        # has the distances between a comparison group and another field state
        # 'point' along the x-axis.
        assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\
                "labels do not match the number of points along the x-axis."
        raw_data_fp = join(opts.output_dir,
                           "%s_Distance_Comparisons.txt" % field)
        raw_data_f = open(raw_data_fp, 'w')

        raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n")
        for label, data in zip(plot_x_axis_labels, plot_data):
            assert (len(comparison_field_states) == len(data)), "The " +\
                    "number of specified comparison groups does not match " +\
                    "the number of groups found at the current point along " +\
                    "the x-axis."
            for comp_field_state, comp_grp_data in zip(comparison_field_states, data):
                raw_data_f.write(comp_field_state + "\t" + label + "\t" +
                        "\t".join(map(str, comp_grp_data)) + "\n")
        raw_data_f.close()
コード例 #6
0
def _color_field_states(map_f, samp_ids, field, field_states, color_by_field):
    """Colors one field by another.

    Returns a list of matplotlib-compatible colors, one for each of the input
    field_states. Also returns a dictionary mapping color_by_field states to
    colors (useful for building a legend, for example).

    If there are not enough colors available (they are drawn from
    qiime.colors.data_colors), an error will be raised as the color mapping
    (and legend) will be ambiguous.

    A one-to-one mapping must exist between each field_state and its
    corresponding color_by field state (otherwise it is unclear which
    corresponding color_by field state should be used to color it by). An error
    will be raised if this one-to-one mapping does not exist.

    Arguments:
        map_f - the mapping file (file-like object)
        samp_ids - a list of sample IDs to consider in the mapping file. Only
            these sample IDs will be used when coloring field states
        field - the field in the mapping file to color
        field_states - the field states in field to color
        color_by_field - the field in the mapping file to color field_states by
    """
    colors = []
    color_pool = [matplotlib_rgb_color(data_colors[color].toRGB())
                  for color in data_color_order]
    metadata_map = MetadataMap.parseMetadataMap(map_f)

    for field_to_check in field, color_by_field:
        if field_to_check not in metadata_map.CategoryNames:
            raise ValueError("The field '%s' is not in the metadata mapping "
                             "file's column headers." % field_to_check)

    all_field_states = metadata_map.getCategoryValues(samp_ids, field)
    all_color_by_states = metadata_map.getCategoryValues(samp_ids,
                                                         color_by_field)

    if len(set(field_states) - set(all_field_states)) != 0:
        raise ValueError("Encountered unrecognizable field state(s) in %r "
                         "for field '%s'." % (field_states, field))

    # Build mapping from one field to the other.
    field_mapping = defaultdict(list)
    for field_state, color_by_state in zip(all_field_states,
                                           all_color_by_states):
        if field_state in field_states:
            field_mapping[field_state].append(color_by_state)

    # For each of the specified input field states, find its corresponding
    # "color by" field state and give it a color if it hasn't been assigned one
    # yet. Make sure we have enough colors and there is a one-to-one mapping.
    color_mapping = {}
    for field_state in field_states:
        color_by_states = set(field_mapping[field_state])

        if len(color_by_states) != 1:
            raise ValueError("The field '%s' to color by does not have a "
                             "one-to-one mapping with field '%s'. Coloring "
                             "would be ambiguous." % (color_by_field, field))

        color_by_state = list(color_by_states)[0]
        if color_by_state not in color_mapping:
            if len(color_pool) > 0:
                color_mapping[color_by_state] = color_pool.pop(0)
            else:
                raise ValueError("There are not enough available QIIME colors "
                                 "to color each of the field states in field "
                                 "'%s'. Coloring would be ambiguous." %
                                 color_by_field)

        colors.append(color_mapping[color_by_state])

    return colors, color_mapping
コード例 #7
0
def _color_field_states(map_f, samp_ids, field, field_states, color_by_field):
    """Colors one field by another.

    Returns a list of matplotlib-compatible colors, one for each of the input
    field_states. Also returns a dictionary mapping color_by_field states to
    colors (useful for building a legend, for example).

    If there are not enough colors available (they are drawn from
    qiime.colors.data_colors), an error will be raised as the color mapping
    (and legend) will be ambiguous.

    A one-to-one mapping must exist between each field_state and its
    corresponding color_by field state (otherwise it is unclear which
    corresponding color_by field state should be used to color it by). An error
    will be raised if this one-to-one mapping does not exist.

    Arguments:
        map_f - the mapping file (file-like object)
        samp_ids - a list of sample IDs to consider in the mapping file. Only
            these sample IDs will be used when coloring field states
        field - the field in the mapping file to color
        field_states - the field states in field to color
        color_by_field - the field in the mapping file to color field_states by
    """
    colors = []
    color_pool = [matplotlib_rgb_color(data_colors[color].toRGB())
                  for color in data_color_order]
    metadata_map = MetadataMap.parseMetadataMap(map_f)

    for field_to_check in field, color_by_field:
        if field_to_check not in metadata_map.CategoryNames:
            raise ValueError("The field '%s' is not in the metadata mapping "
                             "file's column headers." % field_to_check)

    all_field_states = metadata_map.getCategoryValues(samp_ids, field)
    all_color_by_states = metadata_map.getCategoryValues(samp_ids,
                                                         color_by_field)

    if len(set(field_states) - set(all_field_states)) != 0:
        raise ValueError("Encountered unrecognizable field state(s) in %r "
                         "for field '%s'." % (field_states, field))

    # Build mapping from one field to the other.
    field_mapping = defaultdict(list)
    for field_state, color_by_state in zip(all_field_states,
                                           all_color_by_states):
        if field_state in field_states:
            field_mapping[field_state].append(color_by_state)

    # For each of the specified input field states, find its corresponding
    # "color by" field state and give it a color if it hasn't been assigned one
    # yet. Make sure we have enough colors and there is a one-to-one mapping.
    color_mapping = {}
    for field_state in field_states:
        color_by_states = set(field_mapping[field_state])

        if len(color_by_states) != 1:
            raise ValueError("The field '%s' to color by does not have a "
                             "one-to-one mapping with field '%s'. Coloring "
                             "would be ambiguous." % (color_by_field, field))

        color_by_state = list(color_by_states)[0]
        if color_by_state not in color_mapping:
            if len(color_pool) > 0:
                color_mapping[color_by_state] = color_pool.pop(0)
            else:
                raise ValueError("There are not enough available QIIME colors "
                                 "to color each of the field states in field "
                                 "'%s'. Coloring would be ambiguous." %
                                 color_by_field)

        colors.append(color_mapping[color_by_state])

    return colors, color_mapping