def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # Create the output dir if it doesn't already exist. try: create_dir(opts.output_dir) except: option_parser.error("Could not create or access output directory " "specified with the -o option.") # Parse the distance matrix and mapping file. try: dist_matrix_header, dist_matrix = parse_distmat(open(opts.distance_matrix_fp, "U")) except: option_parser.error( "This does not look like a valid distance matrix " "file. Please supply a valid distance matrix file using the -d " "option." ) try: mapping, mapping_header, mapping_comments = parse_mapping_file(open(opts.mapping_fp, "U")) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping " "file. Please supply a valid mapping file using the -m option." ) # Make sure the y_min and y_max options make sense, as they can be either # 'auto' or a number. y_min = opts.y_min y_max = opts.y_max try: y_min = float(y_min) except ValueError: if y_min == "auto": y_min = None else: option_parser.error("The --y_min option must be either a number " "or 'auto'.") try: y_max = float(y_max) except ValueError: if y_max == "auto": y_max = None else: option_parser.error("The --y_max option must be either a number " "or 'auto'.") # Parse the field states that will be compared to every other field state. comparison_field_states = opts.comparison_groups comparison_field_states = map(strip, comparison_field_states.split(",")) comparison_field_states = [field_state.strip('"').strip("'") for field_state in comparison_field_states] if comparison_field_states is None: option_parser.error("You must provide at least one field state to " "compare (using the -c option).") # Get distance comparisons between each field state and each of the # comparison field states. field = opts.field comparison_groupings = get_field_state_comparisons( dist_matrix_header, dist_matrix, mapping_header, mapping, field, comparison_field_states ) # Grab a list of all field states that had the comparison field states # compared against them. These will be plotted along the x-axis. field_states = comparison_groupings.keys() def custom_comparator(x, y): try: num_x = float(x) num_y = float(y) return int(num_x - num_y) except: if x < y: return -1 elif x > y: return 1 else: return 0 # Sort the field states as numbers if the elements are numbers, else sort # them lexically. field_states.sort(custom_comparator) # If the label type is numeric, get a list of all field states in sorted # numeric order. These will be used to determine the spacing of the # field state 'points' along the x-axis. x_spacing = None if opts.label_type == "numeric": try: x_spacing = sorted(map(float, field_states)) except: option_parser.error( "The 'numeric' label type is invalid because " "not all field states could be converted into " "numbers. Please specify a different label " "type." ) # Accumulate the data for each field state 'point' along the x-axis. plot_data = [] plot_x_axis_labels = [] for field_state in field_states: field_state_data = [] for comp_field_state in comparison_field_states: field_state_data.append(comparison_groupings[field_state][comp_field_state]) plot_data.append(field_state_data) plot_x_axis_labels.append(field_state) # Plot the data and labels. plot_title = "Distance Comparisons" plot_x_label = field plot_y_label = "Distance" # If we are creating a bar chart or box plot, grab a list of good data # colors to use. plot_type = opts.plot_type plot_colors = None if plot_type == "bar" or plot_type == "box": plot_colors = [matplotlib_rgb_color(data_colors[color].toRGB()) for color in data_color_order] assert plot_data, "Error: there is no data to plot!" width = opts.width height = opts.height if width <= 0 or height <= 0: option_parser.error("The specified width and height of the image must " "be greater than zero.") plot_figure = grouped_distributions( opts.plot_type, plot_data, x_values=x_spacing, data_point_labels=plot_x_axis_labels, distribution_labels=comparison_field_states, distribution_markers=plot_colors, x_label=plot_x_label, y_label=plot_y_label, title=plot_title, x_tick_labels_orientation=opts.x_tick_labels_orientation, y_min=y_min, y_max=y_max, whisker_length=opts.whisker_length, error_bar_type=opts.error_bar_type, distribution_width=opts.distribution_width, figure_width=width, figure_height=height, ) # Save the plot in the specified format. output_plot_fp = join(opts.output_dir, "%s_Distance_Comparisons.%s" % (field, opts.imagetype)) plot_figure.savefig(output_plot_fp, format=opts.imagetype, transparent=opts.transparent) if not opts.suppress_significance_tests: sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), "w") # Rearrange the plot data into a format suitable for all_pairs_t_test. sig_tests_labels = [] sig_tests_data = [] for data_point, data_point_label in zip(plot_data, plot_x_axis_labels): for dist, comp_field in zip(data_point, comparison_field_states): sig_tests_labels.append("%s vs %s" % (data_point_label, comp_field)) sig_tests_data.append(dist) sig_tests_results = all_pairs_t_test( sig_tests_labels, sig_tests_data, tail_type=opts.tail_type, num_permutations=opts.num_permutations ) sig_tests_f.write(sig_tests_results) sig_tests_f.close() if opts.save_raw_data: # Write the raw plot data into a tab-delimited file, where each line # has the distances between a comparison group and another field state # 'point' along the x-axis. assert len(plot_x_axis_labels) == len(plot_data), ( "The number of " + "labels do not match the number of points along the x-axis." ) raw_data_fp = join(opts.output_dir, "%s_Distance_Comparisons.txt" % field) raw_data_f = open(raw_data_fp, "w") raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n") for label, data in zip(plot_x_axis_labels, plot_data): assert len(comparison_field_states) == len(data), ( "The " + "number of specified comparison groups does not match " + "the number of groups found at the current point along " + "the x-axis." ) for comp_field_state, comp_grp_data in zip(comparison_field_states, data): raw_data_f.write(comp_field_state + "\t" + label + "\t" + "\t".join(map(str, comp_grp_data)) + "\n") raw_data_f.close()
def _color_field_states(map_f, samp_ids, field, field_states, color_by_field): """Colors one field by another. Returns a list of matplotlib-compatible colors, one for each of the input field_states. Also returns a dictionary mapping color_by_field states to colors (useful for building a legend, for example). If there are not enough colors available (they are drawn from qiime.colors.data_colors), an error will be raised as the color mapping (and legend) will be ambiguous. A one-to-one mapping must exist between each field_state and its corresponding color_by field state (otherwise it is unclear which corresponding color_by field state should be used to color it by). An error will be raised if this one-to-one mapping does not exist. Arguments: map_f - the mapping file (file-like object) samp_ids - a list of sample IDs to consider in the mapping file. Only these sample IDs will be used when coloring field states field - the field in the mapping file to color field_states - the field states in field to color color_by_field - the field in the mapping file to color field_states by """ colors = [] color_pool = [matplotlib_rgb_color(data_colors[color].toRGB()) for color in data_color_order] metadata_map = MetadataMap.parseMetadataMap(map_f) for field_to_check in field, color_by_field: if field_to_check not in metadata_map.CategoryNames: raise ValueError("The field '%s' is not in the metadata mapping " "file's column headers." % field_to_check) all_field_states = metadata_map.getCategoryValues(samp_ids, field) all_color_by_states = metadata_map.getCategoryValues(samp_ids, color_by_field) if len(set(field_states) - set(all_field_states)) != 0: raise ValueError("Encountered unrecognizable field state(s) in %r " "for field '%s'." % (field_states, field)) # Build mapping from one field to the other. field_mapping = defaultdict(list) for field_state, color_by_state in zip(all_field_states, all_color_by_states): if field_state in field_states: field_mapping[field_state].append(color_by_state) # For each of the specified input field states, find its corresponding # "color by" field state and give it a color if it hasn't been assigned one # yet. Make sure we have enough colors and there is a one-to-one mapping. color_mapping = {} for field_state in field_states: color_by_states = set(field_mapping[field_state]) if len(color_by_states) != 1: raise ValueError( "The field '%s' to color by does not have a " "one-to-one mapping with field '%s'. Coloring " "would be ambiguous." % (color_by_field, field) ) color_by_state = list(color_by_states)[0] if color_by_state not in color_mapping: if len(color_pool) > 0: color_mapping[color_by_state] = color_pool.pop(0) else: raise ValueError( "There are not enough available QIIME colors " "to color each of the field states in field " "'%s'. Coloring would be ambiguous." % color_by_field ) colors.append(color_mapping[color_by_state]) return colors, color_mapping
def _color_field_states(map_f, samp_ids, field, field_states, color_by_field): """Colors one field by another. Returns a list of matplotlib-compatible colors, one for each of the input field_states. Also returns a dictionary mapping color_by_field states to colors (useful for building a legend, for example). If there are not enough colors available (they are drawn from qiime.colors.data_colors), an error will be raised as the color mapping (and legend) will be ambiguous. A one-to-one mapping must exist between each field_state and its corresponding color_by field state (otherwise it is unclear which corresponding color_by field state should be used to color it by). An error will be raised if this one-to-one mapping does not exist. Arguments: map_f - the mapping file (file-like object) samp_ids - a list of sample IDs to consider in the mapping file. Only these sample IDs will be used when coloring field states field - the field in the mapping file to color field_states - the field states in field to color color_by_field - the field in the mapping file to color field_states by """ colors = [] color_pool = [ matplotlib_rgb_color(data_colors[color].toRGB()) for color in data_color_order ] metadata_map = MetadataMap.parseMetadataMap(map_f) for field_to_check in field, color_by_field: if field_to_check not in metadata_map.CategoryNames: raise ValueError("The field '%s' is not in the metadata mapping " "file's column headers." % field_to_check) all_field_states = metadata_map.getCategoryValues(samp_ids, field) all_color_by_states = metadata_map.getCategoryValues( samp_ids, color_by_field) if len(set(field_states) - set(all_field_states)) != 0: raise ValueError("Encountered unrecognizable field state(s) in %r " "for field '%s'." % (field_states, field)) # Build mapping from one field to the other. field_mapping = defaultdict(list) for field_state, color_by_state in zip(all_field_states, all_color_by_states): if field_state in field_states: field_mapping[field_state].append(color_by_state) # For each of the specified input field states, find its corresponding # "color by" field state and give it a color if it hasn't been assigned one # yet. Make sure we have enough colors and there is a one-to-one mapping. color_mapping = {} for field_state in field_states: color_by_states = set(field_mapping[field_state]) if len(color_by_states) != 1: raise ValueError("The field '%s' to color by does not have a " "one-to-one mapping with field '%s'. Coloring " "would be ambiguous." % (color_by_field, field)) color_by_state = list(color_by_states)[0] if color_by_state not in color_mapping: if len(color_pool) > 0: color_mapping[color_by_state] = color_pool.pop(0) else: raise ValueError("There are not enough available QIIME colors " "to color each of the field states in field " "'%s'. Coloring would be ambiguous." % color_by_field) colors.append(color_mapping[color_by_state]) return colors, color_mapping
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # Create the output dir if it doesn't already exist. try: create_dir(opts.output_dir) except: option_parser.error("Could not create or access output directory " "specified with the -o option.") # Parse the distance matrix and mapping file. try: dist_matrix_header, dist_matrix = parse_distmat( open(opts.distance_matrix_fp, 'U')) except: option_parser.error( "This does not look like a valid distance matrix " "file. Please supply a valid distance matrix file using the -d " "option.") try: mapping, mapping_header, mapping_comments = parse_mapping_file( open(opts.mapping_fp, 'U')) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping " "file. Please supply a valid mapping file using the -m option.") # Make sure the y_min and y_max options make sense, as they can be either # 'auto' or a number. y_min = opts.y_min y_max = opts.y_max try: y_min = float(y_min) except ValueError: if y_min == 'auto': y_min = None else: option_parser.error("The --y_min option must be either a number " "or 'auto'.") try: y_max = float(y_max) except ValueError: if y_max == 'auto': y_max = None else: option_parser.error("The --y_max option must be either a number " "or 'auto'.") # Parse the field states that will be compared to every other field state. comparison_field_states = opts.comparison_groups comparison_field_states = map(strip, comparison_field_states.split(',')) comparison_field_states = [ field_state.strip('"').strip("'") for field_state in comparison_field_states ] if comparison_field_states is None: option_parser.error("You must provide at least one field state to " "compare (using the -c option).") # Get distance comparisons between each field state and each of the # comparison field states. field = opts.field comparison_groupings = get_field_state_comparisons( dist_matrix_header, dist_matrix, mapping_header, mapping, field, comparison_field_states) # Grab a list of all field states that had the comparison field states # compared against them. These will be plotted along the x-axis. field_states = comparison_groupings.keys() def custom_comparator(x, y): try: num_x = float(x) num_y = float(y) return int(num_x - num_y) except: if x < y: return -1 elif x > y: return 1 else: return 0 # Sort the field states as numbers if the elements are numbers, else sort # them lexically. field_states.sort(custom_comparator) # If the label type is numeric, get a list of all field states in sorted # numeric order. These will be used to determine the spacing of the # field state 'points' along the x-axis. x_spacing = None if opts.label_type == "numeric": try: x_spacing = sorted(map(float, field_states)) except: option_parser.error("The 'numeric' label type is invalid because " "not all field states could be converted into " "numbers. Please specify a different label " "type.") # Accumulate the data for each field state 'point' along the x-axis. plot_data = [] plot_x_axis_labels = [] for field_state in field_states: field_state_data = [] for comp_field_state in comparison_field_states: field_state_data.append( comparison_groupings[field_state][comp_field_state]) plot_data.append(field_state_data) plot_x_axis_labels.append(field_state) # Plot the data and labels. plot_title = "Distance Comparisons" plot_x_label = field plot_y_label = "Distance" # If we are creating a bar chart or box plot, grab a list of good data # colors to use. plot_type = opts.plot_type plot_colors = None if plot_type == "bar" or plot_type == "box": plot_colors = [ matplotlib_rgb_color(data_colors[color].toRGB()) for color in data_color_order ] assert plot_data, "Error: there is no data to plot!" width = opts.width height = opts.height if width <= 0 or height <= 0: option_parser.error("The specified width and height of the image must " "be greater than zero.") plot_figure = grouped_distributions( opts.plot_type, plot_data, x_values=x_spacing, data_point_labels=plot_x_axis_labels, distribution_labels=comparison_field_states, distribution_markers=plot_colors, x_label=plot_x_label, y_label=plot_y_label, title=plot_title, x_tick_labels_orientation=opts.x_tick_labels_orientation, y_min=y_min, y_max=y_max, whisker_length=opts.whisker_length, error_bar_type=opts.error_bar_type, distribution_width=opts.distribution_width, figure_width=width, figure_height=height) # Save the plot in the specified format. output_plot_fp = join( opts.output_dir, "%s_Distance_Comparisons.%s" % (field, opts.imagetype)) plot_figure.savefig(output_plot_fp, format=opts.imagetype, transparent=opts.transparent) if not opts.suppress_significance_tests: sig_tests_f = open(join(opts.output_dir, "%s_Stats.txt" % field), 'w') # Rearrange the plot data into a format suitable for all_pairs_t_test. sig_tests_labels = [] sig_tests_data = [] for data_point, data_point_label in zip(plot_data, plot_x_axis_labels): for dist, comp_field in zip(data_point, comparison_field_states): sig_tests_labels.append('%s vs %s' % (data_point_label, comp_field)) sig_tests_data.append(dist) sig_tests_results = all_pairs_t_test( sig_tests_labels, sig_tests_data, tail_type=opts.tail_type, num_permutations=opts.num_permutations) sig_tests_f.write(sig_tests_results) sig_tests_f.close() if opts.save_raw_data: # Write the raw plot data into a tab-delimited file, where each line # has the distances between a comparison group and another field state # 'point' along the x-axis. assert (len(plot_x_axis_labels) == len(plot_data)), "The number of " +\ "labels do not match the number of points along the x-axis." raw_data_fp = join(opts.output_dir, "%s_Distance_Comparisons.txt" % field) raw_data_f = open(raw_data_fp, 'w') raw_data_f.write("#ComparisonGroup\tFieldState\tDistances\n") for label, data in zip(plot_x_axis_labels, plot_data): assert (len(comparison_field_states) == len(data)), "The " +\ "number of specified comparison groups does not match " +\ "the number of groups found at the current point along " +\ "the x-axis." for comp_field_state, comp_grp_data in zip(comparison_field_states, data): raw_data_f.write(comp_field_state + "\t" + label + "\t" + "\t".join(map(str, comp_grp_data)) + "\n") raw_data_f.close()