def _get_groupings(dist_matrix_header, dist_matrix, groups, within=True, suppress_symmetry_and_hollowness_check=False): """Returns a list of distance groupings. The return value is a list that contains tuples of three elements: the first two elements are the field values being compared, and the third element is a list of the distances. WARNING: Only symmetric, hollow distance matrices may be used as input. Asymmetric distance matrices, such as those obtained by the UniFrac Gain metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input. Arguments: - dist_matrix_header: The distance matrix header. - dist_matrix: The distance matrix. - groups: A dictionary mapping field value to sample IDs, obtained by calling group_by_field(). - within: If True, distances are grouped within a field value. If False, distances are grouped between field values. - suppress_symmetry_and_hollowness_check: By default, the input distance matrix will be checked for symmetry and hollowness. It is recommended to leave this check in place for safety, as the check is fairly fast. However, if you *know* you have a symmetric and hollow distance matrix, you can disable this check for small performance gains on extremely large distance matrices If within is True, the zeros along the diagonal of the distance matrix are omitted. """ # Note: Much of this code is taken from Jeremy Widmann's # distances_by_groups() function, part of make_distance_histograms.py. if not suppress_symmetry_and_hollowness_check: if not is_symmetric_and_hollow(dist_matrix): raise ValueError("The distance matrix must be symmetric and " "hollow.") result = [] group_items = groups.items() for i, (row_group, row_ids) in enumerate(group_items): row_indices = _get_indices(dist_matrix_header, row_ids) if within: # Handle the case where indices are the same so we need to omit # the diagonal. block = dist_matrix[row_indices][:,row_indices] size = len(row_indices) indices = [] for i in range(size): for j in range(i,size): if i != j: indices.append(block[i][j]) if indices: result.append((row_group, row_group, indices)) else: # Handle the case where indices are separate: just return blocks. for j in range(i+1, len(groups)): col_group, col_ids = group_items[j] col_indices = _get_indices(dist_matrix_header, col_ids) vals = dist_matrix[row_indices][:,col_indices] # Flatten the array into a single-level list. vals = map(None, vals.flat) if vals: result.append((row_group, col_group, vals)) return result
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # Some code for error checking of input args: # Check if distance_matrix_file is valid: try: d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, "U")) except: option_parser.error( "This does not look like a valid distance matrix file. Please supply a valid distance matrix file using the -d option." ) if not is_symmetric_and_hollow(d_mat): option_parser.error("The distance matrix must be symmetric and " "hollow.") # Check if map_fname is valid: try: mapping, m_header, m_comments = parse_mapping_file(open(opts.map_fname, "U")) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping file. Please supply a valid mapping file using the -m option." ) # make sure background_color is valid if opts.background_color not in ["black", "white"]: option_parser.error( "'%s' is not a valid background color. Please pass in either 'black' or 'white' using the -k option." % (opts.background_color) ) # make sure prefs file is valid if it exists if opts.prefs_path is not None: try: prefs_file = open(opts.prefs_path, "U").read() except IOError: option_parser.error( "Provided prefs file, '%s', does not exist. Please pass in a valid prefs file with the -p option." % (opts.prefs_path) ) if opts.prefs_path is not None: prefs = parse_prefs_file(prefs_file) else: prefs = None color_prefs, color_data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options( opts ) # list of labelname, groups, colors, data_colors, data_color_order groups_and_colors = list(iter_color_groups(mapping=color_data["map"], prefs=color_prefs)) # dict mapping labelname to list of: [groups, colors, data_colors, # data_color_order] field_to_colors = {} for color_info in groups_and_colors: field_to_colors[color_info[0]] = color_info[1:] qiime_dir = get_qiime_project_dir() + "/qiime/support_files/" fields = opts.fields if fields is not None: fields = map(strip, fields.split(",")) fields = [i.strip('"').strip("'") for i in fields] elif prefs is not None: fields = prefs.get("FIELDS", None) else: fields = get_interesting_mapping_fields(mapping, m_header) # Check that all provided fields are valid: if fields is not None: for f in fields: if f not in m_header: option_parser.error( "The field, %s, is not in the provided mapping file. Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file." % (f) ) within_distances, between_distances, dmat = group_distances( mapping_file=opts.map_fname, dmatrix_file=opts.distance_matrix_file, fields=fields, dir_prefix=get_random_directory_name(output_dir=opts.dir_path, prefix="distances"), ) if not opts.suppress_html_output: # histograms output path histograms_path = path.join(opts.dir_path, "histograms") try: mkdir(histograms_path) except OSError: # raised if dir exists pass # draw all histograms distances_dict, label_to_histogram_filename = draw_all_histograms( single_field=within_distances, paired_field=between_distances, dmat=dmat, histogram_dir=histograms_path, field_to_color_prefs=field_to_colors, background_color=background_color, ) # Get relative path to histogram files. label_to_histogram_filename_relative = _make_relative_paths(label_to_histogram_filename, opts.dir_path) dm_fname = path.split(opts.distance_matrix_file)[-1] basename = path.splitext(dm_fname)[0] outfile_name = basename + "_distance_histograms.html" make_main_html( distances_dict=distances_dict, label_to_histogram_filename=label_to_histogram_filename_relative, root_outdir=opts.dir_path, outfile_name=outfile_name, title="Distance Histograms", ) # Handle saving web resources locally. # javascript file javascript_path = path.join(opts.dir_path, "js") try: mkdir(javascript_path) except OSError: # raised if dir exists pass js_out = open(javascript_path + "/histograms.js", "w") js_out.write(open(qiime_dir + "js/histograms.js").read()) js_out.close() monte_carlo_iters = opts.monte_carlo_iters if monte_carlo_iters > 0: # Do Monte Carlo for all fields monte_carlo_group_distances( mapping_file=opts.map_fname, dmatrix_file=opts.distance_matrix_file, prefs=prefs, dir_prefix=opts.dir_path, fields=fields, default_iters=monte_carlo_iters, ) # Do Monte Carlo for within and between fields monte_carlo_group_distances_within_between( single_field=within_distances, paired_field=between_distances, dmat=dmat, dir_prefix=opts.dir_path, num_iters=monte_carlo_iters, )
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) #Some code for error checking of input args: #Check if distance_matrix_file is valid: try: d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, 'U')) except: option_parser.error( "This does not look like a valid distance matrix file. Please supply a valid distance matrix file using the -d option." ) if not is_symmetric_and_hollow(d_mat): option_parser.error("The distance matrix must be symmetric and " "hollow.") #Check if map_fname is valid: try: mapping, m_header, m_comments = \ parse_mapping_file(open(opts.map_fname,'U')) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping file. Please supply a valid mapping file using the -m option." ) #make sure background_color is valid if opts.background_color not in ['black', 'white']: option_parser.error( "'%s' is not a valid background color. Please pass in either 'black' or 'white' using the -k option." % (opts.background_color)) #make sure prefs file is valid if it exists if opts.prefs_path is not None: try: prefs_file = open(opts.prefs_path, 'U').read() except IOError: option_parser.error( "Provided prefs file, '%s', does not exist. Please pass in a valid prefs file with the -p option." % (opts.prefs_path)) if opts.prefs_path is not None: prefs = parse_prefs_file(prefs_file) else: prefs = None color_prefs, color_data, background_color, label_color, ball_scale,\ arrow_colors=sample_color_prefs_and_map_data_from_options(opts) #list of labelname, groups, colors, data_colors, data_color_order groups_and_colors=list(iter_color_groups(mapping=color_data['map'],\ prefs=color_prefs)) #dict mapping labelname to list of: [groups, colors, data_colors, # data_color_order] field_to_colors = {} for color_info in groups_and_colors: field_to_colors[color_info[0]] = color_info[1:] qiime_dir = get_qiime_project_dir() + '/qiime/support_files/' fields = opts.fields if fields is not None: fields = map(strip, fields.split(',')) fields = [i.strip('"').strip("'") for i in fields] elif prefs is not None: fields = prefs.get('FIELDS', None) else: fields = get_interesting_mapping_fields(mapping, m_header) #Check that all provided fields are valid: if fields is not None: for f in fields: if f not in m_header: option_parser.error( "The field, %s, is not in the provided mapping file. Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file." % (f)) within_distances, between_distances, dmat = \ group_distances(mapping_file=opts.map_fname,\ dmatrix_file=opts.distance_matrix_file,\ fields=fields,\ dir_prefix=get_random_directory_name(output_dir=opts.dir_path,\ prefix='distances')) if not opts.suppress_html_output: #histograms output path histograms_path = path.join(opts.dir_path, 'histograms') try: mkdir(histograms_path) except OSError: #raised if dir exists pass #draw all histograms distances_dict, label_to_histogram_filename = \ draw_all_histograms(single_field=within_distances, \ paired_field=between_distances, \ dmat=dmat,\ histogram_dir=histograms_path,\ field_to_color_prefs=field_to_colors,\ background_color=background_color) #Get relative path to histogram files. label_to_histogram_filename_relative = \ _make_relative_paths(label_to_histogram_filename, opts.dir_path) dm_fname = path.split(opts.distance_matrix_file)[-1] basename = path.splitext(dm_fname)[0] outfile_name = basename + '_distance_histograms.html' make_main_html(distances_dict=distances_dict,\ label_to_histogram_filename=label_to_histogram_filename_relative,\ root_outdir=opts.dir_path, \ outfile_name = outfile_name, \ title='Distance Histograms') #Handle saving web resources locally. #javascript file javascript_path = path.join(opts.dir_path, 'js') try: mkdir(javascript_path) except OSError: #raised if dir exists pass js_out = open(javascript_path + '/histograms.js', 'w') js_out.write(open(qiime_dir + 'js/histograms.js').read()) js_out.close() monte_carlo_iters = opts.monte_carlo_iters if monte_carlo_iters > 0: #Do Monte Carlo for all fields monte_carlo_group_distances(mapping_file=opts.map_fname,\ dmatrix_file=opts.distance_matrix_file,\ prefs=prefs, \ dir_prefix = opts.dir_path,\ fields=fields,\ default_iters=monte_carlo_iters) #Do Monte Carlo for within and between fields monte_carlo_group_distances_within_between(\ single_field=within_distances,\ paired_field=between_distances, dmat=dmat, \ dir_prefix = opts.dir_path,\ num_iters=monte_carlo_iters)