def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # Some code for error checking of input args: # Check if distance_matrix_file is valid: try: d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, "U")) except: option_parser.error( "This does not look like a valid distance matrix file. Please supply a valid distance matrix file using the -d option." ) if not is_symmetric_and_hollow(d_mat): option_parser.error("The distance matrix must be symmetric and " "hollow.") # Check if map_fname is valid: try: mapping, m_header, m_comments = parse_mapping_file(open(opts.map_fname, "U")) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping file. Please supply a valid mapping file using the -m option." ) # make sure background_color is valid if opts.background_color not in ["black", "white"]: option_parser.error( "'%s' is not a valid background color. Please pass in either 'black' or 'white' using the -k option." % (opts.background_color) ) # make sure prefs file is valid if it exists if opts.prefs_path is not None: try: prefs_file = open(opts.prefs_path, "U").read() except IOError: option_parser.error( "Provided prefs file, '%s', does not exist. Please pass in a valid prefs file with the -p option." % (opts.prefs_path) ) if opts.prefs_path is not None: prefs = parse_prefs_file(prefs_file) else: prefs = None color_prefs, color_data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options( opts ) # list of labelname, groups, colors, data_colors, data_color_order groups_and_colors = list(iter_color_groups(mapping=color_data["map"], prefs=color_prefs)) # dict mapping labelname to list of: [groups, colors, data_colors, # data_color_order] field_to_colors = {} for color_info in groups_and_colors: field_to_colors[color_info[0]] = color_info[1:] qiime_dir = get_qiime_project_dir() + "/qiime/support_files/" fields = opts.fields if fields is not None: fields = map(strip, fields.split(",")) fields = [i.strip('"').strip("'") for i in fields] elif prefs is not None: fields = prefs.get("FIELDS", None) else: fields = get_interesting_mapping_fields(mapping, m_header) # Check that all provided fields are valid: if fields is not None: for f in fields: if f not in m_header: option_parser.error( "The field, %s, is not in the provided mapping file. Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file." % (f) ) within_distances, between_distances, dmat = group_distances( mapping_file=opts.map_fname, dmatrix_file=opts.distance_matrix_file, fields=fields, dir_prefix=get_random_directory_name(output_dir=opts.dir_path, prefix="distances"), ) if not opts.suppress_html_output: # histograms output path histograms_path = path.join(opts.dir_path, "histograms") try: mkdir(histograms_path) except OSError: # raised if dir exists pass # draw all histograms distances_dict, label_to_histogram_filename = draw_all_histograms( single_field=within_distances, paired_field=between_distances, dmat=dmat, histogram_dir=histograms_path, field_to_color_prefs=field_to_colors, background_color=background_color, ) # Get relative path to histogram files. label_to_histogram_filename_relative = _make_relative_paths(label_to_histogram_filename, opts.dir_path) dm_fname = path.split(opts.distance_matrix_file)[-1] basename = path.splitext(dm_fname)[0] outfile_name = basename + "_distance_histograms.html" make_main_html( distances_dict=distances_dict, label_to_histogram_filename=label_to_histogram_filename_relative, root_outdir=opts.dir_path, outfile_name=outfile_name, title="Distance Histograms", ) # Handle saving web resources locally. # javascript file javascript_path = path.join(opts.dir_path, "js") try: mkdir(javascript_path) except OSError: # raised if dir exists pass js_out = open(javascript_path + "/histograms.js", "w") js_out.write(open(qiime_dir + "js/histograms.js").read()) js_out.close() monte_carlo_iters = opts.monte_carlo_iters if monte_carlo_iters > 0: # Do Monte Carlo for all fields monte_carlo_group_distances( mapping_file=opts.map_fname, dmatrix_file=opts.distance_matrix_file, prefs=prefs, dir_prefix=opts.dir_path, fields=fields, default_iters=monte_carlo_iters, ) # Do Monte Carlo for within and between fields monte_carlo_group_distances_within_between( single_field=within_distances, paired_field=between_distances, dmat=dmat, dir_prefix=opts.dir_path, num_iters=monte_carlo_iters, )
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) #Some code for error checking of input args: #Check if distance_matrix_file is valid: try: d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, 'U')) except: option_parser.error( "This does not look like a valid distance matrix file. Please supply a valid distance matrix file using the -d option." ) if not is_symmetric_and_hollow(d_mat): option_parser.error("The distance matrix must be symmetric and " "hollow.") #Check if map_fname is valid: try: mapping, m_header, m_comments = \ parse_mapping_file(open(opts.map_fname,'U')) except QiimeParseError: option_parser.error( "This does not look like a valid metadata mapping file. Please supply a valid mapping file using the -m option." ) #make sure background_color is valid if opts.background_color not in ['black', 'white']: option_parser.error( "'%s' is not a valid background color. Please pass in either 'black' or 'white' using the -k option." % (opts.background_color)) #make sure prefs file is valid if it exists if opts.prefs_path is not None: try: prefs_file = open(opts.prefs_path, 'U').read() except IOError: option_parser.error( "Provided prefs file, '%s', does not exist. Please pass in a valid prefs file with the -p option." % (opts.prefs_path)) if opts.prefs_path is not None: prefs = parse_prefs_file(prefs_file) else: prefs = None color_prefs, color_data, background_color, label_color, ball_scale,\ arrow_colors=sample_color_prefs_and_map_data_from_options(opts) #list of labelname, groups, colors, data_colors, data_color_order groups_and_colors=list(iter_color_groups(mapping=color_data['map'],\ prefs=color_prefs)) #dict mapping labelname to list of: [groups, colors, data_colors, # data_color_order] field_to_colors = {} for color_info in groups_and_colors: field_to_colors[color_info[0]] = color_info[1:] qiime_dir = get_qiime_project_dir() + '/qiime/support_files/' fields = opts.fields if fields is not None: fields = map(strip, fields.split(',')) fields = [i.strip('"').strip("'") for i in fields] elif prefs is not None: fields = prefs.get('FIELDS', None) else: fields = get_interesting_mapping_fields(mapping, m_header) #Check that all provided fields are valid: if fields is not None: for f in fields: if f not in m_header: option_parser.error( "The field, %s, is not in the provided mapping file. Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file." % (f)) within_distances, between_distances, dmat = \ group_distances(mapping_file=opts.map_fname,\ dmatrix_file=opts.distance_matrix_file,\ fields=fields,\ dir_prefix=get_random_directory_name(output_dir=opts.dir_path,\ prefix='distances')) if not opts.suppress_html_output: #histograms output path histograms_path = path.join(opts.dir_path, 'histograms') try: mkdir(histograms_path) except OSError: #raised if dir exists pass #draw all histograms distances_dict, label_to_histogram_filename = \ draw_all_histograms(single_field=within_distances, \ paired_field=between_distances, \ dmat=dmat,\ histogram_dir=histograms_path,\ field_to_color_prefs=field_to_colors,\ background_color=background_color) #Get relative path to histogram files. label_to_histogram_filename_relative = \ _make_relative_paths(label_to_histogram_filename, opts.dir_path) dm_fname = path.split(opts.distance_matrix_file)[-1] basename = path.splitext(dm_fname)[0] outfile_name = basename + '_distance_histograms.html' make_main_html(distances_dict=distances_dict,\ label_to_histogram_filename=label_to_histogram_filename_relative,\ root_outdir=opts.dir_path, \ outfile_name = outfile_name, \ title='Distance Histograms') #Handle saving web resources locally. #javascript file javascript_path = path.join(opts.dir_path, 'js') try: mkdir(javascript_path) except OSError: #raised if dir exists pass js_out = open(javascript_path + '/histograms.js', 'w') js_out.write(open(qiime_dir + 'js/histograms.js').read()) js_out.close() monte_carlo_iters = opts.monte_carlo_iters if monte_carlo_iters > 0: #Do Monte Carlo for all fields monte_carlo_group_distances(mapping_file=opts.map_fname,\ dmatrix_file=opts.distance_matrix_file,\ prefs=prefs, \ dir_prefix = opts.dir_path,\ fields=fields,\ default_iters=monte_carlo_iters) #Do Monte Carlo for within and between fields monte_carlo_group_distances_within_between(\ single_field=within_distances,\ paired_field=between_distances, dmat=dmat, \ dir_prefix = opts.dir_path,\ num_iters=monte_carlo_iters)
def run_beta_diversity_through_plots(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, color_by_interesting_fields_only=True, sampling_depth=None, tree_fp=None, parallel=False, logger=None, suppress_emperor_plots=False, suppress_md5=False, status_update_callback=print_to_stdout): """ Compute beta diversity distance matrices, run PCoA, and generate emperor plots The steps performed by this function are: 1) Compute a beta diversity distance matrix for each metric 2) Peform a principal coordinates analysis on the result of step 1 3) Generate an emperor plot for each result of step 2 """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp]) mapping_data, mapping_header, mapping_comments =\ parse_mapping_file(open(mapping_fp,'U')) # Get the interesting mapping fields to color by -- if none are # interesting, take all of them. Interesting is defined as those # which have greater than one value and fewer values than the number # of samples if color_by_interesting_fields_only: mapping_fields =\ get_interesting_mapping_fields(mapping_data, mapping_header) or\ mapping_header else: mapping_fields = mapping_header mapping_fields = ','.join(mapping_fields) if sampling_depth: # Sample the OTU table at even depth even_sampled_otu_table_fp = '%s/%s_even%d%s' %\ (output_dir, otu_table_basename, sampling_depth, otu_table_ext) single_rarefaction_cmd = \ '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\ (python_exe_fp, script_dir, otu_table_fp, even_sampled_otu_table_fp, sampling_depth) commands.append([ ('Sample OTU table at %d seqs/sample' % sampling_depth, single_rarefaction_cmd)]) otu_table_fp = even_sampled_otu_table_fp otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) try: beta_diversity_metrics = params['beta_diversity']['metrics'].split(',') except KeyError: beta_diversity_metrics = ['weighted_unifrac','unweighted_unifrac'] dm_fps = [] for beta_diversity_metric in beta_diversity_metrics: # Prep the beta-diversity command try: bdiv_params_copy = params['beta_diversity'].copy() except KeyError: bdiv_params_copy = {} try: del bdiv_params_copy['metrics'] except KeyError: pass params_str = get_params_str(bdiv_params_copy) if tree_fp: params_str = '%s -t %s ' % (params_str,tree_fp) # Build the beta-diversity command if parallel: # Grab the parallel-specific parameters try: params_str += get_params_str(params['parallel']) except KeyError: pass beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) else: beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) orig_beta_div_fp = '%s/%s_%s.txt' % \ (output_dir, beta_diversity_metric, otu_table_basename) beta_div_fp = '%s/%s_dm.txt' % \ (output_dir, beta_diversity_metric) commands.append([('Rename distance matrix (%s)' % beta_diversity_metric, 'mv %s %s' % (orig_beta_div_fp, beta_div_fp))]) dm_fps.append((beta_diversity_metric, beta_div_fp)) # Prep the principal coordinates command pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric) try: params_str = get_params_str(params['principal_coordinates']) except KeyError: params_str = '' # Build the principal coordinates command pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str) commands.append(\ [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)]) # Generate emperor plots if not suppress_emperor_plots: # Prep the emperor plots command emperor_dir = '%s/%s_emperor_pcoa_plot/' % (output_dir, beta_diversity_metric) create_dir(emperor_dir) try: params_str = get_params_str(params['make_emperor']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command emperor_command = \ 'make_emperor.py -i %s -o %s -m %s %s' % (pc_fp, emperor_dir, mapping_fp, params_str) commands.append([('Make emperor plots, %s)' % beta_diversity_metric, emperor_command)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return dm_fps
def run_beta_diversity_through_plots(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, color_by_interesting_fields_only=True, sampling_depth=None, tree_fp=None, parallel=False, logger=None, suppress_emperor_plots=False, suppress_md5=False, status_update_callback=print_to_stdout): """ Compute beta diversity distance matrices, run PCoA, and generate emperor plots The steps performed by this function are: 1) Compute a beta diversity distance matrix for each metric 2) Peform a principal coordinates analysis on the result of step 1 3) Generate an emperor plot for each result of step 2 """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] if logger is None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp]) mapping_data, mapping_header, mapping_comments =\ parse_mapping_file(open(mapping_fp, 'U')) # Get the interesting mapping fields to color by -- if none are # interesting, take all of them. Interesting is defined as those # which have greater than one value and fewer values than the number # of samples if color_by_interesting_fields_only: mapping_fields =\ get_interesting_mapping_fields(mapping_data, mapping_header) or\ mapping_header else: mapping_fields = mapping_header mapping_fields = ','.join(mapping_fields) if sampling_depth: # Sample the OTU table at even depth even_sampled_otu_table_fp = '%s/%s_even%d%s' %\ (output_dir, otu_table_basename, sampling_depth, otu_table_ext) single_rarefaction_cmd = \ 'single_rarefaction.py -i %s -o %s -d %d' %\ (otu_table_fp, even_sampled_otu_table_fp, sampling_depth) commands.append([ ('Sample OTU table at %d seqs/sample' % sampling_depth, single_rarefaction_cmd) ]) otu_table_fp = even_sampled_otu_table_fp otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) try: beta_diversity_metrics = params['beta_diversity']['metrics'].split(',') except KeyError: beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac'] dm_fps = [] for beta_diversity_metric in beta_diversity_metrics: # Prep the beta-diversity command try: bdiv_params_copy = params['beta_diversity'].copy() except KeyError: bdiv_params_copy = {} try: del bdiv_params_copy['metrics'] except KeyError: pass params_str = get_params_str(bdiv_params_copy) if tree_fp: params_str = '%s -t %s ' % (params_str, tree_fp) # Build the beta-diversity command if parallel: # Grab the parallel-specific parameters try: params_str += get_params_str(params['parallel']) except KeyError: pass beta_div_cmd = 'parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\ (otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append([('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) else: beta_div_cmd = 'beta_diversity.py -i %s -o %s --metrics %s %s' %\ (otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append([('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) orig_beta_div_fp = '%s/%s_%s.txt' % \ (output_dir, beta_diversity_metric, otu_table_basename) beta_div_fp = '%s/%s_dm.txt' % \ (output_dir, beta_diversity_metric) commands.append([ ('Rename distance matrix (%s)' % beta_diversity_metric, 'mv %s %s' % (orig_beta_div_fp, beta_div_fp)) ]) dm_fps.append((beta_diversity_metric, beta_div_fp)) # Prep the principal coordinates command pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric) try: params_str = get_params_str(params['principal_coordinates']) except KeyError: params_str = '' # Build the principal coordinates command pc_cmd = 'principal_coordinates.py -i %s -o %s %s' %\ (beta_div_fp, pc_fp, params_str) commands.append([('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)]) # Generate emperor plots if not suppress_emperor_plots: # Prep the emperor plots command emperor_dir = '%s/%s_emperor_pcoa_plot/' % (output_dir, beta_diversity_metric) create_dir(emperor_dir) try: params_str = get_params_str(params['make_emperor']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command emperor_command = \ 'make_emperor.py -i %s -o %s -m %s %s' % (pc_fp, emperor_dir, mapping_fp, params_str) commands.append([ ('Make emperor plots, %s)' % beta_diversity_metric, emperor_command) ]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return dm_fps
def run_beta_diversity_through_plots(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, color_by_interesting_fields_only=True, sampling_depth=None, histogram_categories=None, tree_fp=None, parallel=False, logger=None, suppress_3d_plots=False, suppress_2d_plots=False, suppress_md5=False, status_update_callback=print_to_stdout): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Compute a beta diversity distance matrix; 2) Peform a principal coordinates analysis on the result of Step 1; 3) Generate a 3D prefs file for optimized coloring of continuous variables; 4) Generate a 3D plot for all mapping fields with colors optimized for continuous data; 5) Generate a 3D plot for all mapping fields with colors optimized for discrete data. """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp]) mapping_data, mapping_header, mapping_comments =\ parse_mapping_file(open(mapping_fp,'U')) if histogram_categories: invalid_categories = set(histogram_categories) - set(mapping_header) if invalid_categories: raise ValueError,\ "Invalid histogram categories - these must exactly match "+\ "mapping file column headers: %s" % (' '.join(invalid_categories)) # Get the interesting mapping fields to color by -- if none are # interesting, take all of them. Interesting is defined as those # which have greater than one value and fewer values than the number # of samples if color_by_interesting_fields_only: mapping_fields =\ get_interesting_mapping_fields(mapping_data, mapping_header) or\ mapping_header else: mapping_fields = mapping_header mapping_fields = ','.join(mapping_fields) if sampling_depth: # Sample the OTU table at even depth even_sampled_otu_table_fp = '%s/%s_even%d%s' %\ (output_dir, otu_table_basename, sampling_depth, otu_table_ext) single_rarefaction_cmd = \ '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\ (python_exe_fp, script_dir, otu_table_fp, even_sampled_otu_table_fp, sampling_depth) commands.append([ ('Sample OTU table at %d seqs/sample' % sampling_depth, single_rarefaction_cmd) ]) otu_table_fp = even_sampled_otu_table_fp otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) try: beta_diversity_metrics = params['beta_diversity']['metrics'].split(',') except KeyError: beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac'] # Prep the 3d prefs file generator command prefs_fp = '%s/prefs.txt' % output_dir try: params_str = get_params_str(params['make_prefs_file']) except KeyError: params_str = '' if not 'mapping_headers_to_use' in params['make_prefs_file']: params_str = '%s --mapping_headers_to_use %s' \ % (params_str,mapping_fields) # Build the 3d prefs file generator command prefs_cmd = \ '%s %s/make_prefs_file.py -m %s -o %s %s' %\ (python_exe_fp, script_dir, mapping_fp, prefs_fp, params_str) commands.append([('Build prefs file', prefs_cmd)]) dm_fps = [] for beta_diversity_metric in beta_diversity_metrics: # Prep the beta-diversity command try: bdiv_params_copy = params['beta_diversity'].copy() except KeyError: bdiv_params_copy = {} try: del bdiv_params_copy['metrics'] except KeyError: pass params_str = get_params_str(bdiv_params_copy) if tree_fp: params_str = '%s -t %s ' % (params_str, tree_fp) # Build the beta-diversity command if parallel: # Grab the parallel-specific parameters try: params_str += get_params_str(params['parallel']) except KeyError: pass beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) else: beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) orig_beta_div_fp = '%s/%s_%s.txt' % \ (output_dir, beta_diversity_metric, otu_table_basename) beta_div_fp = '%s/%s_dm.txt' % \ (output_dir, beta_diversity_metric) commands.append([ ('Rename distance matrix (%s)' % beta_diversity_metric, 'mv %s %s' % (orig_beta_div_fp, beta_div_fp)) ]) dm_fps.append((beta_diversity_metric, beta_div_fp)) # Prep the principal coordinates command pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric) try: params_str = get_params_str(params['principal_coordinates']) except KeyError: params_str = '' # Build the principal coordinates command pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str) commands.append(\ [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)]) # Generate 3d plots if not suppress_3d_plots: # Prep the continuous-coloring 3d plots command continuous_3d_dir = '%s/%s_3d_continuous/' %\ (output_dir, beta_diversity_metric) create_dir(continuous_3d_dir) try: params_str = get_params_str(params['make_3d_plots']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command continuous_3d_command = \ '%s %s/make_3d_plots.py -p %s -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_3d_dir, mapping_fp, params_str) # Prep the discrete-coloring 3d plots command discrete_3d_dir = '%s/%s_3d_discrete/' %\ (output_dir, beta_diversity_metric) create_dir(discrete_3d_dir) try: params_str = get_params_str(params['make_3d_plots']) except KeyError: params_str = '' # Build the discrete-coloring 3d plots command discrete_3d_command = \ '%s %s/make_3d_plots.py -b "%s" -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_3d_dir, mapping_fp, params_str) commands.append([\ ('Make 3D plots (continuous coloring, %s)' %\ beta_diversity_metric,continuous_3d_command),\ ('Make 3D plots (discrete coloring, %s)' %\ beta_diversity_metric,discrete_3d_command,)]) # Generate 3d plots if not suppress_2d_plots: # Prep the continuous-coloring 3d plots command continuous_2d_dir = '%s/%s_2d_continuous/' %\ (output_dir, beta_diversity_metric) create_dir(continuous_2d_dir) try: params_str = get_params_str(params['make_2d_plots']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command continuous_2d_command = \ '%s %s/make_2d_plots.py -p %s -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_2d_dir, mapping_fp, params_str) # Prep the discrete-coloring 3d plots command discrete_2d_dir = '%s/%s_2d_discrete/' %\ (output_dir, beta_diversity_metric) create_dir(discrete_2d_dir) try: params_str = get_params_str(params['make_2d_plots']) except KeyError: params_str = '' # Build the discrete-coloring 2d plots command discrete_2d_command = \ '%s %s/make_2d_plots.py -b "%s" -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_2d_dir, mapping_fp, params_str) commands.append([\ ('Make 2D plots (continuous coloring, %s)' %\ beta_diversity_metric,continuous_2d_command),\ ('Make 2D plots (discrete coloring, %s)' %\ beta_diversity_metric,discrete_2d_command,)]) if histogram_categories: # Prep the discrete-coloring 3d plots command histograms_dir = '%s/%s_histograms/' %\ (output_dir, beta_diversity_metric) create_dir(histograms_dir) try: params_str = get_params_str(params['make_distance_histograms']) except KeyError: params_str = '' # Build the make_distance_histograms command distance_histograms_command = \ '%s %s/make_distance_histograms.py -d %s -o %s -m %s -f "%s" %s' %\ (python_exe_fp, script_dir, beta_div_fp, histograms_dir, mapping_fp, ','.join(histogram_categories), params_str) commands.append([\ ('Make Distance Histograms (%s)' %\ beta_diversity_metric,distance_histograms_command)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return dm_fps
def run_beta_diversity_through_plots(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, color_by_interesting_fields_only=True, sampling_depth=None, histogram_categories=None, tree_fp=None, parallel=False, logger=None, suppress_3d_plots=False, suppress_2d_plots=False, suppress_md5=False, status_update_callback=print_to_stdout): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Compute a beta diversity distance matrix; 2) Peform a principal coordinates analysis on the result of Step 1; 3) Generate a 3D prefs file for optimized coloring of continuous variables; 4) Generate a 3D plot for all mapping fields with colors optimized for continuous data; 5) Generate a 3D plot for all mapping fields with colors optimized for discrete data. """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp]) mapping_data, mapping_header, mapping_comments =\ parse_mapping_file(open(mapping_fp,'U')) if histogram_categories: invalid_categories = set(histogram_categories) - set(mapping_header) if invalid_categories: raise ValueError,\ "Invalid histogram categories - these must exactly match "+\ "mapping file column headers: %s" % (' '.join(invalid_categories)) # Get the interesting mapping fields to color by -- if none are # interesting, take all of them. Interesting is defined as those # which have greater than one value and fewer values than the number # of samples if color_by_interesting_fields_only: mapping_fields =\ get_interesting_mapping_fields(mapping_data, mapping_header) or\ mapping_header else: mapping_fields = mapping_header mapping_fields = ','.join(mapping_fields) if sampling_depth: # Sample the OTU table at even depth even_sampled_otu_table_fp = '%s/%s_even%d%s' %\ (output_dir, otu_table_basename, sampling_depth, otu_table_ext) single_rarefaction_cmd = \ '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\ (python_exe_fp, script_dir, otu_table_fp, even_sampled_otu_table_fp, sampling_depth) commands.append([ ('Sample OTU table at %d seqs/sample' % sampling_depth, single_rarefaction_cmd)]) otu_table_fp = even_sampled_otu_table_fp otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) try: beta_diversity_metrics = params['beta_diversity']['metrics'].split(',') except KeyError: beta_diversity_metrics = ['weighted_unifrac','unweighted_unifrac'] # Prep the 3d prefs file generator command prefs_fp = '%s/prefs.txt' % output_dir try: params_str = get_params_str(params['make_prefs_file']) except KeyError: params_str = '' if not 'mapping_headers_to_use' in params['make_prefs_file']: params_str = '%s --mapping_headers_to_use %s' \ % (params_str,mapping_fields) # Build the 3d prefs file generator command prefs_cmd = \ '%s %s/make_prefs_file.py -m %s -o %s %s' %\ (python_exe_fp, script_dir, mapping_fp, prefs_fp, params_str) commands.append([('Build prefs file', prefs_cmd)]) dm_fps = [] for beta_diversity_metric in beta_diversity_metrics: # Prep the beta-diversity command try: bdiv_params_copy = params['beta_diversity'].copy() except KeyError: bdiv_params_copy = {} try: del bdiv_params_copy['metrics'] except KeyError: pass params_str = get_params_str(bdiv_params_copy) if tree_fp: params_str = '%s -t %s ' % (params_str,tree_fp) # Build the beta-diversity command if parallel: # Grab the parallel-specific parameters try: params_str += get_params_str(params['parallel']) except KeyError: pass beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) else: beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) orig_beta_div_fp = '%s/%s_%s.txt' % \ (output_dir, beta_diversity_metric, otu_table_basename) beta_div_fp = '%s/%s_dm.txt' % \ (output_dir, beta_diversity_metric) commands.append([('Rename distance matrix (%s)' % beta_diversity_metric, 'mv %s %s' % (orig_beta_div_fp, beta_div_fp))]) dm_fps.append((beta_diversity_metric, beta_div_fp)) # Prep the principal coordinates command pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric) try: params_str = get_params_str(params['principal_coordinates']) except KeyError: params_str = '' # Build the principal coordinates command pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str) commands.append(\ [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)]) # Generate 3d plots if not suppress_3d_plots: # Prep the continuous-coloring 3d plots command continuous_3d_dir = '%s/%s_3d_continuous/' %\ (output_dir, beta_diversity_metric) create_dir(continuous_3d_dir) try: params_str = get_params_str(params['make_3d_plots']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command continuous_3d_command = \ '%s %s/make_3d_plots.py -p %s -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_3d_dir, mapping_fp, params_str) # Prep the discrete-coloring 3d plots command discrete_3d_dir = '%s/%s_3d_discrete/' %\ (output_dir, beta_diversity_metric) create_dir(discrete_3d_dir) try: params_str = get_params_str(params['make_3d_plots']) except KeyError: params_str = '' # Build the discrete-coloring 3d plots command discrete_3d_command = \ '%s %s/make_3d_plots.py -b "%s" -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_3d_dir, mapping_fp, params_str) commands.append([\ ('Make 3D plots (continuous coloring, %s)' %\ beta_diversity_metric,continuous_3d_command),\ ('Make 3D plots (discrete coloring, %s)' %\ beta_diversity_metric,discrete_3d_command,)]) # Generate 3d plots if not suppress_2d_plots: # Prep the continuous-coloring 3d plots command continuous_2d_dir = '%s/%s_2d_continuous/' %\ (output_dir, beta_diversity_metric) create_dir(continuous_2d_dir) try: params_str = get_params_str(params['make_2d_plots']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command continuous_2d_command = \ '%s %s/make_2d_plots.py -p %s -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_2d_dir, mapping_fp, params_str) # Prep the discrete-coloring 3d plots command discrete_2d_dir = '%s/%s_2d_discrete/' %\ (output_dir, beta_diversity_metric) create_dir(discrete_2d_dir) try: params_str = get_params_str(params['make_2d_plots']) except KeyError: params_str = '' # Build the discrete-coloring 2d plots command discrete_2d_command = \ '%s %s/make_2d_plots.py -b "%s" -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_2d_dir, mapping_fp, params_str) commands.append([\ ('Make 2D plots (continuous coloring, %s)' %\ beta_diversity_metric,continuous_2d_command),\ ('Make 2D plots (discrete coloring, %s)' %\ beta_diversity_metric,discrete_2d_command,)]) if histogram_categories: # Prep the discrete-coloring 3d plots command histograms_dir = '%s/%s_histograms/' %\ (output_dir, beta_diversity_metric) create_dir(histograms_dir) try: params_str = get_params_str(params['make_distance_histograms']) except KeyError: params_str = '' # Build the make_distance_histograms command distance_histograms_command = \ '%s %s/make_distance_histograms.py -d %s -o %s -m %s -f "%s" %s' %\ (python_exe_fp, script_dir, beta_div_fp, histograms_dir, mapping_fp, ','.join(histogram_categories), params_str) commands.append([\ ('Make Distance Histograms (%s)' %\ beta_diversity_metric,distance_histograms_command)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return dm_fps