def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Some code for error checking of input args:

    # Check if distance_matrix_file is valid:
    try:
        d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, "U"))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix file.  Please supply a valid distance matrix file using the -d option."
        )

    if not is_symmetric_and_hollow(d_mat):
        option_parser.error("The distance matrix must be symmetric and " "hollow.")

    # Check if map_fname is valid:
    try:
        mapping, m_header, m_comments = parse_mapping_file(open(opts.map_fname, "U"))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping file.  Please supply a valid mapping file using the -m option."
        )

    # make sure background_color is valid
    if opts.background_color not in ["black", "white"]:
        option_parser.error(
            "'%s' is not a valid background color.  Please pass in either 'black' or 'white' using the -k option."
            % (opts.background_color)
        )

    # make sure prefs file is valid if it exists
    if opts.prefs_path is not None:
        try:
            prefs_file = open(opts.prefs_path, "U").read()
        except IOError:
            option_parser.error(
                "Provided prefs file, '%s', does not exist.  Please pass in a valid prefs file with the -p option."
                % (opts.prefs_path)
            )

    if opts.prefs_path is not None:
        prefs = parse_prefs_file(prefs_file)
    else:
        prefs = None

    color_prefs, color_data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options(
        opts
    )

    # list of labelname, groups, colors, data_colors, data_color_order
    groups_and_colors = list(iter_color_groups(mapping=color_data["map"], prefs=color_prefs))

    # dict mapping labelname to list of: [groups, colors, data_colors,
    # data_color_order]
    field_to_colors = {}
    for color_info in groups_and_colors:
        field_to_colors[color_info[0]] = color_info[1:]

    qiime_dir = get_qiime_project_dir() + "/qiime/support_files/"

    fields = opts.fields
    if fields is not None:
        fields = map(strip, fields.split(","))
        fields = [i.strip('"').strip("'") for i in fields]
    elif prefs is not None:
        fields = prefs.get("FIELDS", None)
    else:
        fields = get_interesting_mapping_fields(mapping, m_header)

    # Check that all provided fields are valid:
    if fields is not None:
        for f in fields:
            if f not in m_header:
                option_parser.error(
                    "The field, %s, is not in the provided mapping file.  Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file."
                    % (f)
                )

    within_distances, between_distances, dmat = group_distances(
        mapping_file=opts.map_fname,
        dmatrix_file=opts.distance_matrix_file,
        fields=fields,
        dir_prefix=get_random_directory_name(output_dir=opts.dir_path, prefix="distances"),
    )

    if not opts.suppress_html_output:
        # histograms output path
        histograms_path = path.join(opts.dir_path, "histograms")
        try:
            mkdir(histograms_path)
        except OSError:  # raised if dir exists
            pass

        # draw all histograms
        distances_dict, label_to_histogram_filename = draw_all_histograms(
            single_field=within_distances,
            paired_field=between_distances,
            dmat=dmat,
            histogram_dir=histograms_path,
            field_to_color_prefs=field_to_colors,
            background_color=background_color,
        )

        # Get relative path to histogram files.
        label_to_histogram_filename_relative = _make_relative_paths(label_to_histogram_filename, opts.dir_path)

        dm_fname = path.split(opts.distance_matrix_file)[-1]
        basename = path.splitext(dm_fname)[0]
        outfile_name = basename + "_distance_histograms.html"
        make_main_html(
            distances_dict=distances_dict,
            label_to_histogram_filename=label_to_histogram_filename_relative,
            root_outdir=opts.dir_path,
            outfile_name=outfile_name,
            title="Distance Histograms",
        )

        # Handle saving web resources locally.
        # javascript file
        javascript_path = path.join(opts.dir_path, "js")
        try:
            mkdir(javascript_path)
        except OSError:  # raised if dir exists
            pass
        js_out = open(javascript_path + "/histograms.js", "w")
        js_out.write(open(qiime_dir + "js/histograms.js").read())
        js_out.close()

    monte_carlo_iters = opts.monte_carlo_iters
    if monte_carlo_iters > 0:
        # Do Monte Carlo for all fields
        monte_carlo_group_distances(
            mapping_file=opts.map_fname,
            dmatrix_file=opts.distance_matrix_file,
            prefs=prefs,
            dir_prefix=opts.dir_path,
            fields=fields,
            default_iters=monte_carlo_iters,
        )

        # Do Monte Carlo for within and between fields
        monte_carlo_group_distances_within_between(
            single_field=within_distances,
            paired_field=between_distances,
            dmat=dmat,
            dir_prefix=opts.dir_path,
            num_iters=monte_carlo_iters,
        )
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    #Some code for error checking of input args:

    #Check if distance_matrix_file is valid:
    try:
        d_header, d_mat = parse_distmat(open(opts.distance_matrix_file, 'U'))
    except:
        option_parser.error(
            "This does not look like a valid distance matrix file.  Please supply a valid distance matrix file using the -d option."
        )

    if not is_symmetric_and_hollow(d_mat):
        option_parser.error("The distance matrix must be symmetric and "
                            "hollow.")

    #Check if map_fname is valid:
    try:
        mapping, m_header, m_comments = \
            parse_mapping_file(open(opts.map_fname,'U'))
    except QiimeParseError:
        option_parser.error(
            "This does not look like a valid metadata mapping file.  Please supply a valid mapping file using the -m option."
        )

    #make sure background_color is valid
    if opts.background_color not in ['black', 'white']:
        option_parser.error(
            "'%s' is not a valid background color.  Please pass in either 'black' or 'white' using the -k option."
            % (opts.background_color))

    #make sure prefs file is valid if it exists
    if opts.prefs_path is not None:
        try:
            prefs_file = open(opts.prefs_path, 'U').read()
        except IOError:
            option_parser.error(
                "Provided prefs file, '%s', does not exist.  Please pass in a valid prefs file with the -p option."
                % (opts.prefs_path))

    if opts.prefs_path is not None:
        prefs = parse_prefs_file(prefs_file)
    else:
        prefs = None


    color_prefs, color_data, background_color, label_color, ball_scale,\
     arrow_colors=sample_color_prefs_and_map_data_from_options(opts)

    #list of labelname, groups, colors, data_colors, data_color_order
    groups_and_colors=list(iter_color_groups(mapping=color_data['map'],\
        prefs=color_prefs))

    #dict mapping labelname to list of: [groups, colors, data_colors,
    # data_color_order]
    field_to_colors = {}
    for color_info in groups_and_colors:
        field_to_colors[color_info[0]] = color_info[1:]

    qiime_dir = get_qiime_project_dir() + '/qiime/support_files/'

    fields = opts.fields
    if fields is not None:
        fields = map(strip, fields.split(','))
        fields = [i.strip('"').strip("'") for i in fields]
    elif prefs is not None:
        fields = prefs.get('FIELDS', None)
    else:
        fields = get_interesting_mapping_fields(mapping, m_header)

    #Check that all provided fields are valid:
    if fields is not None:
        for f in fields:
            if f not in m_header:
                option_parser.error(
                    "The field, %s, is not in the provided mapping file.  Please supply correct fields (using the -f option or providing a 'FIELDS' list in the prefs file) corresponding to fields in mapping file."
                    % (f))

    within_distances, between_distances, dmat = \
        group_distances(mapping_file=opts.map_fname,\
        dmatrix_file=opts.distance_matrix_file,\
        fields=fields,\
        dir_prefix=get_random_directory_name(output_dir=opts.dir_path,\
            prefix='distances'))

    if not opts.suppress_html_output:
        #histograms output path
        histograms_path = path.join(opts.dir_path, 'histograms')
        try:
            mkdir(histograms_path)
        except OSError:  #raised if dir exists
            pass

        #draw all histograms
        distances_dict, label_to_histogram_filename = \
            draw_all_histograms(single_field=within_distances, \
                paired_field=between_distances, \
                dmat=dmat,\
                histogram_dir=histograms_path,\
                field_to_color_prefs=field_to_colors,\
                background_color=background_color)

        #Get relative path to histogram files.
        label_to_histogram_filename_relative = \
            _make_relative_paths(label_to_histogram_filename, opts.dir_path)

        dm_fname = path.split(opts.distance_matrix_file)[-1]
        basename = path.splitext(dm_fname)[0]
        outfile_name = basename + '_distance_histograms.html'
        make_main_html(distances_dict=distances_dict,\
            label_to_histogram_filename=label_to_histogram_filename_relative,\
            root_outdir=opts.dir_path, \
            outfile_name = outfile_name, \
            title='Distance Histograms')

        #Handle saving web resources locally.
        #javascript file
        javascript_path = path.join(opts.dir_path, 'js')
        try:
            mkdir(javascript_path)
        except OSError:  #raised if dir exists
            pass
        js_out = open(javascript_path + '/histograms.js', 'w')
        js_out.write(open(qiime_dir + 'js/histograms.js').read())
        js_out.close()

    monte_carlo_iters = opts.monte_carlo_iters
    if monte_carlo_iters > 0:
        #Do Monte Carlo for all fields
        monte_carlo_group_distances(mapping_file=opts.map_fname,\
            dmatrix_file=opts.distance_matrix_file,\
            prefs=prefs, \
            dir_prefix = opts.dir_path,\
            fields=fields,\
            default_iters=monte_carlo_iters)

        #Do Monte Carlo for within and between fields
        monte_carlo_group_distances_within_between(\
            single_field=within_distances,\
            paired_field=between_distances, dmat=dmat, \
            dir_prefix = opts.dir_path,\
            num_iters=monte_carlo_iters)
Exemple #3
0
def run_beta_diversity_through_plots(otu_table_fp, 
                                     mapping_fp,
                                     output_dir,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     color_by_interesting_fields_only=True,
                                     sampling_depth=None,
                                     tree_fp=None,
                                     parallel=False,
                                     logger=None,
                                     suppress_emperor_plots=False,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Compute beta diversity distance matrices, run PCoA, and generate emperor plots
    
        The steps performed by this function are:
         1) Compute a beta diversity distance matrix for each metric
         2) Peform a principal coordinates analysis on the result of step 1
         3) Generate an emperor plot for each result of step 2
    
    """  
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    if not suppress_md5:
        log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
    
    mapping_data, mapping_header, mapping_comments =\
     parse_mapping_file(open(mapping_fp,'U'))

    # Get the interesting mapping fields to color by -- if none are
    # interesting, take all of them. Interesting is defined as those
    # which have greater than one value and fewer values than the number 
    # of samples
    if color_by_interesting_fields_only:
        mapping_fields =\
          get_interesting_mapping_fields(mapping_data, mapping_header) or\
          mapping_header
    else:
        mapping_fields = mapping_header
    mapping_fields = ','.join(mapping_fields)
    
    if sampling_depth:
        # Sample the OTU table at even depth
        even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
         (output_dir, otu_table_basename, 
          sampling_depth, otu_table_ext)
        single_rarefaction_cmd = \
         '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\
         (python_exe_fp, script_dir, otu_table_fp,
          even_sampled_otu_table_fp, sampling_depth)
        commands.append([
         ('Sample OTU table at %d seqs/sample' % sampling_depth,
          single_rarefaction_cmd)])
        otu_table_fp = even_sampled_otu_table_fp
        otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
        otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac','unweighted_unifrac']

    dm_fps = []
    for beta_diversity_metric in beta_diversity_metrics:
        
        # Prep the beta-diversity command
        try:
            bdiv_params_copy = params['beta_diversity'].copy()
        except KeyError:
            bdiv_params_copy = {}
        try:
            del bdiv_params_copy['metrics']
        except KeyError:
            pass
        
        params_str = get_params_str(bdiv_params_copy)
            
        if tree_fp:
            params_str = '%s -t %s ' % (params_str,tree_fp)
            
        # Build the beta-diversity command
        if parallel:
            # Grab the parallel-specific parameters
            try:
                params_str += get_params_str(params['parallel'])
            except KeyError:
                pass
            beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\
             (python_exe_fp, script_dir, otu_table_fp,
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        else:
            beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\
             (python_exe_fp, script_dir, otu_table_fp, 
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        
        
        orig_beta_div_fp = '%s/%s_%s.txt' % \
         (output_dir, beta_diversity_metric, otu_table_basename)
        beta_div_fp = '%s/%s_dm.txt' % \
         (output_dir, beta_diversity_metric)
        commands.append([('Rename distance matrix (%s)' % beta_diversity_metric,
                         'mv %s %s' % (orig_beta_div_fp, beta_div_fp))])
        dm_fps.append((beta_diversity_metric, beta_div_fp))
        
        # Prep the principal coordinates command
        pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the principal coordinates command
        pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str)
        commands.append(\
         [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)])
        
        # Generate emperor plots
        if not suppress_emperor_plots:
            # Prep the emperor plots command
            emperor_dir = '%s/%s_emperor_pcoa_plot/' % (output_dir, beta_diversity_metric)
            create_dir(emperor_dir)
            try:
                params_str = get_params_str(params['make_emperor'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            emperor_command = \
             'make_emperor.py -i %s -o %s -m %s %s' % (pc_fp,
                                                       emperor_dir,
                                                       mapping_fp,
                                                       params_str)
       
            commands.append([('Make emperor plots, %s)' % beta_diversity_metric,
                              emperor_command)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    
    return dm_fps
Exemple #4
0
def run_beta_diversity_through_plots(otu_table_fp,
                                     mapping_fp,
                                     output_dir,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     color_by_interesting_fields_only=True,
                                     sampling_depth=None,
                                     tree_fp=None,
                                     parallel=False,
                                     logger=None,
                                     suppress_emperor_plots=False,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Compute beta diversity distance matrices, run PCoA, and generate emperor plots

        The steps performed by this function are:
         1) Compute a beta diversity distance matrix for each metric
         2) Peform a principal coordinates analysis on the result of step 1
         3) Generate an emperor plot for each result of step 2

    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp])

    mapping_data, mapping_header, mapping_comments =\
        parse_mapping_file(open(mapping_fp, 'U'))

    # Get the interesting mapping fields to color by -- if none are
    # interesting, take all of them. Interesting is defined as those
    # which have greater than one value and fewer values than the number
    # of samples
    if color_by_interesting_fields_only:
        mapping_fields =\
            get_interesting_mapping_fields(mapping_data, mapping_header) or\
            mapping_header
    else:
        mapping_fields = mapping_header
    mapping_fields = ','.join(mapping_fields)

    if sampling_depth:
        # Sample the OTU table at even depth
        even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
            (output_dir, otu_table_basename,
             sampling_depth, otu_table_ext)
        single_rarefaction_cmd = \
            'single_rarefaction.py -i %s -o %s -d %d' %\
            (otu_table_fp, even_sampled_otu_table_fp, sampling_depth)
        commands.append([
            ('Sample OTU table at %d seqs/sample' % sampling_depth,
             single_rarefaction_cmd)
        ])
        otu_table_fp = even_sampled_otu_table_fp
        otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
        otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac']

    dm_fps = []
    for beta_diversity_metric in beta_diversity_metrics:

        # Prep the beta-diversity command
        try:
            bdiv_params_copy = params['beta_diversity'].copy()
        except KeyError:
            bdiv_params_copy = {}
        try:
            del bdiv_params_copy['metrics']
        except KeyError:
            pass

        params_str = get_params_str(bdiv_params_copy)

        if tree_fp:
            params_str = '%s -t %s ' % (params_str, tree_fp)

        # Build the beta-diversity command
        if parallel:
            # Grab the parallel-specific parameters
            try:
                params_str += get_params_str(params['parallel'])
            except KeyError:
                pass
            beta_div_cmd = 'parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\
                (otu_table_fp, output_dir, beta_diversity_metric, params_str)
            commands.append([('Beta Diversity (%s)' % beta_diversity_metric,
                              beta_div_cmd)])
        else:
            beta_div_cmd = 'beta_diversity.py -i %s -o %s --metrics %s %s' %\
                (otu_table_fp, output_dir, beta_diversity_metric, params_str)
            commands.append([('Beta Diversity (%s)' % beta_diversity_metric,
                              beta_div_cmd)])

        orig_beta_div_fp = '%s/%s_%s.txt' % \
            (output_dir, beta_diversity_metric, otu_table_basename)
        beta_div_fp = '%s/%s_dm.txt' % \
            (output_dir, beta_diversity_metric)
        commands.append([
            ('Rename distance matrix (%s)' % beta_diversity_metric,
             'mv %s %s' % (orig_beta_div_fp, beta_div_fp))
        ])
        dm_fps.append((beta_diversity_metric, beta_div_fp))

        # Prep the principal coordinates command
        pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the principal coordinates command
        pc_cmd = 'principal_coordinates.py -i %s -o %s %s' %\
            (beta_div_fp, pc_fp, params_str)
        commands.append([('Principal coordinates (%s)' % beta_diversity_metric,
                          pc_cmd)])

        # Generate emperor plots
        if not suppress_emperor_plots:
            # Prep the emperor plots command
            emperor_dir = '%s/%s_emperor_pcoa_plot/' % (output_dir,
                                                        beta_diversity_metric)
            create_dir(emperor_dir)
            try:
                params_str = get_params_str(params['make_emperor'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            emperor_command = \
                'make_emperor.py -i %s -o %s -m %s %s' % (pc_fp,
                                                          emperor_dir,
                                                          mapping_fp,
                                                          params_str)

            commands.append([
                ('Make emperor plots, %s)' % beta_diversity_metric,
                 emperor_command)
            ])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)

    return dm_fps
Exemple #5
0
def run_beta_diversity_through_plots(otu_table_fp,
                                     mapping_fp,
                                     output_dir,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     color_by_interesting_fields_only=True,
                                     sampling_depth=None,
                                     histogram_categories=None,
                                     tree_fp=None,
                                     parallel=False,
                                     logger=None,
                                     suppress_3d_plots=False,
                                     suppress_2d_plots=False,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
         1) Compute a beta diversity distance matrix;
         2) Peform a principal coordinates analysis on the result of
          Step 1;
         3) Generate a 3D prefs file for optimized coloring of continuous
          variables;
         4) Generate a 3D plot for all mapping fields with colors
          optimized for continuous data;
         5) Generate a 3D plot for all mapping fields with colors
          optimized for discrete data.
    
    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp])

    mapping_data, mapping_header, mapping_comments =\
     parse_mapping_file(open(mapping_fp,'U'))
    if histogram_categories:
        invalid_categories = set(histogram_categories) - set(mapping_header)
        if invalid_categories:
            raise ValueError,\
             "Invalid histogram categories - these must exactly match "+\
             "mapping file column headers: %s" % (' '.join(invalid_categories))
    # Get the interesting mapping fields to color by -- if none are
    # interesting, take all of them. Interesting is defined as those
    # which have greater than one value and fewer values than the number
    # of samples
    if color_by_interesting_fields_only:
        mapping_fields =\
          get_interesting_mapping_fields(mapping_data, mapping_header) or\
          mapping_header
    else:
        mapping_fields = mapping_header
    mapping_fields = ','.join(mapping_fields)

    if sampling_depth:
        # Sample the OTU table at even depth
        even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
         (output_dir, otu_table_basename,
          sampling_depth, otu_table_ext)
        single_rarefaction_cmd = \
         '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\
         (python_exe_fp, script_dir, otu_table_fp,
          even_sampled_otu_table_fp, sampling_depth)
        commands.append([
            ('Sample OTU table at %d seqs/sample' % sampling_depth,
             single_rarefaction_cmd)
        ])
        otu_table_fp = even_sampled_otu_table_fp
        otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
        otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac']

    # Prep the 3d prefs file generator command
    prefs_fp = '%s/prefs.txt' % output_dir
    try:
        params_str = get_params_str(params['make_prefs_file'])
    except KeyError:
        params_str = ''
    if not 'mapping_headers_to_use' in params['make_prefs_file']:
        params_str = '%s --mapping_headers_to_use %s' \
         % (params_str,mapping_fields)
    # Build the 3d prefs file generator command
    prefs_cmd = \
     '%s %s/make_prefs_file.py -m %s -o %s %s' %\
     (python_exe_fp, script_dir, mapping_fp, prefs_fp, params_str)
    commands.append([('Build prefs file', prefs_cmd)])

    dm_fps = []
    for beta_diversity_metric in beta_diversity_metrics:

        # Prep the beta-diversity command
        try:
            bdiv_params_copy = params['beta_diversity'].copy()
        except KeyError:
            bdiv_params_copy = {}
        try:
            del bdiv_params_copy['metrics']
        except KeyError:
            pass

        params_str = get_params_str(bdiv_params_copy)

        if tree_fp:
            params_str = '%s -t %s ' % (params_str, tree_fp)

        # Build the beta-diversity command
        if parallel:
            # Grab the parallel-specific parameters
            try:
                params_str += get_params_str(params['parallel'])
            except KeyError:
                pass
            beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\
             (python_exe_fp, script_dir, otu_table_fp,
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        else:
            beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\
             (python_exe_fp, script_dir, otu_table_fp,
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])


        orig_beta_div_fp = '%s/%s_%s.txt' % \
         (output_dir, beta_diversity_metric, otu_table_basename)
        beta_div_fp = '%s/%s_dm.txt' % \
         (output_dir, beta_diversity_metric)
        commands.append([
            ('Rename distance matrix (%s)' % beta_diversity_metric,
             'mv %s %s' % (orig_beta_div_fp, beta_div_fp))
        ])
        dm_fps.append((beta_diversity_metric, beta_div_fp))

        # Prep the principal coordinates command
        pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the principal coordinates command
        pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str)
        commands.append(\
         [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)])

        # Generate 3d plots
        if not suppress_3d_plots:
            # Prep the continuous-coloring 3d plots command
            continuous_3d_dir = '%s/%s_3d_continuous/' %\
             (output_dir, beta_diversity_metric)
            create_dir(continuous_3d_dir)
            try:
                params_str = get_params_str(params['make_3d_plots'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            continuous_3d_command = \
             '%s %s/make_3d_plots.py -p %s -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_3d_dir,
               mapping_fp, params_str)

            # Prep the discrete-coloring 3d plots command
            discrete_3d_dir = '%s/%s_3d_discrete/' %\
             (output_dir, beta_diversity_metric)
            create_dir(discrete_3d_dir)
            try:
                params_str = get_params_str(params['make_3d_plots'])
            except KeyError:
                params_str = ''
            # Build the discrete-coloring 3d plots command
            discrete_3d_command = \
             '%s %s/make_3d_plots.py -b "%s" -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_3d_dir,
               mapping_fp, params_str)

            commands.append([\
              ('Make 3D plots (continuous coloring, %s)' %\
                beta_diversity_metric,continuous_3d_command),\
              ('Make 3D plots (discrete coloring, %s)' %\
                beta_diversity_metric,discrete_3d_command,)])

        # Generate 3d plots
        if not suppress_2d_plots:
            # Prep the continuous-coloring 3d plots command
            continuous_2d_dir = '%s/%s_2d_continuous/' %\
             (output_dir, beta_diversity_metric)
            create_dir(continuous_2d_dir)
            try:
                params_str = get_params_str(params['make_2d_plots'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            continuous_2d_command = \
             '%s %s/make_2d_plots.py -p %s -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_2d_dir,
               mapping_fp, params_str)

            # Prep the discrete-coloring 3d plots command
            discrete_2d_dir = '%s/%s_2d_discrete/' %\
             (output_dir, beta_diversity_metric)
            create_dir(discrete_2d_dir)
            try:
                params_str = get_params_str(params['make_2d_plots'])
            except KeyError:
                params_str = ''
            # Build the discrete-coloring 2d plots command
            discrete_2d_command = \
             '%s %s/make_2d_plots.py -b "%s" -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_2d_dir,
               mapping_fp, params_str)

            commands.append([\
              ('Make 2D plots (continuous coloring, %s)' %\
                beta_diversity_metric,continuous_2d_command),\
              ('Make 2D plots (discrete coloring, %s)' %\
                beta_diversity_metric,discrete_2d_command,)])

        if histogram_categories:
            # Prep the discrete-coloring 3d plots command
            histograms_dir = '%s/%s_histograms/' %\
             (output_dir, beta_diversity_metric)
            create_dir(histograms_dir)
            try:
                params_str = get_params_str(params['make_distance_histograms'])
            except KeyError:
                params_str = ''
            # Build the make_distance_histograms command
            distance_histograms_command = \
             '%s %s/make_distance_histograms.py -d %s -o %s -m %s -f "%s" %s' %\
              (python_exe_fp, script_dir, beta_div_fp,
               histograms_dir, mapping_fp,
               ','.join(histogram_categories), params_str)

            commands.append([\
              ('Make Distance Histograms (%s)' %\
                beta_diversity_metric,distance_histograms_command)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)

    return dm_fps
Exemple #6
0
def run_beta_diversity_through_plots(otu_table_fp, 
                                     mapping_fp,
                                     output_dir,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     color_by_interesting_fields_only=True,
                                     sampling_depth=None,
                                     histogram_categories=None,
                                     tree_fp=None,
                                     parallel=False,
                                     logger=None,
                                     suppress_3d_plots=False,
                                     suppress_2d_plots=False,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
         1) Compute a beta diversity distance matrix;
         2) Peform a principal coordinates analysis on the result of
          Step 1;
         3) Generate a 3D prefs file for optimized coloring of continuous
          variables;
         4) Generate a 3D plot for all mapping fields with colors
          optimized for continuous data;
         5) Generate a 3D plot for all mapping fields with colors
          optimized for discrete data.
    
    """  
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    if not suppress_md5:
        log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
    
    mapping_data, mapping_header, mapping_comments =\
     parse_mapping_file(open(mapping_fp,'U'))
    if histogram_categories:
        invalid_categories = set(histogram_categories) - set(mapping_header)
        if invalid_categories:
            raise ValueError,\
             "Invalid histogram categories - these must exactly match "+\
             "mapping file column headers: %s" % (' '.join(invalid_categories))
    # Get the interesting mapping fields to color by -- if none are
    # interesting, take all of them. Interesting is defined as those
    # which have greater than one value and fewer values than the number 
    # of samples
    if color_by_interesting_fields_only:
        mapping_fields =\
          get_interesting_mapping_fields(mapping_data, mapping_header) or\
          mapping_header
    else:
        mapping_fields = mapping_header
    mapping_fields = ','.join(mapping_fields)
    
    if sampling_depth:
        # Sample the OTU table at even depth
        even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
         (output_dir, otu_table_basename, 
          sampling_depth, otu_table_ext)
        single_rarefaction_cmd = \
         '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\
         (python_exe_fp, script_dir, otu_table_fp,
          even_sampled_otu_table_fp, sampling_depth)
        commands.append([
         ('Sample OTU table at %d seqs/sample' % sampling_depth,
          single_rarefaction_cmd)])
        otu_table_fp = even_sampled_otu_table_fp
        otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
        otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac','unweighted_unifrac']
    
    # Prep the 3d prefs file generator command
    prefs_fp = '%s/prefs.txt' % output_dir
    try:
        params_str = get_params_str(params['make_prefs_file'])
    except KeyError:
        params_str = ''
    if not 'mapping_headers_to_use' in params['make_prefs_file']:
        params_str = '%s --mapping_headers_to_use %s' \
         % (params_str,mapping_fields)
    # Build the 3d prefs file generator command
    prefs_cmd = \
     '%s %s/make_prefs_file.py -m %s -o %s %s' %\
     (python_exe_fp, script_dir, mapping_fp, prefs_fp, params_str)
    commands.append([('Build prefs file', prefs_cmd)])
    
    dm_fps = []
    for beta_diversity_metric in beta_diversity_metrics:
        
        # Prep the beta-diversity command
        try:
            bdiv_params_copy = params['beta_diversity'].copy()
        except KeyError:
            bdiv_params_copy = {}
        try:
            del bdiv_params_copy['metrics']
        except KeyError:
            pass
        
        params_str = get_params_str(bdiv_params_copy)
            
        if tree_fp:
            params_str = '%s -t %s ' % (params_str,tree_fp)
            
        # Build the beta-diversity command
        if parallel:
            # Grab the parallel-specific parameters
            try:
                params_str += get_params_str(params['parallel'])
            except KeyError:
                pass
            beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\
             (python_exe_fp, script_dir, otu_table_fp,
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        else:
            beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\
             (python_exe_fp, script_dir, otu_table_fp, 
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        
        
        orig_beta_div_fp = '%s/%s_%s.txt' % \
         (output_dir, beta_diversity_metric, otu_table_basename)
        beta_div_fp = '%s/%s_dm.txt' % \
         (output_dir, beta_diversity_metric)
        commands.append([('Rename distance matrix (%s)' % beta_diversity_metric,
                         'mv %s %s' % (orig_beta_div_fp, beta_div_fp))])
        dm_fps.append((beta_diversity_metric, beta_div_fp))
        
        # Prep the principal coordinates command
        pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the principal coordinates command
        pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str)
        commands.append(\
         [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)])
        
        # Generate 3d plots
        if not suppress_3d_plots:
            # Prep the continuous-coloring 3d plots command
            continuous_3d_dir = '%s/%s_3d_continuous/' %\
             (output_dir, beta_diversity_metric)
            create_dir(continuous_3d_dir)
            try:
                params_str = get_params_str(params['make_3d_plots'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            continuous_3d_command = \
             '%s %s/make_3d_plots.py -p %s -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_3d_dir,
               mapping_fp, params_str)
    
            # Prep the discrete-coloring 3d plots command
            discrete_3d_dir = '%s/%s_3d_discrete/' %\
             (output_dir, beta_diversity_metric)
            create_dir(discrete_3d_dir)
            try:
                params_str = get_params_str(params['make_3d_plots'])
            except KeyError:
                params_str = ''
            # Build the discrete-coloring 3d plots command
            discrete_3d_command = \
             '%s %s/make_3d_plots.py -b "%s" -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_3d_dir,
               mapping_fp, params_str)
       
            commands.append([\
              ('Make 3D plots (continuous coloring, %s)' %\
                beta_diversity_metric,continuous_3d_command),\
              ('Make 3D plots (discrete coloring, %s)' %\
                beta_diversity_metric,discrete_3d_command,)])
    
        # Generate 3d plots
        if not suppress_2d_plots:
            # Prep the continuous-coloring 3d plots command
            continuous_2d_dir = '%s/%s_2d_continuous/' %\
             (output_dir, beta_diversity_metric)
            create_dir(continuous_2d_dir)
            try:
                params_str = get_params_str(params['make_2d_plots'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            continuous_2d_command = \
             '%s %s/make_2d_plots.py -p %s -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_2d_dir,
               mapping_fp, params_str)
               
            # Prep the discrete-coloring 3d plots command
            discrete_2d_dir = '%s/%s_2d_discrete/' %\
             (output_dir, beta_diversity_metric)
            create_dir(discrete_2d_dir)
            try:
                params_str = get_params_str(params['make_2d_plots'])
            except KeyError:
                params_str = ''
            # Build the discrete-coloring 2d plots command
            discrete_2d_command = \
             '%s %s/make_2d_plots.py -b "%s" -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_2d_dir,
               mapping_fp, params_str)
       
            commands.append([\
              ('Make 2D plots (continuous coloring, %s)' %\
                beta_diversity_metric,continuous_2d_command),\
              ('Make 2D plots (discrete coloring, %s)' %\
                beta_diversity_metric,discrete_2d_command,)])
                
        if histogram_categories:
            # Prep the discrete-coloring 3d plots command
            histograms_dir = '%s/%s_histograms/' %\
             (output_dir, beta_diversity_metric)
            create_dir(histograms_dir)
            try:
                params_str = get_params_str(params['make_distance_histograms'])
            except KeyError:
                params_str = ''
            # Build the make_distance_histograms command
            distance_histograms_command = \
             '%s %s/make_distance_histograms.py -d %s -o %s -m %s -f "%s" %s' %\
              (python_exe_fp, script_dir, beta_div_fp, 
               histograms_dir, mapping_fp, 
               ','.join(histogram_categories), params_str)
       
            commands.append([\
              ('Make Distance Histograms (%s)' %\
                beta_diversity_metric,distance_histograms_command)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    
    return dm_fps