Beispiel #1
0
    def test_run_alpha_rarefaction_stderr_and_stddev(self):
        """ run_alpha_rarefaction generates expected results """

        run_alpha_rarefaction(self.test_data['biom'][0],
                              self.test_data['map'][0],
                              self.test_out,
                              call_commands_serially,
                              self.params,
                              self.qiime_config,
                              tree_fp=self.test_data['tree'][0],
                              num_steps=5,
                              parallel=False,
                              min_rare_depth=3,
                              max_rare_depth=18,
                              status_update_callback=no_status_updates,
                              plot_stderr_and_stddev=True)

        html_fp_stderr = join(self.test_out, 'alpha_rarefaction_plots_stderr',
                              'rarefaction_plots.html')
        pd_averages_fp_stderr = join(self.test_out,
                                     'alpha_rarefaction_plots_stderr',
                                     'average_tables',
                                     'PD_whole_treeSampleType.txt')
        html_fp_stddev = join(self.test_out, 'alpha_rarefaction_plots_stddev',
                              'rarefaction_plots.html')
        pd_averages_fp_stddev = join(self.test_out,
                                     'alpha_rarefaction_plots_stddev',
                                     'average_tables',
                                     'PD_whole_treeSampleType.txt')
        pd_collated_fp = join(self.test_out, 'alpha_div_collated',
                              'PD_whole_tree.txt')

        # Confirm that palm and gut alpha diversities are different,
        # and suggestive of statistical significance (we only have a
        # few sequences, so we don't get significant results)
        ttest_res, alpha_avg = compare_alpha_diversities(
            open(pd_collated_fp),
            open(self.test_data['map'][0]),
            'SampleType',
            18,
            test_type='parametric')
        feces_palm_t = ttest_res[('feces', 'L_palm')][0]
        self.assertTrue(feces_palm_t < 0,
         "t-statistic too high: %1.3f, but should be less than 0"\
          % feces_palm_t)

        # check that final output files have non-zero size
        self.assertTrue(getsize(html_fp_stderr) > 0)
        self.assertTrue(getsize(pd_averages_fp_stderr) > 0)
        self.assertTrue(getsize(html_fp_stddev) > 0)
        self.assertTrue(getsize(pd_averages_fp_stddev) > 0)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
Beispiel #2
0
    def test_run_alpha_rarefaction_stderr_and_stddev(self):
        """ run_alpha_rarefaction generates expected results """

        run_alpha_rarefaction(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            self.test_out,
            call_commands_serially,
            self.params,
            self.qiime_config,
            tree_fp=self.test_data['tree'][0],
            num_steps=5,
            parallel=False,
            min_rare_depth=3,
            max_rare_depth=18,
            status_update_callback=no_status_updates,
            plot_stderr_and_stddev=True)

        html_fp_stderr = join(self.test_out, 'alpha_rarefaction_plots_stderr',
                              'rarefaction_plots.html')
        pd_averages_fp_stderr = join(
            self.test_out, 'alpha_rarefaction_plots_stderr',
            'average_tables', 'PD_whole_treeSampleType.txt')
        html_fp_stddev = join(self.test_out, 'alpha_rarefaction_plots_stddev',
                              'rarefaction_plots.html')
        pd_averages_fp_stddev = join(
            self.test_out, 'alpha_rarefaction_plots_stddev',
            'average_tables', 'PD_whole_treeSampleType.txt')
        pd_collated_fp = join(self.test_out, 'alpha_div_collated',
                              'PD_whole_tree.txt')

        # Confirm that palm and gut alpha diversities are different,
        # and suggestive of statistical significance (we only have a
        # few sequences, so we don't get significant results)
        ttest_res, alpha_avg = compare_alpha_diversities(open(pd_collated_fp),
                                                         open(
                                                             self.test_data[
                                                                 'map'][0]),
                                                         'SampleType',
                                                         18,
                                                         test_type='parametric')
        feces_palm_t = ttest_res[('feces', 'L_palm')][0]
        self.assertTrue(feces_palm_t < 0,
                        "t-statistic too high: %1.3f, but should be less than 0"
                        % feces_palm_t)

        # check that final output files have non-zero size
        self.assertTrue(getsize(html_fp_stderr) > 0)
        self.assertTrue(getsize(html_fp_stddev) > 0)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
Beispiel #3
0
    def test_run_alpha_rarefaction_parallel(self):
        """ run_alpha_rarefaction generates expected results when run in parallel
        """

        run_alpha_rarefaction(
         self.test_data['biom'][0], 
         self.test_data['map'][0],
         self.test_out, 
         call_commands_serially,
         self.params,
         self.qiime_config,
         tree_fp=self.test_data['tree'][0],
         num_steps=5, 
         parallel=True, 
         min_rare_depth=3,
         max_rare_depth=18,
         status_update_callback=no_status_updates)
         
        html_fp = join(self.test_out,'alpha_rarefaction_plots',
         'rarefaction_plots.html')
        pd_averages_fp = join(self.test_out,'alpha_rarefaction_plots',
         'average_tables','PD_whole_treeSampleType.txt')
        pd_collated_fp = join(self.test_out,'alpha_div_collated',
         'PD_whole_tree.txt')
        
        # Confirm that palm and gut alpha diversities are different,
        # and suggestive of statistical significance (we only have a 
        # few sequences, so we don't get significant results)
        a = compare_alpha_diversities(open(pd_collated_fp), 
                                      open(self.test_data['map'][0]),
                                      'SampleType', 
                                      18,
                                      test_type='parametric')
        self.assertTrue(a['feces,L_palm'][1] < 0.15)
        
        # check that final output files have non-zero size
        self.assertTrue(getsize(html_fp) > 0)
        self.assertTrue(getsize(pd_averages_fp) > 0)
        
        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out,'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout,
):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, (
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
                )
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, (
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." % c
                )

    else:
        categories = []

    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = "%s/index.html" % output_dir
    index_links = []
    commands = []

    # begin logging
    log_fp = generate_log_fp(output_dir)
    index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run print_biom_table_summary.py on input BIOM table
    try:
        params_str = get_params_str(params["print_biom_table_summary"])
    except KeyError:
        params_str = ""
    biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
    print_biom_table_summary_cmd = "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % (
        biom_fp,
        biom_table_stats_output_fp,
        params_str,
    )
    index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
    commands.append([("Generate BIOM table summary", print_biom_table_summary_cmd)])

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
        biom_fp,
        filtered_biom_fp,
        sampling_depth,
    )
    commands.append(
        [
            (
                "Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
                filter_samples_cmd,
            )
        ]
    )
    biom_fp = filtered_biom_fp

    # run initial commands and reset the command list
    command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
    commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
        even_dm_fps = run_beta_diversity_through_plots(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=bdiv_even_output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            sampling_depth=sampling_depth,
            # force suppression of distance histograms - boxplots work better
            # in this context, and are created below.
            histogram_categories=[],
            tree_fp=tree_fp,
            parallel=parallel,
            logger=logger,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = "%s/%s_boxplots/" % (bdiv_even_output_dir, bdiv_metric)
                try:
                    params_str = get_params_str(params["make_distance_boxplots"])
                except KeyError:
                    params_str = ""
                boxplots_cmd = "make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s" % (
                    dm_fp,
                    category,
                    boxplots_output_dir,
                    mapping_fp,
                    params_str,
                )
                commands.append([("Boxplots (%s)" % category, boxplots_cmd)])
                index_links.append(
                    (
                        "Distance boxplots (%s)" % bdiv_metric,
                        "%s/%s_Distances.pdf" % (boxplots_output_dir, category),
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )
                index_links.append(
                    (
                        "Distance boxplots statistics (%s)" % bdiv_metric,
                        "%s/%s_Stats.txt" % (boxplots_output_dir, category),
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )

            index_links.append(
                (
                    "3D plot (%s, continuous coloring)" % bdiv_metric,
                    "%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "3D plot (%s, discrete coloring)" % bdiv_metric,
                    "%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "2D plot (%s, continuous coloring)" % bdiv_metric,
                    "%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "2D plot (%s, discrete coloring)" % bdiv_metric,
                    "%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Distance matrix (%s)" % bdiv_metric,
                    "%s/%s_dm.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Principal coordinate matrix (%s)" % bdiv_metric,
                    "%s/%s_pc.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )

    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = "%s/arare_max%d/" % (output_dir, sampling_depth)
        run_alpha_rarefaction(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=arare_full_output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            tree_fp=tree_fp,
            num_steps=arare_num_steps,
            parallel=parallel,
            logger=logger,
            min_rare_depth=arare_min_rare_depth,
            max_rare_depth=sampling_depth,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        index_links.append(
            (
                "Alpha rarefaction plots",
                "%s/alpha_rarefaction_plots/rarefaction_plots.html" % arare_full_output_dir,
                _index_headers["alpha_diversity"],
            )
        )

        collated_alpha_diversity_fps = glob("%s/alpha_div_collated/*txt" % arare_full_output_dir)
        try:
            params_str = get_params_str(params["compare_alpha_diversity"])
        except KeyError:
            params_str = ""
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = "%s/%s_%s.txt" % (arare_full_output_dir, category, alpha_metric)
                compare_alpha_cmd = "compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s" % (
                    collated_alpha_diversity_fp,
                    mapping_fp,
                    category,
                    alpha_comparison_output_fp,
                    params_str,
                )
                commands.append([("Compare alpha diversity (%s, %s)" % (category, alpha_metric), compare_alpha_cmd)])
                index_links.append(
                    (
                        "Alpha diversity statistics (%s, %s)" % (category, alpha_metric),
                        alpha_comparison_output_fp,
                        _index_headers["alpha_diversity"],
                    )
                )

    if not suppress_taxa_summary:
        taxa_plots_output_dir = "%s/taxa_plots/" % output_dir
        run_summarize_taxa_through_plots(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=taxa_plots_output_dir,
            mapping_cat=None,
            sort=True,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            logger=logger,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        index_links.append(
            (
                "Taxa summary bar plots",
                "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        index_links.append(
            (
                "Taxa summary area plots",
                "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        for category in categories:
            taxa_plots_output_dir = "%s/taxa_plots_%s/" % (output_dir, category)
            run_summarize_taxa_through_plots(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=taxa_plots_output_dir,
                mapping_cat=category,
                sort=True,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback,
            )

            index_links.append(
                (
                    "Taxa summary bar plots",
                    "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )
            index_links.append(
                (
                    "Taxa summary area plots",
                    "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )

    if not suppress_otu_category_significance:
        # OTU category significance
        for category in categories:
            category_signifance_fp = "%s/category_significance_%s.txt" % (output_dir, category)
            try:
                params_str = get_params_str(params["otu_category_significance"])
            except KeyError:
                params_str = ""
            # Build the OTU cateogry significance command
            category_significance_cmd = "otu_category_significance.py -i %s -m %s -c %s -o %s %s" % (
                biom_fp,
                mapping_fp,
                category,
                category_signifance_fp,
                params_str,
            )
            commands.append([("OTU category significance (%s)" % category, category_significance_cmd)])

            index_links.append(
                ("Category significance (%s)" % category, category_signifance_fp, _index_headers["otu_category_sig"])
            )

    commands.append([("Compress the filtered BIOM table", "gzip %s" % filtered_biom_fp)])
    index_links.append(
        (
            "Filtered BIOM table (minimum sequence count: %d)" % sampling_depth,
            "%s.gz" % filtered_biom_fp,
            _index_headers["run_summary"],
        )
    )

    command_handler(commands, status_update_callback, logger)
    generate_index_page(index_links, index_fp)
def run_core_diversity_analyses(biom_fp,
                                mapping_fp,
                                sampling_depth,
                                output_dir,
                                qiime_config,
                                command_handler=call_commands_serially,
                                tree_fp=None,
                                params=None,
                                categories=None,
                                arare_min_rare_depth=10,
                                arare_num_steps=10,
                                parallel=False,
                                suppress_taxa_summary=False,
                                suppress_beta_diversity=False,
                                suppress_alpha_diversity=False,
                                suppress_group_significance=False,
                                status_update_callback=print_to_stdout):
    """
    """
    if categories is not None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
            parse_mapping_file_to_dict(open(mapping_fp, 'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError(
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" %
                    (c, ', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError(
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." %
                    c)

    else:
        categories = []
    comma_separated_categories = ','.join(categories)
    # prep some variables
    if params is None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []

    # begin logging
    old_log_fps = glob(join(output_dir, 'log_20*txt'))
    log_fp = generate_log_fp(output_dir)
    index_links.append(
        ('Master run log', log_fp, _index_headers['run_summary']))
    for old_log_fp in old_log_fps:
        index_links.append(
            ('Previous run log', old_log_fp, _index_headers['run_summary']))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp is not None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params['biom-summarize-table'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = \
            "biom summarize-table -i %s -o %s %s" % \
            (biom_fp, biom_table_stats_output_fp, params_str)
        commands.append([('Generate BIOM table summary',
                          biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" %
                     biom_table_stats_output_fp)
    index_links.append(('BIOM table statistics', biom_table_stats_output_fp,
                        _index_headers['run_summary']))

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
            (biom_fp, filtered_biom_fp, sampling_depth)
        commands.append([(
            'Filter low sequence count samples from table (minimum sequence count: %d)'
            % sampling_depth, filter_samples_cmd)])
    else:
        logger.write(
            "Skipping filter_samples_from_otu_table.py as %s exists.\n\n" %
            filtered_biom_fp)
    biom_fp = filtered_biom_fp

    # rarify the BIOM table to sampling_depth
    rarefied_biom_fp = "%s/table_even%d.biom" % (output_dir, sampling_depth)
    if not exists(rarefied_biom_fp):
        single_rarefaction_cmd = "single_rarefaction.py -i %s -o %s -d %d" %\
            (biom_fp, rarefied_biom_fp, sampling_depth)
        commands.append([
            ('Rarify the OTU table to %d sequences/sample' % sampling_depth,
             single_rarefaction_cmd)
        ])
    else:
        logger.write("Skipping single_rarefaction.py as %s exists.\n\n" %
                     rarefied_biom_fp)

    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands,
                        status_update_callback,
                        logger,
                        close_logger_on_success=False)
        commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir, sampling_depth)
        # Need to check for the existence of any distance matrices, since the user
        # can select which will be generated.
        existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir)
        if len(existing_dm_fps) == 0:
            even_dm_fps = run_beta_diversity_through_plots(
                otu_table_fp=rarefied_biom_fp,
                mapping_fp=mapping_fp,
                output_dir=bdiv_even_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                # Note: we pass sampling depth=None here as
                # we rarify the BIOM table above and pass that
                # in here.
                sampling_depth=None,
                tree_fp=tree_fp,
                parallel=parallel,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback)
        else:
            logger.write(
                "Skipping beta_diversity_through_plots.py as %s exist(s).\n\n"
                % ', '.join(existing_dm_fps))
            even_dm_fps = [(split(fp)[1].strip('_dm.txt'), fp)
                           for fp in existing_dm_fps]

        # Get make_distance_boxplots parameters
        try:
            params_str = get_params_str(params['make_distance_boxplots'])
        except KeyError:
            params_str = ''

        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (
                    bdiv_even_output_dir, bdiv_metric)
                plot_output_fp = '%s/%s_Distances.pdf' % (boxplots_output_dir,
                                                          category)
                stats_output_fp = '%s/%s_Stats.txt' % (boxplots_output_dir,
                                                       category)
                if not exists(plot_output_fp):
                    boxplots_cmd = \
                        'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                        (dm_fp, category, boxplots_output_dir,
                         mapping_fp, params_str)
                    commands.append([('Boxplots (%s)' % category, boxplots_cmd)
                                     ])
                else:
                    logger.write(
                        "Skipping make_distance_boxplots.py for %s as %s exists.\n\n"
                        % (category, plot_output_fp))
                index_links.append(
                    ('Distance boxplots (%s)' % bdiv_metric, plot_output_fp,
                     _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(
                    ('Distance boxplots statistics (%s)' % bdiv_metric,
                     stats_output_fp,
                     _index_headers['beta_diversity_even'] % sampling_depth))

            index_links.append(
                ('PCoA plot (%s)' % bdiv_metric,
                 '%s/%s_emperor_pcoa_plot/index.html' %
                 (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(
                ('Distance matrix (%s)' % bdiv_metric,
                 '%s/%s_dm.txt' % (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(
                ('Principal coordinate matrix (%s)' % bdiv_metric,
                 '%s/%s_pc.txt' % (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))

    if not suppress_alpha_diversity:
        # Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,
                                                     sampling_depth)
        rarefaction_plots_output_fp = \
            '%s/alpha_rarefaction_plots/rarefaction_plots.html' % arare_full_output_dir
        if not exists(rarefaction_plots_output_fp):
            run_alpha_rarefaction(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=arare_full_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                tree_fp=tree_fp,
                num_steps=arare_num_steps,
                parallel=parallel,
                logger=logger,
                min_rare_depth=arare_min_rare_depth,
                max_rare_depth=sampling_depth,
                suppress_md5=True,
                status_update_callback=status_update_callback,
                retain_intermediate_files=False)
        else:
            logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" %
                         rarefaction_plots_output_fp)

        index_links.append(
            ('Alpha rarefaction plots', rarefaction_plots_output_fp,
             _index_headers['alpha_diversity']))

        collated_alpha_diversity_fps = \
            glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''

        if len(categories) > 0:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(
                    split(collated_alpha_diversity_fp)[1])[0]
                compare_alpha_output_dir = '%s/compare_%s' % \
                    (arare_full_output_dir, alpha_metric)
                if not exists(compare_alpha_output_dir):
                    compare_alpha_cmd = \
                        'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                        (collated_alpha_diversity_fp,
                         mapping_fp,
                         comma_separated_categories,
                         compare_alpha_output_dir,
                         params_str)
                    commands.append([
                        ('Compare alpha diversity (%s)' % alpha_metric,
                         compare_alpha_cmd)
                    ])
                    for category in categories:
                        alpha_comparison_stat_fp = '%s/%s_stats.txt' % \
                            (compare_alpha_output_dir, category)
                        alpha_comparison_boxplot_fp = '%s/%s_boxplots.pdf' % \
                            (compare_alpha_output_dir, category)
                        index_links.append(
                            ('Alpha diversity statistics (%s, %s)' %
                             (category, alpha_metric),
                             alpha_comparison_stat_fp,
                             _index_headers['alpha_diversity']))
                        index_links.append(
                            ('Alpha diversity boxplots (%s, %s)' %
                             (category, alpha_metric),
                             alpha_comparison_boxplot_fp,
                             _index_headers['alpha_diversity']))
                else:
                    logger.write("Skipping compare_alpha_diversity.py"
                                 " for %s as %s exists.\n\n" %
                                 (alpha_metric, compare_alpha_output_dir))
        else:
            logger.write("Skipping compare_alpha_diversity.py as"
                         " no categories were provided.\n\n")

    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        # need to check for existence of any html files, since the user can
        # select only certain ones to be generated
        existing_taxa_plot_html_fps = glob(
            join(taxa_plots_output_dir, 'taxa_summary_plots', '*.html'))
        if len(existing_taxa_plot_html_fps) == 0:
            run_summarize_taxa_through_plots(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=taxa_plots_output_dir,
                mapping_cat=None,
                sort=True,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback)
        else:
            logger.write(
                "Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n"
                % ', '.join(existing_taxa_plot_html_fps))

        index_links.append(
            ('Taxa summary bar plots',
             '%s/taxa_summary_plots/bar_charts.html' % taxa_plots_output_dir,
             _index_headers['taxa_summary']))
        index_links.append(
            ('Taxa summary area plots',
             '%s/taxa_summary_plots/area_charts.html' % taxa_plots_output_dir,
             _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,
                                                           category)
            # need to check for existence of any html files, since the user can
            # select only certain ones to be generated
            existing_taxa_plot_html_fps = glob('%s/taxa_summary_plots/*.html' %
                                               taxa_plots_output_dir)
            if len(existing_taxa_plot_html_fps) == 0:
                run_summarize_taxa_through_plots(
                    otu_table_fp=biom_fp,
                    mapping_fp=mapping_fp,
                    output_dir=taxa_plots_output_dir,
                    mapping_cat=category,
                    sort=True,
                    command_handler=command_handler,
                    params=params,
                    qiime_config=qiime_config,
                    logger=logger,
                    suppress_md5=True,
                    status_update_callback=status_update_callback)
            else:
                logger.write(
                    "Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n"
                    % (category, ', '.join(existing_taxa_plot_html_fps)))

            index_links.append(
                ('Taxa summary bar plots',
                 '%s/taxa_summary_plots/bar_charts.html' %
                 taxa_plots_output_dir,
                 _index_headers['taxa_summary_categorical'] % category))
            index_links.append(
                ('Taxa summary area plots',
                 '%s/taxa_summary_plots/area_charts.html' %
                 taxa_plots_output_dir,
                 _index_headers['taxa_summary_categorical'] % category))

    if not suppress_group_significance:
        params_str = get_params_str(params['group_significance'])
        # group significance tests, aka category significance
        for category in categories:
            group_signifance_fp = \
                '%s/group_significance_%s.txt' % (output_dir, category)
            if not exists(group_signifance_fp):
                # Build the OTU cateogry significance command
                group_significance_cmd = \
                    'group_significance.py -i %s -m %s -c %s -o %s %s' %\
                    (rarefied_biom_fp, mapping_fp, category,
                     group_signifance_fp, params_str)
                commands.append([('Group significance (%s)' % category,
                                  group_significance_cmd)])
            else:
                logger.write(
                    "Skipping group_significance.py for %s as %s exists.\n\n" %
                    (category, group_signifance_fp))

            index_links.append(
                ('Category significance (%s)' % category, group_signifance_fp,
                 _index_headers['group_significance']))

    filtered_biom_gzip_fp = '%s.gz' % filtered_biom_fp
    if not exists(filtered_biom_gzip_fp):
        commands.append([('Compress the filtered BIOM table',
                          'gzip %s' % filtered_biom_fp)])
    else:
        logger.write(
            "Skipping compressing of filtered BIOM table as %s exists.\n\n" %
            filtered_biom_gzip_fp)
    index_links.append(
        ('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
         filtered_biom_gzip_fp, _index_headers['run_summary']))

    rarified_biom_gzip_fp = '%s.gz' % rarefied_biom_fp
    if not exists(rarified_biom_gzip_fp):
        commands.append([('Compress the rarified BIOM table',
                          'gzip %s' % rarefied_biom_fp)])
    else:
        logger.write(
            "Skipping compressing of rarified BIOM table as %s exists.\n\n" %
            rarified_biom_gzip_fp)
    index_links.append(
        ('Rarified BIOM table (sampling depth: %d)' % sampling_depth,
         rarified_biom_gzip_fp, _index_headers['run_summary']))

    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger)
    else:
        logger.close()

    generate_index_page(index_links, index_fp)
Beispiel #6
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    verbose = opts.verbose
    
    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    num_steps = opts.num_steps
    verbose = opts.verbose
    print_only = opts.print_only
    parallel = opts.parallel
    min_rare_depth = opts.min_rare_depth
    max_rare_depth = opts.max_rare_depth
    retain_intermediate_files = opts.retain_intermediate_files
    
    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            raise IOError,\
             "Can't open parameters file (%s). Does it exist? Do you have read access?"\
             % opts.parameter_fp
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters([]) 
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)
                                   
    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")

    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
     
    run_alpha_rarefaction(otu_table_fp=otu_table_fp,\
     mapping_fp=mapping_fp,\
     output_dir=output_dir,\
     command_handler=command_handler,\
     params=params,
     qiime_config=qiime_config,\
     tree_fp=tree_fp,\
     num_steps=num_steps,\
     parallel=parallel,\
     min_rare_depth=min_rare_depth,
     max_rare_depth=max_rare_depth,
     status_update_callback=status_update_callback,
     retain_intermediate_files=retain_intermediate_files)
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    
    # begin logging
    old_log_fps = glob(join(output_dir,'log_20*txt'))
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
    for old_log_fp in old_log_fps:
        index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary']))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params['biom-summarize-table'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = \
         "biom summarize-table -i %s -o %s --suppress-md5 %s" % \
         (biom_fp, biom_table_stats_output_fp,params_str)
        commands.append([('Generate BIOM table summary',
                          biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
                     % biom_table_stats_output_fp)
    index_links.append(('BIOM table statistics',
                        biom_table_stats_output_fp,
                        _index_headers['run_summary']))
    
    # filter samples with fewer observations than the requested sampling_depth. 
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered 
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
         (biom_fp,filtered_biom_fp,sampling_depth)
        commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
                          filter_samples_cmd)])
    else:
        logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
                     % filtered_biom_fp)
    biom_fp = filtered_biom_fp
    
    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands, 
                        status_update_callback, 
                        logger,
                        close_logger_on_success=False)
        commands = []
    
    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
        # Need to check for the existence of any distance matrices, since the user 
        # can select which will be generated.
        existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir)
        if len(existing_dm_fps) == 0:
            even_dm_fps = run_beta_diversity_through_plots(
             otu_table_fp=biom_fp, 
             mapping_fp=mapping_fp,
             output_dir=bdiv_even_output_dir,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             sampling_depth=sampling_depth,
             tree_fp=tree_fp,
             parallel=parallel,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" \
                         % ', '.join(existing_dm_fps))
            even_dm_fps = [(split(fp)[1].strip('_dm.txt'),fp) for fp in existing_dm_fps]
        
        # Get make_distance_boxplots parameters
        try:
            params_str = get_params_str(params['make_distance_boxplots'])
        except KeyError:
            params_str = ''
        
        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
                plot_output_fp = '%s/%s_Distances.pdf' % (boxplots_output_dir,category)
                stats_output_fp = '%s/%s_Stats.txt' % (boxplots_output_dir,category)
                if not exists(plot_output_fp):
                    boxplots_cmd = \
                     'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                     (dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
                    commands.append([('Boxplots (%s)' % category,
                                      boxplots_cmd)])
                else:
                    logger.write("Skipping make_distance_boxplots.py for %s as %s exists.\n\n" \
                                 % (category, plot_output_fp))
                index_links.append(('Distance boxplots (%s)' % bdiv_metric,
                                    plot_output_fp,
                                    _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
                                    stats_output_fp,
                                    _index_headers['beta_diversity_even'] % sampling_depth))
            
            index_links.append(('PCoA plot (%s)' % bdiv_metric,
                                '%s/%s_emperor_pcoa_plot/index.html' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Distance matrix (%s)' % bdiv_metric,
                                '%s/%s_dm.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
                                '%s/%s_pc.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
    
    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
        rarefaction_plots_output_fp = \
         '%s/alpha_rarefaction_plots/rarefaction_plots.html' % arare_full_output_dir
        if not exists(rarefaction_plots_output_fp):
            run_alpha_rarefaction(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=arare_full_output_dir,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             tree_fp=tree_fp,
             num_steps=arare_num_steps,
             parallel=parallel,
             logger=logger,
             min_rare_depth=arare_min_rare_depth,
             max_rare_depth=sampling_depth,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" \
                         % rarefaction_plots_output_fp)
    
        index_links.append(('Alpha rarefaction plots',
                            rarefaction_plots_output_fp,
                            _index_headers['alpha_diversity']))
                        
        collated_alpha_diversity_fps = \
         glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''
            
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = '%s/%s_%s.txt' % \
                 (arare_full_output_dir,category,alpha_metric)
                if not exists(alpha_comparison_output_fp):
                    compare_alpha_cmd = \
                     'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                     (collated_alpha_diversity_fp, mapping_fp, category, 
                      alpha_comparison_output_fp, params_str)
                    commands.append([('Compare alpha diversity (%s, %s)' %\
                                       (category,alpha_metric),
                                      compare_alpha_cmd)])
                else:
                    logger.write("Skipping compare_alpha_diversity.py for %s as %s exists.\n\n" \
                                 % (category, alpha_comparison_output_fp))
                index_links.append(
                 ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
                  alpha_comparison_output_fp,
                  _index_headers['alpha_diversity']))
    
    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        # need to check for existence of any html files, since the user can 
        # select only certain ones to be generated
        existing_taxa_plot_html_fps = glob(join(output_dir,'taxa_summary_plots','*.html'))
        if len(existing_taxa_plot_html_fps) == 0:
            run_summarize_taxa_through_plots(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=taxa_plots_output_dir,
             mapping_cat=None, 
             sort=True,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n" \
                         % ', '.join(existing_taxa_plot_html_fps))

        index_links.append(('Taxa summary bar plots',
                            '%s/taxa_summary_plots/bar_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        index_links.append(('Taxa summary area plots',
                            '%s/taxa_summary_plots/area_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category)
            # need to check for existence of any html files, since the user can 
            # select only certain ones to be generated
            existing_taxa_plot_html_fps = glob('%s/taxa_summary_plots/*.html' % taxa_plots_output_dir)
            if len(existing_taxa_plot_html_fps) == 0:
                run_summarize_taxa_through_plots(
                 otu_table_fp=biom_fp,
                 mapping_fp=mapping_fp,
                 output_dir=taxa_plots_output_dir,
                 mapping_cat=category, 
                 sort=True,
                 command_handler=command_handler,
                 params=params,
                 qiime_config=qiime_config,
                 logger=logger,
                 suppress_md5=True,
                 status_update_callback=status_update_callback)
            else:
                logger.write("Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n" \
                             % (category, ', '.join(existing_taxa_plot_html_fps)))

            index_links.append(('Taxa summary bar plots',
                                '%s/taxa_summary_plots/bar_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
            index_links.append(('Taxa summary area plots',
                                '%s/taxa_summary_plots/area_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
    
    if not suppress_otu_category_significance:
        try:
            params_str = get_params_str(params['otu_category_significance'])
        except KeyError:
            params_str = ''
        # OTU category significance
        for category in categories:
            category_signifance_fp = \
             '%s/category_significance_%s.txt' % (output_dir, category)
            if not exists(category_signifance_fp):
                # Build the OTU cateogry significance command
                category_significance_cmd = \
                 'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
                 (biom_fp, mapping_fp, category, 
                  category_signifance_fp, params_str)
                commands.append([('OTU category significance (%s)' % category, 
                                  category_significance_cmd)])
            else:
                logger.write("Skipping otu_category_significance.py for %s as %s exists.\n\n" \
                             % (category, category_signifance_fp))
            
            index_links.append(('Category significance (%s)' % category,
                        category_signifance_fp,
                        _index_headers['otu_category_sig']))
    filtered_biom_gzip_fp = '%s.gz' % filtered_biom_fp
    if not exists(filtered_biom_gzip_fp):
        commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)])
        index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
                            filtered_biom_gzip_fp,
                            _index_headers['run_summary']))
    else:
        logger.write("Skipping compressing of filtered BIOM table as %s exists.\n\n" \
                     % filtered_biom_gzip_fp)
    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger)
    else:
        logger.close()
    
    generate_index_page(index_links,index_fp)
Beispiel #8
0
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_group_significance=False,
    status_update_callback=print_to_stdout,
):
    """
    """
    if categories is not None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError(
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
                )
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError(
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." % c
                )

    else:
        categories = []
    comma_separated_categories = ",".join(categories)
    # prep some variables
    if params is None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = "%s/index.html" % output_dir
    index_links = []
    commands = []

    # begin logging
    old_log_fps = glob(join(output_dir, "log_20*txt"))
    log_fp = generate_log_fp(output_dir)
    index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
    for old_log_fp in old_log_fps:
        index_links.append(("Previous run log", old_log_fp, _index_headers["run_summary"]))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp is not None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params["biom-summarize-table"])
    except KeyError:
        params_str = ""
    biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = "biom summarize-table -i %s -o %s --suppress-md5 %s" % (
            biom_fp,
            biom_table_stats_output_fp,
            params_str,
        )
        commands.append([("Generate BIOM table summary", biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" % biom_table_stats_output_fp)
    index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
            biom_fp,
            filtered_biom_fp,
            sampling_depth,
        )
        commands.append(
            [
                (
                    "Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
                    filter_samples_cmd,
                )
            ]
        )
    else:
        logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" % filtered_biom_fp)
    biom_fp = filtered_biom_fp

    # rarify the BIOM table to sampling_depth
    rarefied_biom_fp = "%s/table_even%d.biom" % (output_dir, sampling_depth)
    if not exists(rarefied_biom_fp):
        single_rarefaction_cmd = "single_rarefaction.py -i %s -o %s -d %d" % (biom_fp, rarefied_biom_fp, sampling_depth)
        commands.append([("Rarify the OTU table to %d sequences/sample" % sampling_depth, single_rarefaction_cmd)])
    else:
        logger.write("Skipping single_rarefaction.py as %s exists.\n\n" % rarefied_biom_fp)

    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
        commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
        # Need to check for the existence of any distance matrices, since the user
        # can select which will be generated.
        existing_dm_fps = glob("%s/*_dm.txt" % bdiv_even_output_dir)
        if len(existing_dm_fps) == 0:
            even_dm_fps = run_beta_diversity_through_plots(
                otu_table_fp=rarefied_biom_fp,
                mapping_fp=mapping_fp,
                output_dir=bdiv_even_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                # Note: we pass sampling depth=None here as
                # we rarify the BIOM table above and pass that
                # in here.
                sampling_depth=None,
                tree_fp=tree_fp,
                parallel=parallel,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback,
            )
        else:
            logger.write("Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" % ", ".join(existing_dm_fps))
            even_dm_fps = [(split(fp)[1].strip("_dm.txt"), fp) for fp in existing_dm_fps]

        # Get make_distance_boxplots parameters
        try:
            params_str = get_params_str(params["make_distance_boxplots"])
        except KeyError:
            params_str = ""

        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = "%s/%s_boxplots/" % (bdiv_even_output_dir, bdiv_metric)
                plot_output_fp = "%s/%s_Distances.pdf" % (boxplots_output_dir, category)
                stats_output_fp = "%s/%s_Stats.txt" % (boxplots_output_dir, category)
                if not exists(plot_output_fp):
                    boxplots_cmd = "make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s" % (
                        dm_fp,
                        category,
                        boxplots_output_dir,
                        mapping_fp,
                        params_str,
                    )
                    commands.append([("Boxplots (%s)" % category, boxplots_cmd)])
                else:
                    logger.write(
                        "Skipping make_distance_boxplots.py for %s as %s exists.\n\n" % (category, plot_output_fp)
                    )
                index_links.append(
                    (
                        "Distance boxplots (%s)" % bdiv_metric,
                        plot_output_fp,
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )
                index_links.append(
                    (
                        "Distance boxplots statistics (%s)" % bdiv_metric,
                        stats_output_fp,
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )

            index_links.append(
                (
                    "PCoA plot (%s)" % bdiv_metric,
                    "%s/%s_emperor_pcoa_plot/index.html" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Distance matrix (%s)" % bdiv_metric,
                    "%s/%s_dm.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Principal coordinate matrix (%s)" % bdiv_metric,
                    "%s/%s_pc.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )

    if not suppress_alpha_diversity:
        # Alpha rarefaction workflow
        arare_full_output_dir = "%s/arare_max%d/" % (output_dir, sampling_depth)
        rarefaction_plots_output_fp = "%s/alpha_rarefaction_plots/rarefaction_plots.html" % arare_full_output_dir
        if not exists(rarefaction_plots_output_fp):
            run_alpha_rarefaction(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=arare_full_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                tree_fp=tree_fp,
                num_steps=arare_num_steps,
                parallel=parallel,
                logger=logger,
                min_rare_depth=arare_min_rare_depth,
                max_rare_depth=sampling_depth,
                suppress_md5=True,
                status_update_callback=status_update_callback,
                retain_intermediate_files=False,
            )
        else:
            logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" % rarefaction_plots_output_fp)

        index_links.append(("Alpha rarefaction plots", rarefaction_plots_output_fp, _index_headers["alpha_diversity"]))

        collated_alpha_diversity_fps = glob("%s/alpha_div_collated/*txt" % arare_full_output_dir)
        try:
            params_str = get_params_str(params["compare_alpha_diversity"])
        except KeyError:
            params_str = ""

        if len(categories) > 0:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                compare_alpha_output_dir = "%s/compare_%s" % (arare_full_output_dir, alpha_metric)
                if not exists(compare_alpha_output_dir):
                    compare_alpha_cmd = "compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s" % (
                        collated_alpha_diversity_fp,
                        mapping_fp,
                        comma_separated_categories,
                        compare_alpha_output_dir,
                        params_str,
                    )
                    commands.append([("Compare alpha diversity (%s)" % alpha_metric, compare_alpha_cmd)])
                    for category in categories:
                        alpha_comparison_stat_fp = "%s/%s_stats.txt" % (compare_alpha_output_dir, category)
                        alpha_comparison_boxplot_fp = "%s/%s_boxplots.pdf" % (compare_alpha_output_dir, category)
                        index_links.append(
                            (
                                "Alpha diversity statistics (%s, %s)" % (category, alpha_metric),
                                alpha_comparison_stat_fp,
                                _index_headers["alpha_diversity"],
                            )
                        )
                        index_links.append(
                            (
                                "Alpha diversity boxplots (%s, %s)" % (category, alpha_metric),
                                alpha_comparison_boxplot_fp,
                                _index_headers["alpha_diversity"],
                            )
                        )
                else:
                    logger.write(
                        "Skipping compare_alpha_diversity.py"
                        " for %s as %s exists.\n\n" % (alpha_metric, compare_alpha_output_dir)
                    )
        else:
            logger.write("Skipping compare_alpha_diversity.py as" " no categories were provided.\n\n")

    if not suppress_taxa_summary:
        taxa_plots_output_dir = "%s/taxa_plots/" % output_dir
        # need to check for existence of any html files, since the user can
        # select only certain ones to be generated
        existing_taxa_plot_html_fps = glob(join(taxa_plots_output_dir, "taxa_summary_plots", "*.html"))
        if len(existing_taxa_plot_html_fps) == 0:
            run_summarize_taxa_through_plots(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=taxa_plots_output_dir,
                mapping_cat=None,
                sort=True,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback,
            )
        else:
            logger.write(
                "Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n"
                % ", ".join(existing_taxa_plot_html_fps)
            )

        index_links.append(
            (
                "Taxa summary bar plots",
                "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        index_links.append(
            (
                "Taxa summary area plots",
                "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        for category in categories:
            taxa_plots_output_dir = "%s/taxa_plots_%s/" % (output_dir, category)
            # need to check for existence of any html files, since the user can
            # select only certain ones to be generated
            existing_taxa_plot_html_fps = glob("%s/taxa_summary_plots/*.html" % taxa_plots_output_dir)
            if len(existing_taxa_plot_html_fps) == 0:
                run_summarize_taxa_through_plots(
                    otu_table_fp=biom_fp,
                    mapping_fp=mapping_fp,
                    output_dir=taxa_plots_output_dir,
                    mapping_cat=category,
                    sort=True,
                    command_handler=command_handler,
                    params=params,
                    qiime_config=qiime_config,
                    logger=logger,
                    suppress_md5=True,
                    status_update_callback=status_update_callback,
                )
            else:
                logger.write(
                    "Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n"
                    % (category, ", ".join(existing_taxa_plot_html_fps))
                )

            index_links.append(
                (
                    "Taxa summary bar plots",
                    "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )
            index_links.append(
                (
                    "Taxa summary area plots",
                    "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )

    if not suppress_group_significance:
        params_str = get_params_str(params["group_significance"])
        # group significance tests, aka category significance
        for category in categories:
            group_signifance_fp = "%s/group_significance_%s.txt" % (output_dir, category)
            if not exists(group_signifance_fp):
                # Build the OTU cateogry significance command
                group_significance_cmd = "group_significance.py -i %s -m %s -c %s -o %s %s" % (
                    rarefied_biom_fp,
                    mapping_fp,
                    category,
                    group_signifance_fp,
                    params_str,
                )
                commands.append([("Group significance (%s)" % category, group_significance_cmd)])
            else:
                logger.write(
                    "Skipping group_significance.py for %s as %s exists.\n\n" % (category, group_signifance_fp)
                )

            index_links.append(
                ("Category significance (%s)" % category, group_signifance_fp, _index_headers["group_significance"])
            )

    filtered_biom_gzip_fp = "%s.gz" % filtered_biom_fp
    if not exists(filtered_biom_gzip_fp):
        commands.append([("Compress the filtered BIOM table", "gzip %s" % filtered_biom_fp)])
    else:
        logger.write("Skipping compressing of filtered BIOM table as %s exists.\n\n" % filtered_biom_gzip_fp)
    index_links.append(
        (
            "Filtered BIOM table (minimum sequence count: %d)" % sampling_depth,
            filtered_biom_gzip_fp,
            _index_headers["run_summary"],
        )
    )

    rarified_biom_gzip_fp = "%s.gz" % rarefied_biom_fp
    if not exists(rarified_biom_gzip_fp):
        commands.append([("Compress the rarified BIOM table", "gzip %s" % rarefied_biom_fp)])
    else:
        logger.write("Skipping compressing of rarified BIOM table as %s exists.\n\n" % rarified_biom_gzip_fp)
    index_links.append(
        (
            "Rarified BIOM table (sampling depth: %d)" % sampling_depth,
            rarified_biom_gzip_fp,
            _index_headers["run_summary"],
        )
    )

    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger)
    else:
        logger.close()

    generate_index_page(index_links, index_fp)
Beispiel #9
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    otu_table_fp = opts.otu_table_fp
    output_dir = opts.output_dir
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    verbose = opts.verbose
    print_only = opts.print_only
    seqs_per_sample = int(opts.seqs_per_sample)
    parallel = opts.parallel
    min_seqs_sample = opts.min_seqs_sample
    subject_category = opts.subject_name

    try:
        makedirs(output_dir)
    except OSError:
        if opts.force:
            pass
        else:
            # Since the analysis can take quite a while, I put this check
            # in to help users avoid overwriting previous output.
            option_parser.error("Output directory already exists. Please choose"
                " a different directory, or force overwrite with -f.")


    ## ******************** make_evident_selectors ********************
    ## The code for make_evident_selectors.py is here and has to go before the params
    ## validation as we need to know the main cats before creating the params file
    map_data, headers, comments = parse_mapping_file(open(mapping_fp, 'U'))
    biom_table = parse_biom_table(open(otu_table_fp, 'U'))

    # getting valid samples from biom file
    real_map_headers, real_map_data = filter_mapping_file(map_data, headers,\
        biom_table.SampleIds, include_repeat_cols=False)

    if subject_category not in real_map_headers:
        option_parser.error('This column: %s is not in the mapping file, try %s'%\
            (subject_category, real_map_headers))
 
    sorted_counts_per_sample = get_sorted_counts_per_sample(biom_table)

    mapping_file_tuple = (real_map_data, real_map_headers)

    # calculate the available subjects at each rarefaction level
    results, main_map_cat = make_selectors(sorted_counts_per_sample, min_seqs_sample,\
        mapping_file_tuple, subject_category, verbose=verbose)

    fout = open(join(output_dir,'selectors.txt'),'w')
    fout.write('#Sequences\tSubjects\tSamples\tMetadata\n')
    fout.write('\n'.join(results))
    fout.close()
    
    fout = open(join(output_dir,'mapping_file.txt'),'w')
    fout.write(format_mapping_file(real_map_headers, real_map_data))
    fout.close()
    ## ******************** make_evident_selectors ********************

    fout = open(join(output_dir,'study_preferences.txt'),'w')
    fout.write('%d\n' % seqs_per_sample)
    fout.write('%s\n' % subject_category)
    fout.close()

    ## ******************** filter_samples_from_otu_table ********************
    ## Filtering original biom file to only have samples above the max length to avoid
    ## ugly plots
    alpha_biom_file = join(output_dir,'filtered_otu_table_for_alpha.biom')
    fout = open(alpha_biom_file,'w')
    sample_ids_to_keep = biom_table.SampleIds
    filtered_otu_table = filter_samples_from_otu_table(biom_table,
                                                       sample_ids_to_keep,
                                                       min_count=seqs_per_sample,
                                                       max_count=inf)
    fout.write(format_biom_table(filtered_otu_table))
    fout.close()
    ## ******************** filter_samples_from_otu_table ********************

    if opts.parameter_fp:
        try:
            parameter_f = open(opts.parameter_fp, 'U')
        except IOError:
            option_parser.error("Can't open parameters file (%s). Does it exist? " \
            "Do you have read access?" % opts.parameter_fp)
        params = parse_qiime_parameters(parameter_f)
        parameter_f.close()
    else:
        params = parse_qiime_parameters(
            ['beta_diversity:metrics unweighted_unifrac',\
             'make_rarefaction_plots:prefs_path %s' % join(output_dir,'prefs.txt'),
             'make_rarefaction_plots:colorby %s' % ','.join(main_map_cat), 
             'make_rarefaction_plots:output_type memory', 
             'multiple_rarefactions:min %d' % int(seqs_per_sample/4),
             'multiple_rarefactions:max %d' % (seqs_per_sample+1),
             'multiple_rarefactions:step %d' % int(seqs_per_sample/4),
             'multiple_rarefactions:num-reps 4',
            ])
        # empty list returns empty defaultdict for now
    
    jobs_to_start = opts.jobs_to_start
    default_jobs_to_start = qiime_config['jobs_to_start']
    validate_and_set_jobs_to_start(params,
                                   jobs_to_start,
                                   default_jobs_to_start,
                                   parallel,
                                   option_parser)


    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    
    if verbose:
        status_update_callback = print_to_stdout
    else:
        status_update_callback = no_status_updates
    
    copyfile(otu_table_fp, join(output_dir,'raw.biom'))
    
    run_beta_diversity_through_plots(otu_table_fp=otu_table_fp,
     mapping_fp=mapping_fp,
     output_dir=output_dir,
     command_handler=command_handler,
     params=params,
     qiime_config=qiime_config,
     color_by_interesting_fields_only=False,
     sampling_depth=seqs_per_sample,
     histogram_categories=None,
     tree_fp=tree_fp,
     parallel=parallel,
     suppress_3d_plots=True,
     suppress_2d_plots=True,
     status_update_callback=status_update_callback)
    
    output_dir = join(output_dir,'alpha')
    run_alpha_rarefaction(otu_table_fp=alpha_biom_file,\
     mapping_fp=mapping_fp,\
     output_dir=output_dir,\
     command_handler=command_handler,\
     params=params,
     qiime_config=qiime_config,\
     tree_fp=tree_fp,\
     num_steps=4,\
     parallel=parallel,\
     min_rare_depth=10,
     max_rare_depth=20,
     status_update_callback=status_update_callback,
     plot_stderr_and_stddev=True)
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    
    # begin logging
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)

    # run print_biom_table_summary.py on input BIOM table
    try:
        params_str = get_params_str(params['print_biom_table_summary'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    print_biom_table_summary_cmd = \
     "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % \
     (biom_fp, biom_table_stats_output_fp,params_str)
    index_links.append(('BIOM table statistics',
                        biom_table_stats_output_fp,
                        _index_headers['run_summary']))
    commands.append([('Generate BIOM table summary',
                      print_biom_table_summary_cmd)])
    
    # filter samples with fewer observations than the requested sampling_depth. 
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered 
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
     (biom_fp,filtered_biom_fp,sampling_depth)
    commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
                      filter_samples_cmd)])
    biom_fp = filtered_biom_fp
    
    # run initial commands and reset the command list
    command_handler(commands, 
                    status_update_callback, 
                    logger,
                    close_logger_on_success=False)
    commands = []
    
    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
        even_dm_fps = run_beta_diversity_through_plots(
         otu_table_fp=biom_fp, 
         mapping_fp=mapping_fp,
         output_dir=bdiv_even_output_dir,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         sampling_depth=sampling_depth,
         # force suppression of distance histograms - boxplots work better
         # in this context, and are created below.
         histogram_categories=[],
         tree_fp=tree_fp,
         parallel=parallel,
         logger=logger,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    
        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
                try:
                    params_str = get_params_str(params['make_distance_boxplots'])
                except KeyError:
                    params_str = ''
                boxplots_cmd = \
                 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                 (dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
                commands.append([('Boxplots (%s)' % category,
                                  boxplots_cmd)])
                index_links.append(('Distance boxplots (%s)' % bdiv_metric,
                                    '%s/%s_Distances.pdf' % \
                                     (boxplots_output_dir,category),
                                    _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
                                    '%s/%s_Stats.txt' % \
                                     (boxplots_output_dir,category),
                                    _index_headers['beta_diversity_even'] % sampling_depth))
            
            index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric,
                                '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric,
                                '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric,
                                '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric,
                                '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Distance matrix (%s)' % bdiv_metric,
                                '%s/%s_dm.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
                                '%s/%s_pc.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
    
    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
        run_alpha_rarefaction(
         otu_table_fp=biom_fp,
         mapping_fp=mapping_fp,
         output_dir=arare_full_output_dir,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         tree_fp=tree_fp,
         num_steps=arare_num_steps,
         parallel=parallel,
         logger=logger,
         min_rare_depth=arare_min_rare_depth,
         max_rare_depth=sampling_depth,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    
        index_links.append(('Alpha rarefaction plots',
                            '%s/alpha_rarefaction_plots/rarefaction_plots.html'\
                              % arare_full_output_dir,
                            _index_headers['alpha_diversity']))
                        
        collated_alpha_diversity_fps = \
         glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = '%s/%s_%s.txt' % \
                 (arare_full_output_dir,category,alpha_metric)
                compare_alpha_cmd = \
                 'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                 (collated_alpha_diversity_fp, mapping_fp, category, 
                  alpha_comparison_output_fp, params_str)
                commands.append([('Compare alpha diversity (%s, %s)' %\
                                   (category,alpha_metric),
                                  compare_alpha_cmd)])
                index_links.append(
                 ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
                  alpha_comparison_output_fp,
                  _index_headers['alpha_diversity']))
    
    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        run_summarize_taxa_through_plots(
         otu_table_fp=biom_fp,
         mapping_fp=mapping_fp,
         output_dir=taxa_plots_output_dir,
         mapping_cat=None, 
         sort=True,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         logger=logger,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    

        index_links.append(('Taxa summary bar plots',
                            '%s/taxa_summary_plots/bar_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        index_links.append(('Taxa summary area plots',
                            '%s/taxa_summary_plots/area_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category)
            run_summarize_taxa_through_plots(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=taxa_plots_output_dir,
             mapping_cat=category, 
             sort=True,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)

            index_links.append(('Taxa summary bar plots',
                                '%s/taxa_summary_plots/bar_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
            index_links.append(('Taxa summary area plots',
                                '%s/taxa_summary_plots/area_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
    
    if not suppress_otu_category_significance:
        # OTU category significance
        for category in categories:
            category_signifance_fp = \
             '%s/category_significance_%s.txt' % (output_dir, category)
            try:
                params_str = get_params_str(params['otu_category_significance'])
            except KeyError:
                params_str = ''
            # Build the OTU cateogry significance command
            category_significance_cmd = \
             'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
             (biom_fp, mapping_fp, category, 
              category_signifance_fp, params_str)
            commands.append([('OTU category significance (%s)' % category, 
                              category_significance_cmd)])
                          
            index_links.append(('Category significance (%s)' % category,
                        category_signifance_fp,
                        _index_headers['otu_category_sig']))
    
    commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)])
    index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
                        '%s.gz' % filtered_biom_fp,
                        _index_headers['run_summary']))
    
    command_handler(commands, status_update_callback, logger)
    generate_index_page(index_links,index_fp)