コード例 #1
0
def pick_reference_otus(input_fp,
                        output_dir,
                        otu_picking_method,
                        refseqs_fp,
                        parallel,
                        params,
                        logger,
                        similarity_override=None):
    params_copy = deepcopy(params)
    if 'pick_otus' in params_copy and 'refseqs_fp' in params_copy['pick_otus']:
        raise WorkflowError(
            "Cannot pass pick_otus:refseqs_fp in parameters file. This can only be"
            " passed on the command line or through the API.")
    if similarity_override is not None:
        logger.write('Similiarity of %1.3f being used for pre-filtering.\n' %
                     similarity_override)
        if 'pick_otus' in params_copy:
            params_copy['pick_otus']['similarity'] = str(similarity_override)
        else:
            params_copy['pick_otus'] = {'similarity': str(similarity_override)}

    if parallel and (otu_picking_method == 'uclust_ref'
                     or otu_picking_method == "sortmerna"):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params_copy['parallel'])
        except KeyError:
            params_str = ''

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --otu_picking_method
            # option. This works for now though.
            if 'otu_picking_method' in params_copy['pick_otus']:
                del params_copy['pick_otus']['otu_picking_method']
        except KeyError:
            pass

        params_str += ' %s' % get_params_str(params_copy['pick_otus'])
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\
            (otu_picking_script,
             input_fp,
             output_dir,
             refseqs_fp,
             params_str)
    else:
        try:
            params_str = get_params_str(params_copy['pick_otus'])
        except KeyError:
            params_str = ''
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str += ' --suppress_new_clusters'
        logger.write(
            "Forcing --suppress_new_clusters as this is reference-based OTU picking.\n\n"
        )
        # Build the OTU picking command
        pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\
            (input_fp,
             output_dir,
             refseqs_fp,
             otu_picking_method,
             params_str)

    return pick_otus_cmd
コード例 #2
0
ファイル: most_wanted_otus.py プロジェクト: smirarab/emp
def generate_most_wanted_list(
        output_dir, otu_table_fps, rep_set_fp, gg_fp, nt_fp, mapping_fp,
        mapping_category, top_n, min_abundance, max_abundance, min_categories,
        num_categories_to_plot, max_gg_similarity, max_nt_similarity, e_value,
        word_size, merged_otu_table_fp, suppress_taxonomic_output,
        jobs_to_start, command_handler, status_update_callback, force):
    try:
        makedirs(output_dir)
    except OSError:
        if not force:
            raise WorkflowError(
                "Output directory '%s' already exists. Please "
                "choose a different directory, or force overwrite with -f." %
                output_dir)

    logger = WorkflowLogger(generate_log_fp(output_dir))
    commands, blast_results_fp, rep_set_cands_failures_fp, \
        master_otu_table_ms_fp = _get_most_wanted_filtering_commands(
            output_dir, otu_table_fps,
            rep_set_fp, gg_fp, nt_fp, mapping_fp, mapping_category,
            min_abundance, max_abundance, min_categories, max_gg_similarity,
            e_value, word_size, merged_otu_table_fp, jobs_to_start)

    # Execute the commands, but keep the logger open because
    # we're going to write additional status updates as we process the data.
    command_handler(commands,
                    status_update_callback,
                    logger,
                    close_logger_on_success=False)
    commands = []

    # We'll sort the BLAST results by percent identity (ascending) and pick the
    # top n.
    logger.write("Reading in BLAST results, sorting by percent identity, "
                 "and picking the top %d OTUs.\n\n" % top_n)
    top_n_mw = _get_top_n_blast_results(open(blast_results_fp, 'U'), top_n,
                                        max_nt_similarity)

    # Read in our filtered down candidate seqs file and latest filtered and
    # collapsed OTU table. We'll need to compute some stats on these to include
    # in our report.
    logger.write("Reading in filtered candidate sequences and latest filtered "
                 "and collapsed OTU table.\n\n")
    mw_seqs = _get_rep_set_lookup(open(rep_set_cands_failures_fp, 'U'))
    master_otu_table_ms = parse_biom_table(open(master_otu_table_ms_fp, 'U'))

    # Write results out to tsv and HTML table.
    logger.write("Writing most wanted OTUs results to TSV and HTML "
                 "tables.\n\n")
    output_img_dir = join(output_dir, 'img')
    try:
        makedirs(output_img_dir)
    except OSError:
        # It already exists, which is okay since we already know we are in
        # 'force' mode from above.
        pass

    tsv_lines, html_table_lines, mw_fasta_lines, plot_fps, plot_data_fps = \
            _format_top_n_results_table(top_n_mw,
                mw_seqs, master_otu_table_ms, output_img_dir, mapping_category,
                suppress_taxonomic_output, num_categories_to_plot)

    mw_tsv_rel_fp = 'most_wanted_otus.txt'
    mw_tsv_fp = join(output_dir, mw_tsv_rel_fp)
    mw_tsv_f = open(mw_tsv_fp, 'w')
    mw_tsv_f.write(tsv_lines)
    mw_tsv_f.close()

    mw_fasta_rel_fp = 'most_wanted_otus.fasta'
    mw_fasta_fp = join(output_dir, mw_fasta_rel_fp)
    mw_fasta_f = open(mw_fasta_fp, 'w')
    mw_fasta_f.write(mw_fasta_lines)
    mw_fasta_f.close()

    html_dl_links = (
        '<a href="%s" target="_blank">Download table in tab-'
        'separated value (TSV) format</a><br /><a href="%s" '
        'target="_blank">Download OTU sequence data in FASTA format</a>' %
        (mw_tsv_rel_fp, mw_fasta_rel_fp))
    html_lines = '%s<div>%s<br /><br />%s<br />%s</div>%s' % (
        html_header, html_dl_links, html_table_lines, html_dl_links,
        html_footer)

    mw_html_f = open(join(output_dir, 'most_wanted_otus.html'), 'w')
    mw_html_f.write(html_lines)
    mw_html_f.close()
    logger.close()