Python WorkflowLogger.WorkflowLogger Beispiele

Programmiersprache: Python

Namespace / Paketname: qiime.workflow

Klasse / Typ: WorkflowLogger

Methode / Funktion: WorkflowLogger

Beispiele auf hotexamples.com: 4

Python WorkflowLogger.WorkflowLogger - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die qiime.workflow.WorkflowLogger.WorkflowLogger, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

WorkflowLogger(4)

write(3)

close(2)

Häufig verwendete Methoden

WorkflowLogger (4)

write (3)

close (2)

Beispiel #1

Datei anzeigen

def pick_subsampled_open_referenence_otus(
        input_fp,
        refseqs_fp,
        output_dir,
        percent_subsample,
        new_ref_set_id,
        command_handler,
        params,
        qiime_config,
        prefilter_refseqs_fp=None,
        run_tax_align_tree=True,
        prefilter_percent_id=0.60,
        min_otu_size=2,
        step1_otu_map_fp=None,
        step1_failures_fasta_fp=None,
        parallel=False,
        suppress_step4=False,
        logger=None,
        status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          - Pick reference OTUs against refseqs_fp
          - Subsample the failures to n sequences.
          - Pick OTUs de novo on the n failures.
          - Pick representative sequences for the resulting OTUs.
          - Pick reference OTUs on all failures using the 
             representative set from step 4 as the reference set.
    
    """
    # for now only allowing uclust for otu picking
    denovo_otu_picking_method = 'uclust'
    reference_otu_picking_method = 'uclust_ref'

    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    log_input_md5s(
        logger,
        [input_fp, refseqs_fp, step1_otu_map_fp, step1_failures_fasta_fp])

    # if the user has not passed a different reference collection for the pre-filter,
    # used the main refseqs_fp. this is useful if the user wants to provide a smaller
    # reference collection, or to use the input reference collection when running in
    # iterative mode (rather than an iteration's new refseqs)
    if prefilter_refseqs_fp == None:
        prefilter_refseqs_fp = refseqs_fp

    ## Step 1: Closed-reference OTU picking on the input file (if not already complete)
    if step1_otu_map_fp and step1_failures_fasta_fp:
        step1_dir = '%s/step1_otus' % output_dir
        create_dir(step1_dir)
        logger.write("Using pre-existing reference otu map and failures.\n\n")
    else:
        if prefilter_percent_id != None:
            prefilter_dir = '%s/prefilter_otus/' % output_dir
            prefilter_otu_map_fp = \
             '%s/%s_otus.txt' % (prefilter_dir,input_basename)
            prefilter_failures_list_fp = '%s/%s_failures.txt' % \
             (prefilter_dir,input_basename)
            prefilter_pick_otu_cmd = pick_reference_otus(\
             input_fp,prefilter_dir,reference_otu_picking_method,
             prefilter_refseqs_fp,parallel,params,logger,prefilter_percent_id)
            commands.append([('Pick Reference OTUs (prefilter)',
                              prefilter_pick_otu_cmd)])

            prefiltered_input_fp = '%s/prefiltered_%s%s' %\
             (prefilter_dir,input_basename,input_ext)
            filter_fasta_cmd = 'filter_fasta.py -f %s -o %s -s %s -n' %\
             (input_fp,prefiltered_input_fp,prefilter_failures_list_fp)
            commands.append([('Filter prefilter failures from input',
                              filter_fasta_cmd)])

            input_fp = prefiltered_input_fp
            input_dir, input_filename = split(input_fp)
            input_basename, input_ext = splitext(input_filename)

        ## Build the OTU picking command
        step1_dir = \
         '%s/step1_otus' % output_dir
        step1_otu_map_fp = \
         '%s/%s_otus.txt' % (step1_dir,input_basename)
        step1_pick_otu_cmd = pick_reference_otus(\
         input_fp,step1_dir,reference_otu_picking_method,
         refseqs_fp,parallel,params,logger)
        commands.append([('Pick Reference OTUs', step1_pick_otu_cmd)])

        ## Build the failures fasta file
        step1_failures_list_fp = '%s/%s_failures.txt' % \
         (step1_dir,input_basename)
        step1_failures_fasta_fp = \
         '%s/failures.fasta' % step1_dir
        step1_filter_fasta_cmd = 'filter_fasta.py -f %s -s %s -o %s' %\
         (input_fp,step1_failures_list_fp,step1_failures_fasta_fp)

        commands.append([('Generate full failures fasta file',
                          step1_filter_fasta_cmd)])

        # Call the command handler on the list of commands
        command_handler(commands,
                        status_update_callback,
                        logger=logger,
                        close_logger_on_success=False)
        commands = []

    step1_repset_fasta_fp = \
     '%s/step1_rep_set.fna' % step1_dir
    step1_pick_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\
     (step1_otu_map_fp, step1_repset_fasta_fp, input_fp)
    commands.append([('Pick rep set', step1_pick_rep_set_cmd)])

    ## Subsample the failures fasta file to retain (roughly) the
    ## percent_subsample
    step2_input_fasta_fp = \
     '%s/subsampled_failures.fasta' % step1_dir
    subsample_fasta(step1_failures_fasta_fp, step2_input_fasta_fp,
                    percent_subsample)

    ## Prep the OTU picking command for the subsampled failures
    step2_dir = '%s/step2_otus/' % output_dir
    step2_cmd = pick_denovo_otus(step2_input_fasta_fp, step2_dir,
                                 new_ref_set_id, denovo_otu_picking_method,
                                 params, logger)
    step2_otu_map_fp = '%s/subsampled_failures_otus.txt' % step2_dir

    commands.append([('Pick de novo OTUs for new clusters', step2_cmd)])

    ## Prep the rep set picking command for the subsampled failures
    step2_repset_fasta_fp = '%s/step2_rep_set.fna' % step2_dir
    step2_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\
     (step2_otu_map_fp,step2_repset_fasta_fp,step2_input_fasta_fp)
    commands.append([('Pick representative set for subsampled failures',
                      step2_rep_set_cmd)])

    step3_dir = '%s/step3_otus/' % output_dir
    step3_otu_map_fp = '%s/failures_otus.txt' % step3_dir
    step3_failures_list_fp = '%s/failures_failures.txt' % step3_dir
    step3_cmd = pick_reference_otus(step1_failures_fasta_fp, step3_dir,
                                    reference_otu_picking_method,
                                    step2_repset_fasta_fp, parallel, params,
                                    logger)

    commands.append([('Pick reference OTUs using de novo rep set', step3_cmd)])

    # name the final otu map
    merged_otu_map_fp = '%s/final_otu_map.txt' % output_dir

    if not suppress_step4:
        step3_failures_fasta_fp = '%s/failures_failures.fasta' % step3_dir
        step3_filter_fasta_cmd = 'filter_fasta.py -f %s -s %s -o %s' %\
         (step1_failures_fasta_fp,step3_failures_list_fp,step3_failures_fasta_fp)
        commands.append([('Create fasta file of step3 failures',
                          step3_filter_fasta_cmd)])

        step4_dir = '%s/step4_otus/' % output_dir
        step4_cmd = pick_denovo_otus(step3_failures_fasta_fp, step4_dir,
                                     '.'.join([new_ref_set_id, 'CleanUp']),
                                     denovo_otu_picking_method, params, logger)
        step4_otu_map_fp = '%s/failures_failures_otus.txt' % step4_dir
        commands.append([('Pick de novo OTUs on step3 failures', step4_cmd)])
        # Merge the otu maps
        cat_otu_tables_cmd = 'cat %s %s %s >> %s' %\
             (step1_otu_map_fp,step3_otu_map_fp,step4_otu_map_fp,merged_otu_map_fp)
        commands.append([('Merge OTU maps', cat_otu_tables_cmd)])
        step4_repset_fasta_fp = '%s/step4_rep_set.fna' % step4_dir
        step4_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\
         (step4_otu_map_fp,step4_repset_fasta_fp,step3_failures_fasta_fp)
        commands.append([('Pick representative set for subsampled failures',
                          step4_rep_set_cmd)])

    else:
        # Merge the otu maps
        cat_otu_tables_cmd = 'cat %s %s >> %s' %\
             (step1_otu_map_fp,step3_otu_map_fp,merged_otu_map_fp)
        commands.append([('Merge OTU maps', cat_otu_tables_cmd)])
        # Move the step 3 failures file to the top-level directory
        commands.append([('Move final failures file to top-level directory',
                          'mv %s %s/final_failures.txt' %
                          (step3_failures_list_fp, output_dir))])

    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=False)
    commands = []

    otu_fp = merged_otu_map_fp
    # Filter singletons from the otu map
    otu_no_singletons_fp = '%s/final_otu_map_mc%d.txt' % (output_dir,
                                                          min_otu_size)
    otus_to_keep = filter_otus_from_otu_map(otu_fp, otu_no_singletons_fp,
                                            min_otu_size)

    ## make the final representative seqs file and a new refseqs file that
    ## could be used in subsequent otu picking runs.
    ## this is clunky. first, we need to do this without singletons to match
    ## the otu map without singletons. next, there is a difference in what
    ## we need the reference set to be and what we need the repseqs to be.
    ## the reference set needs to be a superset of the input reference set
    ## to this set. the repset needs to be only the sequences that were observed
    ## in this data set, and we want reps for the step1 reference otus to be
    ## reads from this run so we don't hit issues building a tree using
    ## sequences of very different lengths. so...
    final_repset_fp = '%s/rep_set.fna' % output_dir
    final_repset_f = open(final_repset_fp, 'w')
    new_refseqs_fp = '%s/new_refseqs.fna' % output_dir
    # write non-singleton otus representative sequences from step1 to the
    # final rep set file
    for otu_id, seq in MinimalFastaParser(open(step1_repset_fasta_fp, 'U')):
        if otu_id.split()[0] in otus_to_keep:
            final_repset_f.write('>%s\n%s\n' % (otu_id, seq))
    # copy the full input refseqs file to the new refseqs_fp
    copy(refseqs_fp, new_refseqs_fp)
    new_refseqs_f = open(new_refseqs_fp, 'a')
    new_refseqs_f.write('\n')
    # iterate over all representative sequences from step2 and step4 and write
    # those corresponding to non-singleton otus to the final representative set
    # file and the new reference sequences file.
    for otu_id, seq in MinimalFastaParser(open(step2_repset_fasta_fp, 'U')):
        if otu_id.split()[0] in otus_to_keep:
            new_refseqs_f.write('>%s\n%s\n' % (otu_id, seq))
            final_repset_f.write('>%s\n%s\n' % (otu_id, seq))
    if not suppress_step4:
        for otu_id, seq in MinimalFastaParser(open(step4_repset_fasta_fp,
                                                   'U')):
            if otu_id.split()[0] in otus_to_keep:
                new_refseqs_f.write('>%s\n%s\n' % (otu_id, seq))
                final_repset_f.write('>%s\n%s\n' % (otu_id, seq))
    new_refseqs_f.close()
    final_repset_f.close()

    # Prep the make_otu_table.py command
    otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir, min_otu_size)
    make_otu_table_cmd = 'make_otu_table.py -i %s -o %s' %\
     (otu_no_singletons_fp,otu_table_fp)
    commands.append([("Make the otu table", make_otu_table_cmd)])

    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=False)

    commands = []

    if run_tax_align_tree:
        taxonomy_fp, pynast_failures_fp = tax_align_tree(
            repset_fasta_fp=final_repset_fp,
            output_dir=output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            parallel=parallel,
            logger=logger,
            status_update_callback=status_update_callback)

        # Add taxa to otu table
        otu_table_w_tax_fp = \
         '%s/otu_table_mc%d_w_tax.biom' % (output_dir,min_otu_size)
        add_taxa_cmd = 'add_taxa.py -i %s -t %s -o %s' %\
         (otu_table_fp,taxonomy_fp,otu_table_w_tax_fp)
        commands.append([("Add taxa to OTU table", add_taxa_cmd)])

        command_handler(commands,
                        status_update_callback,
                        logger=logger,
                        close_logger_on_success=False)
        commands = []

        # Build OTU table without PyNAST failures
        otu_table_fp = \
         '%s/otu_table_mc%d_w_tax_no_pynast_failures.biom' % (output_dir,min_otu_size)
        filtered_otu_table = filter_otus_from_otu_table(
            parse_biom_table(open(otu_table_w_tax_fp, 'U')),
            get_seq_ids_from_fasta_file(open(pynast_failures_fp, 'U')),
            0,
            inf,
            0,
            inf,
            negate_ids_to_keep=True)
        otu_table_f = open(otu_table_fp, 'w')
        otu_table_f.write(format_biom_table(filtered_otu_table))
        otu_table_f.close()

        command_handler(commands,
                        status_update_callback,
                        logger=logger,
                        close_logger_on_success=False)
        commands = []

    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)

Beispiel #2

Datei anzeigen

def tax_align_tree(repset_fasta_fp,
                   output_dir,
                   command_handler,
                   params,
                   qiime_config,
                   parallel=False,
                   logger=None,
                   status_update_callback=print_to_stdout):

    input_dir, input_filename = split(repset_fasta_fp)
    input_basename, input_ext = splitext(input_filename)
    commands = []
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    ## Prep the taxonomy assignment command
    try:
        assignment_method = params['assign_taxonomy']['assignment_method']
    except KeyError:
        assignment_method = 'rdp'
    assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\
     (output_dir,assignment_method)
    taxonomy_fp = '%s/%s_tax_assignments.txt' % \
     (assign_taxonomy_dir,input_basename)
    if parallel and (assignment_method == 'rdp'
                     or assignment_method == 'blast'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --assignment_method
            # option. This works for now though.
            d = params['assign_taxonomy'].copy()
            del d['assignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass

        # Build the parallel taxonomy assignment command
        assign_taxonomy_cmd = \
         'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\
         (assignment_method, repset_fasta_fp,assign_taxonomy_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['assign_taxonomy'])
        except KeyError:
            params_str = ''
        # Build the taxonomy assignment command
        assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\
         (assign_taxonomy_dir,repset_fasta_fp, params_str)
    if exists(assign_taxonomy_dir):
        rmtree(assign_taxonomy_dir)
    commands.append([('Assign taxonomy', assign_taxonomy_cmd)])

    ## Prep the pynast alignment command
    alignment_method = 'pynast'
    pynast_dir = '%s/%s_aligned_seqs' % (output_dir, alignment_method)
    aln_fp = '%s/%s_aligned.fasta' % (pynast_dir, input_basename)
    failures_fp = '%s/%s_failures.fasta' % (pynast_dir, input_basename)
    if exists(pynast_dir):
        rmtree(pynast_dir)

    if parallel:
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['align_seqs'].copy()
            if 'alignment_method' in d:
                del d['alignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass

        # Build the parallel pynast alignment command
        align_seqs_cmd = 'parallel_align_seqs_pynast.py -i %s -o %s -T %s' %\
         (repset_fasta_fp, pynast_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['align_seqs'])
        except KeyError:
            params_str = ''
        # Build the pynast alignment command
        align_seqs_cmd = 'align_seqs.py -i %s -o %s %s' %\
         (repset_fasta_fp, pynast_dir, params_str)
    commands.append([('Align sequences', align_seqs_cmd)])

    ## Prep the alignment filtering command
    filtered_aln_fp = '%s/%s_aligned_pfiltered.fasta' %\
     (pynast_dir,input_basename)
    try:
        params_str = get_params_str(params['filter_alignment'])
    except KeyError:
        params_str = ''
    # Build the alignment filtering command
    filter_alignment_cmd = 'filter_alignment.py -o %s -i %s %s' %\
     (pynast_dir, aln_fp, params_str)
    commands.append([('Filter alignment', filter_alignment_cmd)])

    ## Prep the tree building command
    tree_fp = '%s/rep_set.tre' % output_dir
    try:
        params_str = get_params_str(params['make_phylogeny'])
    except KeyError:
        params_str = ''
    # Build the tree building command
    make_phylogeny_cmd = 'make_phylogeny.py -i %s -o %s %s' %\
     (filtered_aln_fp, tree_fp,params_str)
    commands.append([('Build phylogenetic tree', make_phylogeny_cmd)])
    if exists(tree_fp):
        remove_files([tree_fp])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    return taxonomy_fp, failures_fp

Beispiel #3

Datei anzeigen

def iterative_pick_subsampled_open_referenence_otus(
        input_fps,
        refseqs_fp,
        output_dir,
        percent_subsample,
        new_ref_set_id,
        command_handler,
        params,
        qiime_config,
        prefilter_refseqs_fp=None,
        prefilter_percent_id=0.60,
        min_otu_size=2,
        run_tax_align_tree=True,
        step1_otu_map_fp=None,
        step1_failures_fasta_fp=None,
        parallel=False,
        suppress_step4=False,
        logger=None,
        status_update_callback=print_to_stdout):
    """ Call the pick_subsampled_open_referenence_otus workflow on multiple inputs
         and handle processing of the results.
    """
    create_dir(output_dir)
    commands = []
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    # if the user has not passed a different reference collection for the pre-filter,
    # used the input refseqs_fp for all iterations. we want to pre-filter all data against
    # the input data as lower percent identity searches with uclust can be slow, so we
    # want the reference collection to stay at a reasonable size.
    if prefilter_refseqs_fp == None:
        prefilter_refseqs_fp = refseqs_fp

    otu_table_fps = []
    repset_fasta_fps = []
    for i, input_fp in enumerate(input_fps):
        iteration_output_dir = '%s/%d/' % (output_dir, i)
        if iteration_output_exists(iteration_output_dir, min_otu_size):
            # if the output from an iteration already exists, skip that
            # iteration (useful for continuing failed runs)
            log_input_md5s(logger, [input_fp, refseqs_fp])
            logger.write(
                'Iteration %d (input file: %s) output data already exists. '
                'Skipping and moving to next.\n\n' % (i, input_fp))
        else:
            pick_subsampled_open_referenence_otus(
                input_fp=input_fp,
                refseqs_fp=refseqs_fp,
                output_dir=iteration_output_dir,
                percent_subsample=percent_subsample,
                new_ref_set_id='.'.join([new_ref_set_id,
                                         str(i)]),
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                run_tax_align_tree=False,
                prefilter_refseqs_fp=prefilter_refseqs_fp,
                prefilter_percent_id=prefilter_percent_id,
                min_otu_size=min_otu_size,
                step1_otu_map_fp=step1_otu_map_fp,
                step1_failures_fasta_fp=step1_failures_fasta_fp,
                parallel=parallel,
                suppress_step4=suppress_step4,
                logger=logger,
                status_update_callback=status_update_callback)
        ## perform post-iteration file shuffling whether the previous iteration's
        ## data previously existed or was just computed.
        # step1 otu map and failures can only be used for the first iteration
        # as subsequent iterations need to use updated refseqs files
        step1_otu_map_fp = step1_failures_fasta_fp = None
        new_refseqs_fp = '%s/new_refseqs.fna' % iteration_output_dir
        refseqs_fp = new_refseqs_fp
        otu_table_fps.append('%s/otu_table_mc%d.biom' %
                             (iteration_output_dir, min_otu_size))
        repset_fasta_fps.append('%s/rep_set.fna' % iteration_output_dir)

    # Merge OTU tables - check for existence first as this step has historically
    # been a frequent failure, so is sometimes run manually in failed runs.
    otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir, min_otu_size)
    if not (exists(otu_table_fp) and getsize(otu_table_fp) > 0):
        merge_cmd = 'merge_otu_tables.py -i %s -o %s' %\
         (','.join(otu_table_fps),otu_table_fp)
        commands.append([("Merge OTU tables", merge_cmd)])

    # Build master rep set
    final_repset_fp = '%s/rep_set.fna' % output_dir
    final_repset_from_iteration_repsets_fps(repset_fasta_fps, final_repset_fp)

    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=False)
    commands = []

    if run_tax_align_tree:
        otu_table_w_tax_fp = \
         '%s/otu_table_mc%d_w_tax.biom' % (output_dir,min_otu_size)
        final_otu_table_fp = \
         '%s/otu_table_mc%d_w_tax_no_pynast_failures.biom' % (output_dir,min_otu_size)
        if exists(final_otu_table_fp) and getsize(final_otu_table_fp) > 0:
            logger.write("Final output file exists (%s). Will not rebuild." %
                         otu_table_fp)
        else:
            # remove files from partially completed runs
            remove_files([otu_table_w_tax_fp, final_otu_table_fp],
                         error_on_missing=False)

            taxonomy_fp, pynast_failures_fp = tax_align_tree(
                repset_fasta_fp=final_repset_fp,
                output_dir=output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                parallel=parallel,
                logger=logger,
                status_update_callback=status_update_callback)

            # Add taxa to otu table
            add_taxa_cmd = 'add_taxa.py -i %s -t %s -o %s' %\
             (otu_table_fp,taxonomy_fp,otu_table_w_tax_fp)
            commands.append([("Add taxa to OTU table", add_taxa_cmd)])

            command_handler(commands,
                            status_update_callback,
                            logger=logger,
                            close_logger_on_success=False)
            commands = []

            # Build OTU table without PyNAST failures
            filtered_otu_table = filter_otus_from_otu_table(
                parse_biom_table(open(otu_table_w_tax_fp, 'U')),
                get_seq_ids_from_fasta_file(open(pynast_failures_fp, 'U')),
                0,
                inf,
                0,
                inf,
                negate_ids_to_keep=True)
            otu_table_f = open(final_otu_table_fp, 'w')
            otu_table_f.write(format_biom_table(filtered_otu_table))
            otu_table_f.close()

            command_handler(commands,
                            status_update_callback,
                            logger=logger,
                            close_logger_on_success=False)
            commands = []

    logger.close()

Beispiel #4

Datei anzeigen

def assign_taxonomy_multiple_times(input_dirs,
                                   output_dir,
                                   assignment_methods,
                                   reference_seqs_fp,
                                   input_fasta_filename,
                                   clean_otu_table_filename,
                                   id_to_taxonomy_fp=None,
                                   confidences=None,
                                   e_values=None,
                                   command_handler=call_commands_serially,
                                   rdp_max_memory=None,
                                   status_update_callback=print_to_stdout,
                                   force=False,
                                   read_1_seqs_fp=None,
                                   read_2_seqs_fp=None):
    """ Performs sanity checks on passed arguments and directories. Builds 
        commands for each method and sends them off to be executed. """
    ## Check if temp output directory exists
    try:
        makedirs(output_dir)
    except OSError:
        if not force:
            raise WorkflowError(
                "Output directory '%s' already exists. Please "
                "choose a different directory, or force overwrite with -f." %
                output_dir)

    ## Check for inputs that are universally required
    if assignment_methods is None:
        raise WorkflowError("You must specify at least one method:"
                            "'rdp', 'blast', 'mothur', or 'rtax'.")
    if input_fasta_filename is None:
        raise WorkflowError("You must provide an input fasta filename.")
    if clean_otu_table_filename is None:
        raise WorkflowError("You must provide a clean otu table filename.")
    if id_to_taxonomy_fp is None:
        raise WorkflowError("You must provide an ID to taxonomy map filename.")

    logger = WorkflowLogger(generate_log_fp(output_dir))
    time_results = []

    for input_dir in input_dirs:
        ## Make sure the input dataset directory exists.
        if not isdir(input_dir):
            raise WorkflowError("The input directory '%s' does not exist." %
                                input_dir)

        input_dir_name = split(normpath(input_dir))[1]
        output_dataset_dir = join(output_dir, input_dir_name)
        input_fasta_fp = join(input_dir, input_fasta_filename)
        clean_otu_table_fp = join(input_dir, clean_otu_table_filename)

        logger.write("\nCreating output subdirectory '%s' if it doesn't "
                     "already exist.\n" % output_dataset_dir)
        try:
            makedirs(output_dataset_dir)
        except OSError:
            # It already exists, which is okay since we already know we are in
            # 'force' mode from above.
            pass

        for method in assignment_methods:
            ## Method is RDP
            if method == 'rdp':
                ## Check for execution parameters required by RDP method
                if confidences is None:
                    raise WorkflowError("You must specify at least one "
                                        "confidence level.")
                ## Generate command for RDP
                commands = _generate_rdp_commands(
                    output_dataset_dir,
                    input_fasta_fp,
                    reference_seqs_fp,
                    id_to_taxonomy_fp,
                    clean_otu_table_fp,
                    confidences,
                    rdp_max_memory=rdp_max_memory)

            ## Method is BLAST
            elif method == 'blast':
                ## Check for execution parameters required by BLAST method
                if e_values is None:
                    raise WorkflowError("You must specify at least one "
                                        "E value.")
                ## Generate command for BLAST
                commands = _generate_blast_commands(
                    output_dataset_dir, input_fasta_fp, reference_seqs_fp,
                    id_to_taxonomy_fp, clean_otu_table_fp, e_values)

            ## Method is Mothur
            elif method == 'mothur':
                ## Check for execution parameters required by Mothur method
                if confidences is None:
                    raise WorkflowError("You must specify at least one "
                                        "confidence level.")
                ## Generate command for mothur
                commands = _generate_mothur_commands(
                    output_dataset_dir, input_fasta_fp, reference_seqs_fp,
                    id_to_taxonomy_fp, clean_otu_table_fp, confidences)

            ## Method is RTAX
            elif method == 'rtax':
                ## Check for execution parameters required by RTAX method
                if read_1_seqs_fp is None:
                    raise WorkflowError("You must specify a file containing "
                                        "the first read from pair-end "
                                        "sequencing.")
                ## Generate command for rtax
                commands = _generate_rtax_commands(
                    output_dataset_dir,
                    input_fasta_fp,
                    reference_seqs_fp,
                    id_to_taxonomy_fp,
                    clean_otu_table_fp,
                    read_1_seqs_fp,
                    read_2_seqs_fp=read_2_seqs_fp)

            ## Unsupported method
            else:
                raise WorkflowError("Unrecognized or unsupported taxonomy "
                                    "assignment method '%s'." % method)

            # send command for current method to command handler
            for command in commands:
                #call_commands_serially needs a list of commands so here's a length one commmand list.
                c = list()
                c.append(command)
                start = time()
                command_handler(c,
                                status_update_callback,
                                logger,
                                close_logger_on_success=False)
                end = time()
                input_file = command[0][1].split()[
                    command[0][1].split().index('-i') + 1].split('/')[-2]
                if 'Assigning' in command[0][0]:
                    time_results.append(
                        (input_file, ' '.join(command[0][0].split()[2:]),
                         end - start))

    # removes and writes out the title we initialized with earlier
    logger.write('\n\nAssignment times (seconds):\n')
    for t in time_results:
        # write out each time result as (method, params)\ttime (seconds)
        #First clean up the output
        method, param = t[1].split(', ')
        method = method.lstrip('(')
        param = param.rstrip(')')

        logger.write('%s\t%s\t%s\t%s\n' % (t[0], method, param, str(t[2])))

    logger.close()