Beispiel #1
0
def pick_subsampled_open_referenence_otus(
        input_fp,
        refseqs_fp,
        output_dir,
        percent_subsample,
        new_ref_set_id,
        command_handler,
        params,
        qiime_config,
        prefilter_refseqs_fp=None,
        run_tax_align_tree=True,
        prefilter_percent_id=0.60,
        min_otu_size=2,
        step1_otu_map_fp=None,
        step1_failures_fasta_fp=None,
        parallel=False,
        suppress_step4=False,
        logger=None,
        status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          - Pick reference OTUs against refseqs_fp
          - Subsample the failures to n sequences.
          - Pick OTUs de novo on the n failures.
          - Pick representative sequences for the resulting OTUs.
          - Pick reference OTUs on all failures using the 
             representative set from step 4 as the reference set.
    
    """
    # for now only allowing uclust for otu picking
    denovo_otu_picking_method = 'uclust'
    reference_otu_picking_method = 'uclust_ref'

    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    log_input_md5s(
        logger,
        [input_fp, refseqs_fp, step1_otu_map_fp, step1_failures_fasta_fp])

    # if the user has not passed a different reference collection for the pre-filter,
    # used the main refseqs_fp. this is useful if the user wants to provide a smaller
    # reference collection, or to use the input reference collection when running in
    # iterative mode (rather than an iteration's new refseqs)
    if prefilter_refseqs_fp == None:
        prefilter_refseqs_fp = refseqs_fp

    ## Step 1: Closed-reference OTU picking on the input file (if not already complete)
    if step1_otu_map_fp and step1_failures_fasta_fp:
        step1_dir = '%s/step1_otus' % output_dir
        create_dir(step1_dir)
        logger.write("Using pre-existing reference otu map and failures.\n\n")
    else:
        if prefilter_percent_id != None:
            prefilter_dir = '%s/prefilter_otus/' % output_dir
            prefilter_otu_map_fp = \
             '%s/%s_otus.txt' % (prefilter_dir,input_basename)
            prefilter_failures_list_fp = '%s/%s_failures.txt' % \
             (prefilter_dir,input_basename)
            prefilter_pick_otu_cmd = pick_reference_otus(\
             input_fp,prefilter_dir,reference_otu_picking_method,
             prefilter_refseqs_fp,parallel,params,logger,prefilter_percent_id)
            commands.append([('Pick Reference OTUs (prefilter)',
                              prefilter_pick_otu_cmd)])

            prefiltered_input_fp = '%s/prefiltered_%s%s' %\
             (prefilter_dir,input_basename,input_ext)
            filter_fasta_cmd = 'filter_fasta.py -f %s -o %s -s %s -n' %\
             (input_fp,prefiltered_input_fp,prefilter_failures_list_fp)
            commands.append([('Filter prefilter failures from input',
                              filter_fasta_cmd)])

            input_fp = prefiltered_input_fp
            input_dir, input_filename = split(input_fp)
            input_basename, input_ext = splitext(input_filename)

        ## Build the OTU picking command
        step1_dir = \
         '%s/step1_otus' % output_dir
        step1_otu_map_fp = \
         '%s/%s_otus.txt' % (step1_dir,input_basename)
        step1_pick_otu_cmd = pick_reference_otus(\
         input_fp,step1_dir,reference_otu_picking_method,
         refseqs_fp,parallel,params,logger)
        commands.append([('Pick Reference OTUs', step1_pick_otu_cmd)])

        ## Build the failures fasta file
        step1_failures_list_fp = '%s/%s_failures.txt' % \
         (step1_dir,input_basename)
        step1_failures_fasta_fp = \
         '%s/failures.fasta' % step1_dir
        step1_filter_fasta_cmd = 'filter_fasta.py -f %s -s %s -o %s' %\
         (input_fp,step1_failures_list_fp,step1_failures_fasta_fp)

        commands.append([('Generate full failures fasta file',
                          step1_filter_fasta_cmd)])

        # Call the command handler on the list of commands
        command_handler(commands,
                        status_update_callback,
                        logger=logger,
                        close_logger_on_success=False)
        commands = []

    step1_repset_fasta_fp = \
     '%s/step1_rep_set.fna' % step1_dir
    step1_pick_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\
     (step1_otu_map_fp, step1_repset_fasta_fp, input_fp)
    commands.append([('Pick rep set', step1_pick_rep_set_cmd)])

    ## Subsample the failures fasta file to retain (roughly) the
    ## percent_subsample
    step2_input_fasta_fp = \
     '%s/subsampled_failures.fasta' % step1_dir
    subsample_fasta(step1_failures_fasta_fp, step2_input_fasta_fp,
                    percent_subsample)

    ## Prep the OTU picking command for the subsampled failures
    step2_dir = '%s/step2_otus/' % output_dir
    step2_cmd = pick_denovo_otus(step2_input_fasta_fp, step2_dir,
                                 new_ref_set_id, denovo_otu_picking_method,
                                 params, logger)
    step2_otu_map_fp = '%s/subsampled_failures_otus.txt' % step2_dir

    commands.append([('Pick de novo OTUs for new clusters', step2_cmd)])

    ## Prep the rep set picking command for the subsampled failures
    step2_repset_fasta_fp = '%s/step2_rep_set.fna' % step2_dir
    step2_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\
     (step2_otu_map_fp,step2_repset_fasta_fp,step2_input_fasta_fp)
    commands.append([('Pick representative set for subsampled failures',
                      step2_rep_set_cmd)])

    step3_dir = '%s/step3_otus/' % output_dir
    step3_otu_map_fp = '%s/failures_otus.txt' % step3_dir
    step3_failures_list_fp = '%s/failures_failures.txt' % step3_dir
    step3_cmd = pick_reference_otus(step1_failures_fasta_fp, step3_dir,
                                    reference_otu_picking_method,
                                    step2_repset_fasta_fp, parallel, params,
                                    logger)

    commands.append([('Pick reference OTUs using de novo rep set', step3_cmd)])

    # name the final otu map
    merged_otu_map_fp = '%s/final_otu_map.txt' % output_dir

    if not suppress_step4:
        step3_failures_fasta_fp = '%s/failures_failures.fasta' % step3_dir
        step3_filter_fasta_cmd = 'filter_fasta.py -f %s -s %s -o %s' %\
         (step1_failures_fasta_fp,step3_failures_list_fp,step3_failures_fasta_fp)
        commands.append([('Create fasta file of step3 failures',
                          step3_filter_fasta_cmd)])

        step4_dir = '%s/step4_otus/' % output_dir
        step4_cmd = pick_denovo_otus(step3_failures_fasta_fp, step4_dir,
                                     '.'.join([new_ref_set_id, 'CleanUp']),
                                     denovo_otu_picking_method, params, logger)
        step4_otu_map_fp = '%s/failures_failures_otus.txt' % step4_dir
        commands.append([('Pick de novo OTUs on step3 failures', step4_cmd)])
        # Merge the otu maps
        cat_otu_tables_cmd = 'cat %s %s %s >> %s' %\
             (step1_otu_map_fp,step3_otu_map_fp,step4_otu_map_fp,merged_otu_map_fp)
        commands.append([('Merge OTU maps', cat_otu_tables_cmd)])
        step4_repset_fasta_fp = '%s/step4_rep_set.fna' % step4_dir
        step4_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\
         (step4_otu_map_fp,step4_repset_fasta_fp,step3_failures_fasta_fp)
        commands.append([('Pick representative set for subsampled failures',
                          step4_rep_set_cmd)])

    else:
        # Merge the otu maps
        cat_otu_tables_cmd = 'cat %s %s >> %s' %\
             (step1_otu_map_fp,step3_otu_map_fp,merged_otu_map_fp)
        commands.append([('Merge OTU maps', cat_otu_tables_cmd)])
        # Move the step 3 failures file to the top-level directory
        commands.append([('Move final failures file to top-level directory',
                          'mv %s %s/final_failures.txt' %
                          (step3_failures_list_fp, output_dir))])

    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=False)
    commands = []

    otu_fp = merged_otu_map_fp
    # Filter singletons from the otu map
    otu_no_singletons_fp = '%s/final_otu_map_mc%d.txt' % (output_dir,
                                                          min_otu_size)
    otus_to_keep = filter_otus_from_otu_map(otu_fp, otu_no_singletons_fp,
                                            min_otu_size)

    ## make the final representative seqs file and a new refseqs file that
    ## could be used in subsequent otu picking runs.
    ## this is clunky. first, we need to do this without singletons to match
    ## the otu map without singletons. next, there is a difference in what
    ## we need the reference set to be and what we need the repseqs to be.
    ## the reference set needs to be a superset of the input reference set
    ## to this set. the repset needs to be only the sequences that were observed
    ## in this data set, and we want reps for the step1 reference otus to be
    ## reads from this run so we don't hit issues building a tree using
    ## sequences of very different lengths. so...
    final_repset_fp = '%s/rep_set.fna' % output_dir
    final_repset_f = open(final_repset_fp, 'w')
    new_refseqs_fp = '%s/new_refseqs.fna' % output_dir
    # write non-singleton otus representative sequences from step1 to the
    # final rep set file
    for otu_id, seq in MinimalFastaParser(open(step1_repset_fasta_fp, 'U')):
        if otu_id.split()[0] in otus_to_keep:
            final_repset_f.write('>%s\n%s\n' % (otu_id, seq))
    # copy the full input refseqs file to the new refseqs_fp
    copy(refseqs_fp, new_refseqs_fp)
    new_refseqs_f = open(new_refseqs_fp, 'a')
    new_refseqs_f.write('\n')
    # iterate over all representative sequences from step2 and step4 and write
    # those corresponding to non-singleton otus to the final representative set
    # file and the new reference sequences file.
    for otu_id, seq in MinimalFastaParser(open(step2_repset_fasta_fp, 'U')):
        if otu_id.split()[0] in otus_to_keep:
            new_refseqs_f.write('>%s\n%s\n' % (otu_id, seq))
            final_repset_f.write('>%s\n%s\n' % (otu_id, seq))
    if not suppress_step4:
        for otu_id, seq in MinimalFastaParser(open(step4_repset_fasta_fp,
                                                   'U')):
            if otu_id.split()[0] in otus_to_keep:
                new_refseqs_f.write('>%s\n%s\n' % (otu_id, seq))
                final_repset_f.write('>%s\n%s\n' % (otu_id, seq))
    new_refseqs_f.close()
    final_repset_f.close()

    # Prep the make_otu_table.py command
    otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir, min_otu_size)
    make_otu_table_cmd = 'make_otu_table.py -i %s -o %s' %\
     (otu_no_singletons_fp,otu_table_fp)
    commands.append([("Make the otu table", make_otu_table_cmd)])

    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=False)

    commands = []

    if run_tax_align_tree:
        taxonomy_fp, pynast_failures_fp = tax_align_tree(
            repset_fasta_fp=final_repset_fp,
            output_dir=output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            parallel=parallel,
            logger=logger,
            status_update_callback=status_update_callback)

        # Add taxa to otu table
        otu_table_w_tax_fp = \
         '%s/otu_table_mc%d_w_tax.biom' % (output_dir,min_otu_size)
        add_taxa_cmd = 'add_taxa.py -i %s -t %s -o %s' %\
         (otu_table_fp,taxonomy_fp,otu_table_w_tax_fp)
        commands.append([("Add taxa to OTU table", add_taxa_cmd)])

        command_handler(commands,
                        status_update_callback,
                        logger=logger,
                        close_logger_on_success=False)
        commands = []

        # Build OTU table without PyNAST failures
        otu_table_fp = \
         '%s/otu_table_mc%d_w_tax_no_pynast_failures.biom' % (output_dir,min_otu_size)
        filtered_otu_table = filter_otus_from_otu_table(
            parse_biom_table(open(otu_table_w_tax_fp, 'U')),
            get_seq_ids_from_fasta_file(open(pynast_failures_fp, 'U')),
            0,
            inf,
            0,
            inf,
            negate_ids_to_keep=True)
        otu_table_f = open(otu_table_fp, 'w')
        otu_table_f.write(format_biom_table(filtered_otu_table))
        otu_table_f.close()

        command_handler(commands,
                        status_update_callback,
                        logger=logger,
                        close_logger_on_success=False)
        commands = []

    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Beispiel #2
0
def tax_align_tree(repset_fasta_fp,
                   output_dir,
                   command_handler,
                   params,
                   qiime_config,
                   parallel=False,
                   logger=None,
                   status_update_callback=print_to_stdout):

    input_dir, input_filename = split(repset_fasta_fp)
    input_basename, input_ext = splitext(input_filename)
    commands = []
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    ## Prep the taxonomy assignment command
    try:
        assignment_method = params['assign_taxonomy']['assignment_method']
    except KeyError:
        assignment_method = 'rdp'
    assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\
     (output_dir,assignment_method)
    taxonomy_fp = '%s/%s_tax_assignments.txt' % \
     (assign_taxonomy_dir,input_basename)
    if parallel and (assignment_method == 'rdp'
                     or assignment_method == 'blast'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --assignment_method
            # option. This works for now though.
            d = params['assign_taxonomy'].copy()
            del d['assignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass

        # Build the parallel taxonomy assignment command
        assign_taxonomy_cmd = \
         'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\
         (assignment_method, repset_fasta_fp,assign_taxonomy_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['assign_taxonomy'])
        except KeyError:
            params_str = ''
        # Build the taxonomy assignment command
        assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\
         (assign_taxonomy_dir,repset_fasta_fp, params_str)
    if exists(assign_taxonomy_dir):
        rmtree(assign_taxonomy_dir)
    commands.append([('Assign taxonomy', assign_taxonomy_cmd)])

    ## Prep the pynast alignment command
    alignment_method = 'pynast'
    pynast_dir = '%s/%s_aligned_seqs' % (output_dir, alignment_method)
    aln_fp = '%s/%s_aligned.fasta' % (pynast_dir, input_basename)
    failures_fp = '%s/%s_failures.fasta' % (pynast_dir, input_basename)
    if exists(pynast_dir):
        rmtree(pynast_dir)

    if parallel:
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['align_seqs'].copy()
            if 'alignment_method' in d:
                del d['alignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass

        # Build the parallel pynast alignment command
        align_seqs_cmd = 'parallel_align_seqs_pynast.py -i %s -o %s -T %s' %\
         (repset_fasta_fp, pynast_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['align_seqs'])
        except KeyError:
            params_str = ''
        # Build the pynast alignment command
        align_seqs_cmd = 'align_seqs.py -i %s -o %s %s' %\
         (repset_fasta_fp, pynast_dir, params_str)
    commands.append([('Align sequences', align_seqs_cmd)])

    ## Prep the alignment filtering command
    filtered_aln_fp = '%s/%s_aligned_pfiltered.fasta' %\
     (pynast_dir,input_basename)
    try:
        params_str = get_params_str(params['filter_alignment'])
    except KeyError:
        params_str = ''
    # Build the alignment filtering command
    filter_alignment_cmd = 'filter_alignment.py -o %s -i %s %s' %\
     (pynast_dir, aln_fp, params_str)
    commands.append([('Filter alignment', filter_alignment_cmd)])

    ## Prep the tree building command
    tree_fp = '%s/rep_set.tre' % output_dir
    try:
        params_str = get_params_str(params['make_phylogeny'])
    except KeyError:
        params_str = ''
    # Build the tree building command
    make_phylogeny_cmd = 'make_phylogeny.py -i %s -o %s %s' %\
     (filtered_aln_fp, tree_fp,params_str)
    commands.append([('Build phylogenetic tree', make_phylogeny_cmd)])
    if exists(tree_fp):
        remove_files([tree_fp])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    return taxonomy_fp, failures_fp
Beispiel #3
0
def iterative_pick_subsampled_open_referenence_otus(
        input_fps,
        refseqs_fp,
        output_dir,
        percent_subsample,
        new_ref_set_id,
        command_handler,
        params,
        qiime_config,
        prefilter_refseqs_fp=None,
        prefilter_percent_id=0.60,
        min_otu_size=2,
        run_tax_align_tree=True,
        step1_otu_map_fp=None,
        step1_failures_fasta_fp=None,
        parallel=False,
        suppress_step4=False,
        logger=None,
        status_update_callback=print_to_stdout):
    """ Call the pick_subsampled_open_referenence_otus workflow on multiple inputs
         and handle processing of the results.
    """
    create_dir(output_dir)
    commands = []
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    # if the user has not passed a different reference collection for the pre-filter,
    # used the input refseqs_fp for all iterations. we want to pre-filter all data against
    # the input data as lower percent identity searches with uclust can be slow, so we
    # want the reference collection to stay at a reasonable size.
    if prefilter_refseqs_fp == None:
        prefilter_refseqs_fp = refseqs_fp

    otu_table_fps = []
    repset_fasta_fps = []
    for i, input_fp in enumerate(input_fps):
        iteration_output_dir = '%s/%d/' % (output_dir, i)
        if iteration_output_exists(iteration_output_dir, min_otu_size):
            # if the output from an iteration already exists, skip that
            # iteration (useful for continuing failed runs)
            log_input_md5s(logger, [input_fp, refseqs_fp])
            logger.write(
                'Iteration %d (input file: %s) output data already exists. '
                'Skipping and moving to next.\n\n' % (i, input_fp))
        else:
            pick_subsampled_open_referenence_otus(
                input_fp=input_fp,
                refseqs_fp=refseqs_fp,
                output_dir=iteration_output_dir,
                percent_subsample=percent_subsample,
                new_ref_set_id='.'.join([new_ref_set_id,
                                         str(i)]),
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                run_tax_align_tree=False,
                prefilter_refseqs_fp=prefilter_refseqs_fp,
                prefilter_percent_id=prefilter_percent_id,
                min_otu_size=min_otu_size,
                step1_otu_map_fp=step1_otu_map_fp,
                step1_failures_fasta_fp=step1_failures_fasta_fp,
                parallel=parallel,
                suppress_step4=suppress_step4,
                logger=logger,
                status_update_callback=status_update_callback)
        ## perform post-iteration file shuffling whether the previous iteration's
        ## data previously existed or was just computed.
        # step1 otu map and failures can only be used for the first iteration
        # as subsequent iterations need to use updated refseqs files
        step1_otu_map_fp = step1_failures_fasta_fp = None
        new_refseqs_fp = '%s/new_refseqs.fna' % iteration_output_dir
        refseqs_fp = new_refseqs_fp
        otu_table_fps.append('%s/otu_table_mc%d.biom' %
                             (iteration_output_dir, min_otu_size))
        repset_fasta_fps.append('%s/rep_set.fna' % iteration_output_dir)

    # Merge OTU tables - check for existence first as this step has historically
    # been a frequent failure, so is sometimes run manually in failed runs.
    otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir, min_otu_size)
    if not (exists(otu_table_fp) and getsize(otu_table_fp) > 0):
        merge_cmd = 'merge_otu_tables.py -i %s -o %s' %\
         (','.join(otu_table_fps),otu_table_fp)
        commands.append([("Merge OTU tables", merge_cmd)])

    # Build master rep set
    final_repset_fp = '%s/rep_set.fna' % output_dir
    final_repset_from_iteration_repsets_fps(repset_fasta_fps, final_repset_fp)

    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=False)
    commands = []

    if run_tax_align_tree:
        otu_table_w_tax_fp = \
         '%s/otu_table_mc%d_w_tax.biom' % (output_dir,min_otu_size)
        final_otu_table_fp = \
         '%s/otu_table_mc%d_w_tax_no_pynast_failures.biom' % (output_dir,min_otu_size)
        if exists(final_otu_table_fp) and getsize(final_otu_table_fp) > 0:
            logger.write("Final output file exists (%s). Will not rebuild." %
                         otu_table_fp)
        else:
            # remove files from partially completed runs
            remove_files([otu_table_w_tax_fp, final_otu_table_fp],
                         error_on_missing=False)

            taxonomy_fp, pynast_failures_fp = tax_align_tree(
                repset_fasta_fp=final_repset_fp,
                output_dir=output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                parallel=parallel,
                logger=logger,
                status_update_callback=status_update_callback)

            # Add taxa to otu table
            add_taxa_cmd = 'add_taxa.py -i %s -t %s -o %s' %\
             (otu_table_fp,taxonomy_fp,otu_table_w_tax_fp)
            commands.append([("Add taxa to OTU table", add_taxa_cmd)])

            command_handler(commands,
                            status_update_callback,
                            logger=logger,
                            close_logger_on_success=False)
            commands = []

            # Build OTU table without PyNAST failures
            filtered_otu_table = filter_otus_from_otu_table(
                parse_biom_table(open(otu_table_w_tax_fp, 'U')),
                get_seq_ids_from_fasta_file(open(pynast_failures_fp, 'U')),
                0,
                inf,
                0,
                inf,
                negate_ids_to_keep=True)
            otu_table_f = open(final_otu_table_fp, 'w')
            otu_table_f.write(format_biom_table(filtered_otu_table))
            otu_table_f.close()

            command_handler(commands,
                            status_update_callback,
                            logger=logger,
                            close_logger_on_success=False)
            commands = []

    logger.close()
Beispiel #4
0
def assign_taxonomy_multiple_times(input_dirs,
                                   output_dir,
                                   assignment_methods,
                                   reference_seqs_fp,
                                   input_fasta_filename,
                                   clean_otu_table_filename,
                                   id_to_taxonomy_fp=None,
                                   confidences=None,
                                   e_values=None,
                                   command_handler=call_commands_serially,
                                   rdp_max_memory=None,
                                   status_update_callback=print_to_stdout,
                                   force=False,
                                   read_1_seqs_fp=None,
                                   read_2_seqs_fp=None):
    """ Performs sanity checks on passed arguments and directories. Builds 
        commands for each method and sends them off to be executed. """
    ## Check if temp output directory exists
    try:
        makedirs(output_dir)
    except OSError:
        if not force:
            raise WorkflowError(
                "Output directory '%s' already exists. Please "
                "choose a different directory, or force overwrite with -f." %
                output_dir)

    ## Check for inputs that are universally required
    if assignment_methods is None:
        raise WorkflowError("You must specify at least one method:"
                            "'rdp', 'blast', 'mothur', or 'rtax'.")
    if input_fasta_filename is None:
        raise WorkflowError("You must provide an input fasta filename.")
    if clean_otu_table_filename is None:
        raise WorkflowError("You must provide a clean otu table filename.")
    if id_to_taxonomy_fp is None:
        raise WorkflowError("You must provide an ID to taxonomy map filename.")

    logger = WorkflowLogger(generate_log_fp(output_dir))
    time_results = []

    for input_dir in input_dirs:
        ## Make sure the input dataset directory exists.
        if not isdir(input_dir):
            raise WorkflowError("The input directory '%s' does not exist." %
                                input_dir)

        input_dir_name = split(normpath(input_dir))[1]
        output_dataset_dir = join(output_dir, input_dir_name)
        input_fasta_fp = join(input_dir, input_fasta_filename)
        clean_otu_table_fp = join(input_dir, clean_otu_table_filename)

        logger.write("\nCreating output subdirectory '%s' if it doesn't "
                     "already exist.\n" % output_dataset_dir)
        try:
            makedirs(output_dataset_dir)
        except OSError:
            # It already exists, which is okay since we already know we are in
            # 'force' mode from above.
            pass

        for method in assignment_methods:
            ## Method is RDP
            if method == 'rdp':
                ## Check for execution parameters required by RDP method
                if confidences is None:
                    raise WorkflowError("You must specify at least one "
                                        "confidence level.")
                ## Generate command for RDP
                commands = _generate_rdp_commands(
                    output_dataset_dir,
                    input_fasta_fp,
                    reference_seqs_fp,
                    id_to_taxonomy_fp,
                    clean_otu_table_fp,
                    confidences,
                    rdp_max_memory=rdp_max_memory)

            ## Method is BLAST
            elif method == 'blast':
                ## Check for execution parameters required by BLAST method
                if e_values is None:
                    raise WorkflowError("You must specify at least one "
                                        "E value.")
                ## Generate command for BLAST
                commands = _generate_blast_commands(
                    output_dataset_dir, input_fasta_fp, reference_seqs_fp,
                    id_to_taxonomy_fp, clean_otu_table_fp, e_values)

            ## Method is Mothur
            elif method == 'mothur':
                ## Check for execution parameters required by Mothur method
                if confidences is None:
                    raise WorkflowError("You must specify at least one "
                                        "confidence level.")
                ## Generate command for mothur
                commands = _generate_mothur_commands(
                    output_dataset_dir, input_fasta_fp, reference_seqs_fp,
                    id_to_taxonomy_fp, clean_otu_table_fp, confidences)

            ## Method is RTAX
            elif method == 'rtax':
                ## Check for execution parameters required by RTAX method
                if read_1_seqs_fp is None:
                    raise WorkflowError("You must specify a file containing "
                                        "the first read from pair-end "
                                        "sequencing.")
                ## Generate command for rtax
                commands = _generate_rtax_commands(
                    output_dataset_dir,
                    input_fasta_fp,
                    reference_seqs_fp,
                    id_to_taxonomy_fp,
                    clean_otu_table_fp,
                    read_1_seqs_fp,
                    read_2_seqs_fp=read_2_seqs_fp)

            ## Unsupported method
            else:
                raise WorkflowError("Unrecognized or unsupported taxonomy "
                                    "assignment method '%s'." % method)

            # send command for current method to command handler
            for command in commands:
                #call_commands_serially needs a list of commands so here's a length one commmand list.
                c = list()
                c.append(command)
                start = time()
                command_handler(c,
                                status_update_callback,
                                logger,
                                close_logger_on_success=False)
                end = time()
                input_file = command[0][1].split()[
                    command[0][1].split().index('-i') + 1].split('/')[-2]
                if 'Assigning' in command[0][0]:
                    time_results.append(
                        (input_file, ' '.join(command[0][0].split()[2:]),
                         end - start))

    # removes and writes out the title we initialized with earlier
    logger.write('\n\nAssignment times (seconds):\n')
    for t in time_results:
        # write out each time result as (method, params)\ttime (seconds)
        #First clean up the output
        method, param = t[1].split(', ')
        method = method.lstrip('(')
        param = param.rstrip(')')

        logger.write('%s\t%s\t%s\t%s\n' % (t[0], method, param, str(t[2])))

    logger.close()