def pick_reference_otus(input_fp, output_dir, otu_picking_method, refseqs_fp, parallel, params, logger, similarity_override=None): params_copy = deepcopy(params) if similarity_override != None: logger.write('Overridding similiary with %1.3f.\n' % similarity_override) if 'pick_otus' in params_copy: params_copy['pick_otus']['similarity'] = str(similarity_override) else: params_copy['pick_otus'] = {'similarity':str(similarity_override)} if parallel and otu_picking_method == 'uclust_ref': # Grab the parallel-specific parameters try: params_str = get_params_str(params_copy['parallel']) except KeyError: params_str = '' # Grab the OTU picker parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --otu_picking_method # option. This works for now though. if 'otu_picking_method' in params_copy['pick_otus']: del params_copy['pick_otus']['otu_picking_method'] except KeyError: pass params_str += ' %s' % get_params_str(params_copy['pick_otus']) otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method # Build the OTU picking command pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\ (otu_picking_script, input_fp, output_dir, refseqs_fp, params_str) else: try: params_str = get_params_str(params_copy['pick_otus']) except KeyError: params_str = '' # Since this is reference-based OTU picking we always want to # suppress new clusters -- force it here. params_str+= ' --suppress_new_clusters' logger.write("Forcing --suppress_new_clusters as this is reference-based OTU picking.\n\n") # Build the OTU picking command pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\ (input_fp, output_dir, refseqs_fp, otu_picking_method, params_str) return pick_otus_cmd
def run_make_otu_heatmap_html(otu_table_fp,mapping_fp,output_dir, params, qiime_config, command_handler,tree_fp, status_update_callback=print_to_stdout): """ This function calls the make_otu_heatmap_html script """ # define upper-level values python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() commands = [] logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) # get the user-defined parameters try: params_str = get_params_str(params['make_otu_heatmap_html']) except KeyError: params_str = '' # Build the make_otu_heatmap_html command heatmap_cmd = '%s %s/make_otu_heatmap_html.py -i %s -m %s -t %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, mapping_fp,tree_fp, output_dir, params_str) commands.append([('OTU Heatmap' , heatmap_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger) return True
def pick_denovo_otus(input_fp, output_dir, new_ref_set_id, otu_picking_method, params, logger): try: d = params['pick_otus'].copy() del d['otu_picking_method'] except KeyError: pass d['uclust_otu_id_prefix'] = '%s.ReferenceOTU' % new_ref_set_id params_str = ' %s' % get_params_str(d) # Build the OTU picking command result = 'pick_otus.py -i %s -o %s -m %s %s' %\ (input_fp, output_dir, otu_picking_method, params_str) return result
def run_process_sff_through_split_lib(study_id,run_prefix,sff_input_fp, mapping_fp, output_dir, command_handler, params, qiime_config, convert_to_flx=False, write_to_all_fasta=False, status_update_callback=print_to_stdout): """ NOTE: Parts of this function are a directly copied from the run_qiime_data_preparation function from the workflow.py library file in QIIME. The steps performed by this function are: 1) Process SFFs to generate .fna, .qual and flowgram file. (process_sff.py) 2) De-multiplex sequences. (split_libraries.py) """ # Prepare some variables for the later steps sff_filenames=sff_input_fp.split(',') commands = [] create_dir(output_dir) python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() # generate a log file logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) make_flowgram=True split_lib_fasta_input_files=[] split_lib_qual_input_files=[] denoise_flow_input_files=[] # make a copy of the mapping file copied_mapping=split(mapping_fp)[-1] mapping_input_fp_copy=join(output_dir, copied_mapping) copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy) commands.append([('CopyMapping', copy_mapping_cmd)]) # iterate over SFFs and match to the mapping file for sff_input_fp in sff_filenames: # GENERATE THE MD5 HERE AND STORE IN THE DATABASE AFTER FILE # SUCCESSFULLY PROCESSED # Copy the SFF into the processed files directory copied_sff=split(sff_input_fp)[-1] sff_input_fp_copy=join(output_dir, copied_sff) #Generate filenames for split_libraries input_dir, input_filename = split(sff_input_fp) if is_gzip(sff_input_fp) and sff_input_fp.endswith('.gz'): input_basename, input_ext = splitext(splitext(input_filename)[0]) else: input_basename, input_ext = splitext(input_filename) # Convert sff file into fasta, qual and flowgram file if convert_to_flx: if study_id in ['496','968','969','1069','1002','1066','1194','1195','1457','1458','1460','1536','1918','1962']: ### this function is for handling files where the barcode and ### linkerprimer are all lowercase (i.e. HMP data or SRA data) # write process_sff command process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t --no_trim --use_sfftools' %\ (python_exe_fp, script_dir, sff_input_fp, output_dir) #process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t' % (python_exe_fp, script_dir, sff_input_fp, output_dir) commands.append([('ProcessSFFs', process_sff_cmd)]) # define output fasta from process_sff no_trim_fasta_fp=join(output_dir,input_basename + '_FLX.fna') # define pprospector scripts dir pprospector_scripts_dir=join(ServerConfig.home,'software', 'pprospector','scripts') # clean fasta - basically converting lowercase to uppercase clean_fasta_cmd = '%s %s/clean_fasta.py -f %s -o %s' %\ (python_exe_fp, pprospector_scripts_dir, no_trim_fasta_fp,output_dir) commands.append([('CleanFasta', clean_fasta_cmd)]) # move the cleaned file to be consistent with other processes cleaned_fasta_fp=join(output_dir,input_basename + \ '_FLX_filtered.fasta') moved_fasta_fp=join(output_dir,input_basename + '_FLX.fna') mv_cmd='mv %s %s' % (cleaned_fasta_fp,moved_fasta_fp) commands.append([('RenameFasta',mv_cmd)]) # update the split-lib files to use the cleaned file split_lib_fasta_input_files.append(moved_fasta_fp) split_lib_qual_input_files.append(join(output_dir, input_basename + '_FLX.qual')) denoise_flow_input_files.append(join(output_dir, input_basename + '_FLX.txt')) else: # write process_sff command process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t' %\ (python_exe_fp, script_dir, sff_input_fp, output_dir) commands.append([('ProcessSFFs', process_sff_cmd)]) # get filepaths for generated files split_lib_fasta_input_files.append(join(output_dir, input_basename + '_FLX.fna')) split_lib_qual_input_files.append(join(output_dir, input_basename + '_FLX.qual')) denoise_flow_input_files.append(join(output_dir, input_basename + '_FLX.txt')) else: # write process_sff command process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s' %\ (python_exe_fp, script_dir, sff_input_fp, output_dir) commands.append([('ProcessSFFs', process_sff_cmd)]) # get filepaths for generated files split_lib_fasta_input_files.append(join(output_dir,input_basename + '.fna')) split_lib_qual_input_files.append(join(output_dir,input_basename + '.qual')) denoise_flow_input_files.append(join(output_dir,input_basename + '.txt')) split_lib_fasta_input=','.join(split_lib_fasta_input_files) split_lib_qual_input=','.join(split_lib_qual_input_files) denoise_flow_input=','.join(denoise_flow_input_files) # If dataset is metagenomic disable primer check data_access = data_access_factory(ServerConfig.data_access_type) study_info=data_access.getStudyInfo(study_id,12171) if study_info['investigation_type'].lower() == 'metagenome': params['split_libraries']['disable_primers']=None # create split-libraries folder split_library_output=join(output_dir,'split_libraries') create_dir(split_library_output) # get params string try: params_str = get_params_str(params['split_libraries']) except KeyError: params_str = '' # Build the split libraries command split_libraries_cmd = '%s %s/split_libraries.py -f %s -q %s -m %s -o %s %s'%\ (python_exe_fp, script_dir, split_lib_fasta_input, split_lib_qual_input, mapping_fp, split_library_output, params_str) commands.append([('SplitLibraries', split_libraries_cmd)]) input_fp=join(split_library_output,'seqs.fna') # create per sample fastq files fastq_output=join(split_library_output,'per_sample_fastq') create_dir(fastq_output) try: params_str = get_params_str(params['convert_fastaqual_fastq']) except KeyError: params_str = '' input_qual_fp=join(split_library_output,'seqs_filtered.qual') # build the convert fasta/qual to fastq command create_fastq_cmd = '%s %s/convert_fastaqual_fastq.py -f %s -q %s -o %s %s'%\ (python_exe_fp, script_dir, input_fp, input_qual_fp, fastq_output, params_str) commands.append([('Create FASTQ', create_fastq_cmd)]) # Call the command handler on the list of commands command_handler(commands,status_update_callback,logger=logger) # Return the fasta file paths return split_lib_fasta_input_files
input_str = '-i {0} --sample_id {1}'.format(filenames[0], sample_and_prep) except Exception, e: error = 'Failed to obtain sample and sequence prep info for study_id {0} and run_prefix {1}\n'.format(study_id, run_prefix) error += 'SQL was: \n {0} \n'.format(sql) error += 'Original exception was: \n {0}'.format(str(e)) raise Exception(error) else: input_str=get_split_libraries_fastq_params_and_file_types(filenames, mapping_fp) # create split_libaries folder split_library_output=join(output_dir,'split_libraries') create_dir(split_library_output) # get params string try: params_str = get_params_str(params['split_libraries_fastq']) except KeyError: params_str = '' # Build the split libraries command split_libraries_cmd = '%s %s/split_libraries_fastq.py -o %s -m %s %s %s' % \ (python_exe_fp, script_dir, split_library_output, mapping_input_fp_copy, input_str,params_str) commands.append([('SplitLibraries', split_libraries_cmd)]) # define the generate files input_fp=join(split_library_output,'seqs.fna') # create per sample fastq files fastq_output=join(split_library_output,'per_sample_fastq')
def pick_reference_otus(input_fp, output_dir, otu_picking_method, refseqs_fp, parallel, params, logger, similarity_override=None): params_copy = deepcopy(params) if 'pick_otus' in params_copy and 'refseqs_fp' in params_copy['pick_otus']: raise WorkflowError, \ ("Cannot pass pick_otus:refseqs_fp in parameters file. This can only be" " passed on the command line or through the API.") if similarity_override != None: logger.write('Overridding similiary with %1.3f.\n' % similarity_override) if 'pick_otus' in params_copy: params_copy['pick_otus']['similarity'] = str(similarity_override) else: params_copy['pick_otus'] = {'similarity': str(similarity_override)} if parallel and otu_picking_method == 'uclust_ref': # Grab the parallel-specific parameters try: params_str = get_params_str(params_copy['parallel']) except KeyError: params_str = '' # Grab the OTU picker parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --otu_picking_method # option. This works for now though. if 'otu_picking_method' in params_copy['pick_otus']: del params_copy['pick_otus']['otu_picking_method'] except KeyError: pass params_str += ' %s' % get_params_str(params_copy['pick_otus']) otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method # Build the OTU picking command pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\ (otu_picking_script, input_fp, output_dir, refseqs_fp, params_str) else: try: params_str = get_params_str(params_copy['pick_otus']) except KeyError: params_str = '' # Since this is reference-based OTU picking we always want to # suppress new clusters -- force it here. params_str += ' --suppress_new_clusters' logger.write( "Forcing --suppress_new_clusters as this is reference-based OTU picking.\n\n" ) # Build the OTU picking command pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\ (input_fp, output_dir, refseqs_fp, otu_picking_method, params_str) return pick_otus_cmd
def tax_align_tree(repset_fasta_fp, output_dir, command_handler, params, qiime_config, parallel=False, logger=None, status_update_callback=print_to_stdout): input_dir, input_filename = split(repset_fasta_fp) input_basename, input_ext = splitext(input_filename) commands = [] if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False ## Prep the taxonomy assignment command try: assignment_method = params['assign_taxonomy']['assignment_method'] except KeyError: assignment_method = 'rdp' assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\ (output_dir,assignment_method) taxonomy_fp = '%s/%s_tax_assignments.txt' % \ (assign_taxonomy_dir,input_basename) if parallel and (assignment_method == 'rdp' or assignment_method == 'blast'): # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --assignment_method # option. This works for now though. d = params['assign_taxonomy'].copy() del d['assignment_method'] params_str += ' %s' % get_params_str(d) except KeyError: pass # Build the parallel taxonomy assignment command assign_taxonomy_cmd = \ 'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\ (assignment_method, repset_fasta_fp,assign_taxonomy_dir, params_str) else: try: params_str = get_params_str(params['assign_taxonomy']) except KeyError: params_str = '' # Build the taxonomy assignment command assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\ (assign_taxonomy_dir,repset_fasta_fp, params_str) if exists(assign_taxonomy_dir): rmtree(assign_taxonomy_dir) commands.append([('Assign taxonomy', assign_taxonomy_cmd)]) ## Prep the pynast alignment command alignment_method = 'pynast' pynast_dir = '%s/%s_aligned_seqs' % (output_dir, alignment_method) aln_fp = '%s/%s_aligned.fasta' % (pynast_dir, input_basename) failures_fp = '%s/%s_failures.fasta' % (pynast_dir, input_basename) if exists(pynast_dir): rmtree(pynast_dir) if parallel: # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' # Grab the OTU picker parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --alignment_method # option. This works for now though. d = params['align_seqs'].copy() if 'alignment_method' in d: del d['alignment_method'] params_str += ' %s' % get_params_str(d) except KeyError: pass # Build the parallel pynast alignment command align_seqs_cmd = 'parallel_align_seqs_pynast.py -i %s -o %s -T %s' %\ (repset_fasta_fp, pynast_dir, params_str) else: try: params_str = get_params_str(params['align_seqs']) except KeyError: params_str = '' # Build the pynast alignment command align_seqs_cmd = 'align_seqs.py -i %s -o %s %s' %\ (repset_fasta_fp, pynast_dir, params_str) commands.append([('Align sequences', align_seqs_cmd)]) ## Prep the alignment filtering command filtered_aln_fp = '%s/%s_aligned_pfiltered.fasta' %\ (pynast_dir,input_basename) try: params_str = get_params_str(params['filter_alignment']) except KeyError: params_str = '' # Build the alignment filtering command filter_alignment_cmd = 'filter_alignment.py -o %s -i %s %s' %\ (pynast_dir, aln_fp, params_str) commands.append([('Filter alignment', filter_alignment_cmd)]) ## Prep the tree building command tree_fp = '%s/rep_set.tre' % output_dir try: params_str = get_params_str(params['make_phylogeny']) except KeyError: params_str = '' # Build the tree building command make_phylogeny_cmd = 'make_phylogeny.py -i %s -o %s %s' %\ (filtered_aln_fp, tree_fp,params_str) commands.append([('Build phylogenetic tree', make_phylogeny_cmd)]) if exists(tree_fp): remove_files([tree_fp]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return taxonomy_fp, failures_fp
def run_core_diversity_analyses( biom_fp, mapping_fp, sampling_depth, output_dir, qiime_config, command_handler=call_commands_serially, tree_fp=None, params=None, categories=None, arare_min_rare_depth=10, arare_num_steps=10, parallel=False, status_update_callback=print_to_stdout): """ """ if categories != None: # Validate categories provided by the users mapping_data, mapping_comments = \ parse_mapping_file_to_dict(open(mapping_fp,'U')) metadata_map = MetadataMap(mapping_data, mapping_comments) for c in categories: if c not in metadata_map.CategoryNames: raise ValueError, ("Category '%s' is not a column header " "in your mapping file. " "Categories are case and white space sensitive. Valid " "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames))) if metadata_map.hasSingleCategoryValue(c): raise ValueError, ("Category '%s' contains only one value. " "Categories analyzed here require at least two values." % c) else: categories= [] # prep some variables if params == None: params = parse_qiime_parameters([]) create_dir(output_dir) index_fp = '%s/index.html' % output_dir index_links = [] commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() # begin logging log_fp = generate_log_fp(output_dir) index_links.append(('Master run log',log_fp,'Log files')) logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config) input_fps = [biom_fp,mapping_fp] if tree_fp != None: input_fps.append(tree_fp) log_input_md5s(logger,input_fps) bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth) even_dm_fps = run_beta_diversity_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=bdiv_even_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, sampling_depth=sampling_depth, # force suppression of distance histograms - boxplots work better # in this context, and are created below. histogram_categories=[], tree_fp=tree_fp, parallel=parallel, logger=logger, status_update_callback=status_update_callback) for bdiv_metric, dm_fp in even_dm_fps: for category in categories: boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric) try: params_str = get_params_str(params['make_distance_boxplots']) except KeyError: params_str = '' boxplots_cmd = \ 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\ (dm_fp, category, boxplots_output_dir, mapping_fp, params_str) commands.append([('Boxplots (%s)' % category, boxplots_cmd)]) index_links.append(('Distance boxplots (%s)' % bdiv_metric, '%s/%s_Distances.pdf' % \ (boxplots_output_dir,category), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric, '%s/%s_Stats.txt' % \ (boxplots_output_dir,category), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric, '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric, '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric, '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric, '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Distance matrix (%s)' % bdiv_metric, '%s/%s_dm.txt' % \ (bdiv_even_output_dir,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric, '%s/%s_pc.txt' % \ (bdiv_even_output_dir,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) ## Alpha rarefaction workflow arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth) run_qiime_alpha_rarefaction( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=arare_full_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, tree_fp=tree_fp, num_steps=arare_num_steps, parallel=parallel, logger=logger, min_rare_depth=arare_min_rare_depth, max_rare_depth=sampling_depth, status_update_callback=status_update_callback) index_links.append(('Alpha rarefaction plots', '%s/alpha_rarefaction_plots/rarefaction_plots.html'\ % arare_full_output_dir, "Alpha rarefaction results")) collated_alpha_diversity_fps = \ glob('%s/alpha_div_collated/*txt' % arare_full_output_dir) try: params_str = get_params_str(params['compare_alpha_diversity']) except KeyError: params_str = '' for c in categories: for collated_alpha_diversity_fp in collated_alpha_diversity_fps: alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0] alpha_comparison_output_fp = '%s/%s_%s.txt' % \ (arare_full_output_dir,c,alpha_metric) compare_alpha_cmd = \ 'compare_alpha_diversity.py -i %s -m %s -c %s -d %s -o %s -n 999 %s' %\ (collated_alpha_diversity_fp, mapping_fp, c, sampling_depth, alpha_comparison_output_fp, params_str) commands.append([('Compare alpha diversity (%s, %s)' %\ (category,alpha_metric), compare_alpha_cmd)]) index_links.append( ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric), alpha_comparison_output_fp, "Alpha rarefaction results")) taxa_plots_output_dir = '%s/taxa_plots/' % output_dir run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=None, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, status_update_callback=status_update_callback) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results")) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results")) for c in categories: taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,c) run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=c, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, status_update_callback=status_update_callback) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results (by %s)" % c)) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results (by %s)" % c)) # OTU category significance for category in categories: category_signifance_fp = \ '%s/category_significance_%s.txt' % (output_dir, category) try: params_str = get_params_str(params['otu_category_significance']) except KeyError: params_str = '' # Build the OTU cateogry significance command category_significance_cmd = \ 'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\ (biom_fp, mapping_fp, category, category_signifance_fp, params_str) commands.append([('OTU category significance (%s)' % category, category_significance_cmd)]) index_links.append(('Category significance (%s)' % category, category_signifance_fp, "Category results")) command_handler(commands, status_update_callback, logger) generate_index_page(index_links,index_fp)
def tax_align_tree(repset_fasta_fp, output_dir, command_handler, params, qiime_config, parallel=False, logger=None, status_update_callback=print_to_stdout): input_dir, input_filename = split(repset_fasta_fp) input_basename, input_ext = splitext(input_filename) commands = [] if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False ## Prep the taxonomy assignment command try: assignment_method = params['assign_taxonomy']['assignment_method'] except KeyError: assignment_method = 'rdp' assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\ (output_dir,assignment_method) taxonomy_fp = '%s/%s_tax_assignments.txt' % \ (assign_taxonomy_dir,input_basename) if parallel and (assignment_method == 'rdp' or assignment_method == 'blast'): # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --assignment_method # option. This works for now though. d = params['assign_taxonomy'].copy() del d['assignment_method'] params_str += ' %s' % get_params_str(d) except KeyError: pass # Build the parallel taxonomy assignment command assign_taxonomy_cmd = \ 'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\ (assignment_method, repset_fasta_fp,assign_taxonomy_dir, params_str) else: try: params_str = get_params_str(params['assign_taxonomy']) except KeyError: params_str = '' # Build the taxonomy assignment command assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\ (assign_taxonomy_dir,repset_fasta_fp, params_str) if exists(assign_taxonomy_dir): rmtree(assign_taxonomy_dir) commands.append([('Assign taxonomy',assign_taxonomy_cmd)]) ## Prep the pynast alignment command alignment_method = 'pynast' pynast_dir = '%s/%s_aligned_seqs' % (output_dir,alignment_method) aln_fp = '%s/%s_aligned.fasta' % (pynast_dir,input_basename) failures_fp = '%s/%s_failures.fasta' % (pynast_dir,input_basename) if exists(pynast_dir): rmtree(pynast_dir) if parallel: # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' # Grab the OTU picker parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --alignment_method # option. This works for now though. d = params['align_seqs'].copy() if 'alignment_method' in d: del d['alignment_method'] params_str += ' %s' % get_params_str(d) except KeyError: pass # Build the parallel pynast alignment command align_seqs_cmd = 'parallel_align_seqs_pynast.py -i %s -o %s -T %s' %\ (repset_fasta_fp, pynast_dir, params_str) else: try: params_str = get_params_str(params['align_seqs']) except KeyError: params_str = '' # Build the pynast alignment command align_seqs_cmd = 'align_seqs.py -i %s -o %s %s' %\ (repset_fasta_fp, pynast_dir, params_str) commands.append([('Align sequences', align_seqs_cmd)]) ## Prep the alignment filtering command filtered_aln_fp = '%s/%s_aligned_pfiltered.fasta' %\ (pynast_dir,input_basename) try: params_str = get_params_str(params['filter_alignment']) except KeyError: params_str = '' # Build the alignment filtering command filter_alignment_cmd = 'filter_alignment.py -o %s -i %s %s' %\ (pynast_dir, aln_fp, params_str) commands.append([('Filter alignment', filter_alignment_cmd)]) ## Prep the tree building command tree_fp = '%s/rep_set.tre' % output_dir try: params_str = get_params_str(params['make_phylogeny']) except KeyError: params_str = '' # Build the tree building command make_phylogeny_cmd = 'make_phylogeny.py -i %s -o %s %s' %\ (filtered_aln_fp, tree_fp,params_str) commands.append([('Build phylogenetic tree', make_phylogeny_cmd)]) if exists(tree_fp): remove_files([tree_fp]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return taxonomy_fp, failures_fp
def run_chain_pick_otus(fasta_file, output_dir, command_handler, params, qiime_config, parallel=False, status_update_callback=print_to_stdout): """ NOTE: Parts of this function are a directly copied from the run_qiime_data_preparation function from the workflow.py library file in QIIME. The steps performed by this function are: 1) Pick OTUs; """ # Prepare some variables for the later steps #split_lib_fasta_filenames=fasta_files.split(',') otu_maps_to_merge=[] commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) ###Starting the Chain OTU picking### # Perform exact match pre-filtering exact_match_otus_dir=join(output_dir,'pick_otus_exact') pick_otus_cmd = '%s %s/pick_otus.py -m prefix_suffix -i %s -o %s -p 5000' %\ (python_exe_fp, script_dir, fasta_file, exact_match_otus_dir) commands.append([('Pick OTUs: Exact match', pick_otus_cmd)]) # Pick Rep set from exact match pre-filtering exact_match_basename=splitext(split(fasta_file)[-1])[0] exact_otu_fp=join(exact_match_otus_dir,exact_match_basename+'_otus.txt') exact_match_fna = join(exact_match_otus_dir,exact_match_basename) + \ '_exact_rep.fna' otu_maps_to_merge.append(exact_otu_fp) pick_rep_set_exact_cmd = '%s %s/pick_rep_set.py -i %s -f %s -o %s ' %\ (python_exe_fp, script_dir, exact_otu_fp, fasta_file, exact_match_fna) commands.append([('Pick Rep Set: Exact match', pick_rep_set_exact_cmd)]) # Do exact-match database pre-filtering leftover_fasta = join(output_dir, 'leftover.fasta') db_otu_map = join(output_dir, 'otu_map.txt') web_app_scripts_dir = join(split(realpath(__file__))[0], 'scripts') find_db_otus_command = '%s %s/find_otus_in_database.py -i %s -f %s -m %s' %\ (python_exe_fp, web_app_scripts_dir, exact_match_fna, leftover_fasta,\ db_otu_map) commands.append([('Find Database OTU Hits', find_db_otus_command)]) # Prep the UCLUST_REF OTU picking command otu_picking_method = params['pick_otus']['otu_picking_method'].upper() otu_picking_similarity = int(float(params['pick_otus']['similarity'])*100) pick_otu_dir = '%s/picked_otus_%s_%s' % (output_dir,otu_picking_method,\ otu_picking_similarity) uclust_otu_fp = join(pick_otu_dir,\ splitext(split(leftover_fasta)[-1])[0]+'_otus.txt') uclust_failure_fp = join(pick_otu_dir,\ splitext(split(leftover_fasta)[-1])[0]+'_failures.txt') # Grab the OTU picker parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --otu_picking_method # option. This works for now though. d = params['pick_otus'].copy() del d['otu_picking_method'] params_str = ' %s' % get_params_str(d) except KeyError: pass if parallel: # Grab the parallel-specific parameters # Grab the OTU picker parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --otu_picking_method # option. This works for now though. d = params['pick_otus'].copy() del d['otu_picking_method'] del d['clustering_algorithm'] del d['suppress_new_clusters'] params_str = ' %s' % get_params_str(d) except KeyError: pass try: params_str += ' %s' % get_params_str(params['parallel']) except KeyError: params_str += '' # Build the OTU picking command pick_otus_cmd = '%s %s/parallel_pick_otus_uclust_ref.py -i %s -T -o %s %s' %\ (python_exe_fp, script_dir, leftover_fasta, pick_otu_dir, params_str) else: try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --otu_picking_method # option. This works for now though. d = params['pick_otus'].copy() params_str = ' %s' % get_params_str(d) except KeyError: pass # Build the OTU picking command pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, leftover_fasta, pick_otu_dir, params_str) commands.append([('Pick OTUs: uclust_ref', pick_otus_cmd)]) # Must now merge the otu file produced from database matching and the file # produced by uclust_ref - they are of the same kind but need to be mashed # together combined_otu_file = join(output_dir, 'combined_otu_map.txt') otu_map_files = [db_otu_map, uclust_otu_fp] otu_maps_to_merge.append(combined_otu_file) combine_otu_maps_cmd = '%s %s/combine_otu_map_files.py -i %s -o %s' %\ (python_exe_fp, web_app_scripts_dir, ','.join(otu_map_files), combined_otu_file) commands.append([('Combine OTU maps', combine_otu_maps_cmd)]) # Run merge_otu_maps.py on the newly combined file and the originally # produced otu map merged_otus_fp = join(output_dir,'exact_uclust_ref_otus.txt') merge_otus_cmd = '%s %s/merge_otu_maps.py -i %s -o %s' %\ (python_exe_fp, script_dir, ','.join(otu_maps_to_merge), merged_otus_fp) commands.append([('Merge OTUs', merge_otus_cmd)]) # Deal with failures produced in uclust_ref all_failures_fp = join(output_dir,'all_failures.txt') merge_otus_failures_cmd = '%s %s/merge_otu_maps.py -f %s -i %s -o %s' %\ (python_exe_fp, script_dir, uclust_failure_fp, exact_otu_fp, all_failures_fp) commands.append([('Merge OTUs - Failures', merge_otus_failures_cmd)]) # Make OTU Table otu_biom_fp = join(output_dir,'exact_uclust_ref_otu_table.biom') make_otu_biom_cmd='%s %s/make_otu_table.py -i %s -o %s' %\ (python_exe_fp, script_dir, merged_otus_fp, otu_biom_fp) commands.append([('Make Biom File', make_otu_biom_cmd)]) # Convert to classic OTU table otu_table_fp = join(output_dir,'exact_uclust_ref_otu_table.txt') make_otu_table_cmd='%s %s/software/biom-format/scripts/convert_biom.py -i %s -o %s -b' %\ (python_exe_fp, environ['HOME'], otu_biom_fp, otu_table_fp) commands.append([('Make OTU Table', make_otu_table_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger)
def assign_tax(repset_fasta_fp, output_dir, command_handler, params, qiime_config, parallel=False, logger=None, status_update_callback=print_to_stdout): input_dir, input_filename = split(repset_fasta_fp) input_basename, input_ext = splitext(input_filename) commands = [] if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False ## Prep the taxonomy assignment command try: assignment_method = params['assign_taxonomy']['assignment_method'] except KeyError: assignment_method = 'rdp' assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\ (output_dir,assignment_method) taxonomy_fp = '%s/%s_tax_assignments.txt' % \ (assign_taxonomy_dir,input_basename) if parallel and (assignment_method == 'rdp' or assignment_method == 'blast'): # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --assignment_method # option. This works for now though. d = params['assign_taxonomy'].copy() if 'assignment_method' in d: del d['assignment_method'] params_str += ' %s' % get_params_str(d) except KeyError: pass # Build the parallel taxonomy assignment command assign_taxonomy_cmd = \ 'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\ (assignment_method, repset_fasta_fp,assign_taxonomy_dir, params_str) else: try: params_str = get_params_str(params['assign_taxonomy']) except KeyError: params_str = '' # Build the taxonomy assignment command assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\ (assign_taxonomy_dir,repset_fasta_fp, params_str) if exists(assign_taxonomy_dir): rmtree(assign_taxonomy_dir) commands.append([('Assign taxonomy',assign_taxonomy_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return taxonomy_fp
def run_process_illumina_through_split_lib(study_id,run_prefix,input_fp, mapping_fp, output_dir, command_handler, params, qiime_config, write_to_all_fasta=False, status_update_callback=print_to_stdout): """ NOTE: Parts of this function are a directly copied from the run_qiime_data_preparation function from the workflow.py library file in QIIME. The steps performed by this function are: 1) De-multiplex sequences. (split_libraries_fastq.py) """ # Prepare some variables for the later steps filenames=input_fp.split(',') commands = [] create_dir(output_dir) python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) # copy the mapping file copied_mapping=split(mapping_fp)[-1] mapping_input_fp_copy=join(output_dir, copied_mapping) copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy) commands.append([('CopyMapping', copy_mapping_cmd)]) # sort the filenames filenames.sort() # determine which file is seq-file and which is barcode-file and associate # to mapping file input_str=get_split_libraries_fastq_params_and_file_types(filenames, mapping_fp) # create split_libaries folder split_library_output=join(output_dir,'split_libraries') create_dir(split_library_output) # get params string try: params_str = get_params_str(params['split_libraries_fastq']) except KeyError: params_str = '' # Build the split libraries command split_libraries_cmd = '%s %s/split_libraries_fastq.py -o %s -m %s %s %s' % \ (python_exe_fp, script_dir, split_library_output, mapping_input_fp_copy, input_str,params_str) commands.append([('SplitLibraries', split_libraries_cmd)]) # define the generate files input_fp=join(split_library_output,'seqs.fna') # create per sample fastq files fastq_output=join(split_library_output,'per_sample_fastq') create_dir(fastq_output) """ # not used for the one-off try: params_str = get_params_str(params['convert_fastaqual_fastq']) except KeyError: params_str = '' """ # build the per-sample fastq command input_qual_fp=join(split_library_output,'seqs.qual') create_fastq_cmd = '%s %s/git/qiime_web_app/python_code/scripts/make_per_sample_fastq.py -i %s -q %s -o %s' % \ (python_exe_fp, environ['HOME'], input_fp, input_qual_fp, fastq_output) """ # TURN ON when convert_fastaqual_fastq can handle Illumina qual file create_fastq_cmd = '%s %s/convert_fastaqual_fastq.py -f %s -q %s -o %s %s'%\ (python_exe_fp, script_dir, input_fp, input_qual_fp, fastq_output, params_str) """ commands.append([('Create FASTQ', create_fastq_cmd)]) # Call the command handler on the list of commands command_handler(commands,status_update_callback,logger=logger) # Return the fasta file paths return filenames