def get_job_commands(python_exe_fp,alpha_diversity_fp,tree_fp,job_prefix,\ metrics,input_fps,output_dir,working_dir,\ command_prefix=None,command_suffix=None): """Generate alpha diversity commands to be submitted to cluster """ command_prefix = command_prefix or '/bin/bash; ' command_suffix = command_suffix or '; exit' commands = [] result_filepaths = [] for input_fp in input_fps: input_path, input_fn = split(input_fp) output_fn = 'alpha_%s' % input_fn rename_command, current_result_filepaths = get_rename_command(\ [output_fn],working_dir,output_dir) result_filepaths += current_result_filepaths command = '%s %s %s -i %s -o %s -t %s -m %s %s %s' %\ (command_prefix,\ python_exe_fp,\ alpha_diversity_fp,\ input_fp, working_dir + '/' + output_fn, tree_fp, metrics, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp,align_seqs_fp,fasta_fps,template_aln_fp,\ pairwise_alignment_method,output_dir,blast_db,\ min_length,min_percent_id,job_prefix,working_dir,command_prefix=None,\ command_suffix=None): """Generate PyNAST commands which should be submitted to cluster """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [job_prefix + '.%d_aligned.fasta', job_prefix + '.%d_failures.fasta', job_prefix + '.%d_log.txt'] # Initialize the command_prefix and command_suffix command_prefix = command_prefix or '/bin/bash; ' command_suffix = command_suffix or '; exit' # Create lists to store the results commands = [] result_filepaths = [] # If there is a value for blast_db, pass it. If not, it # will be created on-the-fly. Note that on-the-fly blast dbs # are created with a string of random chars in the name, so this is safe. # They shouldn't overwrite one another, and will be cleaned up. if blast_db: blast_option = '-d %s' % blast_db else: blast_option = '' # Iterate over the input files for i,fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir) result_filepaths += current_result_filepaths command = \ '%s %s %s %s -p %1.2f -e %d -m pynast -t %s -a %s -o %s -i %s %s %s' %\ (command_prefix,\ python_exe_fp,\ align_seqs_fp,\ blast_option,\ min_percent_id,\ min_length,\ template_aln_fp,\ pairwise_alignment_method, working_dir, fasta_fp, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands_single_otu_table(python_exe_fp, beta_diversity_fp, tree_fp, job_prefix, metrics, input_fp, output_dir, working_dir, jobs_to_start, command_prefix=None, command_suffix=None): """Generate beta diversity to split single OTU table to multiple jobs always passes -f to beta_diversity.py """ command_prefix = command_prefix or '/bin/bash; ' command_suffix = command_suffix or '; exit' commands = [] result_filepaths = [] sids = parse_otu_table(open(input_fp, 'U'))[0] sample_id_groups = merge_to_n_commands(sids, jobs_to_start, ',', '', '') for i, sample_id_group in enumerate(sample_id_groups): working_dir_i = os.path.join(working_dir, str(i)) output_dir_i = os.path.join(output_dir, str(i)) input_dir, input_fn = split(input_fp) sample_id_desc = sample_id_group.replace(',', '_') output_fns = ['%s_%s' % (metric, input_fn) \ for metric in metrics.split(',')] rename_command, current_result_filepaths = get_rename_command(\ output_fns,working_dir_i,output_dir_i) result_filepaths += current_result_filepaths command = '%s %s %s -i %s -o %s -t %s -m %s -f -r %s %s %s' %\ (command_prefix,\ python_exe_fp,\ beta_diversity_fp,\ input_fp, working_dir_i + '/', tree_fp, metrics, sample_id_group, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands_multiple_otu_tables(python_exe_fp, beta_diversity_fp, tree_fp, job_prefix, metrics, input_fps, output_dir, working_dir, command_prefix=None, command_suffix=None, full_tree=False): """Generate beta diversity to split multiple OTU tables to multiple jobs """ command_prefix = command_prefix or '/bin/bash; ' command_suffix = command_suffix or '; exit' if full_tree: full_tree_str = '-f' else: full_tree_str = '' commands = [] result_filepaths = [] for input_fp in input_fps: input_path, input_fn = split(input_fp) output_fns = ['%s_%s' % (metric, input_fn) \ for metric in metrics.split(',')] rename_command, current_result_filepaths = get_rename_command(\ output_fns,working_dir,output_dir) result_filepaths += current_result_filepaths command = '%s %s %s -i %s -o %s -t %s -m %s %s %s %s' %\ (command_prefix,\ python_exe_fp,\ beta_diversity_fp,\ input_fp, working_dir + '/', tree_fp, metrics, full_tree_str, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp, pick_otus_fp, fasta_fps, output_dir, blast_db, job_prefix, working_dir, max_e_value, similarity, min_aligned_percent, command_prefix='/bin/bash; ', command_suffix='; exit'): """Generate pick_otus commands which should be submitted to cluster """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [job_prefix + '.%d_otus.log', job_prefix + '.%d_otus.txt'] # Create lists to store the results commands = [] result_filepaths = [] # Iterate over the input files for i, fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir) result_filepaths += current_result_filepaths command = \ '%s %s %s -i %s -b %s -m blast -o %s -e %s -s %s --min_aligned_percent %s %s %s' %\ (command_prefix, python_exe_fp, pick_otus_fp, fasta_fp, blast_db, working_dir, max_e_value, similarity, min_aligned_percent, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_commands(python_exe_fp,assign_taxonomy_fp,confidence,job_prefix,\ fasta_fps,rdp_jar_fp,output_dir,working_dir,\ command_prefix=None,command_suffix=None,\ id_to_taxonomy_fp=None,reference_seqs_fp=None): """Generate RDP classifier commands which should be submitted to cluster """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [job_prefix + '.%d_tax_assignments.log', job_prefix + '.%d_tax_assignments.txt'] command_prefix = command_prefix or\ '/bin/bash; export RDP_JAR_PATH=%s; ' % rdp_jar_fp command_suffix = command_suffix or\ '; exit' rdp_extra_params = '' if id_to_taxonomy_fp and reference_seqs_fp: rdp_extra_params = '-t %s -r %s' % (id_to_taxonomy_fp, reference_seqs_fp) commands = [] result_filepaths = [] for i,fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir)#,\ #id_to_taxonomy_fp,reference_seqs_fp) result_filepaths += current_result_filepaths command = '%s %s %s %s -c %1.2f -m rdp -o %s -i %s %s %s' %\ (command_prefix,\ python_exe_fp,\ assign_taxonomy_fp,\ rdp_extra_params, confidence, working_dir, fasta_fp, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp,assign_taxonomy_fp,id_to_taxonomy_fp,\ e_value,blast_db,job_prefix,\ blastmat_path,fasta_fps,output_dir,working_dir,\ command_prefix=None,command_suffix=None): """Generate BlastTaxonAssiger classifier commands to be submitted to cluster """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [ job_prefix + '.%d_tax_assignments.log', job_prefix + '.%d_tax_assignments.txt' ] command_prefix = command_prefix or\ '/bin/bash; cd %s; export BLASTMAT=%s;' \ % (working_dir,blastmat_path) command_suffix = command_suffix or\ '; exit' commands = [] result_filepaths = [] for i, fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir) result_filepaths += current_result_filepaths command = '%s %s %s -o %s -m blast -e %s -b %s -i %s -t %s %s %s' %\ (command_prefix,\ python_exe_fp,\ assign_taxonomy_fp,\ working_dir, e_value, blast_db, fasta_fp, id_to_taxonomy_fp, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp,rarefaction_fp,job_prefix,\ input_fp,output_dir,working_dir,min_seqs,max_seqs,step,num_reps,\ lineages_included, command_prefix=None,command_suffix=None): """Generate alpha diversity commands to be submitted to cluster """ # Create data for each run (depth, output_fn) run_parameters = [] for num_seqs in range(min_seqs,max_seqs+1, step): for rep_num in range(num_reps): run_parameters.append((\ num_seqs,'rarefaction_%d_%d.txt' % (num_seqs,rep_num))) command_prefix = command_prefix or '/bin/bash; ' command_suffix = command_suffix or '; exit' commands = [] result_filepaths = [] if lineages_included: lineages_included_param = '--lineages_included' else: lineages_included_param = '' for depth,output_fn in run_parameters: # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [output_fn],working_dir,output_dir) result_filepaths += current_result_filepaths command = '%s %s %s -i %s -o %s %s -d %s %s %s' %\ (command_prefix,\ python_exe_fp,\ rarefaction_fp,\ input_fp, working_dir + '/' + output_fn, lineages_included_param, depth, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp,rarefaction_fp,job_prefix,\ input_fp,output_dir,working_dir,min_seqs,max_seqs,step,num_reps,\ lineages_included, command_prefix=None,command_suffix=None): """Generate alpha diversity commands to be submitted to cluster """ # Create data for each run (depth, output_fn) run_parameters = [] for num_seqs in range(min_seqs, max_seqs + 1, step): for rep_num in range(num_reps): run_parameters.append((\ num_seqs,'rarefaction_%d_%d.txt' % (num_seqs,rep_num))) command_prefix = command_prefix or '/bin/bash; ' command_suffix = command_suffix or '; exit' commands = [] result_filepaths = [] if lineages_included: lineages_included_param = '--lineages_included' else: lineages_included_param = '' for depth, output_fn in run_parameters: # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [output_fn],working_dir,output_dir) result_filepaths += current_result_filepaths command = '%s %s %s -i %s -o %s %s -d %s %s %s' %\ (command_prefix,\ python_exe_fp,\ rarefaction_fp,\ input_fp, working_dir + '/' + output_fn, lineages_included_param, depth, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp,assign_taxonomy_fp,id_to_taxonomy_fp,\ e_value,blast_db,job_prefix,\ blastmat_path,fasta_fps,output_dir,working_dir,\ command_prefix=None,command_suffix=None): """Generate BlastTaxonAssiger classifier commands to be submitted to cluster """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [job_prefix + '.%d_tax_assignments.log', job_prefix + '.%d_tax_assignments.txt'] command_prefix = command_prefix or\ '/bin/bash; cd %s; export BLASTMAT=%s;' \ % (working_dir,blastmat_path) command_suffix = command_suffix or\ '; exit' commands = [] result_filepaths = [] for i,fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir) result_filepaths += current_result_filepaths command = '%s %s %s -o %s -m blast -e %s -b %s -i %s -t %s %s %s' %\ (command_prefix,\ python_exe_fp,\ assign_taxonomy_fp,\ working_dir, e_value, blast_db, fasta_fp, id_to_taxonomy_fp, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp,pick_otus_fp,fasta_fps, output_dir,blast_db,job_prefix,working_dir,max_e_value, similarity,min_aligned_percent, command_prefix='/bin/bash; ',command_suffix='; exit'): """Generate pick_otus commands which should be submitted to cluster """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [job_prefix + '.%d_otus.log', job_prefix + '.%d_otus.txt'] # Create lists to store the results commands = [] result_filepaths = [] # Iterate over the input files for i,fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir) result_filepaths += current_result_filepaths command = \ '%s %s %s -i %s -b %s -m blast -o %s -e %s -s %s --min_aligned_percent %s %s %s' %\ (command_prefix, python_exe_fp, pick_otus_fp, fasta_fp, blast_db, working_dir, max_e_value, similarity, min_aligned_percent, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_commands(python_exe_fp,assign_taxonomy_fp,confidence,job_prefix,\ fasta_fps,rdp_jar_fp,output_dir,working_dir,\ command_prefix=None,command_suffix=None): """Generate RDP classifier commands which should be submitted to cluster """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [ job_prefix + '.%d_tax_assignments.log', job_prefix + '.%d_tax_assignments.txt' ] command_prefix = command_prefix or\ '/bin/bash; export RDP_JAR_PATH=%s; ' % rdp_jar_fp command_suffix = command_suffix or\ '; exit' commands = [] result_filepaths = [] for i, fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir) result_filepaths += current_result_filepaths command = '%s %s %s -c %1.2f -m rdp -o %s -i %s %s %s' %\ (command_prefix,\ python_exe_fp,\ assign_taxonomy_fp,\ confidence, working_dir, fasta_fp, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp,pick_otus_fp,fasta_fps, output_dir,refseqs_fp,job_prefix,working_dir,similarity, enable_rev_strand_match,optimal_uclust,exact_uclust, max_accepts,max_rejects,stepwords,word_length, stable_sort,save_uc_files,command_prefix='/bin/bash; ', command_suffix='; exit'): """Generate pick_otus commands which should be run """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [job_prefix + '.%d_otus.log', job_prefix + '.%d_otus.txt', job_prefix + '.%s_failures.txt'] # Create lists to store the results commands = [] result_filepaths = [] if enable_rev_strand_match: enable_rev_strand_match_str = '-z' else: enable_rev_strand_match_str = '' if optimal_uclust: optimal_uclust_str = '-A' else: optimal_uclust_str = '' if exact_uclust: exact_uclust_str = '-E' else: exact_uclust_str = '' if stable_sort: stable_sort_str = '' else: stable_sort_str = '--suppress_uclust_stable_sort' if save_uc_files: save_uc_files = '' out_filenames += [job_prefix + '%d_clusters.uc'] else: save_uc_files = '-d' # Iterate over the input files for i,fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir) result_filepaths += current_result_filepaths command = \ '%s %s %s -i %s -r %s -m uclust_ref --suppress_new_clusters -o %s -s %s %s %s %s --max_accepts %s --max_rejects %s --stepwords %d --w %d %s %s %s %s' %\ (command_prefix,\ python_exe_fp,\ pick_otus_fp,\ fasta_fp,\ refseqs_fp,\ working_dir,\ similarity,\ enable_rev_strand_match_str, optimal_uclust_str, exact_uclust_str, max_accepts, max_rejects, stepwords, word_length, stable_sort_str, save_uc_files, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp, pick_otus_fp, fasta_fps, output_dir, refseqs_fp, job_prefix, working_dir, similarity, enable_rev_strand_match, optimal_uclust, exact_uclust, max_accepts, max_rejects, stable_sort, save_uc_files, command_prefix='/bin/bash; ', command_suffix='; exit'): """Generate pick_otus commands which should be run """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [ job_prefix + '.%d_otus.log', job_prefix + '.%d_otus.txt', job_prefix + '.%s_failures.txt' ] # Create lists to store the results commands = [] result_filepaths = [] if enable_rev_strand_match: enable_rev_strand_match_str = '-z' else: enable_rev_strand_match_str = '' if optimal_uclust: optimal_uclust_str = '-A' else: optimal_uclust_str = '' if exact_uclust: exact_uclust_str = '-E' else: exact_uclust_str = '' if stable_sort: stable_sort_str = '--uclust_stable_sort' else: stable_sort_str = '' if save_uc_files: save_uc_files = '' out_filenames += [job_prefix + '%d_clusters.uc'] else: save_uc_files = '-d' # Iterate over the input files for i, fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames],working_dir,output_dir) result_filepaths += current_result_filepaths command = \ '%s %s %s -i %s -r %s -m uclust_ref --suppress_new_clusters -o %s -s %s %s %s %s --max_accepts %s --max_rejects %s %s %s %s %s' %\ (command_prefix,\ python_exe_fp,\ pick_otus_fp,\ fasta_fp,\ refseqs_fp,\ working_dir,\ similarity,\ enable_rev_strand_match_str, optimal_uclust_str, exact_uclust_str, max_accepts, max_rejects, stable_sort_str, save_uc_files, rename_command, command_suffix) commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp, identify_chimeric_seqs_fp, fasta_fps, output_dir, ref_seqs_fp, job_prefix, working_dir, aligned_reference_seqs_fp, blast_db, chimera_detection_method, min_div_ratio, num_fragments, taxonomy_depth, max_e_value, id_to_taxonomy_fp, command_prefix='', command_suffix=''): # command_prefix='/bin/bash; ', command_suffix='; exit'): """Generate identify_chimeric_seqs commands which should be run """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [job_prefix + '.%d_chimeric.txt'] # Create lists to store the results commands = [] result_filepaths = [] # Iterate over the input files for i, fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames], working_dir, output_dir) result_filepaths += current_result_filepaths #Need to be filled optional_options = "" if chimera_detection_method == 'blast_fragments': if ref_seqs_fp: optional_options += " -r %s" % ref_seqs_fp if blast_db: optional_options += " -b %s" % blast_db command = \ '%s %s %s -i %s -t %s -m blast_fragments -o %s -n %s -d %s -e %s %s %s %s' %\ (command_prefix, python_exe_fp, identify_chimeric_seqs_fp, fasta_fp, id_to_taxonomy_fp, working_dir+"/"+out_filenames[0] % i, num_fragments, taxonomy_depth, max_e_value, optional_options, rename_command, command_suffix) elif chimera_detection_method == 'ChimeraSlayer': if min_div_ratio: optional_options += " --min_div_ratio %s" % min_div_ratio if ref_seqs_fp: optional_options += " -r %s" % ref_seqs_fp command = \ '%s %s %s -i %s -a %s -m ChimeraSlayer -o %s %s %s %s' %\ (command_prefix, python_exe_fp, identify_chimeric_seqs_fp, fasta_fp, aligned_reference_seqs_fp, working_dir+"/"+out_filenames[0] % i, optional_options, rename_command, command_suffix) else: raise NotImplementedError commands.append(command) return commands, result_filepaths
def get_job_commands(python_exe_fp, identify_chimeric_seqs_fp, fasta_fps, output_dir, ref_seqs_fp, job_prefix, working_dir, aligned_reference_seqs_fp, blast_db, chimera_detection_method, min_div_ratio, num_fragments, taxonomy_depth, max_e_value, id_to_taxonomy_fp, command_prefix='', command_suffix=''): # command_prefix='/bin/bash; ', command_suffix='; exit'): """Generate identify_chimeric_seqs commands which should be run """ # Create basenames for each of the output files. These will be filled # in to create the full list of files created by all of the runs. out_filenames = [job_prefix + '.%d_chimeric.txt'] # Create lists to store the results commands = [] result_filepaths = [] # Iterate over the input files for i,fasta_fp in enumerate(fasta_fps): # Each run ends with moving the output file from the tmp dir to # the output_dir. Build the command to perform the move here. rename_command, current_result_filepaths = get_rename_command(\ [fn % i for fn in out_filenames], working_dir, output_dir) result_filepaths += current_result_filepaths #Need to be filled optional_options = "" if chimera_detection_method=='blast_fragments': if ref_seqs_fp: optional_options += " -r %s" % ref_seqs_fp if blast_db: optional_options += " -b %s" % blast_db command = \ '%s %s %s -i %s -t %s -m blast_fragments -o %s -n %s -d %s -e %s %s %s %s' %\ (command_prefix, python_exe_fp, identify_chimeric_seqs_fp, fasta_fp, id_to_taxonomy_fp, working_dir+"/"+out_filenames[0] % i, num_fragments, taxonomy_depth, max_e_value, optional_options, rename_command, command_suffix) elif chimera_detection_method=='ChimeraSlayer': if min_div_ratio: optional_options += " --min_div_ratio %s" % min_div_ratio if ref_seqs_fp: optional_options += " -r %s" % ref_seqs_fp command = \ '%s %s %s -i %s -a %s -m ChimeraSlayer -o %s %s %s %s' %\ (command_prefix, python_exe_fp, identify_chimeric_seqs_fp, fasta_fp, aligned_reference_seqs_fp, working_dir+"/"+out_filenames[0] % i, optional_options, rename_command, command_suffix) else: raise NotImplementedError commands.append(command) return commands, result_filepaths