def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # create local copies of command-line options input_path = opts.input_path output_dir = opts.output_path min_seqs = opts.min max_seqs = opts.max step = opts.step num_reps = opts.num_reps lineages_included = opts.lineages_included single_rarefaction_fp = opts.single_rarefaction_fp python_exe_fp = opts.python_exe_fp path_to_cluster_jobs = opts.cluster_jobs_fp poller_fp = opts.poller_fp retain_temp_files = opts.retain_temp_files suppress_polling = opts.suppress_polling seconds_to_sleep = opts.seconds_to_sleep poll_directly = opts.poll_directly jobs_to_start = opts.jobs_to_start created_temp_paths = [] # split the input filepath into directory and filename, base filename and # extension input_dir, input_fn = split(input_path) input_file_basename, input_file_ext = splitext(input_fn) # set the job_prefix either based on what the user passed in, # or a random string beginning with ALDIV (ALphaDIVersity) job_prefix = opts.job_prefix or get_random_job_prefix('RARIF') # A temporary output directory is created in output_dir named # job_prefix. Output files are then moved from the temporary # directory to the output directory when they are complete, allowing # a poller to detect when runs complete by the presence of their # output files. working_dir = '%s/%s' % (output_dir,job_prefix) try: makedirs(working_dir) created_temp_paths.append(working_dir) except OSError: # working_dir already exists pass # build the filepath for the 'jobs script' jobs_fp = '%s/%sjobs.txt' % (output_dir, job_prefix) created_temp_paths.append(jobs_fp) # generate the list of commands to be pushed out to nodes commands, job_result_filepaths = \ get_job_commands(python_exe_fp,single_rarefaction_fp,job_prefix,\ input_path,output_dir,working_dir,min_seqs,max_seqs,step,num_reps, lineages_included,command_prefix=' ',command_suffix=' ') # Merge commands into jobs_to_start number of jobs commands = merge_to_n_commands(commands,jobs_to_start) # Set up poller apparatus if the user does not suppress polling if not suppress_polling: # Write the list of files which must exist for the jobs to be # considered complete expected_files_filepath = '%s/expected_out_files.txt' % working_dir write_filepaths_to_file(job_result_filepaths,expected_files_filepath) created_temp_paths.append(expected_files_filepath) # Write the mapping file even though no merging is necessary # (get_poller_command requires this, but a future version won't) merge_map_filepath = '%s/merge_map.txt' % working_dir open(merge_map_filepath,'w').close() created_temp_paths.append(merge_map_filepath) # Create the filepath listing the temporary files to be deleted, # but don't write it yet deletion_list_filepath = '%s/deletion_list.txt' % working_dir created_temp_paths.append(deletion_list_filepath) if not poll_directly: # Generate the command to run the poller, and the list of temp files # created by the poller poller_command, poller_result_filepaths =\ get_poller_command(python_exe_fp,poller_fp,expected_files_filepath,\ merge_map_filepath,deletion_list_filepath,\ seconds_to_sleep=seconds_to_sleep) # append the poller command to the list of job commands commands.append(poller_command) else: poller_command, poller_result_filepaths =\ get_poller_command(python_exe_fp,poller_fp,expected_files_filepath,\ merge_map_filepath,deletion_list_filepath,\ seconds_to_sleep=seconds_to_sleep,\ command_prefix='',command_suffix='') created_temp_paths += poller_result_filepaths if not retain_temp_files: # If the user wants temp files deleted, now write the list of # temp files to be deleted write_filepaths_to_file(created_temp_paths,deletion_list_filepath) else: # Otherwise just write an empty file write_filepaths_to_file([],deletion_list_filepath) # write the commands to the 'jobs files' write_jobs_file(commands,job_prefix=job_prefix,jobs_fp=jobs_fp) # submit the jobs file using cluster_jobs, if not suppressed by the # user if not opts.suppress_submit_jobs: submit_jobs(path_to_cluster_jobs,jobs_fp,job_prefix) if poll_directly: try: check_call(poller_command.split()) except CalledProcessError, e: print '**Error occuring when calling the poller directly. '+\ 'Jobs may have been submitted, but are not being polled.' print str(e) exit(-1)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # create local copies of command-line options python_exe_fp = opts.python_exe_fp assign_taxonomy_fp = opts.assign_taxonomy_fp confidence = opts.confidence rdp_classifier_fp = opts.rdp_classifier_fp id_to_taxonomy_fp = opts.id_to_taxonomy_fp reference_seqs_fp = opts.reference_seqs_fp cluster_jobs_fp = opts.cluster_jobs_fp input_fasta_fp = opts.input_fasta_fp jobs_to_start = opts.jobs_to_start output_dir = opts.output_dir poller_fp = opts.poller_fp retain_temp_files = opts.retain_temp_files suppress_polling = opts.suppress_polling seconds_to_sleep = opts.seconds_to_sleep poll_directly = opts.poll_directly if not isfile(input_fasta_fp): raise ValueError('This file does not exists: %s' % input_fasta_fp) if id_to_taxonomy_fp or reference_seqs_fp: if not id_to_taxonomy_fp or not isfile(id_to_taxonomy_fp): raise ValueError('This file does not exits: %s' % id_to_taxonomy_fp) if not reference_seqs_fp or not isfile(reference_seqs_fp): raise ValueError('This file does not exits: %s' % reference_seqs_fp) try: makedirs(output_dir) except OSError: # output dir already exists pass created_temp_paths = [] # split the input filepath into directory and filename, base filename and # extension input_dir, input_fasta_fn = split(input_fasta_fp) input_file_basename, input_fasta_ext = splitext(input_fasta_fn) # set the job_prefix either based on what the user passed in, # or a random string beginning with RDP job_prefix = opts.job_prefix or get_random_job_prefix('RDP') # A temporary output directory is created in output_dir named # job_prefix. Output files are then moved from the temporary # directory to the output directory when they are complete, allowing # a poller to detect when runs complete by the presence of their # output files. working_dir = '%s/%s' % (output_dir,job_prefix) try: mkdir(working_dir) created_temp_paths.append(working_dir) except OSError: # working dir already exists pass # compute the number of sequences that should be included in # each file after splitting the input fasta file num_seqs_per_file = compute_seqs_per_file(input_fasta_fp,jobs_to_start) # split the fasta files and get the list of resulting files tmp_fasta_fps =\ split_fasta(open(input_fasta_fp),num_seqs_per_file,job_prefix,output_dir) created_temp_paths += tmp_fasta_fps # build the filepath for the 'jobs script' jobs_fp = '%s/%sjobs.txt' % (output_dir, job_prefix) created_temp_paths.append(jobs_fp) # generate the list of commands to be pushed out to nodes commands, job_result_filepaths = \ get_commands(python_exe_fp,assign_taxonomy_fp,confidence,job_prefix,\ tmp_fasta_fps,rdp_classifier_fp,output_dir,working_dir,\ id_to_taxonomy_fp=id_to_taxonomy_fp,reference_seqs_fp=reference_seqs_fp) created_temp_paths += job_result_filepaths # Set up poller apparatus if the user does not suppress polling if not suppress_polling: # Write the list of files which must exist for the jobs to be # considered complete expected_files_filepath = '%s/expected_out_files.txt' % working_dir write_filepaths_to_file(job_result_filepaths,expected_files_filepath) created_temp_paths.append(expected_files_filepath) # Write the mapping file which described how the output files from # each job should be merged into the final output files merge_map_filepath = '%s/merge_map.txt' % working_dir write_merge_map_file_assign_taxonomy(job_result_filepaths,output_dir,\ merge_map_filepath,input_file_basename) created_temp_paths.append(merge_map_filepath) # Create the filepath listing the temporary files to be deleted, # but don't write it yet deletion_list_filepath = '%s/deletion_list.txt' % working_dir created_temp_paths.append(deletion_list_filepath) # Generate the command to run the poller, and the list of temp files # created by the poller if not poll_directly: poller_command, poller_result_filepaths =\ get_poller_command(python_exe_fp,poller_fp,expected_files_filepath,\ merge_map_filepath,deletion_list_filepath,\ seconds_to_sleep=seconds_to_sleep) created_temp_paths += poller_result_filepaths # append the poller command to the list of job commands commands.append(poller_command) else: poller_command, poller_result_filepaths =\ get_poller_command(python_exe_fp,poller_fp,\ expected_files_filepath,merge_map_filepath,\ deletion_list_filepath,seconds_to_sleep=seconds_to_sleep,\ command_prefix='',command_suffix='') created_temp_paths += poller_result_filepaths if not retain_temp_files: # If the user wants temp files deleted, now write the list of # temp files to be deleted write_filepaths_to_file(created_temp_paths,deletion_list_filepath) else: # Otherwise just write an empty file write_filepaths_to_file([],deletion_list_filepath) # write the commands to the 'jobs files' write_jobs_file(commands,job_prefix=job_prefix,jobs_fp=jobs_fp) # submit the jobs file using cluster_jobs, if not suppressed by the # user if not opts.suppress_submit_jobs: submit_jobs(cluster_jobs_fp,jobs_fp,job_prefix) if poll_directly: try: check_call(poller_command.split()) except CalledProcessError, e: print '**Error occuring when calling the poller directly. '+\ 'Jobs may have been submitted, but are not being polled.' print str(e) exit(-1)