def setup_workers(num_cpus, outdir, server_socket, verbose=True, error_profile=None): """Start workers waiting for data. num_cpus: number of cores outdir: directory were the workers will work in server_socket: an open socket to the server verbose: verbose flag passed to the workers error_profile: filepath to the error profiles, passed to workers """ qiime_config = load_qiime_config() DENOISE_WORKER = join(get_qiime_scripts_dir(), "denoiser_worker.py") CLOUD_DISPATCH = join(get_qiime_scripts_dir(), "ec2Dispatch") CLOUD_ENV = qiime_config['cloud_environment'] CLOUD = not CLOUD_ENV == "False" workers = [] client_sockets = [] tmpname = "".join(sample(list(lowercase), 8)) #somewhat unique id for cluster job host, port = server_socket.getsockname() #TODO: this should be set to a defined wait time using alarm() for i in range(num_cpus): name = outdir + ("/%sworker%d" % (tmpname, i)) workers.append(name) if CLOUD: cmd = "%s %d %s %s -f %s -s %s -p %s" % ( CLOUD_DISPATCH, i + 1, qiime_config['python_exe_fp'], DENOISE_WORKER, name, host, port) else: cmd = "%s %s -f %s -s %s -p %s" % (qiime_config['python_exe_fp'], DENOISE_WORKER, name, host, port) if verbose: cmd += " -v" if error_profile: cmd += " -e %s" % error_profile submit_jobs([cmd], tmpname) #wait until the client connects #This might be a race condition -> make the client robust client_socket, client_address = server_socket.accept() client_sockets.append((client_socket, client_address)) return workers, client_sockets
def setup_workers(num_cpus, outdir, server_socket, verbose=True, error_profile=None): """Start workers waiting for data. num_cpus: number of cores outdir: directory were the workers will work in server_socket: an open socket to the server verbose: verbose flag passed to the workers error_profile: filepath to the error profiles, passed to workers """ qiime_config = load_qiime_config() DENOISE_WORKER = join(get_qiime_scripts_dir(), "denoiser_worker.py") CLOUD_DISPATCH = join(get_qiime_scripts_dir(), "ec2Dispatch") CLOUD_ENV = qiime_config['cloud_environment'] CLOUD = not CLOUD_ENV == "False" workers = [] client_sockets = [] # somewhat unique id for cluster job tmpname = "".join(sample(list(lowercase), 8)) host, port = server_socket.getsockname() # TODO: this should be set to a defined wait time using alarm() for i in range(num_cpus): name = outdir + ("/%sworker%d" % (tmpname, i)) workers.append(name) if CLOUD: cmd = "%s %d %s %s -f %s -s %s -p %s" % (CLOUD_DISPATCH, i + 1, qiime_config['python_exe_fp'], DENOISE_WORKER, name, host, port) else: cmd = "%s %s -f %s -s %s -p %s" % (qiime_config['python_exe_fp'], DENOISE_WORKER, name, host, port) if verbose: cmd += " -v" if error_profile: cmd += " -e %s" % error_profile submit_jobs([cmd], tmpname) # wait until the client connects # This might be a race condition -> make the client robust client_socket, client_address = server_socket.accept() client_sockets.append((client_socket, client_address)) return workers, client_sockets
def test_denoise_worker(self): """denoiser_worker.py is where it belongs and is callable.""" qiime_config = load_qiime_config() PYTHON_BIN = qiime_config['python_exe_fp'] DENOISE_WORKER = get_qiime_scripts_dir() + "/denoiser_worker.py" self.assertTrue( exists(DENOISE_WORKER), "DENOISER_WORKER is not where it's supposed to be: %s" % DENOISE_WORKER) #test if its callable and actually works command = "%s %s -h" % (PYTHON_BIN, DENOISE_WORKER) proc = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT) if (proc.wait() != 0): self.fail("Calling %s failed. Check permissions and that it is in fact an executable." \ % DENOISE_WORKER) result = proc.stdout.read() #check that the help string looks correct self.assertTrue(result.startswith("Usage"))
def run_make_otu_heatmap_html(otu_table_fp,mapping_fp,output_dir, params, qiime_config, command_handler,tree_fp, status_update_callback=print_to_stdout): """ This function calls the make_otu_heatmap_html script """ # define upper-level values python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() commands = [] logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) # get the user-defined parameters try: params_str = get_params_str(params['make_otu_heatmap_html']) except KeyError: params_str = '' # Build the make_otu_heatmap_html command heatmap_cmd = '%s %s/make_otu_heatmap_html.py -i %s -m %s -t %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, mapping_fp,tree_fp, output_dir, params_str) commands.append([('OTU Heatmap' , heatmap_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger) return True
def test_main(self): """Denoiser should always give same result on test data""" expected = """>FS8APND01D3TW3 | cluster size: 94 CTCCCGTAGGAGTCTGGGCCGTATCTCAGTCCCAATGTGGCCGGTCACCCTCTCAGGCCGGCTACCCGTCAAAGCCTTGGTAAGCCACTACCCCACCAACAAGCTGATAAGCCGCGAGTCCATCCCCAACCGCCGAAACTTTCCAACCCCCACCATGCAGCAGGAGCTCCTATCCGGTATTAGCCCCAGTTTCCTGAAGTTATCCCAAAGTCAAGGGCAGGTTACTCACGTGTTACTCACCCGTTCGCC """ expected_map = """FS8APND01EWRS4: FS8APND01DXG45: FS8APND01D3TW3:\tFS8APND01CSXFN\tFS8APND01DQ8MX\tFS8APND01DY7QW\tFS8APND01B5QNI\tFS8APND01CQ6OG\tFS8APND01C7IGN\tFS8APND01DHSGH\tFS8APND01DJ17E\tFS8APND01CUXOA\tFS8APND01EUTYG\tFS8APND01EKK7T\tFS8APND01D582W\tFS8APND01B5GWU\tFS8APND01D7N2A\tFS8APND01BJGHZ\tFS8APND01D6DYZ\tFS8APND01C6ZIM\tFS8APND01D2X6Y\tFS8APND01BUYCE\tFS8APND01BNUEY\tFS8APND01DKLOE\tFS8APND01C24PP\tFS8APND01EBWQX\tFS8APND01ELDYW\tFS8APND01B0GCS\tFS8APND01D4QXI\tFS8APND01EMYD9\tFS8APND01EA2SK\tFS8APND01DZOSO\tFS8APND01DHYAZ\tFS8APND01C7UD9\tFS8APND01BTZFV\tFS8APND01CR78R\tFS8APND01B39IE\tFS8APND01ECVC0\tFS8APND01DM3PL\tFS8APND01DELWS\tFS8APND01CIEK8\tFS8APND01D7ZOZ\tFS8APND01CZSAI\tFS8APND01DYOVR\tFS8APND01BX9XY\tFS8APND01DEWJA\tFS8APND01BEKIW\tFS8APND01DCKB9\tFS8APND01EEYIS\tFS8APND01DDKEA\tFS8APND01DSZLO\tFS8APND01C6EBC\tFS8APND01EE15M\tFS8APND01ELO9B\tFS8APND01C58QY\tFS8APND01DONCG\tFS8APND01DVXX2\tFS8APND01BL5YT\tFS8APND01BIL2V\tFS8APND01EBSYQ\tFS8APND01CCX8R\tFS8APND01B2YCJ\tFS8APND01B1JG4\tFS8APND01DJ024\tFS8APND01BIJY0\tFS8APND01CIA4G\tFS8APND01DV74M\tFS8APND01ECAX5\tFS8APND01DC3TZ\tFS8APND01EJVO6\tFS8APND01D4VFG\tFS8APND01DYYYO\tFS8APND01D1EDD\tFS8APND01DQUOT\tFS8APND01A2NSJ\tFS8APND01DDC8I\tFS8APND01BP1T2\tFS8APND01DPY6U\tFS8APND01CIQGV\tFS8APND01BPUT8\tFS8APND01BDNH4\tFS8APND01DOZDN\tFS8APND01DS866\tFS8APND01DGS2J\tFS8APND01EDK32\tFS8APND01EPA0T\tFS8APND01CK3JM\tFS8APND01BKLWW\tFS8APND01DV0BO\tFS8APND01DPNXE\tFS8APND01B7LUA\tFS8APND01BTTE2\tFS8APND01CKO4X\tFS8APND01DGGBY\tFS8APND01C4NHX\tFS8APND01DYPQN FS8APND01BSTVP: FS8APND01EFK0W: FS8APND01DCIOO: FS8APND01CKOMZ: """ command = " ".join( ["%s/denoiser.py" % get_qiime_scripts_dir(), "--force", "-o", self.test_dir, "-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME] ); result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() self.result_dir = self.test_dir observed = "".join(list(open(self.result_dir+ "centroids.fasta"))) self.assertEqual(observed, expected) self.assertEqual(len(list(MinimalFastaParser(open(self.result_dir + "singletons.fasta")))), 6) observed = "".join(list(open(self.result_dir+ "denoiser_mapping.txt"))) self.assertEqual(observed, expected_map)
def summarize_otus(processed_dir): """ """ per_library_stats_file = join(processed_dir, 'gg_97_otus/per_library_stats.txt') # Generate the per_library_stats_file if it doesn't already exist if not exists (per_library_stats_file): qiime_config = load_qiime_config() biom_file = join(processed_dir, 'gg_97_otus/exact_uclust_ref_otu_table.biom') python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() per_library_stats_script = join(script_dir, 'per_library_stats.py') command = '{0} {1} -i {2}'.format(python_exe_fp, per_library_stats_script, biom_file) # Run the script and produce the per_library_stats.txt proc = Popen(command, shell = True, universal_newlines = True, stdout = PIPE, stderr = STDOUT) return_value = proc.wait() f = open(per_library_stats_file, 'w') f.write(proc.stdout.read()) f.close() # File exists, parse out details start_lines = ['Seqs/sample detail:'] header_lines, otu_summary_dict = parse_log_file(per_library_stats_file, start_lines) return header_lines, otu_summary_dict
def test_get_qiime_scripts_dir(self): """Test that we can find the directory containing QIIME scripts.""" # get_qiime_scripts_dir will raise an error if it can't find a scripts # directory. scripts_dir = get_qiime_scripts_dir() self.assertTrue(isdir(scripts_dir), "The QIIME scripts directory does " "not exist: %s" % scripts_dir)
def test_main(self): """Denoiser should always give same result on test data""" expected = """>FS8APND01D3TW3 | cluster size: 94 CTCCCGTAGGAGTCTGGGCCGTATCTCAGTCCCAATGTGGCCGGTCACCCTCTCAGGCCGGCTACCCGTCAAAGCCTTGGTAAGCCACTACCCCACCAACAAGCTGATAAGCCGCGAGTCCATCCCCAACCGCCGAAACTTTCCAACCCCCACCATGCAGCAGGAGCTCCTATCCGGTATTAGCCCCAGTTTCCTGAAGTTATCCCAAAGTCAAGGGCAGGTTACTCACGTGTTACTCACCCGTTCGCC """ expected_map = """FS8APND01EWRS4: FS8APND01DXG45: FS8APND01D3TW3:\tFS8APND01CSXFN\tFS8APND01DQ8MX\tFS8APND01DY7QW\tFS8APND01B5QNI\tFS8APND01CQ6OG\tFS8APND01C7IGN\tFS8APND01DHSGH\tFS8APND01DJ17E\tFS8APND01CUXOA\tFS8APND01EUTYG\tFS8APND01EKK7T\tFS8APND01D582W\tFS8APND01B5GWU\tFS8APND01D7N2A\tFS8APND01BJGHZ\tFS8APND01D6DYZ\tFS8APND01C6ZIM\tFS8APND01D2X6Y\tFS8APND01BUYCE\tFS8APND01BNUEY\tFS8APND01DKLOE\tFS8APND01C24PP\tFS8APND01EBWQX\tFS8APND01ELDYW\tFS8APND01B0GCS\tFS8APND01D4QXI\tFS8APND01EMYD9\tFS8APND01EA2SK\tFS8APND01DZOSO\tFS8APND01DHYAZ\tFS8APND01C7UD9\tFS8APND01BTZFV\tFS8APND01CR78R\tFS8APND01B39IE\tFS8APND01ECVC0\tFS8APND01DM3PL\tFS8APND01DELWS\tFS8APND01CIEK8\tFS8APND01D7ZOZ\tFS8APND01CZSAI\tFS8APND01DYOVR\tFS8APND01BX9XY\tFS8APND01DEWJA\tFS8APND01BEKIW\tFS8APND01DCKB9\tFS8APND01EEYIS\tFS8APND01DDKEA\tFS8APND01DSZLO\tFS8APND01C6EBC\tFS8APND01EE15M\tFS8APND01ELO9B\tFS8APND01C58QY\tFS8APND01DONCG\tFS8APND01DVXX2\tFS8APND01BL5YT\tFS8APND01BIL2V\tFS8APND01EBSYQ\tFS8APND01CCX8R\tFS8APND01B2YCJ\tFS8APND01B1JG4\tFS8APND01DJ024\tFS8APND01BIJY0\tFS8APND01CIA4G\tFS8APND01DV74M\tFS8APND01ECAX5\tFS8APND01DC3TZ\tFS8APND01EJVO6\tFS8APND01D4VFG\tFS8APND01DYYYO\tFS8APND01D1EDD\tFS8APND01DQUOT\tFS8APND01A2NSJ\tFS8APND01DDC8I\tFS8APND01BP1T2\tFS8APND01DPY6U\tFS8APND01CIQGV\tFS8APND01BPUT8\tFS8APND01BDNH4\tFS8APND01DOZDN\tFS8APND01DS866\tFS8APND01DGS2J\tFS8APND01EDK32\tFS8APND01EPA0T\tFS8APND01CK3JM\tFS8APND01BKLWW\tFS8APND01DV0BO\tFS8APND01DPNXE\tFS8APND01B7LUA\tFS8APND01BTTE2\tFS8APND01CKO4X\tFS8APND01DGGBY\tFS8APND01C4NHX\tFS8APND01DYPQN FS8APND01BSTVP: FS8APND01EFK0W: FS8APND01DCIOO: FS8APND01CKOMZ: """ command = " ".join( ["%s/denoiser.py" % get_qiime_scripts_dir(), "--force", "-o", self.test_dir, "-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME] ); result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() self.result_dir = self.test_dir observed = "".join(list(open(self.result_dir+ "centroids.fasta"))) self.assertEqual(observed, expected) self.assertEqual(len(list(MinimalFastaParser(open(self.result_dir + "singletons.fasta")))), 6) observed = "".join(list(open(self.result_dir+ "denoiser_mapping.txt"))) self.assertEqual(observed, expected_map)
def test_main_with_titanium_error(self): """Denoiser with titanium error should always give same result on test data""" command = " ".join( [ "%s/denoiser.py" % get_qiime_scripts_dir(), "--force", "-o", self.test_dir, "-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME, "-f", "%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME, "-e", "%s/qiime/support_files/denoiser/Data/Titanium_error_profile.dat" % PROJECT_HOME, ] ) result = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT).stdout.read() self.result_dir = self.test_dir observed = "".join(list(open(self.result_dir + "centroids.fasta"))) self.assertEqual(observed, self.expected) observed = "".join(list(open(self.result_dir + "denoiser_mapping.txt"))) self.assertEqual(observed, self.expected_titanium_map_string)
def test_get_qiime_scripts_dir(self): """Test that we can find the directory containing QIIME scripts.""" # get_qiime_scripts_dir will raise an error if it can't find a scripts # directory. scripts_dir = get_qiime_scripts_dir() self.assertTrue(isdir(scripts_dir), "The QIIME scripts directory does " "not exist: %s" % scripts_dir)
def test_main_split_cluster(self): """Denoiser on cluster in split mode should always give same result on test data""" command = " ".join( [ "%s/denoiser.py" % get_qiime_scripts_dir(), "-S", "--force", "-c", "-n 2", "-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME, "-f", "%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME, "-o", self.test_dir, ] ) result = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT).stdout.read() self.result_dir = self.test_dir for subdir in ["0/", "1/"]: observed = "".join(list(open(self.result_dir + subdir + "centroids.fasta"))) self.assertEqual(observed, expected_centroids[subdir]) observed = "".join(list(open(self.result_dir + subdir + "denoiser_mapping.txt"))) self.assertEqual(observed, expected_map_string_on_cluster[subdir])
def run_process_illumina_through_split_lib(study_id,run_prefix,input_fp, mapping_fp, output_dir, command_handler, params, qiime_config, write_to_all_fasta=False, status_update_callback=print_to_stdout): """ NOTE: Parts of this function are a directly copied from the run_qiime_data_preparation function from the workflow.py library file in QIIME. The steps performed by this function are: 1) De-multiplex sequences. (split_libraries_fastq.py) """ # Prepare some variables for the later steps filenames=input_fp.split(',') commands = [] create_dir(output_dir) python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) # copy the mapping file copied_mapping=split(mapping_fp)[-1] mapping_input_fp_copy=join(output_dir, copied_mapping) copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy) commands.append([('CopyMapping', copy_mapping_cmd)]) # sort the filenames filenames.sort() # determine which file is seq-file and which is barcode-file and associate # to mapping file if len(filenames) == 1: try: # Format of sample_id needs to be seqs_<sample_name>.<sequence_prep_id>.fastq data_access = data_access_factory(ServerConfig.data_access_type) sql = """ select s.sample_name || '.' || sp.sequence_prep_id from sample s inner join sequence_prep sp on s.sample_id = sp.sample_id where s.study_id = {0} and sp.run_prefix = '{1}' """.format(study_id, run_prefix[:-1]) sample_and_prep = data_access.dynamicMetadataSelect(sql).fetchone()[0] input_str = '-i {0} --sample_id {1}'.format(filenames[0], sample_and_prep) except Exception, e: error = 'Failed to obtain sample and sequence prep info for study_id {0} and run_prefix {1}\n'.format(study_id, run_prefix) error += 'SQL was: \n {0} \n'.format(sql) error += 'Original exception was: \n {0}'.format(str(e)) raise Exception(error)
def test_main_low_mem(self): """Denoiser works using low_memory""" command = " ".join( ["%s/denoiser.py" % get_qiime_scripts_dir(), "-f", "%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME, "-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME, "-o", self.test_dir, "--low_memory"] ) result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() self.result_dir = self.test_dir observed = "".join(list(open(self.result_dir+ "centroids.fasta"))) self.assertEqual(observed, self.expected)
def test_main_low_mem(self): """Denoiser works using low_memory""" command = " ".join( ["%s/denoiser.py" % get_qiime_scripts_dir(), "-f", "%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME, "-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME, "-o", self.test_dir, "--low_memory"] ) result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() self.result_dir = self.test_dir observed = "".join(list(open(self.result_dir+ "centroids.fasta"))) self.assertEqual(observed, self.expected)
def test_main_with_fasta(self): """Denoiser with fasta file should always give same result on test data""" command = " ".join( ["%s/denoiser.py" % get_qiime_scripts_dir(), "--force", "-o", self.test_dir, "-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME, "-f", "%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME] ) result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() self.result_dir = self.test_dir observed = "".join(list(open(self.result_dir+ "centroids.fasta"))) self.assertEqual(observed, self.expected) observed = "".join(list(open(self.result_dir+ "denoiser_mapping.txt"))) self.assertEqual(observed,self.expected_map_string)
def __init__(self, python_exe_fp=qiime_config['python_exe_fp'], cluster_jobs_fp=qiime_config['cluster_jobs_fp'], jobs_to_start=int(qiime_config['jobs_to_start']), poller_fp=join(get_qiime_scripts_dir(), 'poller.py'), retain_temp_files=False, suppress_polling=False, seconds_to_sleep=int(qiime_config['seconds_to_sleep'])): """ """ self._python_exe_fp = python_exe_fp self._cluster_jobs_fp = cluster_jobs_fp self._jobs_to_start = jobs_to_start self._poller_fp = poller_fp self._retain_temp_files = retain_temp_files self._suppress_polling = suppress_polling self._seconds_to_sleep = seconds_to_sleep
def __init__(self, python_exe_fp=qiime_config['python_exe_fp'], cluster_jobs_fp=qiime_config['cluster_jobs_fp'], jobs_to_start=int(qiime_config['jobs_to_start']), poller_fp=join(get_qiime_scripts_dir(),'poller.py'), retain_temp_files=False, suppress_polling=False, seconds_to_sleep=int(qiime_config['seconds_to_sleep'])): """ """ self._python_exe_fp = python_exe_fp self._cluster_jobs_fp = cluster_jobs_fp self._jobs_to_start = jobs_to_start self._poller_fp = poller_fp self._retain_temp_files = retain_temp_files self._suppress_polling = suppress_polling self._seconds_to_sleep = seconds_to_sleep
def preprocess_on_cluster(sff_fps, log_fp, fasta_fp=None, out_fp="/tmp/", squeeze=False, verbose=False, primer=STANDARD_BACTERIAL_PRIMER): """Call preprocess via cluster_jobs_script on the cluster. sff_fps: List of paths to flowgram files. log_fp: path to log file fasta_fp: Path to fasta file, formatted as from split_libraries.py. This files is used to filter the flowgrams in sff_fps. Only reads in fasta_fp are pulled from sff_fps. out_fp: path to output directory verbose: a binary verbose flag squeeze: a flag that controls if sequences are squeezed before phase I. Squeezing means consecutive identical nucs are collapsed to one. primer: The primer sequences of the amplification process. This seq will be removed from all reads during the preprocessing """ qiime_config = load_qiime_config() python_bin = qiime_config['python_exe_fp'] cmd = "%s %s/denoiser_preprocess.py -i %s -l %s -o %s" %\ (python_bin, get_qiime_scripts_dir(), ",".join(sff_fps), log_fp, out_fp) if (fasta_fp): cmd += " -f %s" % fasta_fp if (squeeze): cmd += " -s" if verbose: cmd += " -v" if primer: cmd += " -p %s" % primer submit_jobs([cmd], "pp_" + make_tmp_name(6)) wait_for_file(out_fp + "/prefix_mapping.txt", 10)
def get_html(script_name,column_headers,help_img): '''generate the html table rows for a given QIIME script''' script_dir = get_qiime_scripts_dir() dirname,fname=script_path_components(os.path.join(script_dir,script_name)) script_info_dict=get_script_info(dirname,fname) fname,fname_ext=os.path.splitext(script_name) #get all the required options required_html=get_html_for_options(fname,script_info_dict, 'required_options',column_headers, help_img) #get all the optional options optional_html=get_html_for_options(fname,script_info_dict, 'optional_options',column_headers, help_img) return required_html,optional_html
def setUp(self): """ """ self.headers=['head1','head2','head3'] self.test_option_file=make_option('-i', '--coord_fname', help='Input principal coordinates filepath', type='existing_path') self.test_option_colorby=make_option('-b', '--colorby', dest='colorby',\ help='Comma-separated list categories metadata categories' +\ ' (column headers) [default=color by all]') self.test_option_custom_axes=make_option('-a', '--custom_axes', help='This is the category from the metadata mapping file' +\ ' [default: %default]') self.test_option_choice=make_option('-k', '--background_color', help='Background color to use in the plots.[default: %default]', default='black',type='choice',choices=['black','white']) self.test_option_float=make_option('--ellipsoid_opacity', help='Used only when plotting ellipsoids for jackknifed' +\ ' beta diversity (i.e. using a directory of coord files' +\ ' [default=%default]', default=0.33,type=float) self.test_option_int=make_option('--n_taxa_keep', help='Used only when generating BiPlots. This is the number '+\ ' to display. Use -1 to display all. [default: %default]', default=10,type=int) self.test_option_true=make_option('--suppress_html_output', dest='suppress_html_output',\ default=False,action='store_true', help='Suppress HTML output. [default: %default]') self.test_option_false=make_option('--suppress_html_output', dest='suppress_html_output',\ default=True,action='store_false', help='Suppress HTML output. [default: %default]') self.option_labels={'coord_fname':'Principal coordinates filepath', 'colorby': 'Colorby category', 'background_color': 'Background color', 'ellipsoid_opacity':'Ellipsoid opacity', 'n_taxa_keep': '# of taxa to keep', 'custom_axes':'Custom Axis'} self.script_dir = get_qiime_scripts_dir() self.test_script_info=get_script_info(self.script_dir, 'make_qiime_rst_file')
def test_main_split_cluster(self): """Denoiser on cluster in split mode should always give same result on test data""" command = " ".join( ["%s/denoiser.py" % get_qiime_scripts_dir(), "-S", "--force", '-c', '-n 2', "-i", "%s/qiime/support_files/denoiser/TestData/denoiser_test_set.sff.txt" % PROJECT_HOME, "-f", "%s/qiime/support_files/denoiser/TestData/test_set_seqs.fna" % PROJECT_HOME, "-o", self.test_dir] ) result = Popen(command,shell=True,universal_newlines=True,\ stdout=PIPE,stderr=STDOUT).stdout.read() self.result_dir = self.test_dir for subdir in ["0/","1/"]: observed = "".join(list(open(self.result_dir+ subdir+"centroids.fasta"))) self.assertEqual(observed, expected_centroids[subdir]) observed = "".join(list(open(self.result_dir+ subdir+"denoiser_mapping.txt"))) self.assertEqual(observed, expected_map_string_on_cluster[subdir])
def preprocess_on_cluster(sff_fps, log_fp, fasta_fp=None, out_fp="/tmp/", squeeze=False, verbose=False, primer=STANDARD_BACTERIAL_PRIMER): """Call preprocess via cluster_jobs_script on the cluster. sff_fps: List of paths to flowgram files. log_fp: path to log file fasta_fp: Path to fasta file, formatted as from split_libraries.py. This files is used to filter the flowgrams in sff_fps. Only reads in fasta_fp are pulled from sff_fps. out_fp: path to output directory verbose: a binary verbose flag squeeze: a flag that controls if sequences are squeezed before phase I. Squeezing means consecutive identical nucs are collapsed to one. primer: The primer sequences of the amplification process. This seq will be removed from all reads during the preprocessing """ qiime_config = load_qiime_config() python_bin = qiime_config['python_exe_fp'] cmd = "%s %s/denoiser_preprocess.py -i %s -l %s -o %s" %\ (python_bin, get_qiime_scripts_dir(), ",".join(sff_fps), log_fp, out_fp) if (fasta_fp): cmd += " -f %s" % fasta_fp if(squeeze): cmd += " -s" if verbose: cmd += " -v" if primer: cmd += " -p %s" % primer submit_jobs([cmd], "pp_" + make_tmp_name(6)) wait_for_file(out_fp + "/prefix_mapping.txt", 10)
def test_denoise_worker(self): """denoiser_worker.py is where it belongs and is callable.""" qiime_config = load_qiime_config() PYTHON_BIN = qiime_config["python_exe_fp"] DENOISE_WORKER = get_qiime_scripts_dir() + "/denoiser_worker.py" self.assertTrue(exists(DENOISE_WORKER), "DENOISER_WORKER is not where it's supposed to be: %s" % DENOISE_WORKER) # test if its callable and actually works command = "%s %s -h" % (PYTHON_BIN, DENOISE_WORKER) proc = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT) if proc.wait() != 0: self.fail("Calling %s failed. Check permissions and that it is in fact an executable." % DENOISE_WORKER) result = proc.stdout.read() # check that the help string looks correct self.assertTrue(result.startswith("Usage"))
def add_taxa_to_biom(input_dir,study): """ Add the retrained taxonomy assignments to the biom-table """ # get the study directory study_input_dir=join(input_dir,'study_%s' % (str(study))) # get a list of all processed folders processed_folders=listdir(study_input_dir) for processed_folder in processed_folders: # make sure it is processed folder produced by DB if processed_folder.startswith('processed'): # get the biom file for this particular run gg_biom_fp=join(study_input_dir, processed_folder, \ 'gg_97_otus', 'exact_uclust_ref_otu_table.biom') # make sure the path exists and if not create the biom-table # only applicable to studies processed prior to biom-format if not exists(gg_biom_fp): # get the classic OTU table path gg_otu_table_fp=join(study_input_dir, processed_folder, \ 'gg_97_otus', \ 'exact_uclust_ref_otu_table.txt') # make sure path exists and convert classic to biom-table if exists(gg_otu_table_fp): system("python %s/software/biom-format/scripts/convert_biom.py -i %s -o %s --biom_table_type='otu table'" % \ (environ['HOME'],gg_otu_table_fp,gg_biom_fp)) try: # add the taxa to the biom-table # if it exists already, this will fail out system("python %s/add_taxa.py -i %s -t %s/phpr_taxonomy.txt -o %s" % \ (get_qiime_scripts_dir(), gg_biom_fp, environ['HOME'], \ gg_biom_fp)) except: pass
def run_pick_closed_reference_otus( input_fp, refseqs_fp, output_dir, taxonomy_fp, command_handler, params, qiime_config, parallel=False, logger=None, suppress_md5=False, status_update_callback=print_to_stdout): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Pick OTUs; 2) Build an OTU table with optional pre-defined taxonmy. """ # confirm that a valid otu picking method was supplied before doing # any work reference_otu_picking_methods = ['blast','uclust_ref','usearch61_ref'] try: otu_picking_method = params['pick_otus']['otu_picking_method'] except KeyError: otu_picking_method = 'uclust_ref' assert otu_picking_method in reference_otu_picking_methods,\ "Invalid OTU picking method supplied: %s. Valid choices are: %s"\ % (otu_picking_method,' '.join(reference_otu_picking_methods)) # Prepare some variables for the later steps input_dir, input_filename = split(input_fp) input_basename, input_ext = splitext(input_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger,[input_fp,refseqs_fp,taxonomy_fp]) # Prep the OTU picking command pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method) otu_fp = '%s/%s_otus.txt' % (pick_otu_dir,input_basename) if parallel and (otu_picking_method == 'blast' or otu_picking_method == 'uclust_ref' or otu_picking_method == 'usearch61_ref'): # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' # Grab the OTU picker parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --alignment_method # option. This works for now though. d = params['pick_otus'].copy() if 'otu_picking_method' in d: del d['otu_picking_method'] params_str += ' %s' % get_params_str(d) except KeyError: pass otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method # Build the OTU picking command pick_otus_cmd = '%s %s/%s -i %s -o %s -r %s -T %s' %\ (python_exe_fp, script_dir, otu_picking_script, input_fp, pick_otu_dir, refseqs_fp, params_str) else: try: params_str = get_params_str(params['pick_otus']) except KeyError: params_str = '' # Since this is reference-based OTU picking we always want to # suppress new clusters -- force it here. params_str+= ' --suppress_new_clusters' logger.write("Forcing --suppress_new_clusters as this is closed-reference OTU picking.\n\n") # Build the OTU picking command pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s -r %s -m %s %s' %\ (python_exe_fp, script_dir, input_fp, pick_otu_dir, refseqs_fp, otu_picking_method, params_str) commands.append([('Pick OTUs', pick_otus_cmd)]) # Prep the OTU table building command otu_table_fp = '%s/otu_table.biom' % output_dir try: params_str = get_params_str(params['make_otu_table']) except KeyError: params_str = '' if taxonomy_fp: taxonomy_str = '-t %s' % taxonomy_fp else: taxonomy_str = '' # Build the OTU table building command make_otu_table_cmd = '%s %s/make_otu_table.py -i %s %s -o %s %s' %\ (python_exe_fp, script_dir, otu_fp, taxonomy_str, otu_table_fp, params_str) commands.append([('Make OTU table', make_otu_table_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)
def run_pick_de_novo_otus(input_fp, output_dir, command_handler, params, qiime_config, parallel=False, logger=None, suppress_md5=False, status_update_callback=print_to_stdout): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Pick OTUs; 2) Pick a representative set; 3) Align the representative set; 4) Assign taxonomy; 5) Filter the alignment prior to tree building - remove positions which are all gaps, and specified as 0 in the lanemask 6) Build a phylogenetic tree; 7) Build an OTU table. """ # Prepare some variables for the later steps input_dir, input_filename = split(input_fp) input_basename, input_ext = splitext(input_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() cluster_failures = False if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger,[input_fp]) # Prep the OTU picking command try: otu_picking_method = params['pick_otus']['otu_picking_method'] except KeyError: otu_picking_method = 'uclust' pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method) otu_fp = '%s/%s_otus.txt' % (pick_otu_dir,input_basename) if parallel and (otu_picking_method == 'blast' or otu_picking_method == 'uclust_ref'): # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' # Grab the OTU picker parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --otu_picking_method # option. This works for now though. d = params['pick_otus'].copy() del d['otu_picking_method'] except KeyError: pass if otu_picking_method == 'uclust_ref': try: suppress_new_clusters = d['suppress_new_clusters'] del d['suppress_new_clusters'] cluster_failures = False except KeyError: cluster_failures = True failure_otu_picking_method = 'uclust' params_str += ' %s' % get_params_str(d) otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method # Build the OTU picking command pick_otus_cmd = '%s %s/%s -i %s -o %s -T %s' % (python_exe_fp, script_dir, otu_picking_script, input_fp, pick_otu_dir, params_str) else: try: params_str = get_params_str(params['pick_otus']) except KeyError: params_str = '' # Build the OTU picking command pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, input_fp, pick_otu_dir, params_str) commands.append([('Pick OTUs', pick_otus_cmd)]) if cluster_failures: reference_otu_fp = otu_fp clustered_failures_dir = '%s/failure_otus/' % pick_otu_dir try: d = params['pick_otus'].copy() del d['otu_picking_method'] except KeyError: pass if 'uclust_otu_id_prefix' not in d: d['uclust_otu_id_prefix'] = 'DeNovoOTU' params_str = ' %s' % get_params_str(d) failures_list_fp = '%s/%s_failures.txt' % \ (pick_otu_dir,input_basename) failures_fasta_fp = '%s/%s_failures.fasta' % \ (pick_otu_dir,input_basename) filter_fasta_cmd = 'filter_fasta.py -f %s -s %s -o %s' %\ (input_fp,failures_list_fp,failures_fasta_fp) commands.append([('Generate failures fasta file', filter_fasta_cmd)]) # Prep the OTU picking command for failure_otu_fp = '%s/%s_failures_otus.txt' % (clustered_failures_dir,input_basename) # Build the OTU picking command pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, failures_fasta_fp, clustered_failures_dir, failure_otu_picking_method, params_str) commands.append([('Pick de novo OTUs for new clusters', pick_otus_cmd)]) merged_otu_map_fp = '%s/merged_otu_map.txt' % clustered_failures_dir cat_otu_tables_cmd = 'cat %s %s >> %s' %\ (reference_otu_fp,failure_otu_fp,merged_otu_map_fp) commands.append([('Merge OTU maps',cat_otu_tables_cmd)]) otu_fp = merged_otu_map_fp # Prep the representative set picking command rep_set_dir = '%s/rep_set/' % output_dir create_dir(rep_set_dir) rep_set_fp = '%s/%s_rep_set.fasta' % (rep_set_dir,input_basename) rep_set_log_fp = '%s/%s_rep_set.log' % (rep_set_dir,input_basename) try: params_str = get_params_str(params['pick_rep_set']) except KeyError: params_str = '' # Build the representative set picking command pick_rep_set_cmd = '%s %s/pick_rep_set.py -i %s -f %s -l %s -o %s %s' %\ (python_exe_fp, script_dir, otu_fp, input_fp, rep_set_log_fp,\ rep_set_fp, params_str) commands.append([('Pick representative set', pick_rep_set_cmd)]) # Prep the taxonomy assignment command try: assignment_method = params['assign_taxonomy']['assignment_method'] except KeyError: assignment_method = 'uclust' assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\ (output_dir,assignment_method) taxonomy_fp = '%s/%s_rep_set_tax_assignments.txt' % \ (assign_taxonomy_dir,input_basename) if parallel and (assignment_method == 'rdp' or assignment_method == 'blast' or assignment_method == 'uclust'): # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' # Grab the taxonomy assignment parameters try: # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --assignment_method # option. This works for now though. d = params['assign_taxonomy'].copy() if 'assignment_method' in d: del d['assignment_method'] params_str += ' %s' % get_params_str(d) except KeyError: pass # Build the parallel taxonomy assignment command assign_taxonomy_cmd = \ '%s %s/parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\ (python_exe_fp, script_dir, assignment_method, rep_set_fp,\ assign_taxonomy_dir, params_str) else: try: params_str = get_params_str(params['assign_taxonomy']) except KeyError: params_str = '' # Build the taxonomy assignment command assign_taxonomy_cmd = '%s %s/assign_taxonomy.py -o %s -i %s %s' %\ (python_exe_fp, script_dir, assign_taxonomy_dir,\ rep_set_fp, params_str) commands.append([('Assign taxonomy',assign_taxonomy_cmd)]) # Prep the OTU table building command otu_table_fp = '%s/otu_table.biom' % output_dir try: params_str = get_params_str(params['make_otu_table']) except KeyError: params_str = '' # Build the OTU table building command make_otu_table_cmd = '%s %s/make_otu_table.py -i %s -t %s -o %s %s' %\ (python_exe_fp, script_dir, otu_fp, taxonomy_fp, otu_table_fp, params_str) commands.append([('Make OTU table', make_otu_table_cmd)]) if cluster_failures: reference_otu_table_fp = '%s/reference_only_otu_table.biom' % output_dir # Build the OTU table building command make_otu_table_cmd = '%s %s/make_otu_table.py -i %s -t %s -o %s %s' %\ (python_exe_fp, script_dir, reference_otu_fp, taxonomy_fp, reference_otu_table_fp, params_str) commands.append([('Make reference-only OTU table', make_otu_table_cmd)]) # Prep the pynast alignment command try: alignment_method = params['align_seqs']['alignment_method'] except KeyError: alignment_method = 'pynast' pynast_dir = '%s/%s_aligned_seqs' % (output_dir,alignment_method) aln_fp = '%s/%s_rep_set_aligned.fasta' % (pynast_dir,input_basename) if parallel and alignment_method == 'pynast': # Grab the parallel-specific parameters try: params_str = get_params_str(params['parallel']) except KeyError: params_str = '' # Grab the alignment parameters # Want to find a cleaner strategy for this: the parallel script # is method-specific, so doesn't take a --alignment_method # option. This works for now though. try: d = params['align_seqs'].copy() except KeyError: d = {} try: del d['alignment_method'] except KeyError: pass params_str += ' %s' % get_params_str(d) # Build the parallel pynast alignment command align_seqs_cmd = '%s %s/parallel_align_seqs_pynast.py -i %s -o %s -T %s' %\ (python_exe_fp, script_dir, rep_set_fp, pynast_dir, params_str) else: try: params_str = get_params_str(params['align_seqs']) except KeyError: params_str = '' # Build the pynast alignment command align_seqs_cmd = '%s %s/align_seqs.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, rep_set_fp, pynast_dir, params_str) commands.append([('Align sequences', align_seqs_cmd)]) # Prep the alignment filtering command filtered_aln_fp = '%s/%s_rep_set_aligned_pfiltered.fasta' %\ (pynast_dir,input_basename) try: params_str = get_params_str(params['filter_alignment']) except KeyError: params_str = '' # Build the alignment filtering command filter_alignment_cmd = '%s %s/filter_alignment.py -o %s -i %s %s' %\ (python_exe_fp, script_dir, pynast_dir, aln_fp, params_str) commands.append([('Filter alignment', filter_alignment_cmd)]) # Prep the tree building command tree_fp = '%s/rep_set.tre' % output_dir try: params_str = get_params_str(params['make_phylogeny']) except KeyError: params_str = '' # Build the tree building command make_phylogeny_cmd = '%s %s/make_phylogeny.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, filtered_aln_fp, tree_fp,\ params_str) commands.append([('Build phylogenetic tree', make_phylogeny_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return abspath(tree_fp), abspath(otu_table_fp)
def run_summarize_taxa_through_plots(otu_table_fp, mapping_fp, output_dir, mapping_cat, sort, command_handler, params, qiime_config, logger=None, suppress_md5=False, status_update_callback=print_to_stdout): """ Run the data preparation for summarizing taxonomies and generating plots The steps performed by this function are: 1) Summarize OTU by Category 2) Summarize Taxonomy 3) Plot Taxonomy Summary """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger, [otu_table_fp, mapping_fp]) # if mapping category not passed via command-line, # check if it is passed in params file if not mapping_cat: try: mapping_cat = params['summarize_otu_by_cat']['mapping_category'] except: mapping_cat = None try: params_str = get_params_str(params['summarize_otu_by_cat']) # Need to remove the mapping category option, since it is defined above. # Using this method since we don't want to change the params dict split_params = params_str.split('--') updated_params_str = [] for i in split_params: if not i.startswith('mapping_category'): updated_params_str.append(i) params_str = '--'.join(updated_params_str) except: params_str = '' if mapping_cat: output_fp = join(output_dir, '%s_otu_table.biom' % (mapping_cat.replace(' ', '-'))) # Build the summarize otu by category command summarize_otu_by_cat_cmd = \ "%s %s/summarize_otu_by_cat.py -i %s -c %s -o %s -m '%s' %s" %\ (python_exe_fp, script_dir, mapping_fp, otu_table_fp, output_fp, mapping_cat, params_str) commands.append(\ [('Summarize OTU table by Category',summarize_otu_by_cat_cmd)]) otu_table_fp = output_fp # Build the sort OTU table command if sort: # Prep the sort_otu_table command try: params_str = get_params_str(params['sort_otu_table']) except: params_str = '' # define output otu table sorted_fp = join(output_dir, splitext(split(otu_table_fp)[-1])[0] + '_sorted.biom') if mapping_cat or params_str == '': # for this case we don't have a collapsed mapping file so must # handle separately sort_otu_table_cmd = \ "%s %s/sort_otu_table.py -i %s -o %s" %\ (python_exe_fp, script_dir, otu_table_fp, sorted_fp) else: sort_otu_table_cmd = \ "%s %s/sort_otu_table.py -i %s -o %s -m %s %s" %\ (python_exe_fp, script_dir, otu_table_fp, sorted_fp, mapping_fp, params_str) commands.append([('Sort OTU Table', sort_otu_table_cmd)]) # redefine otu_table_fp to use otu_table_fp = sorted_fp # Prep the summarize taxonomy command try: params_str = get_params_str(params['summarize_taxa']) except: params_str = '' try: sum_taxa_levels = params['summarize_taxa']['level'] except: sum_taxa_levels = None # Build the summarize taxonomy command summarize_taxa_cmd = '%s %s/summarize_taxa.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, params_str) commands.append([('Summarize Taxonomy', summarize_taxa_cmd)]) sum_taxa_fps = [] if sum_taxa_levels: basename = join(output_dir, splitext(split(otu_table_fp)[-1])[0]) for i in sum_taxa_levels.split(','): sum_taxa_fps.append(basename + '_L%s.txt' % (str(i))) else: basename = join(output_dir, splitext(split(otu_table_fp)[-1])[0]) # this is the default levels from summarize_taxa, but cannot import # script to get these values for i in [2, 3, 4, 5, 6]: sum_taxa_fps.append(basename + '_L%s.txt' % (str(i))) # Prep the plot taxa summary plot command(s) taxa_summary_plots_dir = '%s/taxa_summary_plots/' % output_dir create_dir(taxa_summary_plots_dir) try: params_str = get_params_str(params['plot_taxa_summary']) except: params_str = '' # Build the plot taxa summary plot command(s) plot_taxa_summary_cmd =\ '%s %s/plot_taxa_summary.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, ','.join(sum_taxa_fps), taxa_summary_plots_dir, params_str) commands.append(\ [('Plot Taxonomy Summary',plot_taxa_summary_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)
def get_flowgram_ali_exe(): """Return the path to the flowgram alignment prog """ fp = get_qiime_scripts_dir() + "/FlowgramAli_4frame" return fp
def run_process_sff_through_split_lib(study_id,run_prefix,sff_input_fp, mapping_fp, output_dir, command_handler, params, qiime_config, convert_to_flx=False, write_to_all_fasta=False, status_update_callback=print_to_stdout): """ NOTE: Parts of this function are a directly copied from the run_qiime_data_preparation function from the workflow.py library file in QIIME. The steps performed by this function are: 1) Process SFFs to generate .fna, .qual and flowgram file. (process_sff.py) 2) De-multiplex sequences. (split_libraries.py) """ # Prepare some variables for the later steps sff_filenames=sff_input_fp.split(',') commands = [] create_dir(output_dir) python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() # generate a log file logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) make_flowgram=True split_lib_fasta_input_files=[] split_lib_qual_input_files=[] denoise_flow_input_files=[] # make a copy of the mapping file copied_mapping=split(mapping_fp)[-1] mapping_input_fp_copy=join(output_dir, copied_mapping) copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy) commands.append([('CopyMapping', copy_mapping_cmd)]) # iterate over SFFs and match to the mapping file for sff_input_fp in sff_filenames: # GENERATE THE MD5 HERE AND STORE IN THE DATABASE AFTER FILE # SUCCESSFULLY PROCESSED # Copy the SFF into the processed files directory copied_sff=split(sff_input_fp)[-1] sff_input_fp_copy=join(output_dir, copied_sff) #Generate filenames for split_libraries input_dir, input_filename = split(sff_input_fp) if is_gzip(sff_input_fp) and sff_input_fp.endswith('.gz'): input_basename, input_ext = splitext(splitext(input_filename)[0]) else: input_basename, input_ext = splitext(input_filename) # Convert sff file into fasta, qual and flowgram file if convert_to_flx: if study_id in ['496','968','969','1069','1002','1066','1194','1195','1457','1458','1460','1536','1918','1962']: ### this function is for handling files where the barcode and ### linkerprimer are all lowercase (i.e. HMP data or SRA data) # write process_sff command process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t --no_trim --use_sfftools' %\ (python_exe_fp, script_dir, sff_input_fp, output_dir) #process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t' % (python_exe_fp, script_dir, sff_input_fp, output_dir) commands.append([('ProcessSFFs', process_sff_cmd)]) # define output fasta from process_sff no_trim_fasta_fp=join(output_dir,input_basename + '_FLX.fna') # define pprospector scripts dir pprospector_scripts_dir=join(ServerConfig.home,'software', 'pprospector','scripts') # clean fasta - basically converting lowercase to uppercase clean_fasta_cmd = '%s %s/clean_fasta.py -f %s -o %s' %\ (python_exe_fp, pprospector_scripts_dir, no_trim_fasta_fp,output_dir) commands.append([('CleanFasta', clean_fasta_cmd)]) # move the cleaned file to be consistent with other processes cleaned_fasta_fp=join(output_dir,input_basename + \ '_FLX_filtered.fasta') moved_fasta_fp=join(output_dir,input_basename + '_FLX.fna') mv_cmd='mv %s %s' % (cleaned_fasta_fp,moved_fasta_fp) commands.append([('RenameFasta',mv_cmd)]) # update the split-lib files to use the cleaned file split_lib_fasta_input_files.append(moved_fasta_fp) split_lib_qual_input_files.append(join(output_dir, input_basename + '_FLX.qual')) denoise_flow_input_files.append(join(output_dir, input_basename + '_FLX.txt')) else: # write process_sff command process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s -t' %\ (python_exe_fp, script_dir, sff_input_fp, output_dir) commands.append([('ProcessSFFs', process_sff_cmd)]) # get filepaths for generated files split_lib_fasta_input_files.append(join(output_dir, input_basename + '_FLX.fna')) split_lib_qual_input_files.append(join(output_dir, input_basename + '_FLX.qual')) denoise_flow_input_files.append(join(output_dir, input_basename + '_FLX.txt')) else: # write process_sff command process_sff_cmd = '%s %s/process_sff.py -i %s -f -o %s' %\ (python_exe_fp, script_dir, sff_input_fp, output_dir) commands.append([('ProcessSFFs', process_sff_cmd)]) # get filepaths for generated files split_lib_fasta_input_files.append(join(output_dir,input_basename + '.fna')) split_lib_qual_input_files.append(join(output_dir,input_basename + '.qual')) denoise_flow_input_files.append(join(output_dir,input_basename + '.txt')) split_lib_fasta_input=','.join(split_lib_fasta_input_files) split_lib_qual_input=','.join(split_lib_qual_input_files) denoise_flow_input=','.join(denoise_flow_input_files) # If dataset is metagenomic disable primer check data_access = data_access_factory(ServerConfig.data_access_type) study_info=data_access.getStudyInfo(study_id,12171) if study_info['investigation_type'].lower() == 'metagenome': params['split_libraries']['disable_primers']=None # create split-libraries folder split_library_output=join(output_dir,'split_libraries') create_dir(split_library_output) # get params string try: params_str = get_params_str(params['split_libraries']) except KeyError: params_str = '' # Build the split libraries command split_libraries_cmd = '%s %s/split_libraries.py -f %s -q %s -m %s -o %s %s'%\ (python_exe_fp, script_dir, split_lib_fasta_input, split_lib_qual_input, mapping_fp, split_library_output, params_str) commands.append([('SplitLibraries', split_libraries_cmd)]) input_fp=join(split_library_output,'seqs.fna') # create per sample fastq files fastq_output=join(split_library_output,'per_sample_fastq') create_dir(fastq_output) try: params_str = get_params_str(params['convert_fastaqual_fastq']) except KeyError: params_str = '' input_qual_fp=join(split_library_output,'seqs_filtered.qual') # build the convert fasta/qual to fastq command create_fastq_cmd = '%s %s/convert_fastaqual_fastq.py -f %s -q %s -o %s %s'%\ (python_exe_fp, script_dir, input_fp, input_qual_fp, fastq_output, params_str) commands.append([('Create FASTQ', create_fastq_cmd)]) # Call the command handler on the list of commands command_handler(commands,status_update_callback,logger=logger) # Return the fasta file paths return split_lib_fasta_input_files
stdout=PIPE,stderr=STDOUT).stdout.read() print result if not unittest_good_pattern.search(result): if application_not_found_pattern.search(result): missing_application_tests.append(unittest_name) else: bad_tests.append(unittest_name) # Run through all of QIIME's scripts, and pass -h to each one. If the # resulting stdout does not being with the Usage text, that is an # indicator of something being wrong with the script. Issues that would # cause that are bad import statements in the script, SyntaxErrors, or # other failures prior to running qiime.util.parse_command_line_parameters. try: scripts_dir = get_qiime_scripts_dir() script_directory_found = True except AssertionError: script_directory_found = False if script_directory_found: script_names = [] script_names = glob('%s/*py' % scripts_dir) script_names.sort() bad_scripts = [] for script_name in script_names: script_good_pattern = re.compile('^Usage: %s' % split(script_name)[1]) print "Testing %s." % script_name command = '%s %s -h' % (python_name, script_name) result = Popen(command,shell=True,universal_newlines=True,\
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.suppress_unit_tests and opts.suppress_script_tests and opts.suppress_script_usage_tests: option_parser.error("You're suppressing all three test types. Nothing to run.") test_dir = abspath(dirname(__file__)) unittest_good_pattern = re.compile("OK\s*$") application_not_found_pattern = re.compile("ApplicationNotFoundError") python_name = "python" bad_tests = [] missing_application_tests = [] # Run through all of QIIME's unit tests, and keep track of any files which # fail unit tests. if not opts.suppress_unit_tests: unittest_names = [] if not opts.unittest_glob: for root, dirs, files in walk(test_dir): for name in files: if name.startswith("test_") and name.endswith(".py"): unittest_names.append(join(root, name)) else: for fp in glob(opts.unittest_glob): fn = split(fp)[1] if fn.startswith("test_") and fn.endswith(".py"): unittest_names.append(abspath(fp)) unittest_names.sort() for unittest_name in unittest_names: print "Testing %s:\n" % unittest_name command = "%s %s -v" % (python_name, unittest_name) stdout, stderr, return_value = qiime_system_call(command) print stderr if not unittest_good_pattern.search(stderr): if application_not_found_pattern.search(stderr): missing_application_tests.append(unittest_name) else: bad_tests.append(unittest_name) bad_scripts = [] if not opts.suppress_script_tests: # Run through all of QIIME's scripts, and pass -h to each one. If the # resulting stdout does not being with the Usage text, that is an # indicator of something being wrong with the script. Issues that would # cause that are bad import statements in the script, SyntaxErrors, or # other failures prior to running qiime.util.parse_command_line_parameters. try: scripts_dir = get_qiime_scripts_dir() script_directory_found = True except AssertionError: script_directory_found = False if script_directory_found: script_names = [] script_names = glob("%s/*py" % scripts_dir) script_names.sort() for script_name in script_names: script_good_pattern = re.compile("^Usage: %s" % split(script_name)[1]) print "Testing %s." % script_name command = "%s %s -h" % (python_name, script_name) stdout, stderr, return_value = qiime_system_call(command) if not script_good_pattern.search(stdout): bad_scripts.append(script_name) num_script_usage_example_failures = 0 qiime_test_data_dir = qiime_config["qiime_test_data_dir"] if not opts.suppress_script_usage_tests and qiime_test_data_dir != None: # Run the script usage testing functionality script_usage_result_summary, num_script_usage_example_failures = run_script_usage_tests( qiime_test_data_dir=qiime_test_data_dir, qiime_scripts_dir=qiime_config["qiime_scripts_dir"], working_dir=qiime_config["temp_dir"], verbose=True, tests=None, # runs all failure_log_fp=None, force_overwrite=True, ) print "==============\nResult summary\n==============" if not opts.suppress_unit_tests: print "\nUnit test result summary\n------------------------\n" if bad_tests: print "\nFailed the following unit tests.\n%s" % "\n".join(bad_tests) if missing_application_tests: print "\nFailed the following unit tests, in part or whole due " + "to missing external applications.\nDepending on the QIIME features " + "you plan to use, this may not be critical.\n%s" % "\n".join( missing_application_tests ) if not (missing_application_tests or bad_tests): print "\nAll unit tests passed.\n\n" if not opts.suppress_script_tests: print "\nBasic script test result summary\n--------------------------------\n" if not script_directory_found: print "Critical error: Failed to test scripts because the script directory could not be found.\n The most likely explanation for this failure is that you've installed QIIME using setup.py, and forgot to specify the qiime_scripts_dir in your qiime_config file. This value shoud be set either to the directory you provided for --install-scripts, or /usr/local/bin if no value was provided to --install-scripts." else: if bad_scripts: print "Failed the following basic script tests.\n%s" % "\n".join(bad_scripts) else: print "All basic script tests passed successfully.\n" qiime_test_data_dir_exists = True if not opts.suppress_script_usage_tests: if qiime_test_data_dir: print "\nScript usage test result summary\n------------------------------------\n" print script_usage_result_summary else: print "\nCould not run script usage tests because qiime_test_data_dir is not defined in your qiime_config." qiime_test_data_dir_exists = False print "" # If any of the unit tests, script tests, or script usage tests fail, or if # we have any missing application errors or a missing QIIME test data dir # if script usage tests weren't suppressed, use return code 1 (as python's # unittest module does to indicate one or more failures). return_code = 1 if ( len(bad_tests) == 0 and len(missing_application_tests) == 0 and len(bad_scripts) == 0 and num_script_usage_example_failures == 0 and qiime_test_data_dir_exists ): return_code = 0 return return_code
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) #get all the options cd_dir=path.join(opts.fs_fp,'arare') tmp_prefix=get_tmp_filename('',suffix='').strip() output_dir=path.join(opts.fs_fp,'arare','arare_'+tmp_prefix) web_fp=path.join(opts.web_fp,'arare','arare_'+tmp_prefix) otu_table_fp=opts.otu_table_fp mapping_file_fp=opts.mapping_file_fp file_name_prefix=opts.fname_prefix user_id=int(opts.user_id) meta_id=int(opts.meta_id) bdiv_rarefied_at=int(opts.bdiv_rarefied_at) jobs_to_start=opts.jobs_to_start tree_fp=opts.tree_fp command_handler=call_commands_serially status_update_callback=no_status_updates zip_fpath=opts.zip_fpath zip_fpath_db=opts.zip_fpath_db run_date=opts.run_date force=True try: from data_access_connections import data_access_factory from enums import ServerConfig import cx_Oracle data_access = data_access_factory(ServerConfig.data_access_type) except ImportError: print "NOT IMPORTING QIIMEDATAACCESS" pass try: parameter_f = open(opts.params_path) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.params_path params=parse_qiime_parameters(parameter_f) try: makedirs(output_dir) except OSError: if force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose "+\ "a different directory, or force overwrite with -f." exit(1) commands=[] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) # determine whether to run alpha-diversity in serial or parallel serial_or_parallel = params['serial_or_parallel']['method'] if serial_or_parallel=='Serial': arare_cmd='%s %s/alpha_rarefaction.py -i %s -m %s -o %s -t %s -p %s -f' %\ (python_exe_fp, script_dir, otu_table_fp, mapping_file_fp, \ output_dir,tree_fp,opts.params_path) else: arare_cmd='%s %s/alpha_rarefaction.py -i %s -m %s -o %s -t %s -a -O 50 -p %s -f' %\ (python_exe_fp, script_dir, otu_table_fp, mapping_file_fp, \ output_dir,tree_fp,opts.params_path) commands.append([('Alpha-Rarefaction',arare_cmd)]) command_handler(commands, status_update_callback, logger) #zip the distance matrices cmd_call='cd %s; zip -r %s %s' % (cd_dir,zip_fpath,'arare_'+tmp_prefix) system(cmd_call) #convert link into web-link web_link=path.join(web_fp, 'alpha_rarefaction_plots', 'rarefaction_plots.html') #add the distance matrices valid=data_access.addMetaAnalysisFiles(True, int(meta_id), web_link, 'ARARE', run_date, 'ARARE') if not valid: raise ValueError, 'There was an issue uploading the filepaths to the DB!'
def run_ampliconnoise(mapping_fp, output_dir, command_handler, params, qiime_config, logger=None, status_update_callback=print_to_stdout, chimera_alpha=-3.8228, chimera_beta=0.6200, sff_txt_fp=None, numnodes=2, suppress_perseus=True, output_filepath=None, platform='flx', seqnoise_resolution=None, truncate_len=None): """ Run the ampliconnoise pipeline The steps performed by this function are: 1. Split input sff.txt file into one file per sample 2. Run scripts required for PyroNoise 3. Run scripts required for SeqNoise 4. Run scripts requred for Perseus (chimera removal) 5. Merge output files into one file similar to the output of split_libraries.py output_filepath should be absolute seqnoise_resolution should be string environment variable PYRO_LOOKUP_FILE must be set correctly. Thus be careful passing command handlers that don't spawn child processes, as they may not inherit the correct environment variable setting """ map_data, headers, comments = parse_mapping_file(open(mapping_fp, 'U')) create_dir(output_dir) if seqnoise_resolution == None: if platform == 'flx': seqnoise_resolution = '30.0' elif platform == 'titanium': seqnoise_resolution = '25.0' else: raise RuntimeError('seqnoise_resolution not set, and no'+\ ' default for platform '+platform) if truncate_len == None: if platform == 'flx': truncate_len = '220' elif platform == 'titanium': truncate_len = '400' else: raise RuntimeError('truncate_len not set, and no'+\ ' default for platform '+platform) sample_names = [ ] # these are filenames minus extension, and are sample IDs primer_seqs = [] # same order as sample_names bc_seqs = [] # same order as sample_names for i in range(len(map_data)): sample_names.append(map_data[i][headers.index('SampleID')]) bc_seqs.append(map_data[i][headers.index('BarcodeSequence')]) # don't know why don't just take off the primer now. # but that's done later # primer += (map_data[i][headers.index('LinkerPrimerSequence')]) # for char, bases in IUPAC_DNA_ambiguities.items(): # primer = primer.replace(char,'['+''.join(bases)+']') primer = (map_data[i][headers.index('LinkerPrimerSequence')]) for char, bases in IUPAC_DNA_ambiguities.items(): primer = primer.replace(char, '[' + ''.join(bases) + ']') primer_seqs.append(primer) if len(set(primer_seqs)) != 1: raise RuntimeError( 'Error: only one primer per mapping file supported.') one_primer = primer_seqs[0] commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False log_input_md5s(logger, [mapping_fp, sff_txt_fp]) # execute commands in output_dir called_dir = os.getcwd() os.chdir(output_dir) fh = open(os.path.join(output_dir, 'map.csv'), 'w') for i in range(len(sample_names)): fh.write(sample_names[i] + ',' + bc_seqs[i] + '\n') fh.close() # these are the fasta results, e.g. PC.636_Good.fa # later we merge them and copy to output file post_pyro_tail = '_' + truncate_len if suppress_perseus == True: fasta_result_names = [ sample_name + post_pyro_tail + '_seqnoise_cd.fa' for sample_name in sample_names ] else: fasta_result_names = [sample_name + '_Good.fa' \ for sample_name in sample_names] cmd = 'cd ' + output_dir # see also os.chdir above commands.append([('change to output dir', cmd)]) cmd = 'echo $PYRO_LOOKUP_FILE > pyro_lookup_filepath.txt' commands.append([('confirm pyro lookup filepath environment variable', cmd) ]) cmd = 'SplitKeys.pl '+one_primer+' map.csv < '+\ os.path.join(called_dir,sff_txt_fp)+\ ' > splitkeys_log.txt 2> unassigned.fna' commands.append([('split sff.txt via barcodes (keys)', cmd)]) for i, sample_name in enumerate(sample_names): # Build the summarize taxonomy command if platform == 'flx': cmd = 'Clean360.pl '+one_primer+' '+sample_name+' < '+\ sample_name+'.raw' commands.append([('clean flows ' + sample_name, cmd)]) # these run through the whole sff file once per sample, I think # cmd = "FlowsFA.pl " + primer_seqs[i] + ' '+sample_name +' < '+\ # os.path.join(called_dir,sff_txt_fp) # commands.append([('extract flows '+sample_name, cmd)]) elif platform == 'titanium': cmd = 'CleanMinMax.pl '+one_primer+' '+sample_name+' < '+\ sample_name+'.raw' commands.append([('clean flows ' + sample_name, cmd)]) # cmd = "FlowsMinMax.pl " + primer_seqs[i] + ' '+sample_name +' < '+\ # os.path.join(called_dir,sff_txt_fp) # commands.append([('extract flows '+sample_name, cmd)]) else: raise RuntimeError("platform " + platform + " not supported") cmd = "mpirun -np "+str(numnodes)+" PyroDist -in "+\ sample_name+".dat -out "+sample_name+ " > "+sample_name+".pdout" commands.append([('pyrodist ' + sample_name, cmd)]) cmd = "FCluster -in "+sample_name+".fdist -out "+sample_name+\ " > "+sample_name+".fcout" commands.append([('fcluster pyrodist ' + sample_name, cmd)]) # e.g.: # mpirun -np 2 PyroNoise -din PC.354.dat -out PC.354_pyronoise -lin # PC.354.list -s 60.0 -c 0.01 > PC.354_pyronoise.pnout cmd = "mpirun -np "+str(numnodes)+" PyroNoise -din "+\ sample_name+".dat -out "+\ sample_name+"_pyronoise "+"-lin "+\ sample_name+".list -s 60.0 -c 0.01 > "+\ sample_name+"_pyronoise.pnout" commands.append([('pyronoise ' + sample_name, cmd)]) cmd = 'Parse.pl '+bc_seqs[i]+one_primer+' '+truncate_len+' < '+\ sample_name+'_pyronoise_cd.fa'+' > '+ sample_name+'_'+\ truncate_len+'.fa' commands.append([('truncate ' + sample_name, cmd)]) # now start with post_pyro_tail cmd = "mpirun -np "+str(numnodes)+" SeqDist -in "+\ sample_name+post_pyro_tail+\ ".fa > "+sample_name+post_pyro_tail+".seqdist" commands.append([('seqdist ' + sample_name, cmd)]) cmd = "FCluster -in "+sample_name+post_pyro_tail+".seqdist -out "+\ sample_name+post_pyro_tail+"fcl > "+\ sample_name+post_pyro_tail+".fcout" commands.append([('fcluster seqdist ' + sample_name, cmd)]) # e.g.: # mpirun -np 2 SeqNoise -in PC.354_pyronoise_cd.fa -din # PC.354_pyronoise_cd.seqdist -out PC.354_pyronoise_cd_seqnoise -lin # PC.354_pyronoise_cdfcl.list -min PC.354_pyronoise.mapping -s 30.0 -c 0.08 > # PC.354_pyronoise_cd.snout cmd = "mpirun -np "+str(numnodes)+" SeqNoise -in "+\ sample_name+post_pyro_tail+\ ".fa -din "+sample_name+post_pyro_tail+".seqdist -out "+\ sample_name+post_pyro_tail+\ "_seqnoise -lin "+sample_name+post_pyro_tail+'fcl.list -min '+\ sample_name+'_pyronoise'+\ '.mapping -s '+seqnoise_resolution+' -c 0.08 > '+\ sample_name+post_pyro_tail+'.snout' commands.append([('seqnoise ' + sample_name, cmd)]) if suppress_perseus == False: cmd = 'Perseus -sin '+sample_name+post_pyro_tail+\ '_seqnoise_cd.fa > ' +\ sample_name+'.per' commands.append([('Perseus ' + sample_name, cmd)]) cmd = 'Class.pl '+sample_name+'.per '+\ str(chimera_alpha) + ' '+ str(chimera_beta)+\ ' > '+sample_name+'.class' commands.append([('Class.pl ' + sample_name, cmd)]) cmd = 'FilterGoodClass.pl '+sample_name+post_pyro_tail+\ '_seqnoise_cd.fa '+\ sample_name+'.class 0.5 > '+sample_name+'_Chi.fa 2> '+\ sample_name+'_Good.fa' commands.append([('FilterGoodClass ' + sample_name, cmd)]) cmd = '%s %s/unweight_fasta.py -i %s -o %s -l %s' %\ (python_exe_fp, script_dir, fasta_result_names[i], sample_name+'_unw.fna', sample_name) commands.append([('unweight fasta ' + sample_name, cmd)]) cmd = 'cat ' +\ ' '.join([sample_name+'_unw.fna' for sample_name in sample_names]) +\ ' > ' + output_filepath # this should be an abs filepath commands.append([('cat into one fasta file', cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)
def run_core_diversity_analyses( biom_fp, mapping_fp, sampling_depth, output_dir, qiime_config, command_handler=call_commands_serially, tree_fp=None, params=None, categories=None, arare_min_rare_depth=10, arare_num_steps=10, parallel=False, status_update_callback=print_to_stdout): """ """ if categories != None: # Validate categories provided by the users mapping_data, mapping_comments = \ parse_mapping_file_to_dict(open(mapping_fp,'U')) metadata_map = MetadataMap(mapping_data, mapping_comments) for c in categories: if c not in metadata_map.CategoryNames: raise ValueError, ("Category '%s' is not a column header " "in your mapping file. " "Categories are case and white space sensitive. Valid " "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames))) if metadata_map.hasSingleCategoryValue(c): raise ValueError, ("Category '%s' contains only one value. " "Categories analyzed here require at least two values." % c) else: categories= [] # prep some variables if params == None: params = parse_qiime_parameters([]) create_dir(output_dir) index_fp = '%s/index.html' % output_dir index_links = [] commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() # begin logging log_fp = generate_log_fp(output_dir) index_links.append(('Master run log',log_fp,'Log files')) logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config) input_fps = [biom_fp,mapping_fp] if tree_fp != None: input_fps.append(tree_fp) log_input_md5s(logger,input_fps) bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth) even_dm_fps = run_beta_diversity_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=bdiv_even_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, sampling_depth=sampling_depth, # force suppression of distance histograms - boxplots work better # in this context, and are created below. histogram_categories=[], tree_fp=tree_fp, parallel=parallel, logger=logger, status_update_callback=status_update_callback) for bdiv_metric, dm_fp in even_dm_fps: for category in categories: boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric) try: params_str = get_params_str(params['make_distance_boxplots']) except KeyError: params_str = '' boxplots_cmd = \ 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\ (dm_fp, category, boxplots_output_dir, mapping_fp, params_str) commands.append([('Boxplots (%s)' % category, boxplots_cmd)]) index_links.append(('Distance boxplots (%s)' % bdiv_metric, '%s/%s_Distances.pdf' % \ (boxplots_output_dir,category), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric, '%s/%s_Stats.txt' % \ (boxplots_output_dir,category), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric, '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric, '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric, '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric, '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Distance matrix (%s)' % bdiv_metric, '%s/%s_dm.txt' % \ (bdiv_even_output_dir,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric, '%s/%s_pc.txt' % \ (bdiv_even_output_dir,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) ## Alpha rarefaction workflow arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth) run_qiime_alpha_rarefaction( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=arare_full_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, tree_fp=tree_fp, num_steps=arare_num_steps, parallel=parallel, logger=logger, min_rare_depth=arare_min_rare_depth, max_rare_depth=sampling_depth, status_update_callback=status_update_callback) index_links.append(('Alpha rarefaction plots', '%s/alpha_rarefaction_plots/rarefaction_plots.html'\ % arare_full_output_dir, "Alpha rarefaction results")) collated_alpha_diversity_fps = \ glob('%s/alpha_div_collated/*txt' % arare_full_output_dir) try: params_str = get_params_str(params['compare_alpha_diversity']) except KeyError: params_str = '' for c in categories: for collated_alpha_diversity_fp in collated_alpha_diversity_fps: alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0] alpha_comparison_output_fp = '%s/%s_%s.txt' % \ (arare_full_output_dir,c,alpha_metric) compare_alpha_cmd = \ 'compare_alpha_diversity.py -i %s -m %s -c %s -d %s -o %s -n 999 %s' %\ (collated_alpha_diversity_fp, mapping_fp, c, sampling_depth, alpha_comparison_output_fp, params_str) commands.append([('Compare alpha diversity (%s, %s)' %\ (category,alpha_metric), compare_alpha_cmd)]) index_links.append( ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric), alpha_comparison_output_fp, "Alpha rarefaction results")) taxa_plots_output_dir = '%s/taxa_plots/' % output_dir run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=None, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, status_update_callback=status_update_callback) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results")) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results")) for c in categories: taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,c) run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=c, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, status_update_callback=status_update_callback) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results (by %s)" % c)) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results (by %s)" % c)) # OTU category significance for category in categories: category_signifance_fp = \ '%s/category_significance_%s.txt' % (output_dir, category) try: params_str = get_params_str(params['otu_category_significance']) except KeyError: params_str = '' # Build the OTU cateogry significance command category_significance_cmd = \ 'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\ (biom_fp, mapping_fp, category, category_signifance_fp, params_str) commands.append([('OTU category significance (%s)' % category, category_significance_cmd)]) index_links.append(('Category significance (%s)' % category, category_signifance_fp, "Category results")) command_handler(commands, status_update_callback, logger) generate_index_page(index_links,index_fp)
def run_beta_diversity_through_plots(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, color_by_interesting_fields_only=True, sampling_depth=None, histogram_categories=None, tree_fp=None, parallel=False, logger=None, suppress_3d_plots=False, suppress_2d_plots=False, suppress_md5=False, status_update_callback=print_to_stdout): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Compute a beta diversity distance matrix; 2) Peform a principal coordinates analysis on the result of Step 1; 3) Generate a 3D prefs file for optimized coloring of continuous variables; 4) Generate a 3D plot for all mapping fields with colors optimized for continuous data; 5) Generate a 3D plot for all mapping fields with colors optimized for discrete data. """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp]) mapping_data, mapping_header, mapping_comments =\ parse_mapping_file(open(mapping_fp,'U')) if histogram_categories: invalid_categories = set(histogram_categories) - set(mapping_header) if invalid_categories: raise ValueError,\ "Invalid histogram categories - these must exactly match "+\ "mapping file column headers: %s" % (' '.join(invalid_categories)) # Get the interesting mapping fields to color by -- if none are # interesting, take all of them. Interesting is defined as those # which have greater than one value and fewer values than the number # of samples if color_by_interesting_fields_only: mapping_fields =\ get_interesting_mapping_fields(mapping_data, mapping_header) or\ mapping_header else: mapping_fields = mapping_header mapping_fields = ','.join(mapping_fields) if sampling_depth: # Sample the OTU table at even depth even_sampled_otu_table_fp = '%s/%s_even%d%s' %\ (output_dir, otu_table_basename, sampling_depth, otu_table_ext) single_rarefaction_cmd = \ '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\ (python_exe_fp, script_dir, otu_table_fp, even_sampled_otu_table_fp, sampling_depth) commands.append([ ('Sample OTU table at %d seqs/sample' % sampling_depth, single_rarefaction_cmd) ]) otu_table_fp = even_sampled_otu_table_fp otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) try: beta_diversity_metrics = params['beta_diversity']['metrics'].split(',') except KeyError: beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac'] # Prep the 3d prefs file generator command prefs_fp = '%s/prefs.txt' % output_dir try: params_str = get_params_str(params['make_prefs_file']) except KeyError: params_str = '' if not 'mapping_headers_to_use' in params['make_prefs_file']: params_str = '%s --mapping_headers_to_use %s' \ % (params_str,mapping_fields) # Build the 3d prefs file generator command prefs_cmd = \ '%s %s/make_prefs_file.py -m %s -o %s %s' %\ (python_exe_fp, script_dir, mapping_fp, prefs_fp, params_str) commands.append([('Build prefs file', prefs_cmd)]) dm_fps = [] for beta_diversity_metric in beta_diversity_metrics: # Prep the beta-diversity command try: bdiv_params_copy = params['beta_diversity'].copy() except KeyError: bdiv_params_copy = {} try: del bdiv_params_copy['metrics'] except KeyError: pass params_str = get_params_str(bdiv_params_copy) if tree_fp: params_str = '%s -t %s ' % (params_str, tree_fp) # Build the beta-diversity command if parallel: # Grab the parallel-specific parameters try: params_str += get_params_str(params['parallel']) except KeyError: pass beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) else: beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) orig_beta_div_fp = '%s/%s_%s.txt' % \ (output_dir, beta_diversity_metric, otu_table_basename) beta_div_fp = '%s/%s_dm.txt' % \ (output_dir, beta_diversity_metric) commands.append([ ('Rename distance matrix (%s)' % beta_diversity_metric, 'mv %s %s' % (orig_beta_div_fp, beta_div_fp)) ]) dm_fps.append((beta_diversity_metric, beta_div_fp)) # Prep the principal coordinates command pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric) try: params_str = get_params_str(params['principal_coordinates']) except KeyError: params_str = '' # Build the principal coordinates command pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str) commands.append(\ [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)]) # Generate 3d plots if not suppress_3d_plots: # Prep the continuous-coloring 3d plots command continuous_3d_dir = '%s/%s_3d_continuous/' %\ (output_dir, beta_diversity_metric) create_dir(continuous_3d_dir) try: params_str = get_params_str(params['make_3d_plots']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command continuous_3d_command = \ '%s %s/make_3d_plots.py -p %s -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_3d_dir, mapping_fp, params_str) # Prep the discrete-coloring 3d plots command discrete_3d_dir = '%s/%s_3d_discrete/' %\ (output_dir, beta_diversity_metric) create_dir(discrete_3d_dir) try: params_str = get_params_str(params['make_3d_plots']) except KeyError: params_str = '' # Build the discrete-coloring 3d plots command discrete_3d_command = \ '%s %s/make_3d_plots.py -b "%s" -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_3d_dir, mapping_fp, params_str) commands.append([\ ('Make 3D plots (continuous coloring, %s)' %\ beta_diversity_metric,continuous_3d_command),\ ('Make 3D plots (discrete coloring, %s)' %\ beta_diversity_metric,discrete_3d_command,)]) # Generate 3d plots if not suppress_2d_plots: # Prep the continuous-coloring 3d plots command continuous_2d_dir = '%s/%s_2d_continuous/' %\ (output_dir, beta_diversity_metric) create_dir(continuous_2d_dir) try: params_str = get_params_str(params['make_2d_plots']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command continuous_2d_command = \ '%s %s/make_2d_plots.py -p %s -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_2d_dir, mapping_fp, params_str) # Prep the discrete-coloring 3d plots command discrete_2d_dir = '%s/%s_2d_discrete/' %\ (output_dir, beta_diversity_metric) create_dir(discrete_2d_dir) try: params_str = get_params_str(params['make_2d_plots']) except KeyError: params_str = '' # Build the discrete-coloring 2d plots command discrete_2d_command = \ '%s %s/make_2d_plots.py -b "%s" -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_2d_dir, mapping_fp, params_str) commands.append([\ ('Make 2D plots (continuous coloring, %s)' %\ beta_diversity_metric,continuous_2d_command),\ ('Make 2D plots (discrete coloring, %s)' %\ beta_diversity_metric,discrete_2d_command,)]) if histogram_categories: # Prep the discrete-coloring 3d plots command histograms_dir = '%s/%s_histograms/' %\ (output_dir, beta_diversity_metric) create_dir(histograms_dir) try: params_str = get_params_str(params['make_distance_histograms']) except KeyError: params_str = '' # Build the make_distance_histograms command distance_histograms_command = \ '%s %s/make_distance_histograms.py -d %s -o %s -m %s -f "%s" %s' %\ (python_exe_fp, script_dir, beta_div_fp, histograms_dir, mapping_fp, ','.join(histogram_categories), params_str) commands.append([\ ('Make Distance Histograms (%s)' %\ beta_diversity_metric,distance_histograms_command)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return dm_fps
def run_alpha_rarefaction(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, tree_fp=None, num_steps=10, parallel=False, logger=None, min_rare_depth=10, max_rare_depth=None, suppress_md5=False, status_update_callback=print_to_stdout, plot_stderr_and_stddev=False): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Generate rarefied OTU tables; 2) Compute alpha diversity metrics for each rarefied OTU table; 3) Collate alpha diversity results; 4) Generate alpha rarefaction plots. """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp]) if max_rare_depth == None: min_count, max_count, median_count, mean_count, counts_per_sample =\ compute_seqs_per_library_stats(parse_biom_table(open(otu_table_fp,'U'))) max_rare_depth = median_count step = int((max_rare_depth - min_rare_depth) / num_steps) or 1 max_rare_depth = int(max_rare_depth) rarefaction_dir = '%s/rarefaction/' % output_dir create_dir(rarefaction_dir) try: params_str = get_params_str(params['multiple_rarefactions']) except KeyError: params_str = '' if parallel: params_str += ' %s' % get_params_str(params['parallel']) # Build the rarefaction command rarefaction_cmd = \ '%s %s/parallel_multiple_rarefactions.py -T -i %s -m %s -x %s -s %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth, step, rarefaction_dir, params_str) else: # Build the rarefaction command rarefaction_cmd = \ '%s %s/multiple_rarefactions.py -i %s -m %s -x %s -s %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth, step, rarefaction_dir, params_str) commands.append([('Alpha rarefaction', rarefaction_cmd)]) # Prep the alpha diversity command alpha_diversity_dir = '%s/alpha_div/' % output_dir create_dir(alpha_diversity_dir) try: params_str = get_params_str(params['alpha_diversity']) except KeyError: params_str = '' if tree_fp: params_str += ' -t %s' % tree_fp if parallel: params_str += ' %s' % get_params_str(params['parallel']) # Build the alpha diversity command alpha_diversity_cmd = \ "%s %s/parallel_alpha_diversity.py -T -i %s -o %s %s" %\ (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir, params_str) else: # Build the alpha diversity command alpha_diversity_cmd = \ "%s %s/alpha_diversity.py -i %s -o %s %s" %\ (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir, params_str) commands.append(\ [('Alpha diversity on rarefied OTU tables',alpha_diversity_cmd)]) # Prep the alpha diversity collation command alpha_collated_dir = '%s/alpha_div_collated/' % output_dir create_dir(alpha_collated_dir) try: params_str = get_params_str(params['collate_alpha']) except KeyError: params_str = '' # Build the alpha diversity collation command alpha_collated_cmd = '%s %s/collate_alpha.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, alpha_diversity_dir, \ alpha_collated_dir, params_str) commands.append([('Collate alpha', alpha_collated_cmd)]) # Prep the make rarefaction plot command(s) try: params_str = get_params_str(params['make_rarefaction_plots']) except KeyError: params_str = '' if 'std_type' in params[ 'make_rarefaction_plots'] or not plot_stderr_and_stddev: rarefaction_plot_dir = '%s/alpha_rarefaction_plots/' % output_dir create_dir(rarefaction_plot_dir) # Build the make rarefaction plot command(s) #for metric in alpha_diversity_metrics: make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) else: rarefaction_plot_dir_stddev = '%s/alpha_rarefaction_plots_stddev/' % output_dir rarefaction_plot_dir_stderr = '%s/alpha_rarefaction_plots_stderr/' % output_dir create_dir(rarefaction_plot_dir_stddev) create_dir(rarefaction_plot_dir_stderr) # Build the make rarefaction plot command(s) # for metric in alpha_diversity_metrics: make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stddev' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir_stddev, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) make_rarefaction_plot_cmd =\ '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stderr' %\ (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp, rarefaction_plot_dir_stderr, params_str) commands.append(\ [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)
def run_jackknifed_beta_diversity(otu_table_fp, tree_fp, seqs_per_sample, output_dir, command_handler, params, qiime_config, mapping_fp, parallel=False, logger=None, suppress_md5=False, status_update_callback=print_to_stdout, master_tree=None): """ Run the data preparation steps of Qiime The steps performed by this function are: 1) Compute beta diversity distance matrix from otu table (and tree, if applicable) 2) Build rarefied OTU tables; 3) Build UPGMA tree from full distance matrix; 4) Compute distance matrics for rarefied OTU tables; 5) Build UPGMA trees from rarefied OTU table distance matrices; 5.5) Build a consensus tree from the rarefied UPGMA trees 6) Compare rarefied OTU table distance matrix UPGMA trees to tree full UPGMA tree and write support file and newick tree with support values as node labels. master_tree can be 'full' or 'consensus', default full """ # Prepare some variables for the later steps if master_tree == None: master_tree = 'full' otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp]) try: beta_diversity_metrics = params['beta_diversity']['metrics'].split(',') except KeyError: beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac'] # Prep the beta-diversity command try: params_str = get_params_str(params['beta_diversity']) except KeyError: params_str = '' if tree_fp: params_str = '%s -t %s' % (params_str, tree_fp) # Build the beta-diversity command beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, params_str) commands.append(\ [('Beta Diversity (%s)' % ', '.join(beta_diversity_metrics), beta_div_cmd)]) # Prep rarefaction command rarefaction_dir = '%s/rarefaction/' % output_dir create_dir(rarefaction_dir) try: params_str = get_params_str(params['multiple_rarefactions_even_depth']) except KeyError: params_str = '' # Build the rarefaction command rarefaction_cmd = \ '%s %s/multiple_rarefactions_even_depth.py -i %s -d %d -o %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, seqs_per_sample, rarefaction_dir, params_str) commands.append([('Rarefaction', rarefaction_cmd)]) # Begin iterating over beta diversity distance metrics, if more than one # was provided for beta_diversity_metric in beta_diversity_metrics: metric_output_dir = '%s/%s/' % (output_dir, beta_diversity_metric) distance_matrix_fp = '%s/%s_%s.txt' % \ (output_dir, beta_diversity_metric, otu_table_basename) # Prep the hierarchical clustering command (for full distance matrix) full_tree_fp = '%s/%s_upgma.tre' % (metric_output_dir, otu_table_basename) try: params_str = get_params_str(params['upgma_cluster']) except KeyError: params_str = '' # Build the hierarchical clustering command (for full distance matrix) hierarchical_cluster_cmd = '%s %s/upgma_cluster.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, distance_matrix_fp, full_tree_fp, params_str) commands.append(\ [('UPGMA on full distance matrix: %s' % beta_diversity_metric,\ hierarchical_cluster_cmd)]) # Prep the beta diversity command (for rarefied OTU tables) dm_dir = '%s/rare_dm/' % metric_output_dir create_dir(dm_dir) # the metrics parameter needs to be ignored as we need to run # beta_diversity one metric at a time to keep the per-metric # output files in separate directories try: d = params['beta_diversity'].copy() del d['metrics'] except KeyError: params_str = {} params_str = get_params_str(d) + ' -m %s ' % beta_diversity_metric if tree_fp: params_str = '%s -t %s' % (params_str, tree_fp) if parallel: params_str += ' %s' % get_params_str(params['parallel']) # Build the parallel beta diversity command (for rarefied OTU tables) beta_div_rarefied_cmd = \ '%s %s/parallel_beta_diversity.py -T -i %s -o %s %s' %\ (python_exe_fp, script_dir, rarefaction_dir, dm_dir, params_str) else: # Build the serial beta diversity command (for rarefied OTU tables) beta_div_rarefied_cmd = \ '%s %s/beta_diversity.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, rarefaction_dir, dm_dir, params_str) commands.append(\ [('Beta diversity on rarefied OTU tables (%s)' % beta_diversity_metric, beta_div_rarefied_cmd)]) # Prep the hierarchical clustering command (for rarefied # distance matrices) upgma_dir = '%s/rare_upgma/' % metric_output_dir create_dir(upgma_dir) try: params_str = get_params_str(params['upgma_cluster']) except KeyError: params_str = '' # Build the hierarchical clustering command (for rarefied # distance matrices) hierarchical_cluster_cmd =\ '%s %s/upgma_cluster.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, dm_dir, upgma_dir, params_str) commands.append(\ [('UPGMA on rarefied distance matrix (%s)' % beta_diversity_metric, hierarchical_cluster_cmd)]) # Build the consensus tree command consensus_tree_cmd =\ '%s %s/consensus_tree.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, upgma_dir, upgma_dir + "/consensus.tre", params_str) commands.append(\ [('consensus on rarefied distance matrices (%s)' % beta_diversity_metric, consensus_tree_cmd)]) # Prep the tree compare command tree_compare_dir = '%s/upgma_cmp/' % metric_output_dir create_dir(tree_compare_dir) try: params_str = get_params_str(params['tree_compare']) except KeyError: params_str = '' # Build the tree compare command if master_tree == "full": master_tree_fp = full_tree_fp elif master_tree == "consensus": master_tree_fp = upgma_dir + "/consensus.tre" else: raise RuntimeError('master tree method "%s" not found' % (master_tree, )) tree_compare_cmd = '%s %s/tree_compare.py -s %s -m %s -o %s %s' %\ (python_exe_fp, script_dir, upgma_dir, master_tree_fp, tree_compare_dir, params_str) commands.append(\ [('Tree compare (%s)' % beta_diversity_metric, tree_compare_cmd)]) # Prep the PCoA command pcoa_dir = '%s/pcoa/' % metric_output_dir create_dir(pcoa_dir) try: params_str = get_params_str(params['principal_coordinates']) except KeyError: params_str = '' # Build the PCoA command pcoa_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, dm_dir, pcoa_dir, params_str) commands.append(\ [('Principal coordinates (%s)' % beta_diversity_metric, pcoa_cmd)]) # Prep the 2D plots command plots_2d_dir = '%s/2d_plots/' % metric_output_dir create_dir(plots_2d_dir) try: params_str = get_params_str(params['make_2d_plots']) except KeyError: params_str = '' # Build the 2d plots command plots_2d_cmd = '%s %s/make_2d_plots.py -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, pcoa_dir, plots_2d_dir, mapping_fp, params_str) commands.append(\ [('2d plots (%s)' % beta_diversity_metric, plots_2d_cmd)]) # Prep the 3D plots command plots_3d_dir = '%s/3d_plots/' % metric_output_dir create_dir(plots_3d_dir) try: params_str = get_params_str(params['make_3d_plots']) except KeyError: params_str = '' # Build the 2d plots command plots_3d_cmd = '%s %s/make_3d_plots.py -i %s -o %s -m %s %s' %\ (python_exe_fp, script_dir, pcoa_dir, plots_3d_dir, mapping_fp, params_str) commands.append(\ [('3d plots (%s)' % beta_diversity_metric, plots_3d_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)
def run_beta_diversity_through_plots(otu_table_fp, mapping_fp, output_dir, command_handler, params, qiime_config, color_by_interesting_fields_only=True, sampling_depth=None, tree_fp=None, parallel=False, logger=None, suppress_emperor_plots=False, suppress_md5=False, status_update_callback=print_to_stdout): """ Compute beta diversity distance matrices, run PCoA, and generate emperor plots The steps performed by this function are: 1) Compute a beta diversity distance matrix for each metric 2) Peform a principal coordinates analysis on the result of step 1 3) Generate an emperor plot for each result of step 2 """ # Prepare some variables for the later steps otu_table_dir, otu_table_filename = split(otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False if not suppress_md5: log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp]) mapping_data, mapping_header, mapping_comments =\ parse_mapping_file(open(mapping_fp,'U')) # Get the interesting mapping fields to color by -- if none are # interesting, take all of them. Interesting is defined as those # which have greater than one value and fewer values than the number # of samples if color_by_interesting_fields_only: mapping_fields =\ get_interesting_mapping_fields(mapping_data, mapping_header) or\ mapping_header else: mapping_fields = mapping_header mapping_fields = ','.join(mapping_fields) if sampling_depth: # Sample the OTU table at even depth even_sampled_otu_table_fp = '%s/%s_even%d%s' %\ (output_dir, otu_table_basename, sampling_depth, otu_table_ext) single_rarefaction_cmd = \ '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\ (python_exe_fp, script_dir, otu_table_fp, even_sampled_otu_table_fp, sampling_depth) commands.append([ ('Sample OTU table at %d seqs/sample' % sampling_depth, single_rarefaction_cmd) ]) otu_table_fp = even_sampled_otu_table_fp otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp) otu_table_basename, otu_table_ext = splitext(otu_table_filename) try: beta_diversity_metrics = params['beta_diversity']['metrics'].split(',') except KeyError: beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac'] dm_fps = [] for beta_diversity_metric in beta_diversity_metrics: # Prep the beta-diversity command try: bdiv_params_copy = params['beta_diversity'].copy() except KeyError: bdiv_params_copy = {} try: del bdiv_params_copy['metrics'] except KeyError: pass params_str = get_params_str(bdiv_params_copy) if tree_fp: params_str = '%s -t %s ' % (params_str, tree_fp) # Build the beta-diversity command if parallel: # Grab the parallel-specific parameters try: params_str += get_params_str(params['parallel']) except KeyError: pass beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) else: beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\ (python_exe_fp, script_dir, otu_table_fp, output_dir, beta_diversity_metric, params_str) commands.append(\ [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)]) orig_beta_div_fp = '%s/%s_%s.txt' % \ (output_dir, beta_diversity_metric, otu_table_basename) beta_div_fp = '%s/%s_dm.txt' % \ (output_dir, beta_diversity_metric) commands.append([ ('Rename distance matrix (%s)' % beta_diversity_metric, 'mv %s %s' % (orig_beta_div_fp, beta_div_fp)) ]) dm_fps.append((beta_diversity_metric, beta_div_fp)) # Prep the principal coordinates command pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric) try: params_str = get_params_str(params['principal_coordinates']) except KeyError: params_str = '' # Build the principal coordinates command pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\ (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str) commands.append(\ [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)]) # Generate emperor plots if not suppress_emperor_plots: # Prep the emperor plots command emperor_dir = '%s/%s_emperor_pcoa_plot/' % (output_dir, beta_diversity_metric) create_dir(emperor_dir) try: params_str = get_params_str(params['make_emperor']) except KeyError: params_str = '' # Build the continuous-coloring 3d plots command emperor_command = \ 'make_emperor.py -i %s -o %s -m %s %s' % (pc_fp, emperor_dir, mapping_fp, params_str) commands.append([ ('Make emperor plots, %s)' % beta_diversity_metric, emperor_command) ]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success) return dm_fps
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if (opts.suppress_unit_tests and opts.suppress_script_usage_tests): option_parser.error( "You're suppressing both test types. Nothing to run.") test_dir = abspath(dirname(__file__)) unittest_good_pattern = re.compile('OK\s*$') application_not_found_pattern = re.compile('ApplicationNotFoundError') python_name = 'python' bad_tests = [] missing_application_tests = [] # Run through all of QIIME's unit tests, and keep track of any files which # fail unit tests. if not opts.suppress_unit_tests: unittest_names = [] if not opts.unittest_glob: for root, dirs, files in walk(test_dir): for name in files: if name.startswith('test_') and name.endswith('.py'): unittest_names.append(join(root, name)) else: for fp in glob(opts.unittest_glob): fn = split(fp)[1] if fn.startswith('test_') and fn.endswith('.py'): unittest_names.append(abspath(fp)) unittest_names.sort() for unittest_name in unittest_names: print "Testing %s:\n" % unittest_name command = '%s %s -v' % (python_name, unittest_name) stdout, stderr, return_value = qiime_system_call(command) print stderr if not unittest_good_pattern.search(stderr): if application_not_found_pattern.search(stderr): missing_application_tests.append(unittest_name) else: bad_tests.append(unittest_name) qiime_test_data_dir = join(get_qiime_project_dir(), 'qiime_test_data') qiime_test_data_dir_exists = exists(qiime_test_data_dir) if not opts.suppress_script_usage_tests and qiime_test_data_dir_exists: if opts.script_usage_tests is not None: script_usage_tests = opts.script_usage_tests.split(',') else: script_usage_tests = None # Run the script usage testing functionality script_usage_result_summary, has_script_usage_example_failures = \ run_script_usage_tests( test_data_dir=qiime_test_data_dir, scripts_dir=get_qiime_scripts_dir(), working_dir=qiime_config['temp_dir'], verbose=True, tests=script_usage_tests, force_overwrite=True, timeout=240) print "==============\nResult summary\n==============" if not opts.suppress_unit_tests: print "\nUnit test result summary\n------------------------\n" if bad_tests: print "\nFailed the following unit tests.\n%s" % '\n'.join(bad_tests) if missing_application_tests: print "\nFailed the following unit tests, in part or whole due " +\ "to missing external applications.\nDepending on the QIIME features " +\ "you plan to use, this may not be critical.\n%s"\ % '\n'.join(missing_application_tests) if not (missing_application_tests or bad_tests): print "\nAll unit tests passed.\n\n" if not opts.suppress_script_usage_tests: if qiime_test_data_dir_exists: print "\nScript usage test result summary\n--------------------------------\n" print script_usage_result_summary else: print "\nCould not run script usage tests because the directory %s does not exist." % qiime_test_data_dir print "" # If script usage tests weren't suppressed, the qiime_test_data dir must # exist and we can't have any failures. script_usage_tests_success = (opts.suppress_script_usage_tests or (qiime_test_data_dir_exists and not has_script_usage_example_failures)) # If any of the unit tests or script usage tests fail, or if we have any # missing application errors, use return code 1 (as python's unittest # module does to indicate one or more failures). return_code = 1 if (len(bad_tests) == 0 and len(missing_application_tests) == 0 and script_usage_tests_success): return_code = 0 return return_code
def pick_subsampled_open_referenence_otus( input_fp, refseqs_fp, output_dir, percent_subsample, new_ref_set_id, command_handler, params, qiime_config, prefilter_refseqs_fp=None, run_tax_align_tree=True, prefilter_percent_id=0.60, min_otu_size=2, step1_otu_map_fp=None, step1_failures_fasta_fp=None, parallel=False, suppress_step4=False, logger=None, status_update_callback=print_to_stdout): """ Run the data preparation steps of Qiime The steps performed by this function are: - Pick reference OTUs against refseqs_fp - Subsample the failures to n sequences. - Pick OTUs de novo on the n failures. - Pick representative sequences for the resulting OTUs. - Pick reference OTUs on all failures using the representative set from step 4 as the reference set. """ # for now only allowing uclust for otu picking denovo_otu_picking_method = 'uclust' reference_otu_picking_method = 'uclust_ref' # Prepare some variables for the later steps input_dir, input_filename = split(input_fp) input_basename, input_ext = splitext(input_filename) create_dir(output_dir) commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False log_input_md5s( logger, [input_fp, refseqs_fp, step1_otu_map_fp, step1_failures_fasta_fp]) # if the user has not passed a different reference collection for the pre-filter, # used the main refseqs_fp. this is useful if the user wants to provide a smaller # reference collection, or to use the input reference collection when running in # iterative mode (rather than an iteration's new refseqs) if prefilter_refseqs_fp == None: prefilter_refseqs_fp = refseqs_fp ## Step 1: Closed-reference OTU picking on the input file (if not already complete) if step1_otu_map_fp and step1_failures_fasta_fp: step1_dir = '%s/step1_otus' % output_dir create_dir(step1_dir) logger.write("Using pre-existing reference otu map and failures.\n\n") else: if prefilter_percent_id != None: prefilter_dir = '%s/prefilter_otus/' % output_dir prefilter_otu_map_fp = \ '%s/%s_otus.txt' % (prefilter_dir,input_basename) prefilter_failures_list_fp = '%s/%s_failures.txt' % \ (prefilter_dir,input_basename) prefilter_pick_otu_cmd = pick_reference_otus(\ input_fp,prefilter_dir,reference_otu_picking_method, prefilter_refseqs_fp,parallel,params,logger,prefilter_percent_id) commands.append([('Pick Reference OTUs (prefilter)', prefilter_pick_otu_cmd)]) prefiltered_input_fp = '%s/prefiltered_%s%s' %\ (prefilter_dir,input_basename,input_ext) filter_fasta_cmd = 'filter_fasta.py -f %s -o %s -s %s -n' %\ (input_fp,prefiltered_input_fp,prefilter_failures_list_fp) commands.append([('Filter prefilter failures from input', filter_fasta_cmd)]) input_fp = prefiltered_input_fp input_dir, input_filename = split(input_fp) input_basename, input_ext = splitext(input_filename) ## Build the OTU picking command step1_dir = \ '%s/step1_otus' % output_dir step1_otu_map_fp = \ '%s/%s_otus.txt' % (step1_dir,input_basename) step1_pick_otu_cmd = pick_reference_otus(\ input_fp,step1_dir,reference_otu_picking_method, refseqs_fp,parallel,params,logger) commands.append([('Pick Reference OTUs', step1_pick_otu_cmd)]) ## Build the failures fasta file step1_failures_list_fp = '%s/%s_failures.txt' % \ (step1_dir,input_basename) step1_failures_fasta_fp = \ '%s/failures.fasta' % step1_dir step1_filter_fasta_cmd = 'filter_fasta.py -f %s -s %s -o %s' %\ (input_fp,step1_failures_list_fp,step1_failures_fasta_fp) commands.append([('Generate full failures fasta file', step1_filter_fasta_cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=False) commands = [] step1_repset_fasta_fp = \ '%s/step1_rep_set.fna' % step1_dir step1_pick_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\ (step1_otu_map_fp, step1_repset_fasta_fp, input_fp) commands.append([('Pick rep set', step1_pick_rep_set_cmd)]) ## Subsample the failures fasta file to retain (roughly) the ## percent_subsample step2_input_fasta_fp = \ '%s/subsampled_failures.fasta' % step1_dir subsample_fasta(step1_failures_fasta_fp, step2_input_fasta_fp, percent_subsample) ## Prep the OTU picking command for the subsampled failures step2_dir = '%s/step2_otus/' % output_dir step2_cmd = pick_denovo_otus(step2_input_fasta_fp, step2_dir, new_ref_set_id, denovo_otu_picking_method, params, logger) step2_otu_map_fp = '%s/subsampled_failures_otus.txt' % step2_dir commands.append([('Pick de novo OTUs for new clusters', step2_cmd)]) ## Prep the rep set picking command for the subsampled failures step2_repset_fasta_fp = '%s/step2_rep_set.fna' % step2_dir step2_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\ (step2_otu_map_fp,step2_repset_fasta_fp,step2_input_fasta_fp) commands.append([('Pick representative set for subsampled failures', step2_rep_set_cmd)]) step3_dir = '%s/step3_otus/' % output_dir step3_otu_map_fp = '%s/failures_otus.txt' % step3_dir step3_failures_list_fp = '%s/failures_failures.txt' % step3_dir step3_cmd = pick_reference_otus(step1_failures_fasta_fp, step3_dir, reference_otu_picking_method, step2_repset_fasta_fp, parallel, params, logger) commands.append([('Pick reference OTUs using de novo rep set', step3_cmd)]) # name the final otu map merged_otu_map_fp = '%s/final_otu_map.txt' % output_dir if not suppress_step4: step3_failures_fasta_fp = '%s/failures_failures.fasta' % step3_dir step3_filter_fasta_cmd = 'filter_fasta.py -f %s -s %s -o %s' %\ (step1_failures_fasta_fp,step3_failures_list_fp,step3_failures_fasta_fp) commands.append([('Create fasta file of step3 failures', step3_filter_fasta_cmd)]) step4_dir = '%s/step4_otus/' % output_dir step4_cmd = pick_denovo_otus(step3_failures_fasta_fp, step4_dir, '.'.join([new_ref_set_id, 'CleanUp']), denovo_otu_picking_method, params, logger) step4_otu_map_fp = '%s/failures_failures_otus.txt' % step4_dir commands.append([('Pick de novo OTUs on step3 failures', step4_cmd)]) # Merge the otu maps cat_otu_tables_cmd = 'cat %s %s %s >> %s' %\ (step1_otu_map_fp,step3_otu_map_fp,step4_otu_map_fp,merged_otu_map_fp) commands.append([('Merge OTU maps', cat_otu_tables_cmd)]) step4_repset_fasta_fp = '%s/step4_rep_set.fna' % step4_dir step4_rep_set_cmd = 'pick_rep_set.py -i %s -o %s -f %s' %\ (step4_otu_map_fp,step4_repset_fasta_fp,step3_failures_fasta_fp) commands.append([('Pick representative set for subsampled failures', step4_rep_set_cmd)]) else: # Merge the otu maps cat_otu_tables_cmd = 'cat %s %s >> %s' %\ (step1_otu_map_fp,step3_otu_map_fp,merged_otu_map_fp) commands.append([('Merge OTU maps', cat_otu_tables_cmd)]) # Move the step 3 failures file to the top-level directory commands.append([('Move final failures file to top-level directory', 'mv %s %s/final_failures.txt' % (step3_failures_list_fp, output_dir))]) command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=False) commands = [] otu_fp = merged_otu_map_fp # Filter singletons from the otu map otu_no_singletons_fp = '%s/final_otu_map_mc%d.txt' % (output_dir, min_otu_size) otus_to_keep = filter_otus_from_otu_map(otu_fp, otu_no_singletons_fp, min_otu_size) ## make the final representative seqs file and a new refseqs file that ## could be used in subsequent otu picking runs. ## this is clunky. first, we need to do this without singletons to match ## the otu map without singletons. next, there is a difference in what ## we need the reference set to be and what we need the repseqs to be. ## the reference set needs to be a superset of the input reference set ## to this set. the repset needs to be only the sequences that were observed ## in this data set, and we want reps for the step1 reference otus to be ## reads from this run so we don't hit issues building a tree using ## sequences of very different lengths. so... final_repset_fp = '%s/rep_set.fna' % output_dir final_repset_f = open(final_repset_fp, 'w') new_refseqs_fp = '%s/new_refseqs.fna' % output_dir # write non-singleton otus representative sequences from step1 to the # final rep set file for otu_id, seq in MinimalFastaParser(open(step1_repset_fasta_fp, 'U')): if otu_id.split()[0] in otus_to_keep: final_repset_f.write('>%s\n%s\n' % (otu_id, seq)) # copy the full input refseqs file to the new refseqs_fp copy(refseqs_fp, new_refseqs_fp) new_refseqs_f = open(new_refseqs_fp, 'a') new_refseqs_f.write('\n') # iterate over all representative sequences from step2 and step4 and write # those corresponding to non-singleton otus to the final representative set # file and the new reference sequences file. for otu_id, seq in MinimalFastaParser(open(step2_repset_fasta_fp, 'U')): if otu_id.split()[0] in otus_to_keep: new_refseqs_f.write('>%s\n%s\n' % (otu_id, seq)) final_repset_f.write('>%s\n%s\n' % (otu_id, seq)) if not suppress_step4: for otu_id, seq in MinimalFastaParser(open(step4_repset_fasta_fp, 'U')): if otu_id.split()[0] in otus_to_keep: new_refseqs_f.write('>%s\n%s\n' % (otu_id, seq)) final_repset_f.write('>%s\n%s\n' % (otu_id, seq)) new_refseqs_f.close() final_repset_f.close() # Prep the make_otu_table.py command otu_table_fp = '%s/otu_table_mc%d.biom' % (output_dir, min_otu_size) make_otu_table_cmd = 'make_otu_table.py -i %s -o %s' %\ (otu_no_singletons_fp,otu_table_fp) commands.append([("Make the otu table", make_otu_table_cmd)]) command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=False) commands = [] if run_tax_align_tree: taxonomy_fp, pynast_failures_fp = tax_align_tree( repset_fasta_fp=final_repset_fp, output_dir=output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, logger=logger, status_update_callback=status_update_callback) # Add taxa to otu table otu_table_w_tax_fp = \ '%s/otu_table_mc%d_w_tax.biom' % (output_dir,min_otu_size) add_taxa_cmd = 'add_taxa.py -i %s -t %s -o %s' %\ (otu_table_fp,taxonomy_fp,otu_table_w_tax_fp) commands.append([("Add taxa to OTU table", add_taxa_cmd)]) command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=False) commands = [] # Build OTU table without PyNAST failures otu_table_fp = \ '%s/otu_table_mc%d_w_tax_no_pynast_failures.biom' % (output_dir,min_otu_size) filtered_otu_table = filter_otus_from_otu_table( parse_biom_table(open(otu_table_w_tax_fp, 'U')), get_seq_ids_from_fasta_file(open(pynast_failures_fp, 'U')), 0, inf, 0, inf, negate_ids_to_keep=True) otu_table_f = open(otu_table_fp, 'w') otu_table_f.write(format_biom_table(filtered_otu_table)) otu_table_f.close() command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=False) commands = [] command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)
def run_ampliconnoise(mapping_fp, output_dir, command_handler, params, qiime_config, logger=None, status_update_callback=print_to_stdout, chimera_alpha=-3.8228,chimera_beta=0.6200, sff_txt_fp=None, numnodes=2, suppress_perseus=True, output_filepath=None, platform='flx', seqnoise_resolution=None, truncate_len=None): """ Run the ampliconnoise pipeline The steps performed by this function are: 1. Split input sff.txt file into one file per sample 2. Run scripts required for PyroNoise 3. Run scripts required for SeqNoise 4. Run scripts requred for Perseus (chimera removal) 5. Merge output files into one file similar to the output of split_libraries.py output_filepath should be absolute seqnoise_resolution should be string environment variable PYRO_LOOKUP_FILE must be set correctly. Thus be careful passing command handlers that don't spawn child processes, as they may not inherit the correct environment variable setting """ map_data,headers,comments = parse_mapping_file(open(mapping_fp,'U')) create_dir(output_dir) if seqnoise_resolution == None: if platform=='flx': seqnoise_resolution = '30.0' elif platform=='titanium': seqnoise_resolution = '25.0' else: raise RuntimeError('seqnoise_resolution not set, and no'+\ ' default for platform '+platform) if truncate_len == None: if platform=='flx': truncate_len = '220' elif platform=='titanium': truncate_len = '400' else: raise RuntimeError('truncate_len not set, and no'+\ ' default for platform '+platform) sample_names = [] # these are filenames minus extension, and are sample IDs primer_seqs = [] # same order as sample_names bc_seqs = [] # same order as sample_names for i in range(len(map_data)): sample_names.append(map_data[i][headers.index('SampleID')]) bc_seqs.append(map_data[i][headers.index('BarcodeSequence')]) # don't know why don't just take off the primer now. # but that's done later # primer += (map_data[i][headers.index('LinkerPrimerSequence')]) # for char, bases in IUPAC_DNA_ambiguities.items(): # primer = primer.replace(char,'['+''.join(bases)+']') primer = (map_data[i][headers.index('LinkerPrimerSequence')]) for char, bases in IUPAC_DNA_ambiguities.items(): primer = primer.replace(char,'['+''.join(bases)+']') primer_seqs.append(primer) if len(set(primer_seqs)) != 1: raise RuntimeError( 'Error: only one primer per mapping file supported.') one_primer = primer_seqs[0] commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() if logger == None: logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) close_logger_on_success = True else: close_logger_on_success = False log_input_md5s(logger,[mapping_fp,sff_txt_fp]) # execute commands in output_dir called_dir = os.getcwd() os.chdir(output_dir) fh = open(os.path.join(output_dir,'map.csv'),'w') for i in range(len(sample_names)): fh.write(sample_names[i]+','+bc_seqs[i]+'\n') fh.close() # these are the fasta results, e.g. PC.636_Good.fa # later we merge them and copy to output file post_pyro_tail = '_'+truncate_len if suppress_perseus == True: fasta_result_names = [sample_name + post_pyro_tail+'_seqnoise_cd.fa' for sample_name in sample_names] else: fasta_result_names = [sample_name + '_Good.fa' \ for sample_name in sample_names] cmd = 'cd '+output_dir # see also os.chdir above commands.append([('change to output dir', cmd)]) cmd = 'echo $PYRO_LOOKUP_FILE > pyro_lookup_filepath.txt' commands.append([('confirm pyro lookup filepath environment variable', cmd)]) cmd = 'SplitKeys.pl '+one_primer+' map.csv < '+\ os.path.join(called_dir,sff_txt_fp)+\ ' > splitkeys_log.txt 2> unassigned.fna' commands.append([('split sff.txt via barcodes (keys)', cmd)]) for i, sample_name in enumerate(sample_names): # Build the summarize taxonomy command if platform == 'flx': cmd = 'Clean360.pl '+one_primer+' '+sample_name+' < '+\ sample_name+'.raw' commands.append([('clean flows '+sample_name, cmd)]) # these run through the whole sff file once per sample, I think # cmd = "FlowsFA.pl " + primer_seqs[i] + ' '+sample_name +' < '+\ # os.path.join(called_dir,sff_txt_fp) # commands.append([('extract flows '+sample_name, cmd)]) elif platform == 'titanium': cmd = 'CleanMinMax.pl '+one_primer+' '+sample_name+' < '+\ sample_name+'.raw' commands.append([('clean flows '+sample_name, cmd)]) # cmd = "FlowsMinMax.pl " + primer_seqs[i] + ' '+sample_name +' < '+\ # os.path.join(called_dir,sff_txt_fp) # commands.append([('extract flows '+sample_name, cmd)]) else: raise RuntimeError("platform " + platform + " not supported") cmd = "mpirun -np "+str(numnodes)+" PyroDist -in "+\ sample_name+".dat -out "+sample_name+ " > "+sample_name+".pdout" commands.append([('pyrodist '+sample_name, cmd)]) cmd = "FCluster -in "+sample_name+".fdist -out "+sample_name+\ " > "+sample_name+".fcout" commands.append([('fcluster pyrodist '+sample_name, cmd)]) # e.g.: # mpirun -np 2 PyroNoise -din PC.354.dat -out PC.354_pyronoise -lin # PC.354.list -s 60.0 -c 0.01 > PC.354_pyronoise.pnout cmd = "mpirun -np "+str(numnodes)+" PyroNoise -din "+\ sample_name+".dat -out "+\ sample_name+"_pyronoise "+"-lin "+\ sample_name+".list -s 60.0 -c 0.01 > "+\ sample_name+"_pyronoise.pnout" commands.append([('pyronoise '+sample_name, cmd)]) cmd = 'Parse.pl '+bc_seqs[i]+one_primer+' '+truncate_len+' < '+\ sample_name+'_pyronoise_cd.fa'+' > '+ sample_name+'_'+\ truncate_len+'.fa' commands.append([('truncate '+sample_name, cmd)]) # now start with post_pyro_tail cmd = "mpirun -np "+str(numnodes)+" SeqDist -in "+\ sample_name+post_pyro_tail+\ ".fa > "+sample_name+post_pyro_tail+".seqdist" commands.append([('seqdist '+sample_name, cmd)]) cmd = "FCluster -in "+sample_name+post_pyro_tail+".seqdist -out "+\ sample_name+post_pyro_tail+"fcl > "+\ sample_name+post_pyro_tail+".fcout" commands.append([('fcluster seqdist '+sample_name, cmd)]) # e.g.: # mpirun -np 2 SeqNoise -in PC.354_pyronoise_cd.fa -din # PC.354_pyronoise_cd.seqdist -out PC.354_pyronoise_cd_seqnoise -lin # PC.354_pyronoise_cdfcl.list -min PC.354_pyronoise.mapping -s 30.0 -c 0.08 > # PC.354_pyronoise_cd.snout cmd = "mpirun -np "+str(numnodes)+" SeqNoise -in "+\ sample_name+post_pyro_tail+\ ".fa -din "+sample_name+post_pyro_tail+".seqdist -out "+\ sample_name+post_pyro_tail+\ "_seqnoise -lin "+sample_name+post_pyro_tail+'fcl.list -min '+\ sample_name+'_pyronoise'+\ '.mapping -s '+seqnoise_resolution+' -c 0.08 > '+\ sample_name+post_pyro_tail+'.snout' commands.append([('seqnoise '+sample_name, cmd)]) if suppress_perseus == False: cmd = 'Perseus -sin '+sample_name+post_pyro_tail+\ '_seqnoise_cd.fa > ' +\ sample_name+'.per' commands.append([('Perseus '+sample_name, cmd)]) cmd = 'Class.pl '+sample_name+'.per '+\ str(chimera_alpha) + ' '+ str(chimera_beta)+\ ' > '+sample_name+'.class' commands.append([('Class.pl '+sample_name, cmd)]) cmd = 'FilterGoodClass.pl '+sample_name+post_pyro_tail+\ '_seqnoise_cd.fa '+\ sample_name+'.class 0.5 > '+sample_name+'_Chi.fa 2> '+\ sample_name+'_Good.fa' commands.append([('FilterGoodClass '+sample_name, cmd)]) cmd = '%s %s/unweight_fasta.py -i %s -o %s -l %s' %\ (python_exe_fp, script_dir, fasta_result_names[i], sample_name+'_unw.fna', sample_name) commands.append([('unweight fasta '+sample_name, cmd)]) cmd = 'cat ' +\ ' '.join([sample_name+'_unw.fna' for sample_name in sample_names]) +\ ' > ' + output_filepath # this should be an abs filepath commands.append([('cat into one fasta file', cmd)]) # Call the command handler on the list of commands command_handler(commands, status_update_callback, logger=logger, close_logger_on_success=close_logger_on_success)
type='string',help='path to store output files '+\ '[REQUIRED]'),\ ] rdp_classifier_fp = getenv('RDP_JAR_PATH') script_info['optional_options'] = [\ make_option('--rdp_classifier_fp',action='store',\ type='string',help='full path to rdp classifier jar file '+\ '[default: %default]',\ default=rdp_classifier_fp),\ make_option('-c','--confidence',action='store',\ type='float',help='Minimum confidence to'+\ ' record an assignment [default: %default]',default=0.80),\ make_option('-N','--assign_taxonomy_fp',action='store',\ type='string',help='full path to '+\ 'scripts/assign_taxonomy.py [default: %default]',\ default=join(get_qiime_scripts_dir(),'assign_taxonomy.py')),\ make_option('-t','--id_to_taxonomy_fp',action='store',\ type='string',help='full path to '+\ 'id_to_taxonomy mapping file [REQUIRED]'),\ make_option('-r','--reference_seqs_fp',action='store',\ help='Ref seqs to rdp against. [default: %default]'),\ options_lookup['jobs_to_start'],\ options_lookup['poller_fp'],\ options_lookup['retain_temp_files'],\ options_lookup['suppress_submit_jobs'],\ options_lookup['poll_directly'],\ options_lookup['cluster_jobs_fp'],\ options_lookup['suppress_polling'],\ options_lookup['job_prefix'],\ options_lookup['python_exe_fp'],\ options_lookup['seconds_to_sleep']\
def run_process_fasta_through_split_lib(study_id,run_prefix,input_fp, mapping_fp, output_dir, command_handler, params, qiime_config, write_to_all_fasta=False, status_update_callback=print_to_stdout): """ NOTE: Parts of this function are a directly copied from the run_qiime_data_preparation function from the workflow.py library file in QIIME. The steps performed by this function are: 1) Update sequence names using DB accessions """ # Prepare some variables for the later steps filenames=input_fp.split(',') commands = [] create_dir(output_dir) python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) # copy the mapping file copied_mapping=split(mapping_fp)[-1] mapping_input_fp_copy=join(output_dir, copied_mapping) copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy) commands.append([('CopyMapping', copy_mapping_cmd)]) # sort filenames filenames.sort() # create split_libraries directory split_library_output=join(output_dir,'split_libraries') create_dir(split_library_output) # Call the command handler on the list of commands command_handler(commands,status_update_callback,logger=logger) # define output filepath output_fp=join(split_library_output,'seqs.fna') # re-write the sequence file outf=open(output_fp,'w') # get sample-info from mapping file map_data,map_header,map_comments=parse_mapping_file(\ open(mapping_input_fp_copy,'U')) # create dictionary of original sample_ids to new sample_ids sample_ids_from_mapping=zip(*map_data)[0] sample_id_dict={} for sample in sample_ids_from_mapping: sample_id_dict['.'.join(sample.split('.')[:-1])]=sample # NEED to be able to just pass fasta file, since mapping sample_ids # will not match input fasta file ever ''' fasta_check=run_fasta_checks(input_fp,mapping_input_fp_copy) if float(fasta_check['invalid_labels']) > 0: raise ValueError, "There are invalid sequence names in the Original sequence file" #elif float(fasta_check['barcodes_detected']) > 0: # raise ValueError, "There are barcode sequences found in the Original sequence file" elif float(fasta_check['duplicate_labels']) > 0: raise ValueError, "There are duplicate sequence names in the Original sequence file" elif float(fasta_check['invalid_seq_chars']) > 0: raise ValueError, "There are invalid nucleotides in the sequence Original file (i.e. not A,C,G,T or N)" #elif float(fasta_check['linkerprimers_detected']) > 0: # raise ValueError, "There are linker primer sequences in the Original sequence file" #elif float(fasta_check['nosample_ids_map']) > 0.20: # raise ValueError, "More than 20% of the samples in the mapping file do not have sequences" ''' # parse the sequences sequences=MinimalFastaParser(open(input_fp),'U') # update fasta file with new DB SampleIDs and create new split-lib seqs # file num=1 for seq_name,seq in sequences: seq_name_arr=seq_name.split() updated_seq_name =sample_id_dict['_'.join(seq_name_arr[0].split('_')[:-1])] + \ '_' + str(num) + ' ' + ' '.join(seq_name_arr[1:]) num=num+1 outf.write('>%s\n%s\n' % (updated_seq_name,seq)) outf.close() # Return the fasta file paths return filenames
script_info[ 'output_description'] = """The output consists of many files (i.e. merged_table.biom, merged_table.log and all intermediate merge tables). The .biom file contains the result of merging the individual BIOM tables. The resulting .log file contains a list of parameters passed to this script along with the output location of the resulting .txt file, the dependency hierarchy and runtime information for each individual merge.""" script_info['required_options'] = [\ make_option('-i','--input_fps',type='existing_filepaths', help='the otu tables in biom format (comma-separated)'),\ make_option('-o','--output_dir',type='new_dirpath', help='the output otu table directory path')] script_info['optional_options'] = [\ make_option('-C','--cluster',action='store_true', default=False, help="Submit to a torque cluster"), make_option('-N','--merge_otus_fp',action='store',\ type='existing_filepath',help='full path to '+\ 'scripts/merge_otu_tables.py [default: %default]',\ default=join(get_qiime_scripts_dir(),'merge_otu_tables.py')),\ options_lookup['python_exe_fp'], options_lookup['seconds_to_sleep'], options_lookup['job_prefix']] script_info['version'] = __version__ RANDOM_JOB_PREFIX_CHARS = "abcdefghigklmnopqrstuvwxyz" RANDOM_JOB_PREFIX_CHARS += RANDOM_JOB_PREFIX_CHARS.upper() RANDOM_JOB_PREFIX_CHARS += "0123456790" def get_random_job_prefix(fixed_prefix='', max_job_prefix_len=10,\ leading_trailing_underscores=True): """ Return a string to use as job prefix """ length = max_job_prefix_len - len(fixed_prefix)
help="write output rarefied otu tables here makes dir if it doesn't exist [REQUIRED]"),\ make_option('-m', '--min', type=int,help='min seqs/sample [REQUIRED]'),\ make_option('-x', '--max', type=int,\ help='max seqs/sample (inclusive) [REQUIRED]'),\ make_option('-s', '--step', type=int,\ help='levels: min, min+step... for level <= max [REQUIRED]'),\ ] script_info['optional_options'] = [\ make_option('-n', '--num-reps', dest='num_reps', default=10, type=int, help='num iterations at each seqs/sample level [default: %default]'),\ make_option('--lineages_included', dest='lineages_included', default=False, action="store_true", help="""output rarefied otu tables will include taxonomic (lineage) information for each otu, if present in input otu table [default: %default]"""), make_option('-N','--single_rarefaction_fp',action='store',\ type='string',help='full path to scripts/single_rarefaction.py [default: %default]',\ default=join(get_qiime_scripts_dir(),'single_rarefaction.py')),\ options_lookup['poller_fp'],\ options_lookup['retain_temp_files'],\ options_lookup['suppress_submit_jobs'],\ options_lookup['poll_directly'],\ options_lookup['cluster_jobs_fp'],\ options_lookup['suppress_polling'],\ options_lookup['job_prefix'],\ options_lookup['python_exe_fp'],\ options_lookup['seconds_to_sleep'],\ options_lookup['jobs_to_start'] ] script_info['version'] = __version__ def main():
from qiime.workflow import print_commands,call_commands_serially,\ print_to_stdout, no_status_updates,generate_log_fp,\ get_params_str, WorkflowError,WorkflowLogger from qiime.util import get_qiime_scripts_dir,create_dir,load_qiime_config,\ get_qiime_library_version from cogent.util.misc import get_random_directory_name from submit_job_to_qiime import submitQiimeJob from qiime.filter import filter_samples_from_otu_table from biom.table import SparseOTUTable, DenseOTUTable, table_factory,\ get_biom_format_version_string,get_biom_format_url_string from json import dumps from numpy import array # get qiime config and qiime scripts directory qiime_config = load_qiime_config() script_dir = get_qiime_scripts_dir() def combine_map_header_cols(combinecolorby, mapping): """Merge two or more mapping columns into one column""" # create an empty array the size of the mapping file combinedmapdata=array([''] * len(mapping), dtype='a1000') title=[] match=False # iterate over columns and see if the colunns are supposed to be joined for p in range(len(combinecolorby)): for i in range(len(mapping[0])): if str(combinecolorby[p]) == str(mapping[0][i]): match=True for q in range(len(mapping)):
script_info[ 'output_description'] = """The output consists of many files (i.e. merged_table.biom, merged_table.log and all intermediate merge tables). The .biom file contains the result of merging the individual BIOM tables. The resulting .log file contains a list of parameters passed to this script along with the output location of the resulting .txt file, the dependency hierarchy and runtime information for each individual merge.""" script_info['required_options'] = [ make_option('-i', '--input_fps', type='existing_filepaths', help='the otu tables in biom format (comma-separated)'), make_option('-o', '--output_dir', type='new_dirpath', help='the output otu table directory path')] script_info['optional_options'] = [ make_option('-C', '--cluster', action='store_true', default=False, help="Submit to a torque cluster"), make_option('-N', '--merge_otus_fp', action='store', type='existing_filepath', help='full path to ' + 'scripts/merge_otu_tables.py [default: %default]', default=join(get_qiime_scripts_dir(), 'merge_otu_tables.py')), options_lookup['python_exe_fp'], options_lookup['seconds_to_sleep'], options_lookup['job_prefix']] script_info['version'] = __version__ RANDOM_JOB_PREFIX_CHARS = "abcdefghigklmnopqrstuvwxyz" RANDOM_JOB_PREFIX_CHARS += RANDOM_JOB_PREFIX_CHARS.upper() RANDOM_JOB_PREFIX_CHARS += "0123456790" def get_random_job_prefix(fixed_prefix='', max_job_prefix_len=10, leading_trailing_underscores=True): """ Return a string to use as job prefix """
help='input path, must be directory [REQUIRED]'),\ make_option('-o', '--output_path', help='output path, must be directory [REQUIRED]'), ] script_info['optional_options'] = [ make_option('-m', '--metrics', default='unweighted_unifrac,weighted_unifrac', help='Beta-diversity metric(s) to use. A comma-separated list should be' +\ ' provided when multiple metrics are specified. [default: %default]'), make_option('-t', '--tree_path', default=None, help='path to newick tree file, required for phylogenetic metrics'+\ ' [default: %default]'),\ make_option('-N','--beta_diversity_fp',action='store',\ type='string',help='full path to '+\ 'scripts/beta_diversity.py [default: %default]',\ default=join(get_qiime_scripts_dir(),'beta_diversity.py')),\ options_lookup['poller_fp'],\ options_lookup['retain_temp_files'],\ options_lookup['suppress_submit_jobs'],\ options_lookup['poll_directly'],\ options_lookup['cluster_jobs_fp'],\ options_lookup['suppress_polling'],\ options_lookup['job_prefix'],\ options_lookup['python_exe_fp'],\ options_lookup['seconds_to_sleep'],\ options_lookup['jobs_to_start'], make_option('-f', '--full_tree', action="store_true", help='By default, each job removes calls _fast_unifrac_setup to remove unused parts of the tree. pass -f if you already have a minimal tree, and this script will run faster'), ] script_info['version'] = __version__
'template alignment [default: %default]'),\ make_option('-b','--blast_db',action='store',\ type='string',help='database to blast against '+\ '[default: %default]'),\ make_option('--min_aligned_percent', help=('Minimum percent of query sequence that can be aligned ' 'to consider a hit (BLAST OTU picker only) [default: %default]'), default=0.50,type='float'), #Define parallel-script-specific parameters make_option('-N','--pick_otus_fp',action='store',\ type='string',help='full path to '+\ 'scripts/pick_otus.py [default: %default]',\ default=join(get_qiime_scripts_dir(),'pick_otus.py')),\ options_lookup['jobs_to_start'],\ options_lookup['poller_fp'],\ options_lookup['retain_temp_files'],\ options_lookup['suppress_submit_jobs'],\ options_lookup['poll_directly'],\ options_lookup['cluster_jobs_fp'],\ options_lookup['suppress_polling'],\ options_lookup['job_prefix'],\ options_lookup['python_exe_fp'],\ options_lookup['seconds_to_sleep']\ ] script_info['version'] = __version__
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if (opts.suppress_unit_tests and \ opts.suppress_script_tests and \ opts.suppress_script_usage_tests): option_parser.error( "You're suppressing all three test types. Nothing to run.") test_dir = abspath(dirname(__file__)) unittest_good_pattern = re.compile('OK\s*$') application_not_found_pattern = re.compile('ApplicationNotFoundError') python_name = 'python' bad_tests = [] missing_application_tests = [] # Run through all of QIIME's unit tests, and keep track of any files which # fail unit tests. if not opts.suppress_unit_tests: unittest_names = [] if not opts.unittest_glob: for root, dirs, files in walk(test_dir): for name in files: if name.startswith('test_') and name.endswith('.py'): unittest_names.append(join(root, name)) else: for fp in glob(opts.unittest_glob): fn = split(fp)[1] if fn.startswith('test_') and fn.endswith('.py'): unittest_names.append(abspath(fp)) unittest_names.sort() for unittest_name in unittest_names: print "Testing %s:\n" % unittest_name command = '%s %s -v' % (python_name, unittest_name) stdout, stderr, return_value = qiime_system_call(command) print stderr if not unittest_good_pattern.search(stderr): if application_not_found_pattern.search(stderr): missing_application_tests.append(unittest_name) else: bad_tests.append(unittest_name) bad_scripts = [] if not opts.suppress_script_tests: # Run through all of QIIME's scripts, and pass -h to each one. If the # resulting stdout does not being with the Usage text, that is an # indicator of something being wrong with the script. Issues that would # cause that are bad import statements in the script, SyntaxErrors, or # other failures prior to running qiime.util.parse_command_line_parameters. try: scripts_dir = get_qiime_scripts_dir() script_directory_found = True except AssertionError: script_directory_found = False if script_directory_found: script_names = [] script_names = glob('%s/*py' % scripts_dir) script_names.sort() for script_name in script_names: script_good_pattern = re.compile('^Usage: %s' % split(script_name)[1]) print "Testing %s." % script_name command = '%s %s -h' % (python_name, script_name) stdout, stderr, return_value = qiime_system_call(command) if not script_good_pattern.search(stdout): bad_scripts.append(script_name) num_script_usage_example_failures = 0 qiime_test_data_dir = qiime_config['qiime_test_data_dir'] if not opts.suppress_script_usage_tests and qiime_test_data_dir != None: # Run the script usage testing functionality script_usage_result_summary, num_script_usage_example_failures = \ run_script_usage_tests( qiime_test_data_dir=qiime_test_data_dir, qiime_scripts_dir=qiime_config['qiime_scripts_dir'], working_dir=qiime_config['temp_dir'], verbose=True, tests=None, # runs all failure_log_fp=None, force_overwrite=True) print "==============\nResult summary\n==============" if not opts.suppress_unit_tests: print "\nUnit test result summary\n------------------------\n" if bad_tests: print "\nFailed the following unit tests.\n%s" % '\n'.join( bad_tests) if missing_application_tests: print "\nFailed the following unit tests, in part or whole due "+\ "to missing external applications.\nDepending on the QIIME features "+\ "you plan to use, this may not be critical.\n%s"\ % '\n'.join(missing_application_tests) if not (missing_application_tests or bad_tests): print "\nAll unit tests passed.\n\n" if not opts.suppress_script_tests: print "\nBasic script test result summary\n--------------------------------\n" if not script_directory_found: print "Critical error: Failed to test scripts because the script directory could not be found.\n The most likely explanation for this failure is that you've installed QIIME using setup.py, and forgot to specify the qiime_scripts_dir in your qiime_config file. This value shoud be set either to the directory you provided for --install-scripts, or /usr/local/bin if no value was provided to --install-scripts." else: if bad_scripts: print "Failed the following basic script tests.\n%s" % '\n'.join( bad_scripts) else: print "All basic script tests passed successfully.\n" qiime_test_data_dir_exists = True if not opts.suppress_script_usage_tests: if qiime_test_data_dir: print "\nScript usage test result summary\n------------------------------------\n" print script_usage_result_summary else: print "\nCould not run script usage tests because qiime_test_data_dir is not defined in your qiime_config." qiime_test_data_dir_exists = False print "" # If any of the unit tests, script tests, or script usage tests fail, or if # we have any missing application errors or a missing QIIME test data dir # if script usage tests weren't suppressed, use return code 1 (as python's # unittest module does to indicate one or more failures). return_code = 1 if (len(bad_tests) == 0 and len(missing_application_tests) == 0 and len(bad_scripts) == 0 and num_script_usage_example_failures == 0 and qiime_test_data_dir_exists): return_code = 0 return return_code