def _assign_taxonomy(self, extracted_reads, assignment_method): graftm_align_directory_base = os.path.join(self._working_directory, 'graftm_aligns') os.mkdir(graftm_align_directory_base) commands = [] for singlem_package, sample_names, tmp_grafts in extracted_reads.each_package_wise(): tmpnames = list([tg.name for tg in tmp_grafts if tg]) if len(tmpnames) > 0: cmd = "%s "\ "--threads %i "\ "--forward %s "\ "--graftm_package %s "\ "--output_directory %s/%s "\ "--max_samples_for_krona 0 "\ "--assignment_method %s" % ( self._graftm_command_prefix(singlem_package.is_protein_package()), self._num_threads, ' '.join(tmpnames), singlem_package.graftm_package_path(), graftm_align_directory_base, singlem_package.graftm_package_basename(), assignment_method) commands.append(cmd) extern.run_many(commands, num_threads=1) logging.info("Finished running taxonomic assignment with graftm") return SingleMPipeTaxonomicAssignmentResult(graftm_align_directory_base)
def _align(self, search_result): graftm_separate_directory_base = os.path.join(self._working_directory, 'graftm_separates') os.mkdir(graftm_separate_directory_base) logging.info("Running separate alignments in GraftM..") commands = [] def command(singlem_package, hit_files, is_protein): return self._graftm_command_prefix(is_protein) + \ "--threads %i "\ "--forward %s "\ "--graftm_package %s --output_directory %s/%s "\ "--search_only" % ( 1, #use 1 thread since most likely better to parallelise processes with extern ' '.join(hit_files), singlem_package.graftm_package_path(), graftm_separate_directory_base, os.path.basename(singlem_package.graftm_package_path())) # Gather commands for aligning protein packages for singlem_package in self._singlem_package_database.protein_packages(): commands.append(command(singlem_package, search_result.protein_hit_paths().values(), True)) # Gather commands for aligning nucleotide packages. for singlem_package in self._singlem_package_database.nucleotide_packages(): temporary_hit_files = [tf for _, tf in \ search_result.direction_corrected_nucleotide_read_files()] commands.append(command(singlem_package, temporary_hit_files, False)) extern.run_many(commands, num_threads=self._num_threads) return SingleMPipeAlignSearchResult( graftm_separate_directory_base, search_result.samples_with_hits())
def _assign_taxonomy(self, extracted_reads, assignment_method): graftm_align_directory_base = os.path.join(self._working_directory, 'graftm_aligns') os.mkdir(graftm_align_directory_base) commands = [] all_tmp_files = [] # Run each one at a time serially so that the number of threads is # respected, to save RAM as one DB needs to be loaded at once, and so # fewer open files are needed, so that the open file count limit is # eased. for singlem_package, readsets in extracted_reads.each_package_wise(): tmp_files = [] for readset in readsets: if len(readset.sequences) > 0: tmp = tempfile.NamedTemporaryFile(prefix='singlem.%s' % readset.sample_name, suffix=".fasta") # Record basename (remove .fasta) so that the graftm output # file is recorded for later on in pipe. tmpbase = os.path.basename(tmp.name[:-6]) readset.tmpfile_basename = tmpbase seqio = SequenceIO() seqio.write_fasta(readset.sequences, tmp) tmp.flush() tmp_files.append(tmp) if len(tmp_files) > 0: tmpnames = list([tg.name for tg in tmp_files]) cmd = "%s "\ "--threads %i "\ "--forward %s "\ "--graftm_package %s "\ "--output_directory %s/%s "\ "--max_samples_for_krona 0 "\ "--assignment_method %s" % ( self._graftm_command_prefix(singlem_package.is_protein_package()), self._num_threads, ' '.join(tmpnames), singlem_package.graftm_package_path(), graftm_align_directory_base, singlem_package.graftm_package_basename(), assignment_method) commands.append(cmd) all_tmp_files.append(tmp_files) extern.run_many(commands, num_threads=1) for tmp_files in all_tmp_files: [t.close() for t in tmp_files] logging.info("Finished running taxonomic assignment with GraftM") return SingleMPipeTaxonomicAssignmentResult( graftm_align_directory_base)
def test_multi_with_exception(self): with self.assertRaises(ExternCalledProcessError) as ex: extern.run_many(['seq 2','cat /notafile']) self.assertEqual('Command cat /notafile returned non-zero exit status 1.\nSTDERR was: cat: /notafile: No such file or directory\nSTDOUT was: ', str(ex.exception))
def test_multi_with_many_threads(self): commands = ['seq 2','seq 3 4']*50 answers = ['1\n2\n','3\n4\n']*50 self.assertEqual(answers, extern.run_many(commands, num_threads=10))
def test_multi_hello_world(self): self.assertEqual(['1\n2\n'], extern.run_many(['seq 2'], num_threads=1))