def _assign_taxonomy(self, extracted_reads, assignment_method): graftm_align_directory_base = os.path.join(self._working_directory, 'graftm_aligns') os.mkdir(graftm_align_directory_base) commands = [] all_tmp_files = [] # Run each one at a time serially so that the number of threads is # respected, to save RAM as one DB needs to be loaded at once, and so # fewer open files are needed, so that the open file count limit is # eased. for singlem_package, readsets in extracted_reads.each_package_wise(): tmp_files = [] for readset in readsets: if len(readset.sequences) > 0: tmp = tempfile.NamedTemporaryFile(prefix='singlem.%s' % readset.sample_name, suffix=".fasta") # Record basename (remove .fasta) so that the graftm output # file is recorded for later on in pipe. tmpbase = os.path.basename(tmp.name[:-6]) readset.tmpfile_basename = tmpbase seqio = SequenceIO() seqio.write_fasta(readset.sequences, tmp) tmp.flush() tmp_files.append(tmp) if len(tmp_files) > 0: tmpnames = list([tg.name for tg in tmp_files]) cmd = "%s "\ "--threads %i "\ "--forward %s "\ "--graftm_package %s "\ "--output_directory %s/%s "\ "--max_samples_for_krona 0 "\ "--assignment_method %s" % ( self._graftm_command_prefix(singlem_package.is_protein_package()), self._num_threads, ' '.join(tmpnames), singlem_package.graftm_package_path(), graftm_align_directory_base, singlem_package.graftm_package_basename(), assignment_method) commands.append(cmd) all_tmp_files.append(tmp_files) extern.run_many(commands, num_threads=1) for tmp_files in all_tmp_files: [t.close() for t in tmp_files] logging.info("Finished running taxonomic assignment with GraftM") return SingleMPipeTaxonomicAssignmentResult( graftm_align_directory_base)
def _test_package(self, package_path): '''Give a GraftM package a spin, and see if it works in reality with default parameters (i.e. pplacer). If it does not work, then raise an error. Parameters ---------- package_path: str path to graftm_package to be tested ''' pkg = GraftMPackage.acquire(package_path) with tempdir.TempDir() as graftM_graft_test_dir_name: # Take a subset of sequences for testing with tempfile.NamedTemporaryFile(suffix=".fa") as tf: seqio = SequenceIO() seqio.write_fasta( itertools.islice(seqio.each_sequence(open(pkg.unaligned_sequence_database_path())), 10), tf) tf.flush() cmd = "graftM graft --forward %s --graftm_package %s --output_directory %s --force" %( tf.name, package_path, graftM_graft_test_dir_name) extern.run(cmd)
def _test_package(self, package_path): '''Give a GraftM package a spin, and see if it works in reality with default parameters (i.e. pplacer). If it does not work, then raise an error. Parameters ---------- package_path: str path to graftm_package to be tested ''' pkg = GraftMPackage.acquire(package_path) with tempdir.TempDir() as graftM_graft_test_dir_name: # Take a subset of sequences for testing with tempfile.NamedTemporaryFile(suffix=".fa", mode='w') as tf: seqio = SequenceIO() with open(pkg.unaligned_sequence_database_path()) as f: seqio.write_fasta( itertools.islice(seqio.each_sequence(f), 10), tf) tf.flush() cmd = "graftM graft --forward %s --graftm_package %s --output_directory %s --force" % ( tf.name, package_path, graftM_graft_test_dir_name) extern.run(cmd)