Esempio n. 1
0
    def _assign_taxonomy(self, extracted_reads, assignment_method):
        graftm_align_directory_base = os.path.join(self._working_directory,
                                                   'graftm_aligns')
        os.mkdir(graftm_align_directory_base)
        commands = []
        all_tmp_files = []
        # Run each one at a time serially so that the number of threads is
        # respected, to save RAM as one DB needs to be loaded at once, and so
        # fewer open files are needed, so that the open file count limit is
        # eased.
        for singlem_package, readsets in extracted_reads.each_package_wise():
            tmp_files = []
            for readset in readsets:
                if len(readset.sequences) > 0:
                    tmp = tempfile.NamedTemporaryFile(prefix='singlem.%s' %
                                                      readset.sample_name,
                                                      suffix=".fasta")
                    # Record basename (remove .fasta) so that the graftm output
                    # file is recorded for later on in pipe.
                    tmpbase = os.path.basename(tmp.name[:-6])
                    readset.tmpfile_basename = tmpbase
                    seqio = SequenceIO()
                    seqio.write_fasta(readset.sequences, tmp)
                    tmp.flush()
                    tmp_files.append(tmp)

            if len(tmp_files) > 0:
                tmpnames = list([tg.name for tg in tmp_files])
                cmd = "%s "\
                      "--threads %i "\
                      "--forward %s "\
                      "--graftm_package %s "\
                      "--output_directory %s/%s "\
                      "--max_samples_for_krona 0 "\
                      "--assignment_method %s" % (
                          self._graftm_command_prefix(singlem_package.is_protein_package()),
                          self._num_threads,
                          ' '.join(tmpnames),
                          singlem_package.graftm_package_path(),
                          graftm_align_directory_base,
                          singlem_package.graftm_package_basename(),
                          assignment_method)
                commands.append(cmd)
                all_tmp_files.append(tmp_files)

        extern.run_many(commands, num_threads=1)
        for tmp_files in all_tmp_files:
            [t.close() for t in tmp_files]
        logging.info("Finished running taxonomic assignment with GraftM")
        return SingleMPipeTaxonomicAssignmentResult(
            graftm_align_directory_base)
Esempio n. 2
0
    def _test_package(self, package_path):
        '''Give a GraftM package a spin, and see if it works in reality with default
        parameters (i.e. pplacer). If it does not work, then raise an error.

        Parameters
        ----------
        package_path: str
            path to graftm_package to be tested
        '''
        pkg = GraftMPackage.acquire(package_path)
        with tempdir.TempDir() as graftM_graft_test_dir_name:
            # Take a subset of sequences for testing
            with tempfile.NamedTemporaryFile(suffix=".fa") as tf:
                seqio = SequenceIO()
                seqio.write_fasta(
                    itertools.islice(seqio.each_sequence(open(pkg.unaligned_sequence_database_path())), 10),
                    tf)
                tf.flush()
                cmd = "graftM graft --forward %s --graftm_package %s --output_directory %s --force" %(
                    tf.name, package_path, graftM_graft_test_dir_name)
                extern.run(cmd)
Esempio n. 3
0
    def _test_package(self, package_path):
        '''Give a GraftM package a spin, and see if it works in reality with default
        parameters (i.e. pplacer). If it does not work, then raise an error.

        Parameters
        ----------
        package_path: str
            path to graftm_package to be tested
        '''
        pkg = GraftMPackage.acquire(package_path)
        with tempdir.TempDir() as graftM_graft_test_dir_name:
            # Take a subset of sequences for testing
            with tempfile.NamedTemporaryFile(suffix=".fa", mode='w') as tf:
                seqio = SequenceIO()
                with open(pkg.unaligned_sequence_database_path()) as f:
                    seqio.write_fasta(
                        itertools.islice(seqio.each_sequence(f), 10), tf)
                tf.flush()
                cmd = "graftM graft --forward %s --graftm_package %s --output_directory %s --force" % (
                    tf.name, package_path, graftM_graft_test_dir_name)
                extern.run(cmd)