Exemple #1
0
    def root(self, options):
        """Root tree using outgroup.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_file_exists(options.input_tree)

        taxonomy = self._read_taxonomy_files(options)

        self.logger.info(f'Identifying genomes from the specified outgroup: {options.outgroup_taxon}')
        outgroup = set()
        for genome_id, taxa in taxonomy.items():
            if options.outgroup_taxon in taxa:
                outgroup.add(genome_id)

        reroot = RerootTree()
        reroot.root_with_outgroup(options.input_tree,
                                  options.output_tree,
                                  outgroup)

        self.logger.info('Done.')
Exemple #2
0
    def identify(self, options):
        """Identify marker genes in genomes.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        if options.genome_dir:
            check_dir_exists(options.genome_dir)

        if options.batchfile:
            check_file_exists(options.batchfile)

        make_sure_path_exists(options.out_dir)

        genomes, tln_tables = self._genomes_to_process(options.genome_dir,
                                                       options.batchfile,
                                                       options.extension)
        self.genomes_to_process = genomes

        markers = Markers(options.cpus)
        markers.identify(genomes,
                         tln_tables,
                         options.out_dir,
                         options.prefix,
                         options.force,
                         options.write_single_copy_genes)

        self.logger.info('Done.')
Exemple #3
0
    def infer_ranks(self, options):
        """Establish taxonomic ranks of internal nodes using RED."""

        check_file_exists(options.input_tree)

        p = InferRanks()
        p.run(options.input_tree, options.ingroup_taxon, options.output_tree)

        self.logger.info('Done.')
Exemple #4
0
    def decorate(self, options):
        """Decorate tree with GTDB taxonomy.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_file_exists(options.input_tree)

        taxonomy = self._read_taxonomy_files(options)

        d = Decorate()
        d.run(options.input_tree, taxonomy, options.output_tree)

        self.logger.info('Done.')

        # symlink to the decorated tree file, if not run independently
        if hasattr(options, 'suffix'):
            if options.suffix == 'bac120':
                symlink_f(
                    PATH_BAC120_DECORATED_TREE.format(prefix=options.prefix),
                    os.path.join(
                        options.out_dir,
                        os.path.basename(
                            PATH_BAC120_DECORATED_TREE.format(
                                prefix=options.prefix))))
                symlink_f(
                    PATH_BAC120_DECORATED_TREE.format(prefix=options.prefix) +
                    '-table',
                    os.path.join(
                        options.out_dir,
                        os.path.basename(
                            PATH_BAC120_DECORATED_TREE.format(
                                prefix=options.prefix) + '-table')))
            elif options.suffix == 'ar122':
                symlink_f(
                    PATH_AR122_DECORATED_TREE.format(prefix=options.prefix),
                    os.path.join(
                        options.out_dir,
                        os.path.basename(
                            PATH_AR122_DECORATED_TREE.format(
                                prefix=options.prefix))))
                symlink_f(
                    PATH_AR122_DECORATED_TREE.format(prefix=options.prefix) +
                    '-table',
                    os.path.join(
                        options.out_dir,
                        os.path.basename(
                            PATH_AR122_DECORATED_TREE.format(
                                prefix=options.prefix) + '-table')))
            else:
                raise GenomeMarkerSetUnknown(
                    'There was an error determining the marker set.')
Exemple #5
0
    def infer(self, options):
        """Infer a tree from a user specified MSA.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_file_exists(options.msa_file)
        make_sure_path_exists(options.out_dir)

        if options.cpus > 1:
            check_dependencies(['FastTreeMP'])
        else:
            check_dependencies(['FastTree'])

        if hasattr(options, 'suffix'):
            output_tree = os.path.join(
                options.out_dir,
                PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix,
                                                 marker=options.suffix))
            tree_log = os.path.join(
                options.out_dir,
                PATH_MARKER_TREE_LOG.format(prefix=options.prefix,
                                            marker=options.suffix))
            fasttree_log = os.path.join(
                options.out_dir,
                PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix,
                                                marker=options.suffix))
        else:
            output_tree = os.path.join(
                options.out_dir,
                PATH_UNROOTED_TREE.format(prefix=options.prefix))
            tree_log = os.path.join(
                options.out_dir, PATH_TREE_LOG.format(prefix=options.prefix))
            fasttree_log = os.path.join(
                options.out_dir,
                PATH_FASTTREE_LOG.format(prefix=options.prefix))

        fasttree = FastTree()
        fasttree.run(output_tree, tree_log, fasttree_log, options.prot_model,
                     options.no_support, options.no_gamma, options.msa_file,
                     options.cpus)
        self.logger.info(f'FastTree version: {fasttree.version}')

        if hasattr(options,
                   'subparser_name') and options.subparser_name == 'infer':
            symlink_f(
                output_tree[len(options.out_dir) + 1:],
                os.path.join(options.out_dir, os.path.basename(output_tree)))

        self.logger.info('Done.')
Exemple #6
0
    def _read_taxonomy_files(self, options) -> Dict[str, Tuple[str, str, str, str, str, str, str]]:
        """Read and merge taxonomy files."""

        self.logger.info('Reading GTDB taxonomy for representative genomes.')
        taxonomy = Taxonomy().read(Config.TAXONOMY_FILE)

        if options.gtdbtk_classification_file:
            # add and overwrite taxonomy for genomes specified in the
            # GTDB-Tk classification file
            check_file_exists(options.gtdbtk_classification_file)

            self.logger.info('Reading GTDB-Tk classification file.')
            gtdbtk_taxonomy = Taxonomy().read(options.gtdbtk_classification_file)
            del gtdbtk_taxonomy['user_genome']
            num_reassigned = 0
            for gid, taxa in gtdbtk_taxonomy.items():
                if gid in taxonomy:
                    num_reassigned += 1
                taxonomy[gid] = taxa

            self.logger.info(f'Read GTDB-Tk classifications for {len(gtdbtk_taxonomy):,} genomes.')
            self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.')

        if options.custom_taxonomy_file:
            # add and overwrite taxonomy for genomes specified in the
            # custom taxonomy file
            check_file_exists(options.custom_taxonomy_file)

            self.logger.info('Reading custom taxonomy file.')
            custom_taxonomy = Taxonomy().read(options.custom_taxonomy_file)
            num_reassigned = 0
            for gid, taxa in custom_taxonomy.items():
                if gid in taxonomy:
                    num_reassigned += 1
                taxonomy[gid] = taxa

            self.logger.info(f'Read custom taxonomy for {len(custom_taxonomy):,} genomes.')
            self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.')

        if options.gtdbtk_classification_file and options.custom_taxonomy_file:
            dup_genomes = set(gtdbtk_taxonomy).intersection(custom_taxonomy)
            if len(dup_genomes) > 0:
                self.logger.error('GTDB-Tk classification and custom taxonomy '
                                  'files must not specify taxonomies for the '
                                  'same genomes.')
                self.logger.error('These files have {:,} genomes in common.'.format(len(dup_genomes)))
                self.logger.error('Example duplicate genome: {}'.format(dup_genomes.pop()))
                raise GTDBTkExit('Duplicated taxonomy information.')

        self.logger.info(f'Read taxonomy for {len(taxonomy):,} genomes.')

        return taxonomy
Exemple #7
0
    def convert_to_itol(self, options):
        """Convert Tree to iTOL format.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """
        check_file_exists(options.input_tree)

        r = Misc()
        r.convert_to_itol(options.input_tree, options.output_tree)
        self.logger.info('Done.')
Exemple #8
0
    def root(self, options):
        """Root tree using outgroup.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """
        self.logger.warning("Tree rooting is still under development!")

        check_file_exists(options.input_tree)

        if options.custom_taxonomy_file:
            check_file_exists(options.custom_taxonomy_file)
            taxonomy = Taxonomy().read(options.custom_taxonomy_file)
        else:
            taxonomy = Taxonomy().read(Config.TAXONOMY_FILE)

        self.logger.info('Identifying genomes from the specified outgroup.')
        outgroup = set()
        for genome_id, taxa in taxonomy.items():
            if options.outgroup_taxon in taxa:
                outgroup.add(genome_id)

        reroot = RerootTree()
        reroot.root_with_outgroup(options.input_tree, options.output_tree,
                                  outgroup)

        # Symlink to the tree summary file, if not run independently
        if hasattr(options, 'suffix'):
            if options.suffix == 'bac120':
                symlink_f(
                    PATH_BAC120_ROOTED_TREE.format(prefix=options.prefix),
                    os.path.join(
                        options.out_dir,
                        os.path.basename(
                            PATH_AR122_ROOTED_TREE.format(
                                prefix=options.prefix))))
            elif options.suffix == 'ar122':
                symlink_f(
                    PATH_AR122_ROOTED_TREE.format(prefix=options.prefix),
                    os.path.join(
                        options.out_dir,
                        os.path.basename(
                            PATH_AR122_ROOTED_TREE.format(
                                prefix=options.prefix))))
            else:
                raise GenomeMarkerSetUnknown(
                    'There was an error determining the marker set.')

        self.logger.info('Done.')
Exemple #9
0
    def decorate(self, options):
        """Decorate tree with GTDB taxonomy.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_file_exists(options.input_tree)

        # Config.TAXONOMY_FILE
        self.logger.warning('DECORATE NOT YET IMPLEMENTED!')
        self.logger.info('Done.')
Exemple #10
0
    def remove_labels(self, options):
        """Remove labels from tree.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_file_exists(options.input_tree)

        r = Misc()
        r.remove_labels(options.input_tree, options.output_tree)
        self.logger.info('Done.')