Ejemplo n.º 1
0
    def write_hgt_list(self, sample):
        """Write a list of predicted HGTs to an output file.

        Parameters
        ----------
        sample : str
            sample Id

        Notes
        -----
        The output file has three columns: protein Id, silhouette score,
        potential donor.

        The donor will be the LCA of top hits as determined by `find_match`.
        However, if `donor_rank` is specified, a donor below this rank will be
        raise to this rank; a donor above this rank will be discarded.
        """
        taxdump, name, rank = self.taxdump, self.donor_name, self.donor_rank
        df_ = self.df[self.df['hgt'] & (self.df['sample'] == sample)]
        print(f'  {sample}: {df_.shape[0]}.')
        with open(join(self.output, 'hgts', f'{sample}.txt'), 'w') as f:
            for row in df_[['protein', 'silh', 'match']].itertuples():
                # format donor taxon
                match = row.match
                if rank and match != '0':
                    match = taxid_at_rank(match, rank, self.taxdump) or '0'
                if name:
                    match = taxdump[match]['name'] if match != '0' else 'N/A'
                f.write(f'{row.protein}\t{row.silh:g}\t{match}\n')
Ejemplo n.º 2
0
    def infer_self_group(self):
        """Infer self group automatically.
        """
        # just use LCA
        if not self.self_rank:
            self.self_tax = [self.lca]

        # try to raise LCA to given rank, but if LCA is already above that
        # rank, just use LCA
        else:
            tid_ = taxid_at_rank(self.lca, self.self_rank, self.taxdump)
            self.self_tax = [tid_ or self.lca]
Ejemplo n.º 3
0
    def infer_self_group(self):
        """Infer self group automatically.

        Notes
        -----
        Assign `self_tax` as top-level taxId(s) of the self group.
        """
        # just use LCA
        if not self.self_rank:
            self.self_tax = [self.lca]

        # try to raise LCA to given rank, but if LCA is already above that
        # rank, just use LCA
        else:
            tid_ = taxid_at_rank(self.lca, self.self_rank, self.taxdump)
            self.self_tax = [tid_ or self.lca]
Ejemplo n.º 4
0
 def test_taxid_at_rank(self):
     taxdump = taxdump_from_text(taxdump_archaea)
     self.assertEqual(taxid_at_rank('1538547', 'genus', taxdump), '1655637')
     self.assertEqual(taxid_at_rank('1538547', 'phylum', taxdump),
                      '1655434')