Ejemplo n.º 1
0
    def __init__(self,
                 table: biom.Table,
                 features: pd.DataFrame,
                 variances: biom.Table = None,
                 formatter: Optional['Formatter'] = None):
        """Establish the taxonomy data

        Parameters
        ----------
        table : biom.Table
            Relative abundance data per sample or collapsed into higher order
            entiries (e.g., abx in the past year)
        features : pd.DataFrame
            DataFrame relating an observation to a Taxon
        variances : biom.Table, optional
            Variation information about a taxon within a label.
        """
        self._table = table.norm(inplace=False)
        self._group_id_lookup = set(self._table.ids())
        self._feature_id_lookup = set(self._table.ids(axis='observation'))
        self._feature_order = self._table.ids(axis='observation')
        self._features = features
        self._ranks = table.rankdata(inplace=False)

        if variances is None:
            self._variances = biom.Table(np.zeros(self._table.shape),
                                         self._table.ids(axis='observation'),
                                         self._table.ids())
        else:
            self._variances = variances

        if set(self._variances.ids()) != set(self._table.ids()):
            raise DisjointError("Table and variances are disjoint")

        if set(self._variances.ids(axis='observation')) != \
                set(self._table.ids(axis='observation')):
            raise DisjointError("Table and variances are disjoint")

        if set(self._table.ids(axis='observation')) != \
                set(self._features.index):
            raise DisjointError("Table and features are disjoint")

        self._features = self._features.loc[self._feature_order]
        self._variances = self._variances.sort_order(self._feature_order,
                                                     axis='observation')

        if formatter is None:
            formatter: Formatter = GreengenesFormatter()
        self._formatter = formatter

        feature_taxons = self._features
        self._formatted_taxa_names = {
            i: self._formatter.dict_format(lineage)
            for i, lineage in feature_taxons['Taxon'].items()
        }