Exemple #1
0
    def test_empty_table(self):
        mc = qiime2.MetadataCategory(
            pd.Series(['a_new', 'b_new'], index=['a', 'b']))

        table = biom.Table(np.array([[]]), sample_ids=[], observation_ids=[])

        with self.assertRaisesRegex(ValueError, 'empty table'):
            group(table, axis='sample', metadata=mc, mode='sum')

        with self.assertRaisesRegex(ValueError, 'empty table'):
            group(table, axis='feature', metadata=mc, mode='sum')
    def test_empty_table(self):
        mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', 'b_new'], name='foo',
                      index=pd.Index(['a', 'b'], name='id')))

        table = biom.Table(np.array([[]]), sample_ids=[], observation_ids=[])

        with self.assertRaisesRegex(ValueError, 'empty table'):
            group(table, axis='sample', metadata=mc, mode='sum')

        with self.assertRaisesRegex(ValueError, 'empty table'):
            group(table, axis='feature', metadata=mc, mode='sum')
Exemple #3
0
    def test_missing_feature_ids(self):
        feature_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g1', 'g2', 'g1', 'g2', 'extra'],
                      index=['a', 'c', 'd', 'e', 'f', 'g']))
        data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5],
                         [0, 1, 100], [7, 8, 9]])
        # g is missing on purpose
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's3'],
                           observation_ids=['a', 'b', 'c', 'd', 'e', 'f'])

        with self.assertRaisesRegex(ValueError, 'metadata.*missing: {\'b\'}'):
            group(table, axis='feature', metadata=feature_mc, mode='sum')
Exemple #4
0
    def test_missing_sample_ids(self):
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g2', 'g0', 'g2'], index=['s1', 's3', 's4',
                                                       's6']))
        data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]])
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's4', 's5'],
                           observation_ids=['x', 'y', 'z'])

        with self.assertRaisesRegex(ValueError, 'metadata.*missing:') as e:
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        self.assertIn('s2', str(e.exception))
        self.assertIn('s5', str(e.exception))
Exemple #5
0
    def test_missing_feature_ids(self):
        feature_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['g0', 'g1', 'g2', 'g1', 'g2', 'extra'],
                      name='foo',
                      index=pd.Index(['a', 'c', 'd', 'e', 'f', 'g'],
                                     name='featureid')))
        data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5],
                         [0, 1, 100], [7, 8, 9]])
        # g is missing on purpose
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's3'],
                           observation_ids=['a', 'b', 'c', 'd', 'e', 'f'])

        with self.assertRaisesRegex(ValueError, "not present.*'b'"):
            group(table, axis='feature', metadata=feature_mc, mode='sum')
Exemple #6
0
    def test_missing_sample_ids(self):
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['g0', 'g2', 'g0', 'g2'],
                      name='foo',
                      index=pd.Index(['s1', 's3', 's4', 's6'],
                                     name='sampleid')))
        data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]])
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's4', 's5'],
                           observation_ids=['x', 'y', 'z'])

        with self.assertRaisesRegex(ValueError, 'not present.*s2.*s5') as e:
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        self.assertIn('s2', str(e.exception))
        self.assertIn('s5', str(e.exception))
Exemple #7
0
def group_by_transect(table, tree, distance):
    otu_map = collapse_tree(tree, distance)
    use_names = not (not tree['names'][1] or is_float(tree['names'][1]))

    def name(otu):
        if use_names:
            i = otu
            names = []
            while i != 0:
                names.append(tree['names'][i])
                i = tree['parent'][i]
            if names:
                return '; '.join(reversed(names))

        return "T_%s:%d" % (distance, otu)

    groups = pd.Series(
        {
            tree['names'][tip]: name(otu)
            for otu, tips in otu_map.items() for tip in tips
        },
        name='T_%s' % distance).sort_index()
    groups.index.name = 'id'
    collapsed_counts = pd.Series(
        {name(otu): len(tips)
         for otu, tips in otu_map.items()},
        name='Cluster Size').sort_index()

    column = q2.CategoricalMetadataColumn(groups)
    collapsed_table = group(table, axis='feature', metadata=column, mode='sum')

    return collapsed_table, collapsed_counts, groups
    def test_missing_sample_ids(self):
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['g0', 'g2', 'g0', 'g2'], name='foo',
                      index=pd.Index(['s1', 's3', 's4', 's6'],
                                     name='sampleid')))
        data = np.array([
            [0, 1, 2, 3],
            [10, 11, 12, 13],
            [100, 110, 120, 130]])
        table = biom.Table(data, sample_ids=['s1', 's2', 's4', 's5'],
                           observation_ids=['x', 'y', 'z'])

        with self.assertRaisesRegex(ValueError, 'not present.*s2.*s5') as e:
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        self.assertIn('s2', str(e.exception))
        self.assertIn('s5', str(e.exception))
Exemple #9
0
    def test_feature_sum(self):
        sample_mc, feature_mc, table = self._shared_setup()
        expected = biom.Table(np.array([[0, 1, 11, 111, 1112, 20, 22],
                                        [23, 14, 25, 11, 11, 104, 36]]),
                              sample_ids=sample_mc.to_series().index,
                              observation_ids=['g0', 'g1'])

        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(expected, result)
Exemple #10
0
    def test_sample_sum(self):
        sample_mc, feature_mc, table = self._shared_setup()
        expected = biom.Table(np.array([[0, 1, 2], [20, 20, 111], [15, 14, 37],
                                        [2, 2, 3], [1, 1122, 151]]),
                              sample_ids=['treatment', 'control', 'other'],
                              observation_ids=feature_mc.to_series().index)

        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
    def test_missing_feature_ids(self):
        feature_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['g0', 'g1', 'g2', 'g1', 'g2', 'extra'], name='foo',
                      index=pd.Index(['a', 'c', 'd', 'e', 'f', 'g'],
                                     name='featureid')))
        data = np.array([
            [1, 0, 0],
            [1, 10, 10],
            [0, 0, 100],
            [5, 5, 5],
            [0, 1, 100],
            [7, 8, 9]])
        # g is missing on purpose
        table = biom.Table(data, sample_ids=['s1', 's2', 's3'],
                           observation_ids=['a', 'b', 'c', 'd', 'e', 'f'])

        with self.assertRaisesRegex(ValueError, "not present.*'b'"):
            group(table, axis='feature', metadata=feature_mc, mode='sum')
    def test_feature_sum(self):
        sample_mc, feature_mc, table = self._shared_setup()
        expected = biom.Table(
            np.array([[0, 1, 11, 111, 1112, 20, 22],
                      [23, 14, 25, 11, 11, 104, 36]]),
            sample_ids=sample_mc.to_series().index,
            observation_ids=['g0', 'g1'])

        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(expected, result)
    def test_feature_median_ceiling(self):
        sample_mc, feature_mc, table = self._shared_setup()
        expected = biom.Table(
            np.array([[0, 1, 6, 56, 556, 10, 11],
                      [10, 3, 10, 1, 1, 3, 1]]),
            sample_ids=sample_mc.to_series().index,
            observation_ids=['g0', 'g1'])

        result = group(table, axis='feature', metadata=feature_mc,
                       mode='median-ceiling')
        self.assertEqual(expected, result)
    def test_feature_median_ceiling(self):
        sample_mc, feature_mc, table = self._shared_setup()
        expected = biom.Table(
            np.array([[0, 1, 6, 56, 556, 10, 11],
                      [10, 3, 10, 1, 1, 3, 1]]),
            sample_ids=sample_mc.to_series().index,
            observation_ids=['g0', 'g1'])

        result = group(table, axis='feature', metadata=feature_mc,
                       mode='median-ceiling')
        self.assertEqual(expected, result)
    def test_empty_metadata_values(self):
        # Trusting that the code is sane enough to not invent a distinction
        # between feature and sample metadata where there is none
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', 'a_new', None], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='sampleid')))
        sample_ids = sample_mc.to_series().index

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data, sample_ids=sample_ids,
                           observation_ids=['x', 'y'])

        with self.assertRaisesRegex(ValueError, "missing.*value.*'c'"):
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        nan_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', float('nan'), 'a_new'], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='id')))

        with self.assertRaisesRegex(ValueError, "missing.*value.*'b'"):
            group(table, axis='sample', metadata=nan_mc, mode='sum')
    def test_empty_metadata_values(self):
        # Trusting that the code is sane enough to not invent a distinction
        # between feature and sample metadata where there is none
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', 'a_new', None], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='sampleid')))
        sample_ids = sample_mc.to_series().index

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data, sample_ids=sample_ids,
                           observation_ids=['x', 'y'])

        with self.assertRaisesRegex(ValueError, "missing.*value.*'c'"):
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        nan_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', float('nan'), 'a_new'], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='id')))

        with self.assertRaisesRegex(ValueError, "missing.*value.*'b'"):
            group(table, axis='sample', metadata=nan_mc, mode='sum')
Exemple #17
0
    def test_sample_median_ceiling(self):
        sample_mc, feature_mc, table = self._shared_setup()
        expected = biom.Table(np.array([[0, 1, 0], [10, 10, 10], [8, 7, 3],
                                        [1, 1, 1], [1, 561, 20]]),
                              sample_ids=['treatment', 'control', 'other'],
                              observation_ids=feature_mc.to_series().index)

        result = group(table,
                       axis='sample',
                       metadata=sample_mc,
                       mode='median-ceiling')
        self.assertEqual(expected, result)
Exemple #18
0
    def test_numeric(self):
        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data,
                           sample_ids=['a', 'b', 'c'],
                           observation_ids=['x', 'y'])

        # ints
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['1', '2', '3'], index=['a', 'b', 'c']))

        with self.assertRaisesRegex(ValueError, 'numeric'):
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        # floats
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['1.1', '2.2', '3.3333'], index=['a', 'b', 'c']))

        with self.assertRaisesRegex(ValueError, 'numeric'):
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        # mixed
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['0', '42', '4.2'], index=['a', 'b', 'c']))

        with self.assertRaisesRegex(ValueError, 'numeric'):
            group(table, axis='sample', metadata=sample_mc, mode='sum')
Exemple #19
0
    def test_empty_metadata_values(self):
        # Trusting that the code is sane enough to not invent a distinction
        # between feature and sample metadata where there is none
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['a_new', 'a_new', None], index=['a', 'b', 'c']))
        sample_ids = sample_mc.to_series().index

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data,
                           sample_ids=sample_ids,
                           observation_ids=['x', 'y'])

        with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'c\'}'):
            group(table, axis='sample', metadata=sample_mc, mode='sum')

        nan_mc = qiime2.MetadataCategory(
            pd.Series(['a_new', float('nan'), 'a_new'], index=['a', 'b', 'c']))

        with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'b\'}'):
            group(table, axis='sample', metadata=nan_mc, mode='sum')

        empty_str = qiime2.MetadataCategory(
            pd.Series(['', 'y_new'], index=['x', 'y']))

        with self.assertRaisesRegex(ValueError, 'missing.*value.*{\'x\'}'):
            group(table,
                  axis='feature',
                  metadata=empty_str,
                  mode='median-ceiling')
    def test_sample_sum(self):
        sample_mc, feature_mc, table = self._shared_setup()
        expected = biom.Table(
            np.array([[0, 1, 2],
                      [20, 20, 111],
                      [15, 14, 37],
                      [2, 2, 3],
                      [1, 1122, 151]]),
            sample_ids=['treatment', 'control', 'other'],
            observation_ids=feature_mc.to_series().index)

        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
    def test_numeric_strings(self):
        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data, sample_ids=['a', 'b', 'c'],
                           observation_ids=['x', 'y'])

        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['-4.2', '-4.2', '-4.2'], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='sampleid')))

        expected = biom.Table(np.array([[6], [60]]),
                              sample_ids=['-4.2'],
                              observation_ids=['x', 'y'])
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
    def test_sample_median_ceiling(self):
        sample_mc, feature_mc, table = self._shared_setup()
        expected = biom.Table(
            np.array([[0, 1, 0],
                      [10, 10, 10],
                      [8, 7, 3],
                      [1, 1, 1],
                      [1, 561, 20]]),
            sample_ids=['treatment', 'control', 'other'],
            observation_ids=feature_mc.to_series().index)

        result = group(table, axis='sample', metadata=sample_mc,
                       mode='median-ceiling')
        self.assertEqual(expected, result)
Exemple #23
0
    def test_reorder(self):
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['c', 'b', 'a'], index=['c', 'b', 'a']))

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data,
                           sample_ids=['a', 'b', 'c'],
                           observation_ids=['x', 'y'])

        expected = biom.Table(np.array([[3, 2, 1], [10, 20, 30]]),
                              sample_ids=['c', 'b', 'a'],
                              observation_ids=['x', 'y'])
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
    def test_numeric_strings(self):
        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data, sample_ids=['a', 'b', 'c'],
                           observation_ids=['x', 'y'])

        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['-4.2', '-4.2', '-4.2'], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='sampleid')))

        expected = biom.Table(np.array([[6], [60]]),
                              sample_ids=['-4.2'],
                              observation_ids=['x', 'y'])
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
Exemple #25
0
    def test_superset_sample_group(self):
        sample_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g1', 'g2', 'g0', 'g1', 'g2'],
                      index=['s1', 's2', 's3', 's4', 's5', 's6']))
        data = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [100, 110, 120, 130]])
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's4', 's5'],
                           observation_ids=['x', 'y', 'z'])

        expected = biom.Table(np.array([[2, 4], [22, 24], [220, 240]]),
                              sample_ids=['g0', 'g1'],
                              observation_ids=['x', 'y', 'z'])

        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
    def test_empty_only_in_superset(self):
        # Trusting that the code is sane enough to not invent a distinction
        # between feature and sample metadata where there is none
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', 'a_new', 'b_new', None], name='foo',
                      index=pd.Index(['a', 'b', 'c', 'd'], name='sampleid')))

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data, sample_ids=['a', 'b', 'c'],
                           observation_ids=['x', 'y'])
        expected = biom.Table(np.array([[2, 3], [25, 10]]),
                              sample_ids=['a_new', 'b_new'],
                              observation_ids=['x', 'y'])
        result = group(table, axis='sample', metadata=sample_mc,
                       mode='mean-ceiling')
        self.assertEqual(expected, result)
    def test_empty_only_in_superset(self):
        # Trusting that the code is sane enough to not invent a distinction
        # between feature and sample metadata where there is none
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', 'a_new', 'b_new', None], name='foo',
                      index=pd.Index(['a', 'b', 'c', 'd'], name='sampleid')))

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data, sample_ids=['a', 'b', 'c'],
                           observation_ids=['x', 'y'])
        expected = biom.Table(np.array([[2, 3], [25, 10]]),
                              sample_ids=['a_new', 'b_new'],
                              observation_ids=['x', 'y'])
        result = group(table, axis='sample', metadata=sample_mc,
                       mode='mean-ceiling')
        self.assertEqual(expected, result)
Exemple #28
0
    def test_superset_feature_group(self):
        feature_mc = qiime2.MetadataCategory(
            pd.Series(['g0', 'g0', 'g1', 'g2', 'g1', 'g2', 'extra'],
                      index=['a', 'b', 'c', 'd', 'e', 'f', 'g']))
        data = np.array([[1, 0, 0], [1, 10, 10], [0, 0, 100], [5, 5, 5],
                         [0, 1, 100], [7, 8, 9]])
        # g is missing on purpose
        table = biom.Table(data,
                           sample_ids=['s1', 's2', 's3'],
                           observation_ids=['a', 'b', 'c', 'd', 'e', 'f'])

        expected = biom.Table(np.array([[2, 10, 10], [0, 1, 200], [12, 13,
                                                                   14]]),
                              sample_ids=['s1', 's2', 's3'],
                              observation_ids=['g0', 'g1', 'g2'])
        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(expected, result)
    def test_one_to_one_rename(self):
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', 'b_new', 'c_new'], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='sampleid')))
        original_sample_ids = sample_mc.to_series().index
        new_sample_ids = list(sample_mc.to_series())

        feature_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['x_new', 'y_new'], name='foo',
                      index=pd.Index(['x', 'y'], name='featureid')))
        original_feature_ids = feature_mc.to_series().index
        new_feature_ids = list(feature_mc.to_series())

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data, sample_ids=original_sample_ids,
                           observation_ids=original_feature_ids)

        # Sample renames
        expected = biom.Table(data, sample_ids=new_sample_ids,
                              observation_ids=original_feature_ids)

        # Sample x Sum
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)

        # Sample X Mean
        result = group(table, axis='sample', metadata=sample_mc,
                       mode='mean-ceiling')
        self.assertEqual(expected, result)

        # Sample X Mean
        result = group(table, axis='sample', metadata=sample_mc,
                       mode='median-ceiling')
        self.assertEqual(expected, result)

        # Feature renames
        expected = biom.Table(data, sample_ids=original_sample_ids,
                              observation_ids=new_feature_ids)

        # Feature X Sum
        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(expected, result)

        # Feature X Mean
        result = group(table, axis='feature', metadata=feature_mc,
                       mode='mean-ceiling')
        self.assertEqual(expected, result)

        # Feature X Median
        result = group(table, axis='feature', metadata=feature_mc,
                       mode='median-ceiling')
        self.assertEqual(expected, result)
    def test_one_to_one_rename(self):
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a_new', 'b_new', 'c_new'], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='sampleid')))
        original_sample_ids = sample_mc.to_series().index
        new_sample_ids = list(sample_mc.to_series())

        feature_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['x_new', 'y_new'], name='foo',
                      index=pd.Index(['x', 'y'], name='featureid')))
        original_feature_ids = feature_mc.to_series().index
        new_feature_ids = list(feature_mc.to_series())

        data = np.array([[1, 2, 3], [30, 20, 10]])
        table = biom.Table(data, sample_ids=original_sample_ids,
                           observation_ids=original_feature_ids)

        # Sample renames
        expected = biom.Table(data, sample_ids=new_sample_ids,
                              observation_ids=original_feature_ids)

        # Sample x Sum
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)

        # Sample X Mean
        result = group(table, axis='sample', metadata=sample_mc,
                       mode='mean-ceiling')
        self.assertEqual(expected, result)

        # Sample X Mean
        result = group(table, axis='sample', metadata=sample_mc,
                       mode='median-ceiling')
        self.assertEqual(expected, result)

        # Feature renames
        expected = biom.Table(data, sample_ids=original_sample_ids,
                              observation_ids=new_feature_ids)

        # Feature X Sum
        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(expected, result)

        # Feature X Mean
        result = group(table, axis='feature', metadata=feature_mc,
                       mode='mean-ceiling')
        self.assertEqual(expected, result)

        # Feature X Median
        result = group(table, axis='feature', metadata=feature_mc,
                       mode='median-ceiling')
        self.assertEqual(expected, result)
    def test_superset_sample_group(self):
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['g0', 'g1', 'g2', 'g0', 'g1', 'g2'], name='foo',
                      index=pd.Index(['s1', 's2', 's3', 's4', 's5', 's6'],
                                     name='sampleid')))
        data = np.array([
            [0, 1, 2, 3],
            [10, 11, 12, 13],
            [100, 110, 120, 130]])
        table = biom.Table(data, sample_ids=['s1', 's2', 's4', 's5'],
                           observation_ids=['x', 'y', 'z'])

        expected = biom.Table(
            np.array([[2, 4], [22, 24], [220, 240]]),
            sample_ids=['g0', 'g1'],
            observation_ids=['x', 'y', 'z'])

        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(expected, result)
Exemple #32
0
    def test_identity_groups(self):
        # These map to the same values as before
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a', 'b', 'c'],
                      name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='sampleid')))
        feature_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['x', 'y'],
                      name='foo',
                      index=pd.Index(['x', 'y'], name='featureid')))
        table = biom.Table(np.array([[1, 2, 3], [30, 20, 10]]),
                           sample_ids=sample_mc.to_series().index,
                           observation_ids=feature_mc.to_series().index)

        # Sample x Sum
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(table, result)

        # Sample x Mean
        result = group(table,
                       axis='sample',
                       metadata=sample_mc,
                       mode='mean-ceiling')
        self.assertEqual(table, result)

        # Sample x Median
        result = group(table,
                       axis='sample',
                       metadata=sample_mc,
                       mode='median-ceiling')
        self.assertEqual(table, result)

        # Feature x Sum
        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(table, result)

        # Feature x Mean
        result = group(table,
                       axis='feature',
                       metadata=feature_mc,
                       mode='mean-ceiling')
        self.assertEqual(table, result)

        # Feature x Median
        result = group(table,
                       axis='feature',
                       metadata=feature_mc,
                       mode='median-ceiling')
        self.assertEqual(table, result)
    def test_superset_feature_group(self):
        feature_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(
                ['g0', 'g0', 'g1', 'g2', 'g1', 'g2', 'extra'], name='foo',
                index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f', 'g'],
                               name='featureid')))
        data = np.array([
            [1, 0, 0],
            [1, 10, 10],
            [0, 0, 100],
            [5, 5, 5],
            [0, 1, 100],
            [7, 8, 9]])
        # g is missing on purpose
        table = biom.Table(data, sample_ids=['s1', 's2', 's3'],
                           observation_ids=['a', 'b', 'c', 'd', 'e', 'f'])

        expected = biom.Table(
                np.array([[2, 10, 10], [0, 1, 200], [12, 13, 14]]),
                sample_ids=['s1', 's2', 's3'],
                observation_ids=['g0', 'g1', 'g2'])
        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(expected, result)
    def test_identity_groups(self):
        # These map to the same values as before
        sample_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['a', 'b', 'c'], name='foo',
                      index=pd.Index(['a', 'b', 'c'], name='sampleid')))
        feature_mc = qiime2.CategoricalMetadataColumn(
            pd.Series(['x', 'y'], name='foo',
                      index=pd.Index(['x', 'y'], name='featureid')))
        table = biom.Table(np.array([[1, 2, 3], [30, 20, 10]]),
                           sample_ids=sample_mc.to_series().index,
                           observation_ids=feature_mc.to_series().index)

        # Sample x Sum
        result = group(table, axis='sample', metadata=sample_mc, mode='sum')
        self.assertEqual(table, result)

        # Sample x Mean
        result = group(table, axis='sample', metadata=sample_mc,
                       mode='mean-ceiling')
        self.assertEqual(table, result)

        # Sample x Median
        result = group(table, axis='sample', metadata=sample_mc,
                       mode='median-ceiling')
        self.assertEqual(table, result)

        # Feature x Sum
        result = group(table, axis='feature', metadata=feature_mc, mode='sum')
        self.assertEqual(table, result)

        # Feature x Mean
        result = group(table, axis='feature', metadata=feature_mc,
                       mode='mean-ceiling')
        self.assertEqual(table, result)

        # Feature x Median
        result = group(table, axis='feature', metadata=feature_mc,
                       mode='median-ceiling')
        self.assertEqual(table, result)