Esempio n. 1
0
    def test_group_taxa_data_table_alt(self):
        with patch.object(TaxonomyModel, 'presence_data_table') as mock_model:
            mock_model.return_value = DataTable.from_dataframe(
                pd.DataFrame({
                    'sampleId': ['sample-1', 'sample-1', 'sample-2'],
                    'rank_1': ['a', 'a', 'a'],
                    'rank_2': ['b', 'f', 'b'],
                }))
            response, code = group_taxa_present_alt(
                {'sample_ids': ['sample-1', 'sample-2']}, "dataset1",
                "some-table")

        self.assertEqual(code, 200)
        exp_keys = ['data', 'columns']
        obs = json.loads(response)
        self.assertCountEqual(exp_keys, obs.keys())
        self.assertCountEqual(obs['columns'], [{
            'data': 'sampleId'
        }, {
            'data': 'rank_1'
        }, {
            'data': 'rank_2'
        }])
        for item in obs['data']:
            self.assertCountEqual(item.keys(),
                                  ['sampleId', 'rank_1', 'rank_2'])
Esempio n. 2
0
    def presence_data_table(self, ids: Iterable[str]) -> DataTable:
        table = self._table.filter(set(ids), inplace=False).remove_empty()
        features = table.ids(axis='observation')

        entries = list()
        for vec, sample_id, _ in table.iter(dense=False):
            for feature_idx, val in zip(vec.indices, vec.data):
                entry = {
                    'sampleId': sample_id,
                    'relativeAbundance': val,
                    **self._formatted_taxa_names[features[feature_idx]],
                }
                entries.append(entry)

        sample_data = pd.DataFrame(
            entries,
            # this enforces the column order
            columns=['sampleId'] + self._formatter.labels +
            ['relativeAbundance'],
            # need the .astype('object') in case a
            # column is completely empty (filled with
            # Nan, default dtype is numeric,
            # which cannot be replaced with None.
            # Need None because it is valid for JSON,
            # but NaN is not.
        ).astype('object')
        sample_data[pd.isna(sample_data)] = None
        return DataTable.from_dataframe(sample_data)
Esempio n. 3
0
    def test_data_table_from_dataframe(self):
        dict_ = {
            'data': [{
                'foo': 'baz',
                'bar': 'qux'
            }, {
                'foo': 'quuz',
                'bar': 'corge'
            }],
            'columns': ['foo', 'bar'],
        }
        df = pd.DataFrame(dict_['data'], columns=dict_['columns'])

        dt = DataTable.from_dataframe(df)

        dict_['columns'] = [{'data': 'foo'}, {'data': 'bar'}]

        obs_dict = dt.to_dict()
        obs = json.dumps(obs_dict)
        exp = json.dumps(dict_)
        self.assertEqual(obs, exp)
Esempio n. 4
0
    def test_group_taxa_data_table(self):
        with patch('microsetta_public_api.repo._taxonomy_repo.TaxonomyRepo.'
                   'tables', new_callable=PropertyMock) as mock_tables, \
                patch.object(TaxonomyModel, 'presence_data_table') as \
                mock_model:
            mock_tables.return_value = {
                'some-table': {
                    'table': self.table,
                    'feature-data-taxonomy': self.taxonomy_df,
                    'variances': self.table_vars,
                    'model': self.taxonomy_model,
                },
            }
            mock_model.return_value = DataTable.from_dataframe(
                pd.DataFrame({
                    'sampleId': ['sample-1', 'sample-1', 'sample-2'],
                    'rank_1': ['a', 'a', 'a'],
                    'rank_2': ['b', 'f', 'b'],
                }))
            response, code = group_taxa_present(
                {'sample_ids': ['sample-1', 'sample-2']}, "some-table")

        self.assertEqual(code, 200)
        exp_keys = ['data', 'columns']
        obs = json.loads(response)
        self.assertCountEqual(exp_keys, obs.keys())
        self.assertCountEqual(obs['columns'], [{
            'data': 'sampleId'
        }, {
            'data': 'rank_1'
        }, {
            'data': 'rank_2'
        }])
        for item in obs['data']:
            self.assertCountEqual(item.keys(),
                                  ['sampleId', 'rank_1', 'rank_2'])