Esempio n. 1
0
    def test_group_taxa_data_table_alt(self):
        with patch.object(TaxonomyModel, 'presence_data_table') as mock_model:
            mock_model.return_value = DataTable.from_dataframe(
                pd.DataFrame({
                    'sampleId': ['sample-1', 'sample-1', 'sample-2'],
                    'rank_1': ['a', 'a', 'a'],
                    'rank_2': ['b', 'f', 'b'],
                }))
            response, code = group_taxa_present_alt(
                {'sample_ids': ['sample-1', 'sample-2']}, "dataset1",
                "some-table")

        self.assertEqual(code, 200)
        exp_keys = ['data', 'columns']
        obs = json.loads(response)
        self.assertCountEqual(exp_keys, obs.keys())
        self.assertCountEqual(obs['columns'], [{
            'data': 'sampleId'
        }, {
            'data': 'rank_1'
        }, {
            'data': 'rank_2'
        }])
        for item in obs['data']:
            self.assertCountEqual(item.keys(),
                                  ['sampleId', 'rank_1', 'rank_2'])
Esempio n. 2
0
    def presence_data_table(self, ids: Iterable[str]) -> DataTable:
        table = self._table.filter(set(ids), inplace=False).remove_empty()
        features = table.ids(axis='observation')

        entries = list()
        for vec, sample_id, _ in table.iter(dense=False):
            for feature_idx, val in zip(vec.indices, vec.data):
                entry = {
                    'sampleId': sample_id,
                    'relativeAbundance': val,
                    **self._formatted_taxa_names[features[feature_idx]],
                }
                entries.append(entry)

        sample_data = pd.DataFrame(
            entries,
            # this enforces the column order
            columns=['sampleId'] + self._formatter.labels +
            ['relativeAbundance'],
            # need the .astype('object') in case a
            # column is completely empty (filled with
            # Nan, default dtype is numeric,
            # which cannot be replaced with None.
            # Need None because it is valid for JSON,
            # but NaN is not.
        ).astype('object')
        sample_data[pd.isna(sample_data)] = None
        return DataTable.from_dataframe(sample_data)
Esempio n. 3
0
    def test_data_table(self):
        DataEntry = create_data_entry(['foo', 'bar'])
        entry1 = DataEntry(foo='baz', bar='qux')
        entry2 = DataEntry(foo='quuz', bar='corge')

        dt = DataTable(data=[entry1, entry2], columns=['foo', 'bar'])
        obs_dict = dt.to_dict()
        exp_dict = {
            'data': [{
                'foo': 'baz',
                'bar': 'qux'
            }, {
                'foo': 'quuz',
                'bar': 'corge'
            }],
            'columns': ['foo', 'bar'],
        }

        obs = json.dumps(obs_dict)
        exp = json.dumps(exp_dict)
        self.assertEqual(obs, exp)
Esempio n. 4
0
    def test_data_table_from_dataframe(self):
        dict_ = {
            'data': [{
                'foo': 'baz',
                'bar': 'qux'
            }, {
                'foo': 'quuz',
                'bar': 'corge'
            }],
            'columns': ['foo', 'bar'],
        }
        df = pd.DataFrame(dict_['data'], columns=dict_['columns'])

        dt = DataTable.from_dataframe(df)

        dict_['columns'] = [{'data': 'foo'}, {'data': 'bar'}]

        obs_dict = dt.to_dict()
        obs = json.dumps(obs_dict)
        exp = json.dumps(dict_)
        self.assertEqual(obs, exp)
Esempio n. 5
0
    def test_group_taxa_data_table(self):
        with patch('microsetta_public_api.repo._taxonomy_repo.TaxonomyRepo.'
                   'tables', new_callable=PropertyMock) as mock_tables, \
                patch.object(TaxonomyModel, 'presence_data_table') as \
                mock_model:
            mock_tables.return_value = {
                'some-table': {
                    'table': self.table,
                    'feature-data-taxonomy': self.taxonomy_df,
                    'variances': self.table_vars,
                    'model': self.taxonomy_model,
                },
            }
            mock_model.return_value = DataTable.from_dataframe(
                pd.DataFrame({
                    'sampleId': ['sample-1', 'sample-1', 'sample-2'],
                    'rank_1': ['a', 'a', 'a'],
                    'rank_2': ['b', 'f', 'b'],
                }))
            response, code = group_taxa_present(
                {'sample_ids': ['sample-1', 'sample-2']}, "some-table")

        self.assertEqual(code, 200)
        exp_keys = ['data', 'columns']
        obs = json.loads(response)
        self.assertCountEqual(exp_keys, obs.keys())
        self.assertCountEqual(obs['columns'], [{
            'data': 'sampleId'
        }, {
            'data': 'rank_1'
        }, {
            'data': 'rank_2'
        }])
        for item in obs['data']:
            self.assertCountEqual(item.keys(),
                                  ['sampleId', 'rank_1', 'rank_2'])
    def test_presence_data_table(self):
        taxonomy = Taxonomy(self.table, self.taxonomy_greengenes_df,
                            self.table_vars)
        obs = taxonomy.presence_data_table(['sample-1', 'sample-2'])

        exp_columns = [
            'sampleId', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family',
            'Genus', 'Species', 'relativeAbundance'
        ]
        DataEntry = create_data_entry(exp_columns)
        exp = DataTable(
            data=[
                DataEntry(
                    **{
                        'sampleId': 'sample-1',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': 'd',
                        'Genus': 'e',
                        'Species': None,
                        'relativeAbundance': 2. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-1',
                        'Kingdom': 'a',
                        'Phylum': 'f',
                        'Class': None,
                        'Order': 'g',
                        'Family': 'h',
                        'Genus': None,
                        'Species': None,
                        'relativeAbundance': 3. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-2',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': None,
                        'Genus': None,
                        'Species': None,
                        'relativeAbundance': 1. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-2',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': 'd',
                        'Genus': 'e',
                        'Species': None,
                        'relativeAbundance': 4. / 5,
                    }),
            ],
            columns=exp_columns,
        )
        self.assertListEqual([{
            'data': col
        } for col in exp.columns], obs.columns)
        # wouldn't want to do this on a huge dataframe..., but it checks if
        #  there is a row of obs corresponding to each row of exp...
        exp_df = pd.DataFrame(exp.data)
        obs_df = pd.DataFrame(obs.data)
        obs_df_copy = obs_df.copy()
        for e_idx, row_exp in exp_df.iterrows():
            for o_idx, row_obs in obs_df.iterrows():
                if row_exp.eq(row_obs).all():
                    obs_df_copy.drop(index=o_idx, inplace=True)
                    break
        self.assertTrue(obs_df_copy.empty)
Esempio n. 7
0
    def test_group_data_table(self):
        response = self.client.post(
            '/api/taxonomy/present/group/'
            'table2-greengenes',
            content_type='application/json',
            data=json.dumps({'sample_ids': ['sample-1', 'sample-2']}))

        self.assertEqual(response.status_code, 200)

        obs = json.loads(response.data)

        exp_columns = [
            'sampleId', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family',
            'Genus', 'Species', 'relativeAbundance'
        ]
        DataEntry = create_data_entry(exp_columns)
        exp = DataTable(
            data=[
                DataEntry(
                    **{
                        'sampleId': 'sample-1',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': 'd',
                        'Genus': 'e',
                        'Species': None,
                        'relativeAbundance': 2. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-1',
                        'Kingdom': 'a',
                        'Phylum': 'f',
                        'Class': None,
                        'Order': 'g',
                        'Family': 'h',
                        'Genus': None,
                        'Species': None,
                        'relativeAbundance': 3. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-2',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': None,
                        'Genus': None,
                        'Species': None,
                        'relativeAbundance': 1. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-2',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': 'd',
                        'Genus': 'e',
                        'Species': None,
                        'relativeAbundance': 4. / 5,
                    }),
            ],
            columns=[{
                'data': col
            } for col in exp_columns],
        ).to_dict()

        self.assertListEqual(exp['columns'], obs['columns'])
        # wouldn't want to do this on a huge dataframe..., but it checks if
        #  there is a row of obs corresponding to each row of exp...
        exp_df = pd.DataFrame(exp['data'])
        obs_df = pd.DataFrame(obs['data'])
        obs_df_copy = obs_df.copy()
        for e_idx, row_exp in exp_df.iterrows():
            for o_idx, row_obs in obs_df.iterrows():
                if row_exp.eq(row_obs).all():
                    obs_df_copy.drop(index=o_idx, inplace=True)
                    break
        self.assertTrue(obs_df_copy.empty)