Ejemplo n.º 1
0
    def test_data_table(self):
        DataEntry = create_data_entry(['foo', 'bar'])
        entry1 = DataEntry(foo='baz', bar='qux')
        entry2 = DataEntry(foo='quuz', bar='corge')

        dt = DataTable(data=[entry1, entry2], columns=['foo', 'bar'])
        obs_dict = dt.to_dict()
        exp_dict = {
            'data': [{
                'foo': 'baz',
                'bar': 'qux'
            }, {
                'foo': 'quuz',
                'bar': 'corge'
            }],
            'columns': ['foo', 'bar'],
        }

        obs = json.dumps(obs_dict)
        exp = json.dumps(exp_dict)
        self.assertEqual(obs, exp)
    def test_presence_data_table(self):
        taxonomy = Taxonomy(self.table, self.taxonomy_greengenes_df,
                            self.table_vars)
        obs = taxonomy.presence_data_table(['sample-1', 'sample-2'])

        exp_columns = [
            'sampleId', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family',
            'Genus', 'Species', 'relativeAbundance'
        ]
        DataEntry = create_data_entry(exp_columns)
        exp = DataTable(
            data=[
                DataEntry(
                    **{
                        'sampleId': 'sample-1',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': 'd',
                        'Genus': 'e',
                        'Species': None,
                        'relativeAbundance': 2. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-1',
                        'Kingdom': 'a',
                        'Phylum': 'f',
                        'Class': None,
                        'Order': 'g',
                        'Family': 'h',
                        'Genus': None,
                        'Species': None,
                        'relativeAbundance': 3. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-2',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': None,
                        'Genus': None,
                        'Species': None,
                        'relativeAbundance': 1. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-2',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': 'd',
                        'Genus': 'e',
                        'Species': None,
                        'relativeAbundance': 4. / 5,
                    }),
            ],
            columns=exp_columns,
        )
        self.assertListEqual([{
            'data': col
        } for col in exp.columns], obs.columns)
        # wouldn't want to do this on a huge dataframe..., but it checks if
        #  there is a row of obs corresponding to each row of exp...
        exp_df = pd.DataFrame(exp.data)
        obs_df = pd.DataFrame(obs.data)
        obs_df_copy = obs_df.copy()
        for e_idx, row_exp in exp_df.iterrows():
            for o_idx, row_obs in obs_df.iterrows():
                if row_exp.eq(row_obs).all():
                    obs_df_copy.drop(index=o_idx, inplace=True)
                    break
        self.assertTrue(obs_df_copy.empty)
Ejemplo n.º 3
0
    def test_group_data_table(self):
        response = self.client.post(
            '/api/taxonomy/present/group/'
            'table2-greengenes',
            content_type='application/json',
            data=json.dumps({'sample_ids': ['sample-1', 'sample-2']}))

        self.assertEqual(response.status_code, 200)

        obs = json.loads(response.data)

        exp_columns = [
            'sampleId', 'Kingdom', 'Phylum', 'Class', 'Order', 'Family',
            'Genus', 'Species', 'relativeAbundance'
        ]
        DataEntry = create_data_entry(exp_columns)
        exp = DataTable(
            data=[
                DataEntry(
                    **{
                        'sampleId': 'sample-1',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': 'd',
                        'Genus': 'e',
                        'Species': None,
                        'relativeAbundance': 2. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-1',
                        'Kingdom': 'a',
                        'Phylum': 'f',
                        'Class': None,
                        'Order': 'g',
                        'Family': 'h',
                        'Genus': None,
                        'Species': None,
                        'relativeAbundance': 3. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-2',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': None,
                        'Genus': None,
                        'Species': None,
                        'relativeAbundance': 1. / 5,
                    }),
                DataEntry(
                    **{
                        'sampleId': 'sample-2',
                        'Kingdom': 'a',
                        'Phylum': 'b',
                        'Class': None,
                        'Order': 'c',
                        'Family': 'd',
                        'Genus': 'e',
                        'Species': None,
                        'relativeAbundance': 4. / 5,
                    }),
            ],
            columns=[{
                'data': col
            } for col in exp_columns],
        ).to_dict()

        self.assertListEqual(exp['columns'], obs['columns'])
        # wouldn't want to do this on a huge dataframe..., but it checks if
        #  there is a row of obs corresponding to each row of exp...
        exp_df = pd.DataFrame(exp['data'])
        obs_df = pd.DataFrame(obs['data'])
        obs_df_copy = obs_df.copy()
        for e_idx, row_exp in exp_df.iterrows():
            for o_idx, row_obs in obs_df.iterrows():
                if row_exp.eq(row_obs).all():
                    obs_df_copy.drop(index=o_idx, inplace=True)
                    break
        self.assertTrue(obs_df_copy.empty)