Example #1
0
 def test_sample_metadata_samples_not_represented_in_context(self):
     redbiom.admin.create_context('test', 'a nice test')
     redbiom.admin.load_sample_metadata(metadata)
     with self.assertRaisesRegexp(ValueError, "None of the samples"):
         # sample data have not been loaded into the context
         sample_metadata(['10317.000047188', '10317.000046868'],
                         context='test')
Example #2
0
    def test_sample_metadata_context(self):
        redbiom.admin.create_context('test', 'a nice test')
        redbiom.admin.load_sample_metadata(metadata)
        redbiom.admin.load_sample_data(table, 'test', tag='foo')

        exp = metadata.copy()
        exp.set_index('#SampleID', inplace=True)
        exp.index = ["%s.foo" % i for i in exp.index]

        obs, ambig = sample_metadata(table.ids(), common=False, context='test')
        obs.set_index('#SampleID', inplace=True)

        self.assertEqual(sorted(exp.index), sorted(obs.index))
        self.assertTrue(set(obs.columns).issubset(exp.columns))

        for col in set(exp.columns) - set(obs.columns):
            self.assertTrue(set(exp[col].values), {
                'Unspecified',
            })
        obs = obs.loc[exp.index]

        # we cannot do a full table == table test. Round tripping is not
        # assured as we do not store null values in redbiom, and there is a
        # litany of possible null values.
        pdt.assert_series_equal(obs['BMI'], exp['BMI'])
        obs['AGE_YEARS'] = [
            v if v is not None else 'Unknown' for v in obs['AGE_YEARS']
        ]
        pdt.assert_series_equal(obs['AGE_YEARS'], exp['AGE_YEARS'])
        pdt.assert_series_equal(obs['SAMPLE_TYPE'], exp['SAMPLE_TYPE'])
Example #3
0
    def test_sample_metadata_with_tagged(self):
        tagged_md = [(ix, 'abc', i % 2)
                     for i, ix in enumerate(metadata['#SampleID'])]
        tagged_md = pd.DataFrame(tagged_md,
                                 columns=['#SampleID', 'foo', 'bar'],
                                 dtype=str)

        redbiom.admin.create_context('test', 'a nice test')
        redbiom.admin.load_sample_metadata(metadata)
        redbiom.admin.load_sample_metadata(tagged_md, 'testtag')
        redbiom.admin.load_sample_data(table, 'test', tag='testtag')

        exp = metadata.copy()
        exp['#SampleID'] = [i + '.testtag' for i in exp['#SampleID']]
        exp['foo'] = tagged_md['foo']
        exp['bar'] = tagged_md['bar']

        exp.set_index('#SampleID', inplace=True)
        obs, ambig = sample_metadata(table.ids(),
                                     common=False,
                                     context='test',
                                     tagged=True)
        obs.set_index('#SampleID', inplace=True)
        self.assertEqual(sorted(exp.index), sorted(obs.index))
        self.assertTrue(set(obs.columns).issubset(exp.columns))
        self.assertIn('foo', obs.columns)
        self.assertIn('bar', obs.columns)
Example #4
0
 def test_sample_metadata_restrict(self):
     redbiom.admin.load_sample_metadata(metadata)
     exp = metadata.copy()
     exp.set_index('#SampleID', inplace=True)
     exp = exp[['BMI', 'AGE_YEARS']]
     exp = exp.sort_values('BMI')
     obs, ambig = sample_metadata(table.ids(),
                                  restrict_to=['BMI', 'AGE_YEARS'])
     obs.set_index('#SampleID', inplace=True)
     obs = obs.sort_values('BMI')
     obs = obs[['BMI', 'AGE_YEARS']]
     obs['AGE_YEARS'] = [
         v if v is not None else 'Unknown' for v in obs['AGE_YEARS']
     ]
     pdt.assert_frame_equal(obs, exp)
Example #5
0
    def test_sample_metadata_have_data(self):
        redbiom.admin.load_sample_metadata(metadata)
        exp = metadata.copy()
        exp.set_index('#SampleID', inplace=True)
        obs, ambig = sample_metadata(table.ids(), common=True)
        self.assertEqual(sorted(exp.index), sorted(obs.index))
        self.assertNotEqual(sorted(exp.columns), sorted(obs.columns))
        obs = obs.loc[exp.index]

        # we cannot do a full table == table test. Round tripping is not
        # assured as we do not store null values in redbiom, and there is a
        # litany of possible null values.
        pdt.assert_series_equal(obs['BMI'], exp['BMI'])
        pdt.assert_series_equal(obs['SAMPLE_TYPE'], exp['SAMPLE_TYPE'])

        # one sample has "Unknown" as its AGE_YEARS value. This means that
        # it is not informative for that column, so that value is not stored
        # for that sample. As a result, the AGE_YEARS columns is not considered
        # to be represented across all samples
        self.assertNotIn('AGE_YEARS', obs.columns)
Example #6
0
 def test_sample_metadata_restrict_bad_cols(self):
     redbiom.admin.load_sample_metadata(metadata)
     with self.assertRaises(KeyError):
         sample_metadata(table.ids(), restrict_to=['BMI', 'foo'])