def test_sample_metadata_samples_not_represented_in_context(self): redbiom.admin.create_context('test', 'a nice test') redbiom.admin.load_sample_metadata(metadata) with self.assertRaisesRegexp(ValueError, "None of the samples"): # sample data have not been loaded into the context sample_metadata(['10317.000047188', '10317.000046868'], context='test')
def test_sample_metadata_context(self): redbiom.admin.create_context('test', 'a nice test') redbiom.admin.load_sample_metadata(metadata) redbiom.admin.load_sample_data(table, 'test', tag='foo') exp = metadata.copy() exp.set_index('#SampleID', inplace=True) exp.index = ["%s.foo" % i for i in exp.index] obs, ambig = sample_metadata(table.ids(), common=False, context='test') obs.set_index('#SampleID', inplace=True) self.assertEqual(sorted(exp.index), sorted(obs.index)) self.assertTrue(set(obs.columns).issubset(exp.columns)) for col in set(exp.columns) - set(obs.columns): self.assertTrue(set(exp[col].values), { 'Unspecified', }) obs = obs.loc[exp.index] # we cannot do a full table == table test. Round tripping is not # assured as we do not store null values in redbiom, and there is a # litany of possible null values. pdt.assert_series_equal(obs['BMI'], exp['BMI']) obs['AGE_YEARS'] = [ v if v is not None else 'Unknown' for v in obs['AGE_YEARS'] ] pdt.assert_series_equal(obs['AGE_YEARS'], exp['AGE_YEARS']) pdt.assert_series_equal(obs['SAMPLE_TYPE'], exp['SAMPLE_TYPE'])
def test_sample_metadata_with_tagged(self): tagged_md = [(ix, 'abc', i % 2) for i, ix in enumerate(metadata['#SampleID'])] tagged_md = pd.DataFrame(tagged_md, columns=['#SampleID', 'foo', 'bar'], dtype=str) redbiom.admin.create_context('test', 'a nice test') redbiom.admin.load_sample_metadata(metadata) redbiom.admin.load_sample_metadata(tagged_md, 'testtag') redbiom.admin.load_sample_data(table, 'test', tag='testtag') exp = metadata.copy() exp['#SampleID'] = [i + '.testtag' for i in exp['#SampleID']] exp['foo'] = tagged_md['foo'] exp['bar'] = tagged_md['bar'] exp.set_index('#SampleID', inplace=True) obs, ambig = sample_metadata(table.ids(), common=False, context='test', tagged=True) obs.set_index('#SampleID', inplace=True) self.assertEqual(sorted(exp.index), sorted(obs.index)) self.assertTrue(set(obs.columns).issubset(exp.columns)) self.assertIn('foo', obs.columns) self.assertIn('bar', obs.columns)
def test_sample_metadata_restrict(self): redbiom.admin.load_sample_metadata(metadata) exp = metadata.copy() exp.set_index('#SampleID', inplace=True) exp = exp[['BMI', 'AGE_YEARS']] exp = exp.sort_values('BMI') obs, ambig = sample_metadata(table.ids(), restrict_to=['BMI', 'AGE_YEARS']) obs.set_index('#SampleID', inplace=True) obs = obs.sort_values('BMI') obs = obs[['BMI', 'AGE_YEARS']] obs['AGE_YEARS'] = [ v if v is not None else 'Unknown' for v in obs['AGE_YEARS'] ] pdt.assert_frame_equal(obs, exp)
def test_sample_metadata_have_data(self): redbiom.admin.load_sample_metadata(metadata) exp = metadata.copy() exp.set_index('#SampleID', inplace=True) obs, ambig = sample_metadata(table.ids(), common=True) self.assertEqual(sorted(exp.index), sorted(obs.index)) self.assertNotEqual(sorted(exp.columns), sorted(obs.columns)) obs = obs.loc[exp.index] # we cannot do a full table == table test. Round tripping is not # assured as we do not store null values in redbiom, and there is a # litany of possible null values. pdt.assert_series_equal(obs['BMI'], exp['BMI']) pdt.assert_series_equal(obs['SAMPLE_TYPE'], exp['SAMPLE_TYPE']) # one sample has "Unknown" as its AGE_YEARS value. This means that # it is not informative for that column, so that value is not stored # for that sample. As a result, the AGE_YEARS columns is not considered # to be represented across all samples self.assertNotIn('AGE_YEARS', obs.columns)
def test_sample_metadata_restrict_bad_cols(self): redbiom.admin.load_sample_metadata(metadata) with self.assertRaises(KeyError): sample_metadata(table.ids(), restrict_to=['BMI', 'foo'])