Beispiel #1
0
 def test_filter_sample_categories(self):
     test = self.test1.filter_ids(['badsample'], axis=0, negate=True)
     # does not filter anything
     assert_experiment_equal(test.filter_sample_categories('group', 9), test)
     # filter group of 2
     assert_experiment_equal(test.filter_sample_categories('group', 10),
                             test.filter_samples('group', '1'))
Beispiel #2
0
 def test_filter_features_edge_cases(self):
     # none filtered
     obs = self.test2.filter_features('oxygen', ['facultative'])
     self.assertEqual(obs.shape, (9, 0))
     obs = self.test2.filter_features('oxygen', ['facultative'],
                                      negate=True)
     assert_experiment_equal(obs, self.test2)
Beispiel #3
0
 def test_copy(self):
     exp = self.test1.copy()
     assert_experiment_equal(exp, self.test1)
     self.assertIsNot(exp, self.test1)
     # make sure it is a deep copy - not sharing the data
     exp.data[0, 0] = exp.data[0, 0] + 1
     self.assertNotEqual(exp.data[0, 0], self.test1.data[0, 0])
Beispiel #4
0
    def test_join_metadata_fields_complex(self):
        # test join feature fields with new field name, separator and inplace
        exp = deepcopy(self.test1)
        newexp = exp.join_metadata_fields('taxonomy',
                                          'taxonomy',
                                          'test',
                                          axis=1,
                                          sep=';',
                                          inplace=True)
        self.assertIs(newexp, exp)
        self.assertIn('test', exp.feature_metadata.columns)
        self.assertNotIn('test', exp.sample_metadata.columns)
        self.assertEqual(exp.feature_metadata['test'].iloc[11],
                         'bad_bacteria;bad_bacteria')
        # test we didn't change anything besides the new sample metadata column
        assert_experiment_equal(exp, self.test1, ignore_md_fields=['test'])

        # test join feature fields with new field name, sepparator, inplace and align
        exp = deepcopy(self.test1)
        newexp = exp.join_metadata_fields('taxonomy',
                                          'ph',
                                          'test',
                                          axis=1,
                                          sep=';',
                                          align='<',
                                          inplace=True)
        self.assertIs(newexp, exp)
        self.assertIn('test', exp.feature_metadata.columns)
        self.assertNotIn('test', exp.sample_metadata.columns)
        self.assertEqual(
            exp.feature_metadata.loc['AT', 'test'],
            'k__Bacteria; p__Tenericutes; c__Mollicutes; o__Mycoplasmatales; f__Mycoplasmataceae; g__Mycoplasma; s__                         ;4.1'
        )
        # test we didn't change anything besides the new sample metadata column
        assert_experiment_equal(exp, self.test1, ignore_md_fields=['test'])
Beispiel #5
0
 def test_read_amplicon(self):
     # test loading a taxonomy biom table and filtering/normalizing
     exp1 = ca.read_amplicon(self.test1_biom, min_reads=1000, normalize=10000)
     exp2 = ca.read(self.test1_biom, normalize=None)
     exp2.filter_by_data('abundance', axis=0, cutoff=1000, inplace=True, mean_or_sum='sum')
     exp2.normalize(inplace=True)
     assert_experiment_equal(exp1, exp2)
     self.assertIn('taxonomy', exp1.feature_metadata.columns)
Beispiel #6
0
 def test_filter_by_data_sample_edge_cases(self):
     # all samples are filtered out
     obs = self.test2.filter_by_data('abundance', axis=0, cutoff=100000, mean_or_sum='sum')
     self.assertEqual(obs.shape, (0, 8))
     # none is filtered out
     obs = self.test2.filter_by_data('abundance', axis=0, cutoff=1, mean_or_sum='sum')
     assert_experiment_equal(obs, self.test2)
     self.assertIsNot(obs, self.test2)
Beispiel #7
0
 def test_sort_by_metadata_feature(self):
     obs = self.test2.sort_by_metadata(
         field='level2', axis=1).sort_by_metadata(field='level1', axis=1)
     self.assertIsNot(obs, self.test2)
     assert_experiment_equal(
         obs, self.test2.reorder(obs.feature_metadata['ori.order'], axis=1))
     self.assertListEqual(obs.feature_metadata['new.order'].tolist(),
                          list(range(8)))
Beispiel #8
0
    def test_split_train_test(self):
        train, test = self.test2_dense.split_train_test(
            test_size=3, stratify='categorical', random_state=7)

        assert_experiment_equal(
            test, self.test2_dense.filter_ids(['S3', 'S8', 'S1'], axis='s'))
        assert_experiment_equal(
            train, self.test2_dense.filter_ids(['S9', 'S6', 'S5', 'S2', 'S4', 'S7'], axis='s'))
Beispiel #9
0
    def test_downsample_sample(self):
        obs = self.test2.downsample('group')
        # should be down to 4 samples; feature number is the same
        self.assertEqual(obs.shape, (4, 8))

        sid = obs.sample_metadata.index.tolist()
        all_sid = self.test2.sample_metadata.index.tolist()
        exp = self.test2.reorder([all_sid.index(i) for i in sid])
        assert_experiment_equal(obs, exp)
Beispiel #10
0
    def test_filter_by_data_feature_edge_cases(self):
        # all features are filtered out
        obs = self.test2.filter_by_data('sum_abundance', axis=1, cutoff=10000)
        self.assertEqual(obs.shape, (9, 0))

        # none is filtered out
        obs = self.test2.filter_by_data('sum_abundance', axis=1, cutoff=1)
        assert_experiment_equal(obs, self.test2)
        self.assertIsNot(obs, self.test2)
Beispiel #11
0
 def test_save(self):
     exp = ca.read(self.test2_biom, self.test2_samp, normalize=None)
     d = mkdtemp()
     f = join(d, 'test1.save')
     # test the json biom format
     exp.save(f, fmt='json')
     newexp = ca.read(f + '.biom', f + '_sample.txt', normalize=None)
     assert_experiment_equal(newexp, exp, ignore_md_fields=['#SampleID.1'])
     shutil.rmtree(d)
Beispiel #12
0
 def test_join_metadata_fields(self):
     # test the default params
     newexp = self.test1.join_metadata_fields('id', 'group', inplace=False)
     self.assertIn('id_group', newexp.sample_metadata.columns)
     self.assertEqual(newexp.sample_metadata.loc['S12', 'id_group'],
                      '12.0_2')
     # test we didn't change anything besides the new sample metadata column
     assert_experiment_equal(newexp,
                             self.test1,
                             ignore_md_fields=['id_group'])
Beispiel #13
0
 def test_from_pandas_reorder(self):
     df = self.test1.to_pandas(sparse=False)
     # let's reorder the dataframe
     df = df.sort_values(self.test1.feature_metadata.index.values[10])
     df = df.sort_values(df.index.values[0], axis=1)
     res = ca.Experiment.from_pandas(df, self.test1)
     # we need to reorder the original experiment
     exp = self.test1.sort_by_data(subset=[10], key='mean')
     exp = exp.sort_by_data(subset=[0], key='mean', axis=1)
     assert_experiment_equal(res, exp)
Beispiel #14
0
 def test_read_amplicon(self):
     # test loading a taxonomy biom table and filtering/normalizing
     exp = ca.read_amplicon(self.test1_biom,
                            filter_reads=1000,
                            normalize=10000)
     exp2 = ca.read(self.test1_biom, normalize=None)
     exp2.filter_by_data('sum_abundance', cutoff=1000, inplace=True)
     exp2.normalize(inplace=True)
     assert_experiment_equal(exp, exp2)
     self.assertIn('taxonomy', exp.feature_metadata)
Beispiel #15
0
 def test_filter_samples_edge_cases(self):
     # no group 3 - none filtered
     test1 = ca.read(self.test1_biom,
                     self.test1_samp,
                     self.test1_feat,
                     normalize=None)
     # group dtype is O
     obs = test1.filter_samples('group', ['3'])
     self.assertEqual(obs.shape, (0, 12))
     obs = test1.filter_samples('group', ['3'], negate=True)
     assert_experiment_equal(obs, test1)
Beispiel #16
0
    def test_sort_by_data_sample(self):
        # sort sample based on the first and last features
        obs = self.test2.sort_by_data(subset=[0, 7])
        # the order is the same with original
        assert_experiment_equal(obs, self.test2)

        obs = self.test2.sort_by_data(subset=[0, 3])
        assert_experiment_equal(
            obs, self.test2.reorder(obs.sample_metadata['ori.order'], axis=0))
        self.assertListEqual(obs.sample_metadata['new.order'].tolist(),
                             list(range(9)))
Beispiel #17
0
 def test_join_experiments(self):
     # do the famous join experiment to itself trick
     texp = deepcopy(self.test1)
     texp.description = 't2'
     newexp = self.test1.join_experiments(texp, prefixes=('c1', ''))
     self.assertEqual(len(newexp.feature_metadata),
                      len(self.test1.feature_metadata))
     self.assertEqual(len(newexp.sample_metadata),
                      len(self.test1.sample_metadata) * 2)
     fexp = newexp.filter_samples('experiments', ['t2'])
     assert_experiment_equal(fexp, texp, ignore_md_fields=['experiments'])
Beispiel #18
0
    def test_filter_by_metadata_sample_edge_cases(self):
        # no group 3 - none filtered
        obs = self.test2.filter_by_metadata('group', [3])
        self.assertEqual(obs.shape, (0, 8))
        obs = self.test2.filter_by_metadata('group', [3], negate=True)
        assert_experiment_equal(obs, self.test2)

        # all samples are filtered
        obs = self.test2.filter_by_metadata('group', [1, 2])
        assert_experiment_equal(obs, self.test2)
        obs = self.test2.filter_by_metadata('group', [1, 2], negate=True)
        self.assertEqual(obs.shape, (0, 8))
Beispiel #19
0
 def test_sort_by_metadata_sample(self):
     # test sorting various fields (keeping the order)
     obs = self.timeseries.sort_by_metadata(
         field='MINUTES', inplace=True).sort_by_metadata(
             field='HOUR', inplace=True).sort_by_metadata(field='DAY',
                                                          inplace=True)
     self.assertIs(obs, self.timeseries)
     exp = ca.read(join(self.test_data_dir, 'timeseries.sorted.time.biom'),
                   join(self.test_data_dir, 'timeseries.sample'),
                   normalize=None)
     assert_experiment_equal(obs, exp, almost_equal=True)
     self.assertListEqual(obs.sample_metadata['MF_SAMPLE_NUMBER'].tolist(),
                          list(range(1, 96)))
Beispiel #20
0
    def test_cluster_data(self):
        def log_and_scale(exp):
            exp.log_n(inplace=True)
            exp.scale(inplace=True, axis=1)
            return exp

        # no minimal filtering
        obs = self.test1.cluster_data(transform=log_and_scale)
        exp = ca.read(join(self.test_data_dir,
                           'test1.clustered.features.biom'),
                      self.test1_samp,
                      normalize=None)
        assert_experiment_equal(obs, exp, almost_equal=True)
Beispiel #21
0
 def test_sort_samples(self):
     obs = self.timeseries.sort_samples(
         'MINUTES',
         inplace=True).sort_samples('HOUR',
                                    inplace=True).sort_samples('DAY',
                                                               inplace=True)
     self.assertIs(obs, self.timeseries)
     exp = ca.read(join(self.test_data_dir, 'timeseries.sorted.time.biom'),
                   join(self.test_data_dir, 'timeseries.sample'),
                   normalize=None)
     assert_experiment_equal(obs, exp, almost_equal=True)
     self.assertListEqual(obs.sample_metadata['MF_SAMPLE_NUMBER'].tolist(),
                          list(range(1, 96)))
Beispiel #22
0
    def test_reorder_round_trip(self):
        # test double permuting of a bigger data set
        exp = ca.read(self.timeseries_biom, self.timeseries_samp, normalize=None)

        rand_perm_samples = np.random.permutation(exp.data.shape[0])
        rand_perm_features = np.random.permutation(exp.data.shape[1])
        rev_perm_samples = np.argsort(rand_perm_samples)
        rev_perm_features = np.argsort(rand_perm_features)
        new = exp.reorder(rand_perm_features, axis=1, inplace=False)
        new.reorder(rand_perm_samples, axis=0, inplace=True)
        new.reorder(rev_perm_features, axis=1, inplace=True)
        new.reorder(rev_perm_samples, axis=0, inplace=True)

        assert_experiment_equal(new, exp)
Beispiel #23
0
 def test_filter_by_data_feature(self):
     # one feature is filtered out when cutoff is set to 25
     for sparse, inplace in [(True, False), (True, True), (False, False), (False, True)]:
         obs = self.test2.filter_by_data(
             'sum_abundance', axis=1, inplace=inplace, cutoff=25)
         self.assertEqual(obs.shape, (9, 7))
         exp = ca.read(*[get_data_path(i) for i in [
             'test2.biom.filter.feature',
             'test2.sample',
             'test2.feature']],
                       normalize=None)
         assert_experiment_equal(obs, exp)
         if inplace:
             self.assertIs(obs, self.test2)
         else:
             self.assertIsNot(obs, self.test2)
Beispiel #24
0
    def test_log_n(self):
        obs = self.test2.log_n()
        self.test2.data = np.log2([[10., 20., 1., 20., 5., 100., 844., 100.],
                                   [10., 20., 2., 19., 1., 100., 849., 200.],
                                   [10., 20., 3., 18., 5., 100., 844., 300.],
                                   [10., 20., 4., 17., 1., 100., 849., 400.],
                                   [10., 20., 5., 16., 4., 100., 845., 500.],
                                   [10., 20., 6., 15., 1., 100., 849., 600.],
                                   [10., 20., 7., 14., 3., 100., 846., 700.],
                                   [10., 20., 8., 13., 1., 100., 849., 800.],
                                   [10., 20., 9., 12., 7., 100., 842., 900.]])
        assert_experiment_equal(obs, self.test2)
        self.assertIsNot(obs, self.test2)

        obs = self.test2.log_n(inplace=True)
        self.assertIs(obs, self.test2)
Beispiel #25
0
 def test_join_fields_complex(self):
     # test join feature fields with new field name, separator and inplace
     exp = deepcopy(self.test1)
     newexp = exp.join_fields('taxonomy',
                              'taxonomy',
                              newname='test',
                              axis=1,
                              sep=';',
                              inplace=True)
     self.assertIs(newexp, exp)
     self.assertIn('test', exp.feature_metadata.columns)
     self.assertNotIn('test', exp.sample_metadata.columns)
     self.assertEqual(exp.feature_metadata['test'].iloc[11],
                      'bad_bacteria;bad_bacteria')
     # test we didn't change anything besides the new sample metadata column
     assert_experiment_equal(exp, self.test1, ignore_md_fields=['test'])
Beispiel #26
0
 def test_filter_by_data_sample(self):
     for sparse, inplace in [(True, False), (True, True), (False, False), (False, True)]:
         test2 = ca.read(self.test2_biom, self.test2_samp, self.test2_feat,
                         sparse=sparse, normalize=None)
         # filter out samples with abundance < 1200. only the last sample is filtered out.
         obs = test2.filter_by_data(
             'sum_abundance', axis=0, inplace=inplace, cutoff=1200)
         self.assertEqual(obs.shape, (8, 8))
         exp = ca.read(*[get_data_path(i) for i in [
             'test2.biom.filter.sample',
             'test2.sample',
             'test2.feature']],
                       normalize=None)
         assert_experiment_equal(obs, exp)
         if inplace:
             self.assertIs(obs, test2)
         else:
             self.assertIsNot(obs, test2)
Beispiel #27
0
 def test_join_experiments_featurewise(self):
     otu1 = ca.Experiment(np.array([[0, 9], [7, 4]]),
                          sparse=False,
                          sample_metadata=pd.DataFrame(
                              {
                                  'category': ['B', 'A'],
                                  'ph': [7.7, 6.6]
                              },
                              index=['s2', 's1']),
                          feature_metadata=pd.DataFrame(
                              {'motile': ['y', 'n']},
                              index=['16S1', '16S2']))
     otu2 = ca.Experiment(np.array([[6], [8], [10]]),
                          sparse=False,
                          sample_metadata=pd.DataFrame(
                              {
                                  'category': ['A', 'B', 'C'],
                                  'ph': [6.6, 7.7, 8.8]
                              },
                              index=['s1', 's2', 's3']),
                          feature_metadata=pd.DataFrame({'motile': [None]},
                                                        index=['ITS1']))
     combined_obs = otu1.join_experiments_featurewise(
         otu2, 'origin', ('16S', 'ITS'))
     combined_exp = ca.Experiment(np.array([[7, 4, 6], [0, 9, 8]]),
                                  sparse=False,
                                  sample_metadata=pd.DataFrame(
                                      {
                                          'category': ['A', 'B'],
                                          'ph': [6.6, 7.7]
                                      },
                                      index=['s1', 's2']),
                                  feature_metadata=pd.DataFrame(
                                      {
                                          'motile': ['y', 'n', None],
                                          'origin': ['16S', '16S', 'ITS']
                                      },
                                      index=['16S1', '16S2', 'ITS1']))
     # reorder the samples
     combined_obs = combined_obs.filter_ids(
         combined_exp.sample_metadata.index, axis=0)
     assert_experiment_equal(combined_obs, combined_exp)
Beispiel #28
0
 def test_save_biom(self):
     # NOTE: Currently not testing the save biom hdf with taxonomy
     # as there is a bug there!
     exp = ca.read_amplicon(self.test1_biom,
                            self.test1_samp,
                            normalize=None,
                            min_reads=None)
     d = mkdtemp()
     f = join(d, 'test1.save.biom')
     # test the json biom format
     exp.save_biom(f, fmt='hdf5')
     newexp = ca.read_amplicon(f,
                               self.test1_samp,
                               normalize=None,
                               min_reads=None)
     assert_experiment_equal(newexp, exp)
     # test the txt biom format
     exp.save_biom(f, fmt='txt')
     newexp = ca.read_amplicon(f,
                               self.test1_samp,
                               normalize=None,
                               min_reads=None)
     assert_experiment_equal(newexp, exp, ignore_md_fields=['taxonomy'])
     # test the hdf5 biom format with no taxonomy
     exp.save_biom(f, add_metadata=None)
     newexp = ca.read(f, self.test1_samp, normalize=None)
     self.assertTrue('taxonomy' not in newexp.feature_metadata)
     assert_experiment_equal(newexp, exp, ignore_md_fields=['taxonomy'])
     shutil.rmtree(d)
Beispiel #29
0
 def test_from_pandas_with_experiment(self):
     df = self.test1.to_pandas(sparse=False)
     res = ca.Experiment.from_pandas(df, self.test1)
     assert_experiment_equal(res, self.test1)
Beispiel #30
0
 def test_deep_copy_experiment(self):
     exp = deepcopy(self.test1)
     assert_experiment_equal(exp, self.test1)
     self.assertIsNot(exp, self.test1)