Ejemplo n.º 1
0
 def setUp(self):
     super().setUp()
     # load the test1 experiment as sparse
     self.test1 = ca.read(self.test1_biom, self.test1_samp, normalize=None)
     # load the timeseries experiment as sparse
     self.timeseries = ca.read(self.timeseries_biom,
                               self.timeseries_samp,
                               normalize=None)
Ejemplo n.º 2
0
 def test_save(self):
     exp = ca.read(self.test2_biom, self.test2_samp, normalize=None)
     d = mkdtemp()
     f = join(d, 'test1.save')
     # test the json biom format
     exp.save(f, fmt='json')
     newexp = ca.read(f + '.biom', f + '_sample.txt', normalize=None)
     assert_experiment_equal(newexp, exp, ignore_md_fields=['#SampleID.1'])
     shutil.rmtree(d)
Ejemplo n.º 3
0
 def setUp(self):
     super().setUp()
     self.test2 = ca.read(self.test2_biom,
                          self.test2_samp,
                          self.test2_feat,
                          normalize=None)
     self.test1 = ca.read(self.test1_biom,
                          self.test1_samp,
                          self.test1_feat,
                          normalize=None)
Ejemplo n.º 4
0
 def setUp(self):
     super().setUp()
     self.test2_sparse = ca.read(self.test2_biom,
                                 self.test2_samp,
                                 self.test2_feat,
                                 normalize=None)
     self.test2_dense = ca.read(self.test2_biom,
                                self.test2_samp,
                                self.test2_feat,
                                sparse=False,
                                normalize=None)
Ejemplo n.º 5
0
 def setUp(self):
     super().setUp()
     # load the simple experiment as sparse
     self.test2 = ca.read(self.test2_biom,
                          self.test2_samp,
                          self.test2_feat,
                          normalize=None)
     self.test1 = ca.read(self.test1_biom, self.test1_samp, normalize=None)
     # load the complex experiment as sparse
     self.timeseries = ca.read(self.timeseries_biom,
                               self.timeseries_samp,
                               normalize=None)
Ejemplo n.º 6
0
 def setUp(self):
     super().setUp()
     # load the simple experiment as sparse
     self.test1 = ca.read(self.test1_biom, self.test1_samp, normalize=None)
     # load the paired testing experiment
     self.test_paired = ca.read(self.test_paired_biom,
                                self.test_paired_samp,
                                normalize=None)
     # load the complex experiment as sparse with normalizing and removing low read samples
     self.complex = ca.read_amplicon(self.timeseries_biom,
                                     self.timeseries_samp,
                                     min_reads=1000,
                                     normalize=10000)
Ejemplo n.º 7
0
 def test_read_no_metadata(self):
     logging.disable(logging.NOTSET)
     with self.assertLogs(level='INFO') as cm:
         # test loading without a mapping file
         exp = ca.read(self.test1_biom, normalize=None)
         self.assertRegex(cm.output[0], 'loaded 21 samples, 12 features')
         self._validate_read(exp, validate_sample_metadata=False)
Ejemplo n.º 8
0
 def test_save_biom(self):
     # NOTE: Currently not testing the save biom hdf with taxonomy
     # as there is a bug there!
     exp = ca.read_amplicon(self.test1_biom,
                            self.test1_samp,
                            normalize=None,
                            min_reads=None)
     d = mkdtemp()
     f = join(d, 'test1.save.biom')
     # test the json biom format
     exp.save_biom(f, fmt='hdf5')
     newexp = ca.read_amplicon(f,
                               self.test1_samp,
                               normalize=None,
                               min_reads=None)
     assert_experiment_equal(newexp, exp)
     # test the txt biom format
     exp.save_biom(f, fmt='txt')
     newexp = ca.read_amplicon(f,
                               self.test1_samp,
                               normalize=None,
                               min_reads=None)
     assert_experiment_equal(newexp, exp, ignore_md_fields=['taxonomy'])
     # test the hdf5 biom format with no taxonomy
     exp.save_biom(f, add_metadata=None)
     newexp = ca.read(f, self.test1_samp, normalize=None)
     self.assertTrue('taxonomy' not in newexp.feature_metadata)
     assert_experiment_equal(newexp, exp, ignore_md_fields=['taxonomy'])
     shutil.rmtree(d)
Ejemplo n.º 9
0
    def test_read_sample_kwargs(self):
        # re-enable logging because it is disabled in setUp
        logging.disable(logging.NOTSET)
        with self.assertLogs(level='INFO') as cm:
            # load the simple dataset as sparse
            exp = ca.read(
                self.test1_biom,
                self.test1_samp,
                self.test1_feat,
                normalize=None,
                sample_metadata_kwargs={'parse_dates': ['collection_date']})
            # test the log messages are correct
            self.assertRegex(cm.output[0], 'loaded 21 samples, 12 features')
            self.assertRegex(cm.output[1],
                             "dropped \\(1\\): {'SAMPLE_NOT_EXIST'}")
            self.assertRegex(
                cm.output[2],
                "These have data but do not have metadata: {'badsample'}")
            self.assertRegex(cm.output[3],
                             "dropped \\(1\\): {'FEATURE_NOT_EXIST'}")
            self.assertRegex(
                cm.output[4],
                "These have data but do not have metadata: {'badfeature'}")

            self.assertTrue(scipy.sparse.issparse(exp.data))
            self._validate_read(exp)

            obs_dates = exp.sample_metadata['collection_date'].tolist()
            # the last sample in OTU table does not have metadata, so NaT
            exp_dates = [pd.Timestamp('2017-8-1')] * 20 + [pd.NaT]
            self.assertListEqual(obs_dates, exp_dates)
Ejemplo n.º 10
0
    def test_read_feature_kwargs(self):
        # re-enable logging because it is disabled in setUp
        logging.disable(logging.NOTSET)
        with self.assertLogs(level='INFO') as cm:
            # load the simple dataset as sparse
            exp = ca.read(self.test1_biom,
                          self.test1_samp,
                          self.test1_feat,
                          normalize=None,
                          feature_metadata_kwargs={'dtype': {
                              'ph': str
                          }})
            # test the log messages are correct
            self.assertRegex(cm.output[0], 'loaded 21 samples, 12 features')
            self.assertRegex(cm.output[1],
                             "dropped \\(1\\): {'SAMPLE_NOT_EXIST'}")
            self.assertRegex(
                cm.output[2],
                "These have data but do not have metadata: {'badsample'}")
            self.assertRegex(cm.output[3],
                             "dropped \\(1\\): {'FEATURE_NOT_EXIST'}")
            self.assertRegex(
                cm.output[4],
                "These have data but do not have metadata: {'badfeature'}")

            self.assertTrue(scipy.sparse.issparse(exp.data))
            self._validate_read(exp)
            # read as str not float
            self.assertEqual(exp.feature_metadata.loc['AA', 'ph'], '4.0')
Ejemplo n.º 11
0
 def test_filter_samples_na(self):
     test1 = ca.read(self.test1_biom, self.test1_samp, self.test1_feat, normalize=None)
     # filter na value in group column
     obs = test1.filter_samples('group', None)
     self.assertEqual(obs.shape, (20, 12))
     self.assertEqual(test1.sample_metadata.dropna(axis=0).index.tolist(),
                      obs.sample_metadata.index.tolist())
Ejemplo n.º 12
0
 def test_read_not_sparse(self):
     logging.disable(logging.NOTSET)
     with self.assertLogs(level='INFO') as cm:
         # load the simple dataset as dense
         exp = ca.read(self.test1_biom, self.test1_samp, sparse=False, normalize=None)
         self.assertFalse(scipy.sparse.issparse(exp.data))
         self._validate_read(exp, cm.output)
Ejemplo n.º 13
0
 def test_create_biom_table_from_exp(self):
     exp = ca.read(self.test1_biom, self.test1_samp, normalize=None)
     table = _create_biom_table_from_exp(exp)
     self.assertCountEqual(table.ids(axis='observation'), exp.feature_metadata.index.values)
     self.assertCountEqual(table.ids(axis='sample'), exp.sample_metadata.index.values)
     assert_array_almost_equal(table.matrix_data.toarray(), exp.get_data(sparse=False).transpose())
     metadata = table.metadata(id=exp.feature_metadata.index[1], axis='observation')
     self.assertEqual(metadata['taxonomy'], exp.feature_metadata['taxonomy'].iloc[1])
Ejemplo n.º 14
0
 def test_filter_by_metadata_na(self):
     test = self.test2 = ca.read(self.test2_biom, self.test2_samp, self.test2_feat,
                                 normalize=None, feature_metadata_kwargs={'na_values': 'B'})
     test_drop = test.filter_by_metadata('level1', select=None, axis='f')
     self.assertEqual(self.test2.sample_metadata.index.tolist(),
                      test_drop.sample_metadata.index.tolist())
     self.assertEqual(['AT', 'AG', 'AC', 'TA', 'TT', 'TC'],
                      test_drop.feature_metadata.index.tolist())
Ejemplo n.º 15
0
 def test_read_amplicon(self):
     # test loading a taxonomy biom table and filtering/normalizing
     exp1 = ca.read_amplicon(self.test1_biom, min_reads=1000, normalize=10000)
     exp2 = ca.read(self.test1_biom, normalize=None)
     exp2.filter_by_data('abundance', axis=0, cutoff=1000, inplace=True, mean_or_sum='sum')
     exp2.normalize(inplace=True)
     assert_experiment_equal(exp1, exp2)
     self.assertIn('taxonomy', exp1.feature_metadata.columns)
Ejemplo n.º 16
0
 def test_read(self):
     # load the simple dataset as sparse
     exp = ca.read(self.test1_biom,
                   self.test1_samp,
                   self.test1_feat,
                   normalize=None)
     self.assertTrue(scipy.sparse.issparse(exp.data))
     self._validate_read(exp)
Ejemplo n.º 17
0
 def test_read_not_sparse(self):
     # load the simple dataset as dense
     exp = ca.read(self.test1_biom,
                   self.test1_samp,
                   sparse=False,
                   normalize=None)
     self.assertFalse(scipy.sparse.issparse(exp.data))
     self._validate_read(exp)
Ejemplo n.º 18
0
 def test_filter_samples_edge_cases(self):
     # no group 3 - none filtered
     test1 = ca.read(self.test1_biom, self.test1_samp, self.test1_feat, normalize=None)
     # group dtype is O
     obs = test1.filter_samples('group', ['3'])
     self.assertEqual(obs.shape, (0, 12))
     obs = test1.filter_samples('group', ['3'], negate=True)
     self.assert_experiment_equal(obs, test1)
Ejemplo n.º 19
0
 def test_plot_core_features(self):
     np.random.seed(12345)
     self.test1 = ca.read(self.test1_biom, self.test1_samp, self.test1_feat, normalize=100)
     self.test1.sparse = False
     ax = self.test1.filter_samples(
         'group', ['1', '2']).plot_core_features(
             field='group', steps=(2, 12), iterations=2)
     lines = ax.get_lines()
     self.assertEqual(len(lines), 6)
Ejemplo n.º 20
0
 def test_read_openms_bucket_table_samples_are_rows(self):
     # load the openms bucket table with no metadata
     exp = ca.read(self.openms_samples_rows_csv, data_file_type='csv', sample_in_row=True, sparse=False, normalize=None)
     self.assertEqual(len(exp.sample_metadata), 9)
     self.assertEqual(len(exp.feature_metadata), 10)
     self.assertEqual(exp.shape, (9, 10))
     self.assertEqual(exp.data[0, :].sum(), 8554202)
     self.assertEqual(exp.data[:, 1].sum(), 13795540)
     self.assertEqual(exp.sparse, False)
Ejemplo n.º 21
0
 def test_filter_by_data_sample(self):
     for sparse, inplace in [(True, False), (True, True), (False, False), (False, True)]:
         test2 = ca.read(self.test2_biom, self.test2_samp, self.test2_feat,
                         sparse=sparse, normalize=None)
         # filter out samples with abundance < 1200. only the last sample is filtered out.
         obs = test2.filter_by_data(
             'sum_abundance', axis=0, inplace=inplace, cutoff=1200)
         self.assertEqual(obs.shape, (8, 8))
         exp = ca.read(*[get_data_path(i) for i in [
             'test2.biom.filter.sample',
             'test2.sample',
             'test2.feature']],
                       normalize=None)
         assert_experiment_equal(obs, exp)
         if inplace:
             self.assertIs(obs, test2)
         else:
             self.assertIsNot(obs, test2)
Ejemplo n.º 22
0
 def test_filter_features(self):
     for inplace in [True, False]:
         test2 = ca.read(self.test2_biom, self.test2_samp, self.test2_feat, normalize=None)
         obs = test2.filter_features('oxygen', ['anaerobic'], inplace=inplace)
         self.assertEqual(obs.shape, (9, 2))
         self.assertListEqual(obs.feature_metadata.index.tolist(), ['TG', 'TC'])
         if inplace:
             self.assertIs(obs, test2)
         else:
             self.assertIsNot(obs, test2)
Ejemplo n.º 23
0
 def test_save_fasta(self):
     exp = ca.read(self.test1_biom, self.test1_samp, normalize=None)
     d = mkdtemp()
     f = join(d, 'test1.fasta')
     exp.save_fasta(f)
     seqs = []
     for seq in skbio.read(f, format='fasta'):
         seqs.append(str(seq))
     self.assertCountEqual(seqs, exp.feature_metadata.index.values)
     shutil.rmtree(d)
Ejemplo n.º 24
0
 def test_normalize_by_subset_features(self):
     # test the filtering in standard mode (remove a few features, normalize to 10k)
     exp = ca.read(self.test1_biom, self.test1_samp, normalize=None)
     bad_features = [6, 7]
     features = [exp.feature_metadata.index[cbad] for cbad in bad_features]
     newexp = exp.normalize_by_subset_features(features, 10000, negate=True, inplace=False)
     # see the mean of the features we want (without 6,7) is 10k
     good_features = list(set(range(exp.data.shape[1])).difference(set(bad_features)))
     assert_array_almost_equal(newexp.data[:, good_features].sum(axis=1), np.ones([exp.data.shape[0]])*10000)
     self.assertTrue(np.all(newexp.data[:, bad_features] > exp.data[:, bad_features]))
Ejemplo n.º 25
0
 def test_read_amplicon(self):
     # test loading a taxonomy biom table and filtering/normalizing
     exp = ca.read_amplicon(self.test1_biom,
                            filter_reads=1000,
                            normalize=10000)
     exp2 = ca.read(self.test1_biom, normalize=None)
     exp2.filter_by_data('sum_abundance', cutoff=1000, inplace=True)
     exp2.normalize(inplace=True)
     assert_experiment_equal(exp, exp2)
     self.assertIn('taxonomy', exp.feature_metadata)
Ejemplo n.º 26
0
 def load(self):
     win = LoadWindow()
     res = win.exec_()
     if res == QtWidgets.QDialog.Accepted:
         tablefname = str(win.wTableFile.text())
         mapfname = str(win.wMapFile.text())
         if mapfname == '':
             mapfname = None
         gnpsfname = str(win.wGNPSFile.text())
         if gnpsfname == '':
             gnpsfname = None
         expname = str(win.wNewName.text())
         exptype = str(win.wType.currentText())
         if exptype == 'Amplicon':
             try:
                 expdat = ca.read_amplicon(tablefname,
                                           mapfname,
                                           normalize=10000,
                                           filter_reads=1000)
             except:
                 logger.warn(
                     'Load for amplicon biom table %s map %s failed' %
                     (tablefname, mapfname))
                 return
         elif exptype == 'Metabolomics (rows are samples)':
             try:
                 expdat = ca.read_open_ms(tablefname,
                                          mapfname,
                                          gnps_file=gnpsfname,
                                          normalize=None,
                                          rows_are_samples=True)
             except:
                 logger.warn('Load for openms table %s map %s failed' %
                             (tablefname, mapfname))
                 return
         elif exptype == 'Metabolomics (rows are features)':
             try:
                 expdat = ca.read_open_ms(tablefname,
                                          mapfname,
                                          gnps_file=gnpsfname,
                                          normalize=None,
                                          rows_are_samples=False)
             except:
                 logger.warn('Load for openms table %s map %s failed' %
                             (tablefname, mapfname))
                 return
         elif exptype == 'Amplicon':
             try:
                 expdat = ca.read(tablefname, mapfname)
             except:
                 logger.warn('Load for biom table %s map %s failed' %
                             (tablefname, mapfname))
                 return
         expdat._studyname = expname
         self.addexp(expdat)
Ejemplo n.º 27
0
 def test_filter_samples(self):
     for inplace in [True, False]:
         test1 = ca.read(self.test1_biom, self.test1_samp, self.test1_feat, normalize=None)
         # only filter samples from 11 to 14.
         obs = test1.filter_samples('id', list(range(11, 15)), inplace=inplace)
         self.assertEqual(obs.shape, (4, 12))
         self.assertEqual(obs.sample_metadata.index.tolist(), ['S11', 'S12', 'S13', 'S14'])
         if inplace:
             self.assertIs(obs, test1)
         else:
             self.assertIsNot(obs, test1)
Ejemplo n.º 28
0
 def test_filter_by_metadata_feature(self):
     for sparse, inplace in [(True, False), (True, True), (False, False), (False, True)]:
         test2 = ca.read(self.test2_biom, self.test2_samp, self.test2_feat, sparse=sparse, normalize=None)
         # only filter samples with id bewtween 3 and 7.
         obs = test2.filter_by_metadata('oxygen', ['anaerobic'], axis=1, inplace=inplace)
         self.assertEqual(obs.shape, (9, 2))
         self.assertListEqual(obs.feature_metadata.index.tolist(), ['TG', 'TC'])
         if inplace:
             self.assertIs(obs, test2)
         else:
             self.assertIsNot(obs, test2)
Ejemplo n.º 29
0
 def test_sort_samples(self):
     obs = self.timeseries.sort_samples(
         'MINUTES',
         inplace=True).sort_samples('HOUR',
                                    inplace=True).sort_samples('DAY',
                                                               inplace=True)
     self.assertIs(obs, self.timeseries)
     exp = ca.read(join(self.test_data_dir, 'timeseries.sorted.time.biom'),
                   join(self.test_data_dir, 'timeseries.sample'),
                   normalize=None)
     assert_experiment_equal(obs, exp, almost_equal=True)
     self.assertListEqual(obs.sample_metadata['MF_SAMPLE_NUMBER'].tolist(),
                          list(range(1, 96)))
Ejemplo n.º 30
0
 def test_sort_by_metadata_sample(self):
     # test sorting various fields (keeping the order)
     obs = self.timeseries.sort_by_metadata(
         field='MINUTES', inplace=True).sort_by_metadata(
             field='HOUR', inplace=True).sort_by_metadata(field='DAY',
                                                          inplace=True)
     self.assertIs(obs, self.timeseries)
     exp = ca.read(join(self.test_data_dir, 'timeseries.sorted.time.biom'),
                   join(self.test_data_dir, 'timeseries.sample'),
                   normalize=None)
     assert_experiment_equal(obs, exp, almost_equal=True)
     self.assertListEqual(obs.sample_metadata['MF_SAMPLE_NUMBER'].tolist(),
                          list(range(1, 96)))