Ejemplo n.º 1
0
 def test_plot_stacked_bar(self):
     exp = ca.Experiment(np.array([[0, 1], [2, 3]]),
                         pd.DataFrame({'A': ['ab', 'cd'], 'B': ['ef', 'gh']},
                                      index=['s1', 's2']),
                         pd.DataFrame({'genus': ['bacillus', 'listeria']}))
     # bar width used in plot_staked_bar
     width = 0.95
     fig = exp.plot_stacked_bar(sample_color_bars='A', field='genus', xtick=None)
     self.assertEqual(len(fig.axes), 3)
     # test bar ax
     ax = fig.axes[0]
     # test x axis tick labels
     obs_xticklabels = [i.get_text() for i in ax.xaxis.get_ticklabels()]
     self.assertListEqual(obs_xticklabels, ['s1', 's2'])
     # get all the bars
     bars = ax.get_children()[:4]
     xs = [0 - width / 2, 1 - width / 2]
     ys = [0, 0, 0, 2]
     heights = [0, 2, 1, 3]
     for i, bar in enumerate(bars):
         bbox = bar.get_bbox()
         # coordinate of lower left corner of the bar: x, y
         # width and height of the bar: w, h
         x, y, w, h = bbox.bounds
         # i % 2 because there are only columns
         self.assertAlmostEqual(x, xs[i % 2])
         self.assertAlmostEqual(w, width)
         self.assertAlmostEqual(y, ys[i])
         self.assertAlmostEqual(h, heights[i])
Ejemplo n.º 2
0
 def test_plot_hist(self):
     exp = ca.Experiment(np.array([[0, 1], [2, 3]]),
                         pd.DataFrame({'A': ['ab', 'cd'], 'B': ['ef', 'gh']}))
     counts, bins, ax = exp.plot_hist(bins=4)
     assert_array_almost_equal(counts, np.array([1] * 4))
     assert_array_almost_equal(bins, np.array([0., 0.75, 1.5, 2.25, 3.]))
     # test the numbers on top of the histogram bars are correct
     self.assertEqual([i.get_text() for i in ax.texts], ['1'] * 4)
Ejemplo n.º 3
0
 def test_join_experiments_featurewise(self):
     otu1 = ca.Experiment(np.array([[0, 9], [7, 4]]),
                          sparse=False,
                          sample_metadata=pd.DataFrame(
                              {
                                  'category': ['B', 'A'],
                                  'ph': [7.7, 6.6]
                              },
                              index=['s2', 's1']),
                          feature_metadata=pd.DataFrame(
                              {'motile': ['y', 'n']},
                              index=['16S1', '16S2']))
     otu2 = ca.Experiment(np.array([[6], [8], [10]]),
                          sparse=False,
                          sample_metadata=pd.DataFrame(
                              {
                                  'category': ['A', 'B', 'C'],
                                  'ph': [6.6, 7.7, 8.8]
                              },
                              index=['s1', 's2', 's3']),
                          feature_metadata=pd.DataFrame({'motile': [None]},
                                                        index=['ITS1']))
     combined_obs = otu1.join_experiments_featurewise(
         otu2, 'origin', ('16S', 'ITS'))
     combined_exp = ca.Experiment(np.array([[7, 4, 6], [0, 9, 8]]),
                                  sparse=False,
                                  sample_metadata=pd.DataFrame(
                                      {
                                          'category': ['A', 'B'],
                                          'ph': [6.6, 7.7]
                                      },
                                      index=['s1', 's2']),
                                  feature_metadata=pd.DataFrame(
                                      {
                                          'motile': ['y', 'n', None],
                                          'origin': ['16S', '16S', 'ITS']
                                      },
                                      index=['16S1', '16S2', 'ITS1']))
     # reorder the samples
     combined_obs = combined_obs.filter_ids(
         combined_exp.sample_metadata.index, axis=0)
     assert_experiment_equal(combined_obs, combined_exp)
Ejemplo n.º 4
0
 def test_regress(self):
     diabetes = datasets.load_diabetes()
     X = diabetes.data[:9]
     y = diabetes.target[:9]
     smd = pd.DataFrame({'diabetes': y})
     exp = ca.Experiment(X, smd, sparse=False)
     run = exp.regress('diabetes', KNeighborsRegressor(), KFold(3, random_state=0))
     res = next(run)
     obs = pd.read_table(join(self.test_data_dir, 'diabetes_pred.txt'), index_col=0)
     # make sure the column order are the same for comparison
     pdt.assert_frame_equal(res.sort_index(axis=1), obs.sort_index(axis=1))
Ejemplo n.º 5
0
 def test_validate_sample(self):
     with self.assertRaises(
             ValueError,
             msg=
             'data table must have the same number of samples with sample_metadata table (2 != 1)'
     ):
         ca.Experiment(np.array([[1, 2], [3, 4]]),
                       sample_metadata=pd.DataFrame({
                           'foo': ['a'],
                           'spam': ['A']
                       }))
Ejemplo n.º 6
0
 def test_classify(self):
     iris = datasets.load_iris()
     n = len(iris.target)
     np.random.seed(0)
     i = np.random.randint(0, n, 36)
     X = iris.data[i]
     y = iris.target[i]
     d = dict(enumerate(iris.target_names))
     smd = pd.DataFrame({'plant': y}).replace(d)
     exp = ca.Experiment(X, smd, sparse=False)
     run = exp.classify('plant', KNeighborsClassifier(),
                        predict='predict_proba',
                        cv=KFold(3, random_state=0))
     res = next(run)
     obs = pd.read_table(join(self.test_data_dir, 'iris_pred.txt'), index_col=0)
     pdt.assert_frame_equal(res, obs)
Ejemplo n.º 7
0
    def test_subsample_count(self):
        exp = ca.Experiment(data=np.array([[1, 2, 3], [4, 5, 6]]),
                            sample_metadata=pd.DataFrame([['a', 'b', 'c'], ['d', 'e', 'f']]),
                            sparse=False)
        n = 6
        obs = exp.subsample_count(n, random_seed=9)
        assert_array_equal(obs.data.sum(axis=1), np.array([n, n]))
        self.assertTrue(np.all(obs.data <= n))

        n = 7
        obs = exp.subsample_count(n)
        # the 1st row dropped
        assert_array_equal(obs.data.sum(axis=1), np.array([n]))
        self.assertIsNot(obs, exp)

        obs = exp.subsample_count(n, inplace=True)
        assert_array_equal(obs.data.sum(axis=1), np.array([n]))
        self.assertTrue(np.all(obs.data <= n))
        self.assertIs(obs, exp)

        n = 10000
        obs = exp.subsample_count(n)
        assert_array_equal(obs.data.sum(axis=1), np.array([]))