def test_plot_stacked_bar(self): exp = ca.Experiment(np.array([[0, 1], [2, 3]]), pd.DataFrame({'A': ['ab', 'cd'], 'B': ['ef', 'gh']}, index=['s1', 's2']), pd.DataFrame({'genus': ['bacillus', 'listeria']})) # bar width used in plot_staked_bar width = 0.95 fig = exp.plot_stacked_bar(sample_color_bars='A', field='genus', xtick=None) self.assertEqual(len(fig.axes), 3) # test bar ax ax = fig.axes[0] # test x axis tick labels obs_xticklabels = [i.get_text() for i in ax.xaxis.get_ticklabels()] self.assertListEqual(obs_xticklabels, ['s1', 's2']) # get all the bars bars = ax.get_children()[:4] xs = [0 - width / 2, 1 - width / 2] ys = [0, 0, 0, 2] heights = [0, 2, 1, 3] for i, bar in enumerate(bars): bbox = bar.get_bbox() # coordinate of lower left corner of the bar: x, y # width and height of the bar: w, h x, y, w, h = bbox.bounds # i % 2 because there are only columns self.assertAlmostEqual(x, xs[i % 2]) self.assertAlmostEqual(w, width) self.assertAlmostEqual(y, ys[i]) self.assertAlmostEqual(h, heights[i])
def test_plot_hist(self): exp = ca.Experiment(np.array([[0, 1], [2, 3]]), pd.DataFrame({'A': ['ab', 'cd'], 'B': ['ef', 'gh']})) counts, bins, ax = exp.plot_hist(bins=4) assert_array_almost_equal(counts, np.array([1] * 4)) assert_array_almost_equal(bins, np.array([0., 0.75, 1.5, 2.25, 3.])) # test the numbers on top of the histogram bars are correct self.assertEqual([i.get_text() for i in ax.texts], ['1'] * 4)
def test_join_experiments_featurewise(self): otu1 = ca.Experiment(np.array([[0, 9], [7, 4]]), sparse=False, sample_metadata=pd.DataFrame( { 'category': ['B', 'A'], 'ph': [7.7, 6.6] }, index=['s2', 's1']), feature_metadata=pd.DataFrame( {'motile': ['y', 'n']}, index=['16S1', '16S2'])) otu2 = ca.Experiment(np.array([[6], [8], [10]]), sparse=False, sample_metadata=pd.DataFrame( { 'category': ['A', 'B', 'C'], 'ph': [6.6, 7.7, 8.8] }, index=['s1', 's2', 's3']), feature_metadata=pd.DataFrame({'motile': [None]}, index=['ITS1'])) combined_obs = otu1.join_experiments_featurewise( otu2, 'origin', ('16S', 'ITS')) combined_exp = ca.Experiment(np.array([[7, 4, 6], [0, 9, 8]]), sparse=False, sample_metadata=pd.DataFrame( { 'category': ['A', 'B'], 'ph': [6.6, 7.7] }, index=['s1', 's2']), feature_metadata=pd.DataFrame( { 'motile': ['y', 'n', None], 'origin': ['16S', '16S', 'ITS'] }, index=['16S1', '16S2', 'ITS1'])) # reorder the samples combined_obs = combined_obs.filter_ids( combined_exp.sample_metadata.index, axis=0) assert_experiment_equal(combined_obs, combined_exp)
def test_regress(self): diabetes = datasets.load_diabetes() X = diabetes.data[:9] y = diabetes.target[:9] smd = pd.DataFrame({'diabetes': y}) exp = ca.Experiment(X, smd, sparse=False) run = exp.regress('diabetes', KNeighborsRegressor(), KFold(3, random_state=0)) res = next(run) obs = pd.read_table(join(self.test_data_dir, 'diabetes_pred.txt'), index_col=0) # make sure the column order are the same for comparison pdt.assert_frame_equal(res.sort_index(axis=1), obs.sort_index(axis=1))
def test_validate_sample(self): with self.assertRaises( ValueError, msg= 'data table must have the same number of samples with sample_metadata table (2 != 1)' ): ca.Experiment(np.array([[1, 2], [3, 4]]), sample_metadata=pd.DataFrame({ 'foo': ['a'], 'spam': ['A'] }))
def test_classify(self): iris = datasets.load_iris() n = len(iris.target) np.random.seed(0) i = np.random.randint(0, n, 36) X = iris.data[i] y = iris.target[i] d = dict(enumerate(iris.target_names)) smd = pd.DataFrame({'plant': y}).replace(d) exp = ca.Experiment(X, smd, sparse=False) run = exp.classify('plant', KNeighborsClassifier(), predict='predict_proba', cv=KFold(3, random_state=0)) res = next(run) obs = pd.read_table(join(self.test_data_dir, 'iris_pred.txt'), index_col=0) pdt.assert_frame_equal(res, obs)
def test_subsample_count(self): exp = ca.Experiment(data=np.array([[1, 2, 3], [4, 5, 6]]), sample_metadata=pd.DataFrame([['a', 'b', 'c'], ['d', 'e', 'f']]), sparse=False) n = 6 obs = exp.subsample_count(n, random_seed=9) assert_array_equal(obs.data.sum(axis=1), np.array([n, n])) self.assertTrue(np.all(obs.data <= n)) n = 7 obs = exp.subsample_count(n) # the 1st row dropped assert_array_equal(obs.data.sum(axis=1), np.array([n])) self.assertIsNot(obs, exp) obs = exp.subsample_count(n, inplace=True) assert_array_equal(obs.data.sum(axis=1), np.array([n])) self.assertTrue(np.all(obs.data <= n)) self.assertIs(obs, exp) n = 10000 obs = exp.subsample_count(n) assert_array_equal(obs.data.sum(axis=1), np.array([]))