def test_classify_samples_ncv_accuracy(self): dat = biom.Table(np.array( [[4446, 9828, 3208, 776, 118, 4175, 657, 251, 7505, 617], [1855, 8716, 3257, 1251, 3205, 2557, 4251, 7405, 1417, 1215], [6616, 281, 8616, 291, 261, 253, 9075, 252, 7385, 4068]]), observation_ids=['o1', 'o2', 'o3'], sample_ids=['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10']) md = qiime2.CategoricalMetadataColumn(pd.Series( ['red', 'red', 'red', 'red', 'red', 'blue', 'blue', 'blue', 'blue', 'blue'], index=pd.Index(['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10'], name='sample-id'), name='color')) y_pred, importances, probabilities = classify_samples_ncv( dat, md, random_state=123, n_estimators=2, n_jobs=1, missing_samples='ignore') exp_pred = pd.Series( ['blue', 'red', 'red', 'blue', 'blue', 'blue', 'blue', 'red', 'blue', 'blue'], index=pd.Index(['s4', 's6', 's1', 's10', 's5', 's8', 's2', 's9', 's3', 's7'], dtype='object', name='SampleID'), name='prediction') exp_importances = pd.DataFrame( [0.595111111111111, 0.23155555555555551, 0.17333333333333334], index=pd.Index(['o3', 'o1', 'o2']), columns=['importance']) exp_probabilities = pd.DataFrame( [[0.5, 0.5], [0., 1.], [0., 1.], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0., 1.], [1., 0.], [1., 0.]], index=pd.Index(['s4', 's6', 's1', 's10', 's5', 's8', 's2', 's9', 's3', 's7'], name='SampleID'), columns=['blue', 'red']) pdt.assert_series_equal(y_pred, exp_pred) pdt.assert_frame_equal(importances, exp_importances) pdt.assert_frame_equal(probabilities, exp_probabilities)
def test_classify_samples_ncv(self): y_pred, importances, probabilities = classify_samples_ncv( self.table_chard_fp, self.mdc_chard_fp, random_state=123, n_estimators=2, n_jobs=1, missing_samples='ignore')