Example #1
0
 def test_classify_samples_ncv_accuracy(self):
     dat = biom.Table(np.array(
         [[4446, 9828, 3208, 776, 118, 4175, 657, 251, 7505, 617],
          [1855, 8716, 3257, 1251, 3205, 2557, 4251, 7405, 1417, 1215],
          [6616, 281, 8616, 291, 261, 253, 9075, 252, 7385, 4068]]),
         observation_ids=['o1', 'o2', 'o3'],
         sample_ids=['s1', 's2', 's3', 's4', 's5',
                     's6', 's7', 's8', 's9', 's10'])
     md = qiime2.CategoricalMetadataColumn(pd.Series(
         ['red', 'red', 'red', 'red', 'red',
          'blue', 'blue', 'blue', 'blue', 'blue'],
         index=pd.Index(['s1', 's2', 's3', 's4', 's5',
                         's6', 's7', 's8', 's9', 's10'],
                        name='sample-id'), name='color'))
     y_pred, importances, probabilities = classify_samples_ncv(
         dat, md, random_state=123, n_estimators=2, n_jobs=1,
         missing_samples='ignore')
     exp_pred = pd.Series(
         ['blue', 'red', 'red', 'blue', 'blue',
          'blue', 'blue', 'red', 'blue', 'blue'],
         index=pd.Index(['s4', 's6', 's1', 's10', 's5', 's8', 's2', 's9',
                         's3', 's7'], dtype='object', name='SampleID'),
         name='prediction')
     exp_importances = pd.DataFrame(
         [0.595111111111111, 0.23155555555555551, 0.17333333333333334],
         index=pd.Index(['o3', 'o1', 'o2']), columns=['importance'])
     exp_probabilities = pd.DataFrame(
         [[0.5, 0.5], [0., 1.], [0., 1.], [0.5, 0.5], [0.5, 0.5],
          [0.5, 0.5], [0.5, 0.5], [0., 1.], [1., 0.], [1., 0.]],
         index=pd.Index(['s4', 's6', 's1', 's10', 's5', 's8', 's2', 's9',
                         's3', 's7'], name='SampleID'),
         columns=['blue', 'red'])
     pdt.assert_series_equal(y_pred, exp_pred)
     pdt.assert_frame_equal(importances, exp_importances)
     pdt.assert_frame_equal(probabilities, exp_probabilities)
 def test_classify_samples_ncv(self):
     y_pred, importances, probabilities = classify_samples_ncv(
         self.table_chard_fp,
         self.mdc_chard_fp,
         random_state=123,
         n_estimators=2,
         n_jobs=1,
         missing_samples='ignore')