def test__prediction_to_row(self): repeat_nr = 0 fold_nr = 0 clf = sklearn.pipeline.Pipeline( steps=[('Imputer', Imputer(strategy='mean') ), ('VarianceThreshold', VarianceThreshold( threshold=0.05)), ('Estimator', GaussianNB())]) task = openml.tasks.get_task(20) train, test = task.get_train_test_split_indices(repeat_nr, fold_nr) X, y = task.get_X_and_y() clf.fit(X[train], y[train]) test_X = X[test] test_y = y[test] probaY = clf.predict_proba(test_X) predY = clf.predict(test_X) sample_nr = 0 # default for this task for idx in range(0, len(test_X)): arff_line = _prediction_to_row(repeat_nr, fold_nr, sample_nr, idx, task.class_labels[test_y[idx]], predY[idx], probaY[idx], task.class_labels, clf.classes_) self.assertIsInstance(arff_line, list) self.assertEqual(len(arff_line), 6 + len(task.class_labels)) self.assertEqual(arff_line[0], repeat_nr) self.assertEqual(arff_line[1], fold_nr) self.assertEqual(arff_line[2], sample_nr) self.assertEqual(arff_line[3], idx) sum = 0.0 for att_idx in range(4, 4 + len(task.class_labels)): self.assertIsInstance(arff_line[att_idx], float) self.assertGreaterEqual(arff_line[att_idx], 0.0) self.assertLessEqual(arff_line[att_idx], 1.0) sum += arff_line[att_idx] self.assertAlmostEqual(sum, 1.0) self.assertIn(arff_line[-1], task.class_labels) self.assertIn(arff_line[-2], task.class_labels) pass
def test__prediction_to_row(self): repeat_nr = 0 fold_nr = 0 clf = sklearn.pipeline.Pipeline(steps=[('Imputer', Imputer(strategy='mean')), ('VarianceThreshold', VarianceThreshold(threshold=0.05)), ('Estimator', GaussianNB())]) task = openml.tasks.get_task(20) train, test = task.get_train_test_split_indices(repeat_nr, fold_nr) X, y = task.get_X_and_y() clf.fit(X[train], y[train]) test_X = X[test] test_y = y[test] probaY = clf.predict_proba(test_X) predY = clf.predict(test_X) for idx in range(0, len(test_X)): arff_line = _prediction_to_row(repeat_nr, fold_nr, idx, task.class_labels[test_y[idx]], predY[idx], probaY[idx], task.class_labels, clf.classes_) self.assertIsInstance(arff_line, list) self.assertEqual(len(arff_line), 5 + len(task.class_labels)) self.assertEqual(arff_line[0], repeat_nr) self.assertEqual(arff_line[1], fold_nr) self.assertEqual(arff_line[2], idx) sum = 0.0 for att_idx in range(3, 3 + len(task.class_labels)): self.assertIsInstance(arff_line[att_idx], float) self.assertGreaterEqual(arff_line[att_idx], 0.0) self.assertLessEqual(arff_line[att_idx], 1.0) sum += arff_line[att_idx] self.assertAlmostEqual(sum, 1.0) self.assertIn(arff_line[-1], task.class_labels) self.assertIn(arff_line[-2], task.class_labels) pass