def test_evaluate_multiclass_classification_all_metrics(self): X_train, Y_train, X_test, Y_test = get_dataset("iris") X_valid = X_test[:25,] Y_valid = Y_test[:25,] X_test = X_test[25:,] Y_test = Y_test[25:,] D = Dummy() D.info = {"metric": BAC_METRIC, "task": MULTICLASS_CLASSIFICATION, "is_sparse": False, "label_num": 3} D.data = {"X_train": X_train, "Y_train": Y_train, "X_valid": X_valid, "X_test": X_test} D.feat_type = ["numerical", "Numerical", "numerical", "numerical"] configuration_space = get_configuration_space(D.info, include_estimators=["lda"], include_preprocessors=["pca"]) # Test all scoring functions err = [] for i in range(N_TEST_RUNS): print("Evaluate configuration: %d; result:" % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration, all_scoring_functions=True) if not self._fit(evaluator): continue err.append(evaluator.predict()) print(err[-1]) self.assertIsInstance(err[-1], dict) for key in err[-1]: self.assertEqual(len(err[-1]), 5) self.assertTrue(np.isfinite(err[-1][key])) self.assertGreaterEqual(err[-1][key], 0.0) print("Number of times it was worse than random guessing:" + str(np.sum(err > 1)))
def test_5000_classes(self): weights = ([0.0002] * 4750) + ([0.0001] * 250) X, Y = sklearn.datasets.make_classification( n_samples=10000, n_features=20, n_classes=5000, n_clusters_per_class=1, n_informative=15, n_redundant=5, n_repeated=0, weights=weights, flip_y=0, class_sep=1.0, hypercube=True, shift=None, scale=1.0, shuffle=True, random_state=1, ) self.assertEqual(250, np.sum(np.bincount(Y) == 1)) D = Dummy() D.info = {"metric": ACC_METRIC, "task": MULTICLASS_CLASSIFICATION, "is_sparse": False, "label_num": 1} D.data = {"X_train": X, "Y_train": Y, "X_valid": X, "X_test": X} D.feat_type = ["numerical"] * 5000 configuration_space = get_configuration_space( D.info, include_estimators=["lda"], include_preprocessors=["no_preprocessing"] ) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) evaluator.fit()
def test_predict_proba_binary_classification(self): self.output_dir = os.path.join(os.getcwd(), '.test_predict_proba_binary_classification') D = get_binary_classification_datamanager() class Dummy2(object): def predict_proba(self, y, batch_size=200): return np.array([[0.1, 0.9]] * 23) def fit(self, X, y): return self model = Dummy2() configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['select_rates']) configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, self.output_dir, configuration) evaluator.model = model loss, Y_optimization_pred, Y_valid_pred, Y_test_pred = \ evaluator.fit_predict_and_loss() for i in range(23): self.assertEqual(0.9, Y_optimization_pred[i][1])
def test_predict_proba_binary_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') eliminate_class_two = Y_train != 2 X_train = X_train[eliminate_class_two] Y_train = Y_train[eliminate_class_two] eliminate_class_two = Y_test != 2 X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] class Dummy2(object): def predict_proba(self, y, batch_size=200): return np.array([[0.1, 0.9], [0.7, 0.3]]) model = Dummy2() task_type = BINARY_CLASSIFICATION D = Dummy() D.info = { 'metric': BAC_METRIC, 'task': task_type, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['lda'], include_preprocessors=['select_rates']) configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, configuration) pred = evaluator.predict_proba(None, model, task_type) expected = [[0.9], [0.3]] for i in range(len(expected)): self.assertEqual(expected[i], pred[i])
def test_evaluate_binary_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') eliminate_class_two = Y_train != 2 X_train = X_train[eliminate_class_two] Y_train = Y_train[eliminate_class_two] eliminate_class_two = Y_test != 2 X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': AUC_METRIC, 'task': BINARY_CLASSIFICATION, 'is_sparse': False, 'label_num': 2 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['lda'], include_preprocessors=['pca']) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() self.assertTrue(np.isfinite(err[i])) print(err[i]) self.assertGreaterEqual(err[i], 0.0)
def test_predict_proba_binary_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') eliminate_class_two = Y_train != 2 X_train = X_train[eliminate_class_two] Y_train = Y_train[eliminate_class_two] eliminate_class_two = Y_test != 2 X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] class Dummy2(object): def predict_proba(self, y, batch_size=200): return np.array([[0.1, 0.9], [0.7, 0.3]]) model = Dummy2() task_type = BINARY_CLASSIFICATION D = Dummy() D.info = { 'metric': BAC_METRIC, 'task': task_type, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['lda'], include_preprocessors=['select_rates']) configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, configuration) pred = evaluator.predict_proba(None, model, task_type) expected = [[0.9], [0.3]] for i in range(len(expected)): self.assertEqual(expected[i], pred[i])
def test_file_output(self): output_dir = os.path.join(os.getcwd(), '.test') try: shutil.rmtree(output_dir) except Exception: pass X_train, Y_train, X_test, Y_test = get_dataset('boston') X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': R2_METRIC, 'task': REGRESSION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] D.name = 'test' configuration_space = get_configuration_space(D.info) while True: configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, configuration, with_predictions=True, all_scoring_functions=True, output_dir=output_dir, output_y_test=True) if not self._fit(evaluator): continue evaluator.predict() evaluator.file_output() self.assertTrue(os.path.exists(os.path.join( output_dir, '.auto-sklearn', 'true_targets_ensemble.npy'))) break
def test_evaluate_binary_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') eliminate_class_two = Y_train != 2 X_train = X_train[eliminate_class_two] Y_train = Y_train[eliminate_class_two] eliminate_class_two = Y_test != 2 X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': AUC_METRIC, 'task': BINARY_CLASSIFICATION, 'is_sparse': False, 'label_num': 2 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['lda'], include_preprocessors=['pca']) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() self.assertTrue(np.isfinite(err[i])) print(err[i]) self.assertGreaterEqual(err[i], 0.0)
def test_evaluate_multiclass_classification_all_metrics(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': BAC_METRIC, 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['lda'], include_preprocessors=['pca']) # Test all scoring functions err = [] for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration, all_scoring_functions=True) if not self._fit(evaluator): continue err.append(evaluator.predict()) print(err[-1]) self.assertIsInstance(err[-1], dict) for key in err[-1]: self.assertEqual(len(err[-1]), 5) self.assertTrue(np.isfinite(err[-1][key])) self.assertGreaterEqual(err[-1][key], 0.0)
def test_evaluate_multilabel_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') Y_train = np.array(convert_to_bin(Y_train, 3)) Y_train[:, -1] = 1 Y_test = np.array(convert_to_bin(Y_test, 3)) Y_test[:, -1] = 1 X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': F1_METRIC, 'task': MULTILABEL_CLASSIFICATION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() print(err[i]) self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0)
def test_evaluate_multiclass_classification_all_metrics(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': BAC_METRIC, 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['lda'], include_preprocessors=['pca']) # Test all scoring functions err = [] for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration, all_scoring_functions=True) if not self._fit(evaluator): continue err.append(evaluator.predict()) print(err[-1]) self.assertIsInstance(err[-1], dict) for key in err[-1]: self.assertEqual(len(err[-1]), 5) self.assertTrue(np.isfinite(err[-1][key])) self.assertGreaterEqual(err[-1][key], 0.0)
def test_evaluate_regression(self): X_train, Y_train, X_test, Y_test = get_dataset('boston') X_valid = X_test[:200, ] Y_valid = Y_test[:200, ] X_test = X_test[200:, ] Y_test = Y_test[200:, ] D = Dummy() D.info = { 'metric': R2_METRIC, 'task': REGRESSION, 'is_sparse': False, 'label_num': 1 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = [ 'numerical', 'Numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical' ] configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() self.assertTrue(np.isfinite(err[i])) print(err[i]) self.assertGreaterEqual(err[i], 0.0)
def test_evaluate_multilabel_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') Y_train = np.array(convert_to_bin(Y_train, 3)) Y_train[:, -1] = 1 Y_test = np.array(convert_to_bin(Y_test, 3)) Y_test[:, -1] = 1 X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': F1_METRIC, 'task': MULTILABEL_CLASSIFICATION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() print(err[i]) self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0)
def test_datasets(self): for getter in get_dataset_getters(): testname = '%s_%s' % (os.path.basename(__file__).replace( '.pyc', '').replace('.py', ''), getter.__name__) with self.subTest(testname): D = getter() output_directory = os.path.join(os.getcwd(), '.%s' % testname) self.output_directory = output_directory err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, self.output_directory, None) err[i] = evaluator.fit_predict_and_loss()[0] self.assertTrue(np.isfinite(err[i]))
def test_datasets(self): for getter in get_dataset_getters(): testname = '%s_%s' % (os.path.basename(__file__). replace('.pyc', '').replace('.py', ''), getter.__name__) with self.subTest(testname): D = getter() output_directory = os.path.join(os.getcwd(), '.%s' % testname) self.output_directory = output_directory err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, self.output_directory, None) err[i] = evaluator.fit_predict_and_loss()[0] self.assertTrue(np.isfinite(err[i]))
def test_evaluate_regression(self): X_train, Y_train, X_test, Y_test = get_dataset("boston") X_valid = X_test[:200,] Y_valid = Y_test[:200,] X_test = X_test[200:,] Y_test = Y_test[200:,] D = Dummy() D.info = {"metric": R2_METRIC, "task": REGRESSION, "is_sparse": False, "label_num": 1} D.data = {"X_train": X_train, "Y_train": Y_train, "X_valid": X_valid, "X_test": X_test} D.feat_type = [ "numerical", "Numerical", "numerical", "numerical", "numerical", "numerical", "numerical", "numerical", "numerical", "numerical", "numerical", ] configuration_space = get_configuration_space( D.info, include_estimators=["extra_trees"], include_preprocessors=["no_preprocessing"] ) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print("Evaluate configuration: %d; result:" % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() self.assertTrue(np.isfinite(err[i])) print(err[i]) self.assertGreaterEqual(err[i], 0.0) print("Number of times it was worse than random guessing:" + str(np.sum(err > 1)))
def test_evaluate_regression(self): X_train, Y_train, X_test, Y_test = get_dataset('boston') X_valid = X_test[:200, ] Y_valid = Y_test[:200, ] X_test = X_test[200:, ] Y_test = Y_test[200:, ] D = Dummy() D.info = { 'metric': R2_METRIC, 'task': REGRESSION, 'is_sparse': False, 'label_num': 1 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() self.assertTrue(np.isfinite(err[i])) print(err[i]) self.assertGreaterEqual(err[i], 0.0)
def test_file_output(self): self.output_dir = os.path.join(os.getcwd(), '.test') D = get_regression_datamanager() D.name = 'test' configuration_space = get_configuration_space(D.info) configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, self.output_dir, configuration, with_predictions=True, all_scoring_functions=True, output_y_test=True) loss, Y_optimization_pred, Y_valid_pred, Y_test_pred = \ evaluator.fit_predict_and_loss() evaluator.file_output(loss, Y_optimization_pred, Y_valid_pred, Y_test_pred) self.assertTrue(os.path.exists(os.path.join( self.output_dir, '.auto-sklearn', 'true_targets_ensemble.npy')))
def test_file_output(self): output_dir = os.path.join(os.getcwd(), ".test") try: shutil.rmtree(output_dir) except Exception: pass X_train, Y_train, X_test, Y_test = get_dataset("boston") X_valid = X_test[:25,] Y_valid = Y_test[:25,] X_test = X_test[25:,] Y_test = Y_test[25:,] D = Dummy() D.info = {"metric": R2_METRIC, "task": REGRESSION, "is_sparse": False, "label_num": 3} D.data = {"X_train": X_train, "Y_train": Y_train, "X_valid": X_valid, "X_test": X_test} D.feat_type = ["numerical", "Numerical", "numerical", "numerical"] D.name = "test" configuration_space = get_configuration_space(D.info) while True: configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator( D, configuration, with_predictions=True, all_scoring_functions=True, output_dir=output_dir, output_y_test=True, ) if not self._fit(evaluator): continue evaluator.predict() evaluator.file_output() self.assertTrue(os.path.exists(os.path.join(output_dir, ".auto-sklearn", "true_targets_ensemble.npy"))) break
def test_evaluate_binary_classification(self): X_train, Y_train, X_test, Y_test = get_dataset("iris") eliminate_class_two = Y_train != 2 X_train = X_train[eliminate_class_two] Y_train = Y_train[eliminate_class_two] eliminate_class_two = Y_test != 2 X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] X_valid = X_test[:25,] Y_valid = Y_test[:25,] X_test = X_test[25:,] Y_test = Y_test[25:,] D = Dummy() D.info = {"metric": AUC_METRIC, "task": BINARY_CLASSIFICATION, "is_sparse": False, "label_num": 2} D.data = {"X_train": X_train, "Y_train": Y_train, "X_valid": X_valid, "X_test": X_test} D.feat_type = ["numerical", "Numerical", "numerical", "numerical"] configuration_space = get_configuration_space(D.info, include_estimators=["lda"], include_preprocessors=["pca"]) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print("Evaluate configuration: %d; result:" % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() self.assertTrue(np.isfinite(err[i])) print(err[i]) self.assertGreaterEqual(err[i], 0.0) print("Number of times it was worse than random guessing:" + str(np.sum(err > 1)))
def test_with_abalone(self): dataset = "abalone" dataset_path = os.path.join(os.path.dirname(__file__), ".datasets", dataset) D = CompetitionDataManager(dataset_path) configuration_space = get_configuration_space( D.info, include_estimators=["extra_trees"], include_preprocessors=["no_preprocessing"] ) errors = [] for i in range(N_TEST_RUNS): configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err = evaluator.predict() self.assertLess(err, 0.99) self.assertTrue(np.isfinite(err)) errors.append(err) # This is a reasonable bound self.assertEqual(10, len(errors)) self.assertLess(min(errors), 0.77)
def test_predict_proba_binary_classification(self): X_train, Y_train, X_test, Y_test = get_dataset("iris") eliminate_class_two = Y_train != 2 X_train = X_train[eliminate_class_two] Y_train = Y_train[eliminate_class_two] eliminate_class_two = Y_test != 2 X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] X_valid = X_test[:25,] Y_valid = Y_test[:25,] X_test = X_test[25:,] Y_test = Y_test[25:,] class Dummy2(object): def predict_proba(self, y, batch_size=200): return np.array([[0.1, 0.9], [0.7, 0.3]]) model = Dummy2() task_type = BINARY_CLASSIFICATION D = Dummy() D.info = {"metric": BAC_METRIC, "task": task_type, "is_sparse": False, "label_num": 3} D.data = {"X_train": X_train, "Y_train": Y_train, "X_valid": X_valid, "X_test": X_test} D.feat_type = ["numerical", "Numerical", "numerical", "numerical"] configuration_space = get_configuration_space( D.info, include_estimators=["lda"], include_preprocessors=["select_rates"] ) configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, configuration) pred = evaluator.predict_proba(None, model, task_type) expected = [[0.9], [0.3]] for i in range(len(expected)): self.assertEqual(expected[i], pred[i])
def test_5000_classes(self): weights = ([0.0002] * 4750) + ([0.0001] * 250) X, Y = sklearn.datasets.make_classification(n_samples=10000, n_features=20, n_classes=5000, n_clusters_per_class=1, n_informative=15, n_redundant=5, n_repeated=0, weights=weights, flip_y=0, class_sep=1.0, hypercube=True, shift=None, scale=1.0, shuffle=True, random_state=1) self.assertEqual(250, np.sum(np.bincount(Y) == 1)) D = Dummy() D.info = { 'metric': ACC_METRIC, 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'label_num': 1 } D.data = {'X_train': X, 'Y_train': Y, 'X_valid': X, 'X_test': X} D.feat_type = ['numerical'] * 5000 configuration_space = get_configuration_space( D.info, include_estimators=['lda'], include_preprocessors=['no_preprocessing']) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) evaluator.fit()
def test_with_abalone(self): dataset = 'abalone' dataset_path = os.path.join(os.path.dirname(__file__), '.datasets', dataset) D = CompetitionDataManager(dataset_path) configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) errors = [] for i in range(N_TEST_RUNS): configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err = evaluator.predict() self.assertLess(err, 0.99) self.assertTrue(np.isfinite(err)) errors.append(err) # This is a reasonable bound self.assertEqual(10, len(errors)) self.assertLess(min(errors), 0.77)
def test_evaluate_multilabel_classification(self): X_train, Y_train, X_test, Y_test = get_dataset("iris") Y_train = np.array(convert_to_bin(Y_train, 3)) Y_train[:, -1] = 1 Y_test = np.array(convert_to_bin(Y_test, 3)) Y_test[:, -1] = 1 X_valid = X_test[:25,] Y_valid = Y_test[:25,] X_test = X_test[25:,] Y_test = Y_test[25:,] D = Dummy() D.info = {"metric": F1_METRIC, "task": MULTILABEL_CLASSIFICATION, "is_sparse": False, "label_num": 3} D.data = {"X_train": X_train, "Y_train": Y_train, "X_valid": X_valid, "X_test": X_test} D.feat_type = ["numerical", "Numerical", "numerical", "numerical"] configuration_space = get_configuration_space( D.info, include_estimators=["extra_trees"], include_preprocessors=["no_preprocessing"] ) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print("Evaluate configuration: %d; result:" % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() print(err[i]) self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0) print("Number of times it was worse than random guessing:" + str(np.sum(err > 1)))
def test_file_output(self): output_dir = os.path.join(os.getcwd(), '.test') try: shutil.rmtree(output_dir) except Exception: pass X_train, Y_train, X_test, Y_test = get_dataset('boston') X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': R2_METRIC, 'task': REGRESSION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] D.name = 'test' configuration_space = get_configuration_space(D.info) while True: configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, configuration, with_predictions=True, all_scoring_functions=True, output_dir=output_dir, output_y_test=True) if not self._fit(evaluator): continue evaluator.predict() evaluator.file_output() self.assertTrue( os.path.exists( os.path.join(output_dir, '.auto-sklearn', 'true_targets_ensemble.npy'))) break