def test_with_abalone(self): dataset = "abalone" dataset_dir = os.path.join(os.path.dirname(__file__), ".datasets") D = CompetitionDataManager(dataset, dataset_dir) configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) errors = [] for i in range(N_TEST_RUNS): configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = NestedCVEvaluator(D_, configuration, inner_cv_folds=2, outer_cv_folds=2) if not self._fit(evaluator): print continue err = evaluator.predict() self.assertLess(err, 0.99) self.assertTrue(np.isfinite(err)) errors.append(err) # This is a reasonable bound self.assertEqual(10, len(errors)) self.assertLess(min(errors), 0.77)
def test_with_abalone(self): dataset = "abalone" dataset_dir = os.path.join(os.path.dirname(__file__), ".datasets") D = CompetitionDataManager(dataset, dataset_dir) configuration_space = get_configuration_space(D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) errors = [] for i in range(N_TEST_RUNS): configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = NestedCVEvaluator(D_, configuration, inner_cv_folds=2, outer_cv_folds=2) if not self._fit(evaluator): print continue err = evaluator.predict() self.assertLess(err, 0.99) self.assertTrue(np.isfinite(err)) errors.append(err) # This is a reasonable bound self.assertEqual(10, len(errors)) self.assertLess(min(errors), 0.77)
def test_evaluate_multiclass_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = {'metric': 'acc_metric', 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'target_num': 3} D.data = {'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test} D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space(D.info, include_estimators=['ridge'], include_preprocessors=['select_rates']) err = np.zeros([N_TEST_RUNS]) num_models_better_than_random = 0 for i in range(N_TEST_RUNS): print "Evaluate configuration: %d; result:" % i, configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = NestedCVEvaluator(D_, configuration, with_predictions=True, all_scoring_functions=True) if not self._fit(evaluator): print continue e_, Y_optimization_pred, Y_valid_pred, Y_test_pred = \ evaluator.predict() err[i] = e_['acc_metric'] print err[i], configuration['classifier'] print e_['outer:bac_metric'], e_['bac_metric'] # Test the outer CV num_targets = len(np.unique(Y_train)) self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0) # Test that ten models were trained self.assertEqual(len(evaluator.outer_models), 5) self.assertTrue(all([model is not None for model in evaluator.outer_models])) self.assertEqual(Y_optimization_pred.shape[0], Y_train.shape[0]) self.assertEqual(Y_optimization_pred.shape[1], num_targets) self.assertEqual(Y_valid_pred.shape[0], Y_valid.shape[0]) self.assertEqual(Y_valid_pred.shape[1], num_targets) self.assertEqual(Y_test_pred.shape[0], Y_test.shape[0]) self.assertEqual(Y_test_pred.shape[1], num_targets) # Test some basic statistics of the predictions if err[i] < 0.5: self.assertTrue(0.3 < Y_valid_pred.mean() < 0.36666) self.assertGreaterEqual(Y_valid_pred.std(), 0.1) self.assertTrue(0.3 < Y_test_pred.mean() < 0.36666) self.assertGreaterEqual(Y_test_pred.std(), 0.1) num_models_better_than_random += 1 # Test the inner CV self.assertEqual(len(evaluator.inner_models), 5) for fold in range(5): self.assertEqual(len(evaluator.inner_models[fold]), 5) self.assertTrue(all([model is not None for model in evaluator.inner_models[fold]])) self.assertGreaterEqual(len(evaluator.outer_indices[fold][0]), 75) for inner_fold in range(5): self.assertGreaterEqual(len(evaluator.inner_indices[ fold][inner_fold][0]), 60) self.assertGreater(num_models_better_than_random, 9)
def main(dataset_info, mode, seed, params, mode_args=None): """This command line interface has three different operation modes: * CV: useful for the Tweakathon * 1/3 test split: useful to evaluate a configuration * cv on 2/3 train split: useful to optimize hyperparameters in a training mode before testing a configuration on the 1/3 test split. It must by no means be used for the Auto part of the competition! """ if mode != "test": num_run = get_new_run_num() for key in params: try: params[key] = int(params[key]) except: try: params[key] = float(params[key]) except: pass if seed is not None: seed = int(float(seed)) else: seed = 1 output_dir = os.getcwd() D = store_and_or_load_data(dataset_info=dataset_info, outputdir=output_dir) cs = get_configuration_space(D.info) configuration = configuration_space.Configuration(cs, params) metric = D.info['metric'] global evaluator # Train/test split if mode == 'holdout': evaluator = HoldoutEvaluator(D, configuration, with_predictions=True, all_scoring_functions=True, output_y_test=True, seed=seed, num_run=num_run) evaluator.fit() signal.signal(15, empty_signal_handler) evaluator.finish_up() model_directory = os.path.join(os.getcwd(), "models_%d" % seed) if os.path.exists(model_directory): model_filename = os.path.join(model_directory, "%s.model" % num_run) with open(model_filename, "w") as fh: pickle.dump(evaluator.model, fh, -1) elif mode == 'test': evaluator = TestEvaluator(D, configuration, all_scoring_functions=True, seed=seed) evaluator.fit() scores = evaluator.predict() duration = time.time() - evaluator.starttime score = scores[metric] additional_run_info = ";".join( ["%s: %s" % (m_, value) for m_, value in scores.items()]) additional_run_info += ";" + "duration: " + str(duration) print "Result for ParamILS: %s, %f, 1, %f, %d, %s" % ( "SAT", abs(duration), score, evaluator.seed, additional_run_info) # CV on the whole training set elif mode == 'cv': evaluator = CVEvaluator(D, configuration, with_predictions=True, all_scoring_functions=True, output_y_test=True, cv_folds=mode_args['folds'], seed=seed, num_run=num_run) evaluator.fit() signal.signal(15, empty_signal_handler) evaluator.finish_up() elif mode == 'partial_cv': evaluator = CVEvaluator(D, configuration, all_scoring_functions=True, cv_folds=mode_args['folds'], seed=seed, num_run=num_run) evaluator.partial_fit(mode_args['fold']) scores = evaluator.predict() duration = time.time() - evaluator.starttime score = scores[metric] additional_run_info = ";".join( ["%s: %s" % (m_, value) for m_, value in scores.items()]) additional_run_info += ";" + "duration: " + str(duration) print "Result for ParamILS: %s, %f, 1, %f, %d, %s" % ( "SAT", abs(duration), score, evaluator.seed, additional_run_info) elif mode == 'nested-cv': evaluator = NestedCVEvaluator(D, configuration, with_predictions=True, inner_cv_folds=mode_args['inner_folds'], outer_cv_folds=mode_args['outer_folds'], all_scoring_functions=True, output_y_test=True, seed=seed, num_run=num_run) evaluator.fit() signal.signal(15, empty_signal_handler) evaluator.finish_up() else: raise ValueError("Must choose a legal mode.")
def test_evaluate_multiclass_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': 'acc_metric', 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'target_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['ridge'], include_preprocessors=['select_rates']) err = np.zeros([N_TEST_RUNS]) num_models_better_than_random = 0 for i in range(N_TEST_RUNS): print "Evaluate configuration: %d; result:" % i, configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = NestedCVEvaluator(D_, configuration, with_predictions=True, all_scoring_functions=True) if not self._fit(evaluator): print continue e_, Y_optimization_pred, Y_valid_pred, Y_test_pred = \ evaluator.predict() err[i] = e_['acc_metric'] print err[i], configuration['classifier'] print e_['outer:bac_metric'], e_['bac_metric'] # Test the outer CV num_targets = len(np.unique(Y_train)) self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0) # Test that ten models were trained self.assertEqual(len(evaluator.outer_models), 5) self.assertTrue( all([model is not None for model in evaluator.outer_models])) self.assertEqual(Y_optimization_pred.shape[0], Y_train.shape[0]) self.assertEqual(Y_optimization_pred.shape[1], num_targets) self.assertEqual(Y_valid_pred.shape[0], Y_valid.shape[0]) self.assertEqual(Y_valid_pred.shape[1], num_targets) self.assertEqual(Y_test_pred.shape[0], Y_test.shape[0]) self.assertEqual(Y_test_pred.shape[1], num_targets) # Test some basic statistics of the predictions if err[i] < 0.5: self.assertTrue(0.3 < Y_valid_pred.mean() < 0.36666) self.assertGreaterEqual(Y_valid_pred.std(), 0.1) self.assertTrue(0.3 < Y_test_pred.mean() < 0.36666) self.assertGreaterEqual(Y_test_pred.std(), 0.1) num_models_better_than_random += 1 # Test the inner CV self.assertEqual(len(evaluator.inner_models), 5) for fold in range(5): self.assertEqual(len(evaluator.inner_models[fold]), 5) self.assertTrue( all([ model is not None for model in evaluator.inner_models[fold] ])) self.assertGreaterEqual(len(evaluator.outer_indices[fold][0]), 75) for inner_fold in range(5): self.assertGreaterEqual( len(evaluator.inner_indices[fold][inner_fold][0]), 60) self.assertGreater(num_models_better_than_random, 9)