def test_fit_pSMAC(self): output = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC') self._setUp(output) X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') automl = AutoSklearnClassifier(time_left_for_this_task=15, per_run_time_limit=15, output_folder=output, tmp_folder=output, shared_mode=True, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0) automl.fit(X_train, Y_train) # Create a 'dummy model' for the first run, which has an accuracy of # more than 99%; it should be in the final ensemble if the ensemble # building of the second AutoSklearn classifier works correct true_targets_ensemble_path = os.path.join(output, '.auto-sklearn', 'true_targets_ensemble.npy') true_targets_ensemble = np.load(true_targets_ensemble_path) true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0 probas = np.zeros((len(true_targets_ensemble), 3), dtype=float) for i, value in enumerate(true_targets_ensemble): probas[i, value] = 1.0 dummy_predictions_path = os.path.join(output, '.auto-sklearn', 'predictions_ensemble', 'predictions_ensemble_1_00030.npy') with open(dummy_predictions_path, 'wb') as fh: np.save(fh, probas) probas_test = np.zeros((len(Y_test), 3), dtype=float) for i, value in enumerate(Y_test): probas_test[i, value] = 1.0 dummy = ArrayReturningDummyPredictor(probas_test) backend = Backend(output, output) backend.save_model(dummy, 30, 1) automl = AutoSklearnClassifier(time_left_for_this_task=15, per_run_time_limit=15, output_folder=output, tmp_folder=output, shared_mode=True, seed=2, initial_configurations_via_metalearning=0, ensemble_size=0) automl.fit(X_train, Y_train) automl.run_ensemble_builder(0, 1, 50).wait() score = automl.score(X_test, Y_test) self.assertEqual(len(os.listdir(os.path.join(output, '.auto-sklearn', 'ensembles'))), 1) self.assertGreaterEqual(score, 0.90) self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION) del automl self._tearDown(output)
def test_fit_pSMAC(self): output = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC') self._setUp(output) X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') automl = AutoSklearnClassifier(time_left_for_this_task=15, per_run_time_limit=15, output_folder=output, tmp_folder=output, shared_mode=True, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0) automl.fit(X_train, Y_train) # Create a 'dummy model' for the first run, which has an accuracy of # more than 99%; it should be in the final ensemble if the ensemble # building of the second AutoSklearn classifier works correct true_targets_ensemble_path = os.path.join(output, '.auto-sklearn', 'true_targets_ensemble.npy') true_targets_ensemble = np.load(true_targets_ensemble_path) true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0 probas = np.zeros((len(true_targets_ensemble), 3), dtype=float) for i, value in enumerate(true_targets_ensemble): probas[i, value] = 1.0 dummy_predictions_path = os.path.join(output, '.auto-sklearn', 'predictions_ensemble', 'predictions_ensemble_1_00030.npy') with open(dummy_predictions_path, 'wb') as fh: np.save(fh, probas) probas_test = np.zeros((len(Y_test), 3), dtype=float) for i, value in enumerate(Y_test): probas_test[i, value] = 1.0 dummy = ArrayReturningDummyPredictor(probas_test) backend = Backend(output, output) backend.save_model(dummy, 30, 1) automl = AutoSklearnClassifier(time_left_for_this_task=15, per_run_time_limit=15, output_folder=output, tmp_folder=output, shared_mode=True, seed=2, initial_configurations_via_metalearning=0, ensemble_size=0) automl.fit(X_train, Y_train) automl.run_ensemble_builder(0, 1, 50).wait() score = automl.score(X_test, Y_test) self.assertEqual(len(os.listdir(os.path.join(output, '.auto-sklearn', 'ensemble_indices'))), 1) self.assertGreaterEqual(score, 0.90) self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION) del automl self._tearDown(output)
def test_fit_pSMAC(self): tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC') output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC') self._setUp(tmp) self._setUp(output) X_train, Y_train, X_test, Y_test = putil.get_dataset('digits') # test parallel Classifier to predict classes, not only indexes Y_train += 1 Y_test += 1 automl = AutoSklearnClassifier( time_left_for_this_task=20, per_run_time_limit=5, output_folder=output, tmp_folder=tmp, shared_mode=True, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0, ) automl.fit(X_train, Y_train) # Create a 'dummy model' for the first run, which has an accuracy of # more than 99%; it should be in the final ensemble if the ensemble # building of the second AutoSklearn classifier works correct true_targets_ensemble_path = os.path.join(tmp, '.auto-sklearn', 'true_targets_ensemble.npy') with open(true_targets_ensemble_path, 'rb') as fh: true_targets_ensemble = np.load(fh) true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0 true_targets_ensemble = true_targets_ensemble.astype(int) probas = np.zeros((len(true_targets_ensemble), 10), dtype=float) for i, value in enumerate(true_targets_ensemble): probas[i, value] = 1.0 dummy_predictions_path = os.path.join( tmp, '.auto-sklearn', 'predictions_ensemble', 'predictions_ensemble_1_00030.npy', ) with open(dummy_predictions_path, 'wb') as fh: np.save(fh, probas) probas_test = np.zeros((len(Y_test), 10), dtype=float) for i, value in enumerate(Y_test): probas_test[i, value - 1] = 1.0 dummy = ArrayReturningDummyPredictor(probas_test) context = BackendContext(tmp, output, False, False, True) backend = Backend(context) backend.save_model(dummy, 30, 1) automl = AutoSklearnClassifier( time_left_for_this_task=20, per_run_time_limit=5, output_folder=output, tmp_folder=tmp, shared_mode=True, seed=2, initial_configurations_via_metalearning=0, ensemble_size=0, ) automl.fit_ensemble( Y_train, task=MULTICLASS_CLASSIFICATION, metric=accuracy, precision='32', dataset_name='iris', ensemble_size=20, ensemble_nbest=50, ) predictions = automl.predict(X_test) score = sklearn.metrics.accuracy_score(Y_test, predictions) self.assertEqual( len(os.listdir(os.path.join(tmp, '.auto-sklearn', 'ensembles'))), 1) self.assertGreaterEqual(score, 0.90) self.assertEqual(automl._automl._task, MULTICLASS_CLASSIFICATION) models = automl._automl.models_ classifier_types = [type(c) for c in models.values()] self.assertIn(ArrayReturningDummyPredictor, classifier_types) del automl self._tearDown(tmp) self._tearDown(output)
def test_fit_pSMAC(self): tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC') output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC') self._setUp(tmp) self._setUp(output) X_train, Y_train, X_test, Y_test = putil.get_dataset('breast_cancer') # test parallel Classifier to predict classes, not only indices Y_train += 1 Y_test += 1 automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, output_folder=output, tmp_folder=tmp, shared_mode=True, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0, ) automl.fit(X_train, Y_train) n_models_fit = len(automl.cv_results_['mean_test_score']) cv_results = automl.cv_results_['mean_test_score'] automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, output_folder=output, tmp_folder=tmp, shared_mode=True, seed=2, initial_configurations_via_metalearning=0, ensemble_size=0, ) automl.fit(X_train, Y_train) n_models_fit_2 = len(automl.cv_results_['mean_test_score']) # Check that the results from the first run were actually read by the # second run self.assertGreater(n_models_fit_2, n_models_fit) for score in cv_results: self.assertIn( score, automl.cv_results_['mean_test_score'], msg=str((automl.cv_results_['mean_test_score'], cv_results)), ) # Create a 'dummy model' for the first run, which has an accuracy of # more than 99%; it should be in the final ensemble if the ensemble # building of the second AutoSklearn classifier works correct true_targets_ensemble_path = os.path.join(tmp, '.auto-sklearn', 'true_targets_ensemble.npy') with open(true_targets_ensemble_path, 'rb') as fh: true_targets_ensemble = np.load(fh, allow_pickle=True) true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0 true_targets_ensemble = true_targets_ensemble.astype(int) probas = np.zeros((len(true_targets_ensemble), 2), dtype=float) for i, value in enumerate(true_targets_ensemble): probas[i, value] = 1.0 dummy_predictions_path = os.path.join( tmp, '.auto-sklearn', 'predictions_ensemble', 'predictions_ensemble_0_999_0.0.npy', ) with open(dummy_predictions_path, 'wb') as fh: np.save(fh, probas) probas_test = np.zeros((len(Y_test), 2), dtype=float) for i, value in enumerate(Y_test): probas_test[i, value - 1] = 1.0 dummy = ArrayReturningDummyPredictor(probas_test) context = BackendContext(tmp, output, False, False, True) backend = Backend(context) model_path = backend.get_model_path(seed=0, idx=999, budget=0.0) backend.save_model(model=dummy, filepath=model_path) automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, output_folder=output, tmp_folder=tmp, shared_mode=True, seed=3, initial_configurations_via_metalearning=0, ensemble_size=0, metric=accuracy, ) automl.fit_ensemble(Y_train, task=BINARY_CLASSIFICATION, precision='32', dataset_name='breast_cancer', ensemble_size=20, ensemble_nbest=50, ) predictions = automl.predict(X_test) score = sklearn.metrics.accuracy_score(Y_test, predictions) self.assertEqual(len(os.listdir(os.path.join(tmp, '.auto-sklearn', 'ensembles'))), 1) self.assertGreaterEqual(score, 0.90) self.assertEqual(automl._automl[0]._task, BINARY_CLASSIFICATION) models = automl._automl[0].models_ classifier_types = [type(c) for c in models.values()] self.assertIn(ArrayReturningDummyPredictor, classifier_types) del automl self._tearDown(tmp) self._tearDown(output)
def test_fit_pSMAC(self): tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC') output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC') self._setUp(tmp) self._setUp(output) X_train, Y_train, X_test, Y_test = putil.get_dataset('digits') # test parallel Classifier to predict classes, not only indexes Y_train += 1 Y_test += 1 automl = AutoSklearnClassifier( time_left_for_this_task=20, per_run_time_limit=5, output_folder=output, tmp_folder=tmp, shared_mode=True, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0, ) automl.fit(X_train, Y_train) # Create a 'dummy model' for the first run, which has an accuracy of # more than 99%; it should be in the final ensemble if the ensemble # building of the second AutoSklearn classifier works correct true_targets_ensemble_path = os.path.join(tmp, '.auto-sklearn', 'true_targets_ensemble.npy') with open(true_targets_ensemble_path, 'rb') as fh: true_targets_ensemble = np.load(fh) true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0 true_targets_ensemble = true_targets_ensemble.astype(int) probas = np.zeros((len(true_targets_ensemble), 10), dtype=float) for i, value in enumerate(true_targets_ensemble): probas[i, value] = 1.0 dummy_predictions_path = os.path.join( tmp, '.auto-sklearn', 'predictions_ensemble', 'predictions_ensemble_1_00030.npy', ) with open(dummy_predictions_path, 'wb') as fh: np.save(fh, probas) probas_test = np.zeros((len(Y_test), 10), dtype=float) for i, value in enumerate(Y_test): probas_test[i, value - 1] = 1.0 dummy = ArrayReturningDummyPredictor(probas_test) context = BackendContext(tmp, output, False, False, True) backend = Backend(context) backend.save_model(dummy, 30, 1) automl = AutoSklearnClassifier( time_left_for_this_task=20, per_run_time_limit=5, output_folder=output, tmp_folder=tmp, shared_mode=True, seed=2, initial_configurations_via_metalearning=0, ensemble_size=0, ) automl.fit_ensemble(Y_train, task=MULTICLASS_CLASSIFICATION, metric=accuracy, precision='32', dataset_name='iris', ensemble_size=20, ensemble_nbest=50, ) predictions = automl.predict(X_test) score = sklearn.metrics.accuracy_score(Y_test, predictions) self.assertEqual(len(os.listdir(os.path.join(tmp, '.auto-sklearn', 'ensembles'))), 1) self.assertGreaterEqual(score, 0.90) self.assertEqual(automl._automl._task, MULTICLASS_CLASSIFICATION) models = automl._automl.models_ classifier_types = [type(c) for c in models.values()] self.assertIn(ArrayReturningDummyPredictor, classifier_types) del automl self._tearDown(tmp) self._tearDown(output)