def test_fit_pSMAC(self):
        output = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC')
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')

        automl = AutoSklearnClassifier(time_left_for_this_task=15,
                                       per_run_time_limit=15,
                                       output_folder=output,
                                       tmp_folder=output,
                                       shared_mode=True,
                                       seed=1,
                                       initial_configurations_via_metalearning=0,
                                       ensemble_size=0)
        automl.fit(X_train, Y_train)

        # Create a 'dummy model' for the first run, which has an accuracy of
        # more than 99%; it should be in the final ensemble if the ensemble
        # building of the second AutoSklearn classifier works correct
        true_targets_ensemble_path = os.path.join(output, '.auto-sklearn',
                                                  'true_targets_ensemble.npy')
        true_targets_ensemble = np.load(true_targets_ensemble_path)
        true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0
        probas = np.zeros((len(true_targets_ensemble), 3), dtype=float)
        for i, value in enumerate(true_targets_ensemble):
            probas[i, value] = 1.0
        dummy_predictions_path = os.path.join(output, '.auto-sklearn',
                                              'predictions_ensemble',
                                              'predictions_ensemble_1_00030.npy')
        with open(dummy_predictions_path, 'wb') as fh:
            np.save(fh, probas)

        probas_test = np.zeros((len(Y_test), 3), dtype=float)
        for i, value in enumerate(Y_test):
            probas_test[i, value] = 1.0

        dummy = ArrayReturningDummyPredictor(probas_test)
        backend = Backend(output, output)
        backend.save_model(dummy, 30, 1)

        automl = AutoSklearnClassifier(time_left_for_this_task=15,
                                       per_run_time_limit=15,
                                       output_folder=output,
                                       tmp_folder=output,
                                       shared_mode=True,
                                       seed=2,
                                       initial_configurations_via_metalearning=0,
                                       ensemble_size=0)
        automl.fit(X_train, Y_train)
        automl.run_ensemble_builder(0, 1, 50).wait()

        score = automl.score(X_test, Y_test)

        self.assertEqual(len(os.listdir(os.path.join(output, '.auto-sklearn',
                                                     'ensembles'))), 1)
        self.assertGreaterEqual(score, 0.90)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(output)
Example #2
0
    def test_fit_pSMAC(self):
        output = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC')
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')

        automl = AutoSklearnClassifier(time_left_for_this_task=15,
                                       per_run_time_limit=15,
                                       output_folder=output,
                                       tmp_folder=output,
                                       shared_mode=True,
                                       seed=1,
                                       initial_configurations_via_metalearning=0,
                                       ensemble_size=0)
        automl.fit(X_train, Y_train)

        # Create a 'dummy model' for the first run, which has an accuracy of
        # more than 99%; it should be in the final ensemble if the ensemble
        # building of the second AutoSklearn classifier works correct
        true_targets_ensemble_path = os.path.join(output, '.auto-sklearn',
                                                  'true_targets_ensemble.npy')
        true_targets_ensemble = np.load(true_targets_ensemble_path)
        true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0
        probas = np.zeros((len(true_targets_ensemble), 3), dtype=float)
        for i, value in enumerate(true_targets_ensemble):
            probas[i, value] = 1.0
        dummy_predictions_path = os.path.join(output, '.auto-sklearn',
                                              'predictions_ensemble',
                                              'predictions_ensemble_1_00030.npy')
        with open(dummy_predictions_path, 'wb') as fh:
            np.save(fh, probas)

        probas_test = np.zeros((len(Y_test), 3), dtype=float)
        for i, value in enumerate(Y_test):
            probas_test[i, value] = 1.0

        dummy = ArrayReturningDummyPredictor(probas_test)
        backend = Backend(output, output)
        backend.save_model(dummy, 30, 1)

        automl = AutoSklearnClassifier(time_left_for_this_task=15,
                                       per_run_time_limit=15,
                                       output_folder=output,
                                       tmp_folder=output,
                                       shared_mode=True,
                                       seed=2,
                                       initial_configurations_via_metalearning=0,
                                       ensemble_size=0)
        automl.fit(X_train, Y_train)
        automl.run_ensemble_builder(0, 1, 50).wait()

        score = automl.score(X_test, Y_test)

        self.assertEqual(len(os.listdir(os.path.join(output, '.auto-sklearn',
                                                     'ensemble_indices'))), 1)
        self.assertGreaterEqual(score, 0.90)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(output)
Example #3
0
    def test_fit_pSMAC(self):
        tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC')
        output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC')
        self._setUp(tmp)
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('digits')

        # test parallel Classifier to predict classes, not only indexes
        Y_train += 1
        Y_test += 1

        automl = AutoSklearnClassifier(
            time_left_for_this_task=20,
            per_run_time_limit=5,
            output_folder=output,
            tmp_folder=tmp,
            shared_mode=True,
            seed=1,
            initial_configurations_via_metalearning=0,
            ensemble_size=0,
        )
        automl.fit(X_train, Y_train)
        # Create a 'dummy model' for the first run, which has an accuracy of
        # more than 99%; it should be in the final ensemble if the ensemble
        # building of the second AutoSklearn classifier works correct
        true_targets_ensemble_path = os.path.join(tmp, '.auto-sklearn',
                                                  'true_targets_ensemble.npy')
        with open(true_targets_ensemble_path, 'rb') as fh:
            true_targets_ensemble = np.load(fh)
        true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0
        true_targets_ensemble = true_targets_ensemble.astype(int)
        probas = np.zeros((len(true_targets_ensemble), 10), dtype=float)

        for i, value in enumerate(true_targets_ensemble):
            probas[i, value] = 1.0
        dummy_predictions_path = os.path.join(
            tmp,
            '.auto-sklearn',
            'predictions_ensemble',
            'predictions_ensemble_1_00030.npy',
        )
        with open(dummy_predictions_path, 'wb') as fh:
            np.save(fh, probas)

        probas_test = np.zeros((len(Y_test), 10), dtype=float)
        for i, value in enumerate(Y_test):
            probas_test[i, value - 1] = 1.0

        dummy = ArrayReturningDummyPredictor(probas_test)
        context = BackendContext(tmp, output, False, False, True)
        backend = Backend(context)
        backend.save_model(dummy, 30, 1)

        automl = AutoSklearnClassifier(
            time_left_for_this_task=20,
            per_run_time_limit=5,
            output_folder=output,
            tmp_folder=tmp,
            shared_mode=True,
            seed=2,
            initial_configurations_via_metalearning=0,
            ensemble_size=0,
        )
        automl.fit_ensemble(
            Y_train,
            task=MULTICLASS_CLASSIFICATION,
            metric=accuracy,
            precision='32',
            dataset_name='iris',
            ensemble_size=20,
            ensemble_nbest=50,
        )

        predictions = automl.predict(X_test)
        score = sklearn.metrics.accuracy_score(Y_test, predictions)

        self.assertEqual(
            len(os.listdir(os.path.join(tmp, '.auto-sklearn', 'ensembles'))),
            1)
        self.assertGreaterEqual(score, 0.90)
        self.assertEqual(automl._automl._task, MULTICLASS_CLASSIFICATION)

        models = automl._automl.models_
        classifier_types = [type(c) for c in models.values()]
        self.assertIn(ArrayReturningDummyPredictor, classifier_types)

        del automl
        self._tearDown(tmp)
        self._tearDown(output)
Example #4
0
    def test_fit_pSMAC(self):
        tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC')
        output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC')
        self._setUp(tmp)
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('breast_cancer')

        # test parallel Classifier to predict classes, not only indices
        Y_train += 1
        Y_test += 1

        automl = AutoSklearnClassifier(
            time_left_for_this_task=30,
            per_run_time_limit=5,
            output_folder=output,
            tmp_folder=tmp,
            shared_mode=True,
            seed=1,
            initial_configurations_via_metalearning=0,
            ensemble_size=0,
        )
        automl.fit(X_train, Y_train)
        n_models_fit = len(automl.cv_results_['mean_test_score'])
        cv_results = automl.cv_results_['mean_test_score']

        automl = AutoSklearnClassifier(
            time_left_for_this_task=30,
            per_run_time_limit=5,
            output_folder=output,
            tmp_folder=tmp,
            shared_mode=True,
            seed=2,
            initial_configurations_via_metalearning=0,
            ensemble_size=0,
        )
        automl.fit(X_train, Y_train)
        n_models_fit_2 = len(automl.cv_results_['mean_test_score'])

        # Check that the results from the first run were actually read by the
        # second run
        self.assertGreater(n_models_fit_2, n_models_fit)
        for score in cv_results:
            self.assertIn(
                score,
                automl.cv_results_['mean_test_score'],
                msg=str((automl.cv_results_['mean_test_score'], cv_results)),
            )

        # Create a 'dummy model' for the first run, which has an accuracy of
        # more than 99%; it should be in the final ensemble if the ensemble
        # building of the second AutoSklearn classifier works correct
        true_targets_ensemble_path = os.path.join(tmp, '.auto-sklearn',
                                                  'true_targets_ensemble.npy')
        with open(true_targets_ensemble_path, 'rb') as fh:
            true_targets_ensemble = np.load(fh, allow_pickle=True)
        true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0
        true_targets_ensemble = true_targets_ensemble.astype(int)
        probas = np.zeros((len(true_targets_ensemble), 2), dtype=float)

        for i, value in enumerate(true_targets_ensemble):
            probas[i, value] = 1.0
        dummy_predictions_path = os.path.join(
            tmp,
            '.auto-sklearn',
            'predictions_ensemble',
            'predictions_ensemble_0_999_0.0.npy',
        )
        with open(dummy_predictions_path, 'wb') as fh:
            np.save(fh, probas)

        probas_test = np.zeros((len(Y_test), 2), dtype=float)
        for i, value in enumerate(Y_test):
            probas_test[i, value - 1] = 1.0

        dummy = ArrayReturningDummyPredictor(probas_test)
        context = BackendContext(tmp, output, False, False, True)
        backend = Backend(context)
        model_path = backend.get_model_path(seed=0, idx=999, budget=0.0)
        backend.save_model(model=dummy, filepath=model_path)

        automl = AutoSklearnClassifier(
            time_left_for_this_task=30,
            per_run_time_limit=5,
            output_folder=output,
            tmp_folder=tmp,
            shared_mode=True,
            seed=3,
            initial_configurations_via_metalearning=0,
            ensemble_size=0,
            metric=accuracy,
        )
        automl.fit_ensemble(Y_train, task=BINARY_CLASSIFICATION,
                            precision='32',
                            dataset_name='breast_cancer',
                            ensemble_size=20,
                            ensemble_nbest=50,
                            )

        predictions = automl.predict(X_test)
        score = sklearn.metrics.accuracy_score(Y_test, predictions)

        self.assertEqual(len(os.listdir(os.path.join(tmp, '.auto-sklearn',
                                                     'ensembles'))), 1)
        self.assertGreaterEqual(score, 0.90)
        self.assertEqual(automl._automl[0]._task, BINARY_CLASSIFICATION)

        models = automl._automl[0].models_
        classifier_types = [type(c) for c in models.values()]
        self.assertIn(ArrayReturningDummyPredictor, classifier_types)

        del automl
        self._tearDown(tmp)
        self._tearDown(output)
Example #5
0
    def test_fit_pSMAC(self):
        tmp = os.path.join(self.test_dir, '..', '.tmp_estimator_fit_pSMAC')
        output = os.path.join(self.test_dir, '..', '.out_estimator_fit_pSMAC')
        self._setUp(tmp)
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('digits')

        # test parallel Classifier to predict classes, not only indexes
        Y_train += 1
        Y_test += 1

        automl = AutoSklearnClassifier(
            time_left_for_this_task=20,
            per_run_time_limit=5,
            output_folder=output,
            tmp_folder=tmp,
            shared_mode=True,
            seed=1,
            initial_configurations_via_metalearning=0,
            ensemble_size=0,
        )
        automl.fit(X_train, Y_train)
        # Create a 'dummy model' for the first run, which has an accuracy of
        # more than 99%; it should be in the final ensemble if the ensemble
        # building of the second AutoSklearn classifier works correct
        true_targets_ensemble_path = os.path.join(tmp, '.auto-sklearn',
                                                  'true_targets_ensemble.npy')
        with open(true_targets_ensemble_path, 'rb') as fh:
            true_targets_ensemble = np.load(fh)
        true_targets_ensemble[-1] = 1 if true_targets_ensemble[-1] != 1 else 0
        true_targets_ensemble = true_targets_ensemble.astype(int)
        probas = np.zeros((len(true_targets_ensemble), 10), dtype=float)

        for i, value in enumerate(true_targets_ensemble):
            probas[i, value] = 1.0
        dummy_predictions_path = os.path.join(
            tmp,
            '.auto-sklearn',
            'predictions_ensemble',
            'predictions_ensemble_1_00030.npy',
        )
        with open(dummy_predictions_path, 'wb') as fh:
            np.save(fh, probas)

        probas_test = np.zeros((len(Y_test), 10), dtype=float)
        for i, value in enumerate(Y_test):
            probas_test[i, value - 1] = 1.0

        dummy = ArrayReturningDummyPredictor(probas_test)
        context = BackendContext(tmp, output, False, False, True)
        backend = Backend(context)
        backend.save_model(dummy, 30, 1)

        automl = AutoSklearnClassifier(
            time_left_for_this_task=20,
            per_run_time_limit=5,
            output_folder=output,
            tmp_folder=tmp,
            shared_mode=True,
            seed=2,
            initial_configurations_via_metalearning=0,
            ensemble_size=0,
        )
        automl.fit_ensemble(Y_train, task=MULTICLASS_CLASSIFICATION,
                            metric=accuracy,
                            precision='32',
                            dataset_name='iris',
                            ensemble_size=20,
                            ensemble_nbest=50,
                            )

        predictions = automl.predict(X_test)
        score = sklearn.metrics.accuracy_score(Y_test, predictions)

        self.assertEqual(len(os.listdir(os.path.join(tmp, '.auto-sklearn',
                                                     'ensembles'))), 1)
        self.assertGreaterEqual(score, 0.90)
        self.assertEqual(automl._automl._task, MULTICLASS_CLASSIFICATION)

        models = automl._automl.models_
        classifier_types = [type(c) for c in models.values()]
        self.assertIn(ArrayReturningDummyPredictor, classifier_types)

        del automl
        self._tearDown(tmp)
        self._tearDown(output)