def test_binary(tmp_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_binary=True) automl = AutoSklearnClassifier( time_left_for_this_task=40, delete_tmp_folder_after_terminate=False, per_run_time_limit=10, tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train, X_test=X_test, y_test=Y_test, dataset_name='binary_test_dataset') predictions = automl.predict(X_test) assert predictions.shape == (50, ), print_debug_information(automl) score = accuracy(Y_test, predictions) assert score > 0.9, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_all_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True
def test_multilabel(tmp_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_multilabel=True) automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) assert predictions.shape == (50, 3), print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True score = f1_macro(Y_test, predictions) assert score >= 0.9, print_debug_information(automl) probs = automl.predict_proba(X_train) assert np.mean(probs) == pytest.approx(0.33, rel=1e-1)
def test_cv_results(tmp_dir): # TODO restructure and actually use real SMAC output from a long run # to do this unittest! X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') cls = AutoSklearnClassifier(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0, scoring_functions=[ autosklearn.metrics.precision, autosklearn.metrics.roc_auc ]) params = cls.get_params() original_params = copy.deepcopy(params) cls.fit(X_train, Y_train) cv_results = cls.cv_results_ assert isinstance(cv_results, dict), type(cv_results) assert isinstance(cv_results['mean_test_score'], np.ndarray), type(cv_results['mean_test_score']) assert isinstance(cv_results['mean_fit_time'], np.ndarray), type(cv_results['mean_fit_time']) assert isinstance(cv_results['params'], list), type(cv_results['params']) assert isinstance(cv_results['rank_test_scores'], np.ndarray), type(cv_results['rank_test_scores']) assert isinstance(cv_results['metric_precision'], npma.MaskedArray), type(cv_results['metric_precision']) assert isinstance(cv_results['metric_roc_auc'], npma.MaskedArray), type(cv_results['metric_roc_auc']) cv_result_items = [ isinstance(val, npma.MaskedArray) for key, val in cv_results.items() if key.startswith('param_') ] assert all(cv_result_items), cv_results.items() # Compare the state of the model parameters with the original parameters new_params = clone(cls).get_params() for param_name, original_value in original_params.items(): new_value = new_params[param_name] # Taken from Sklearn code: # We should never change or mutate the internal state of input # parameters by default. To check this we use the joblib.hash function # that introspects recursively any subobjects to compute a checksum. # The only exception to this rule of immutable constructor parameters # is possible RandomState instance but in this check we explicitly # fixed the random_state params recursively to be integer seeds. assert joblib.hash(new_value) == joblib.hash(original_value), ( "Estimator %s should not change or mutate " " the parameter %s from %s to %s during fit." % (cls, param_name, original_value, new_value)) # Comply with https://scikit-learn.org/dev/glossary.html#term-classes is_classifier(cls) assert hasattr(cls, 'classes_')
def test_performance_over_time_no_ensemble(tmp_dir): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') cls = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, seed=1, initial_configurations_via_metalearning=0, ensemble_size=0, ) cls.fit(X_train, Y_train, X_test, Y_test) performance_over_time = cls.performance_over_time_ assert include_single_scores(performance_over_time.columns) is True assert performance_over_time_is_plausible(performance_over_time) is True
def test_can_pickle_classifier(tmp_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') automl = AutoSklearnClassifier( time_left_for_this_task=30, delete_tmp_folder_after_terminate=False, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train) initial_predictions = automl.predict(X_test) initial_accuracy = sklearn.metrics.accuracy_score(Y_test, initial_predictions) assert initial_accuracy >= 0.75 assert count_succeses(automl.cv_results_) > 0 assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True # Test pickle dump_file = os.path.join(tmp_dir, 'automl.dump.pkl') with open(dump_file, 'wb') as f: pickle.dump(automl, f) with open(dump_file, 'rb') as f: restored_automl = pickle.load(f) restored_predictions = restored_automl.predict(X_test) restored_accuracy = sklearn.metrics.accuracy_score(Y_test, restored_predictions) assert restored_accuracy >= 0.75 assert initial_accuracy == restored_accuracy # Test joblib dump_file = os.path.join(tmp_dir, 'automl.dump.joblib') joblib.dump(automl, dump_file) restored_automl = joblib.load(dump_file) restored_predictions = restored_automl.predict(X_test) restored_accuracy = sklearn.metrics.accuracy_score(Y_test, restored_predictions) assert restored_accuracy >= 0.75 assert initial_accuracy == restored_accuracy
class AutoSklearnWrapper(Wrapper): def __init__(self, preprocessor=None, refit=True, verbose=False, retry_on_error=True, **params): self.estimator = AutoSklearnClassifier(**dict(params)) # Call to super super(AutoSklearnWrapper, self).__init__(estimator=self.estimator, preprocessor=preprocessor, refit=refit, verbose=verbose, retry_on_error=retry_on_error) def predict_proba(self, X): say("WARNING: predict_proba() not working well in Autosklearn. Raising AttributeError." ) raise AttributeError() # Implementation of internal _fit def _fit(self, X, y, **fit_params): self.estimator.fit(X, y, **fit_params, metric=accuracy) # Implementation of internal _refit def _refit(self, X, y): self.estimator.fit(X, y) def _get_cv_results(self, estimator): # Get results and convert to lists, so that it is json serializable results = estimator.cv_results_ lists = dict([(i, j if isinstance(j, list) else j.tolist()) for i, j in results.items()]) # Store results cv_results_ = lists best_index_ = np.argmax( cv_results_['mean_test_score']) # type: np.int64 best_params_ = cv_results_['params'][best_index_] best_score_ = cv_results_['mean_test_score'][best_index_] return cv_results_, best_index_, best_params_, best_score_
def test_classification_pandas_support(tmp_dir, dask_client): X, y = sklearn.datasets.fetch_openml( data_id=2, # cat/num dataset return_X_y=True, as_frame=True, ) # Drop NAN!! X = X.dropna('columns') # This test only make sense if input is dataframe assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, exclude={'classifier': ['libsvm_svc']}, dask_client=dask_client, seed=5, tmp_folder=tmp_dir, ) automl.fit(X, y) # Make sure that at least better than random. # We use same X_train==X_test to test code quality assert automl.score(X, y) > 0.555, print_debug_information(automl) automl.refit(X, y) # Make sure that at least better than random. # accuracy in sklearn needs valid data # It should be 0.555 as the dataset is unbalanced. prediction = automl.predict(X) assert accuracy(y, prediction) > 0.555 assert count_succeses(automl.cv_results_) > 0 assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True
def test_autosklearn_anneal(as_frame): """ This test makes sure that anneal dataset can be fitted and scored. This dataset is quite complex, with NaN, categorical and numerical columns so is a good testcase for unit-testing """ X, y = sklearn.datasets.fetch_openml(data_id=2, return_X_y=True, as_frame=as_frame) automl = AutoSklearnClassifier(time_left_for_this_task=60, ensemble_size=0, delete_tmp_folder_after_terminate=False, initial_configurations_via_metalearning=0, smac_scenario_args={'runcount_limit': 6}, resampling_strategy='holdout-iterative-fit') if as_frame: # Let autosklearn calculate the feat types automl_fitted = automl.fit(X, y) else: X_, y_ = sklearn.datasets.fetch_openml(data_id=2, return_X_y=True, as_frame=True) feat_type = [ 'categorical' if X_[col].dtype.name == 'category' else 'numerical' for col in X_.columns ] automl_fitted = automl.fit(X, y, feat_type=feat_type) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y, ensemble_size=5) assert automl is automl_ensemble_fitted # We want to make sure we can learn from this data. # This is a test to make sure the data format (numpy/pandas) # can be used in a meaningful way -- not meant for generalization, # hence we use the train dataset assert automl_fitted.score(X, y) > 0.75
def test_binary(tmp_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_binary=True) automl = AutoSklearnClassifier( time_left_for_this_task=40, per_run_time_limit=10, tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train, X_test=X_test, y_test=Y_test, dataset_name='binary_test_dataset') predictions = automl.predict(X_test) assert predictions.shape == (50, ), print_debug_information(automl) score = accuracy(Y_test, predictions) assert score > 0.9, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl)
def test_autosklearn_classification_methods_returns_self(dask_client): """ Currently this method only tests that the methods of AutoSklearnClassifier is able to fit using fit(), fit_ensemble() and refit() """ X_train, y_train, X_test, y_test = putil.get_dataset('iris') automl = AutoSklearnClassifier(time_left_for_this_task=60, per_run_time_limit=10, ensemble_size=0, dask_client=dask_client, exclude_preprocessors=['fast_ica']) automl_fitted = automl.fit(X_train, y_train) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5) assert automl is automl_ensemble_fitted automl_refitted = automl.refit(X_train.copy(), y_train.copy()) assert automl is automl_refitted
def test_feat_type_wrong_arguments(): # Every Auto-Sklearn estimator has a backend, that allows a single # call to fit X = np.zeros((100, 100)) y = np.zeros((100, )) cls = AutoSklearnClassifier(ensemble_size=0) expected_msg = r".*feat_type does not have same number of " "variables as X has features. 1 vs 100.*" with pytest.raises(ValueError, match=expected_msg): cls.fit(X=X, y=y, feat_type=[True]) cls = AutoSklearnClassifier(ensemble_size=0) expected_msg = r".*feat_type must only contain strings.*" with pytest.raises(ValueError, match=expected_msg): cls.fit(X=X, y=y, feat_type=[True] * 100) cls = AutoSklearnClassifier(ensemble_size=0) expected_msg = r".*Only `Categorical` and `Numerical` are" "valid feature types, you passed `Car`.*" with pytest.raises(ValueError, match=expected_msg): cls.fit(X=X, y=y, feat_type=['Car'] * 100)
def classification(self, metric="accuracy"): """ Perform auto_classification. Args: metric (str): The evaluation metric of classification. This will be mapped by AutoSklearnML.get_classification_metric to an instance of :class:`autosklearn.metrics.Scorer` as created by :meth:`autosklearn.metrics.make_scorer`. Default metric: "accuracy". Other supported metrics: "balanced_accuracy", "f1", "roc_auc", "average_precision", "precision", "recall" Returns: """ auto_classifier = AutoSklearnClassifier(**self.auto_sklearn_kwargs) classification_metric = AutoSklearnML.get_classification_metric(metric) auto_classifier.fit(self._X_train.copy(), self._y_train.copy(), metric=classification_metric, dataset_name=self.dataset_name) print(auto_classifier.show_models()) if self.auto_sklearn_kwargs["resampling_strategy"] == "cv": auto_classifier.refit(self._X_train.copy(), self._y_train.copy()) prediction_train = auto_classifier.predict(self._X_train) print("training set {} score: {}".format( metric, classification_metric._score_func(self._y_train, prediction_train))) prediction_test = auto_classifier.predict(self._X_test) print("test set {} score: {}".format( metric, classification_metric._score_func(self._y_test, prediction_test))) with open( os.path.join(self.auto_sklearn_kwargs['output_folder'], 'best_auto_sklearn_output.log'), 'a+') as wf: wf.write('The best model is : \n') wf.write(auto_classifier.show_models()) wf.write("\ntraining set {} score: {}\n".format( metric, classification_metric._score_func(self._y_train, prediction_train))) wf.write('\n') wf.write("test set {} score: {}".format( metric, classification_metric._score_func(self._y_test, prediction_test))) dump_file = os.path.join(self.auto_sklearn_kwargs['output_folder'], 'automl_classification.dump.pkl') with open(dump_file, 'wb') as f: pickle.dump(auto_classifier, f) return auto_classifier
def test_fit_n_jobs(tmp_dir): X_train, Y_train, X_test, Y_test = putil.get_dataset('breast_cancer') # test parallel Classifier to predict classes, not only indices Y_train += 1 Y_test += 1 class get_smac_object_wrapper: def __call__(self, *args, **kwargs): self.n_jobs = kwargs['n_jobs'] smac = get_smac_object(*args, **kwargs) self.dask_n_jobs = smac.solver.tae_runner.n_workers self.dask_client_n_jobs = len( smac.solver.tae_runner.client.scheduler_info()['workers']) return smac get_smac_object_wrapper_instance = get_smac_object_wrapper() automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, seed=1, initial_configurations_via_metalearning=0, ensemble_size=5, n_jobs=2, include_estimators=['sgd'], include_preprocessors=['no_preprocessing'], get_smac_object_callback=get_smac_object_wrapper_instance, max_models_on_disc=None, ) automl.fit(X_train, Y_train) # Test that the argument is correctly passed to SMAC assert getattr(get_smac_object_wrapper_instance, 'n_jobs') == 2 assert getattr(get_smac_object_wrapper_instance, 'dask_n_jobs') == 2 assert getattr(get_smac_object_wrapper_instance, 'dask_client_n_jobs') == 2 available_num_runs = set() for run_key, run_value in automl.automl_.runhistory_.data.items(): if run_value.additional_info is not None and 'num_run' in run_value.additional_info: available_num_runs.add(run_value.additional_info['num_run']) available_predictions = set() predictions = glob.glob( os.path.join(automl.automl_._backend.get_runs_directory(), '*', 'predictions_ensemble*.npy')) seeds = set() for prediction in predictions: prediction = os.path.split(prediction)[1] match = re.match(MODEL_FN_RE, prediction.replace("predictions_ensemble", "")) if match: num_run = int(match.group(2)) available_predictions.add(num_run) seed = int(match.group(1)) seeds.add(seed) # Remove the dummy prediction, it is not part of the runhistory available_predictions.remove(1) assert available_num_runs.issubset(available_predictions) assert len(seeds) == 1 ensemble_dir = automl.automl_._backend.get_ensemble_dir() ensembles = os.listdir(ensemble_dir) seeds = set() for ensemble_file in ensembles: seeds.add(int(ensemble_file.split('.')[0].split('_')[0])) assert len(seeds) == 1 assert count_succeses(automl.cv_results_) > 0 # For travis-ci it is important that the client no longer exists assert automl.automl_._dask_client is None
def test_type_of_target(mock_estimator): # Test that classifier raises error for illegal target types. X = np.array([ [1, 2], [2, 3], [3, 4], [4, 5], ]) # Possible target types y_binary = np.array([0, 0, 1, 1]) y_continuous = np.array([0.1, 1.3, 2.1, 4.0]) y_multiclass = np.array([0, 1, 2, 0]) y_multilabel = np.array([ [0, 1], [1, 1], [1, 0], [0, 0], ]) y_multiclass_multioutput = np.array([ [0, 1], [1, 3], [2, 2], [5, 3], ]) y_continuous_multioutput = np.array([ [0.1, 1.5], [1.2, 3.5], [2.7, 2.7], [5.5, 3.9], ]) cls = AutoSklearnClassifier(ensemble_size=0) cls.automl_ = unittest.mock.Mock() cls.automl_.InputValidator = unittest.mock.Mock() cls.automl_.InputValidator.target_validator = unittest.mock.Mock() # Illegal target types for classification: continuous, # multiclass-multioutput, continuous-multioutput. expected_msg = r".*Classification with data of type" " multiclass-multioutput is not supported.*" with pytest.raises(ValueError, match=expected_msg): cls.fit(X=X, y=y_multiclass_multioutput) expected_msg = r".*Classification with data of type" " continuous is not supported.*" with pytest.raises(ValueError, match=expected_msg): cls.fit(X=X, y=y_continuous) expected_msg = r".*Classification with data of type" " continuous-multioutput is not supported.*" with pytest.raises(ValueError, match=expected_msg): cls.fit(X=X, y=y_continuous_multioutput) # Legal target types for classification: binary, multiclass, # multilabel-indicator. try: cls.fit(X, y_binary) except ValueError: pytest.fail("cls.fit() raised ValueError while fitting " "binary targets") try: cls.fit(X, y_multiclass) except ValueError: pytest.fail("cls.fit() raised ValueError while fitting " "multiclass targets") try: cls.fit(X, y_multilabel) except ValueError: pytest.fail("cls.fit() raised ValueError while fitting " "multilabel-indicator targets") # Test that regressor raises error for illegal target types. reg = AutoSklearnRegressor(ensemble_size=0) # Illegal target types for regression: multilabel-indicator # multiclass-multioutput expected_msg = r".*Regression with data of type" " multilabel-indicator is not supported.*" with pytest.raises(ValueError, match=expected_msg): reg.fit( X=X, y=y_multilabel, ) expected_msg = r".*Regression with data of type" " multiclass-multioutput is not supported.*" with pytest.raises(ValueError, match=expected_msg): reg.fit( X=X, y=y_multiclass_multioutput, ) # Legal target types: continuous, multiclass, # continuous-multioutput, # binary try: reg.fit(X, y_continuous) except ValueError: pytest.fail("reg.fit() raised ValueError while fitting " "continuous targets") try: reg.fit(X, y_multiclass) except ValueError: pytest.fail("reg.fit() raised ValueError while fitting " "multiclass targets") try: reg.fit(X, y_continuous_multioutput) except ValueError: pytest.fail("reg.fit() raised ValueError while fitting " "continuous_multioutput targets") try: reg.fit(X, y_binary) except ValueError: pytest.fail("reg.fit() raised ValueError while fitting " "binary targets")
# training_features = pd.read_csv('../../prepare-data/one-label/simple/training.csv') training_features = pd.read_csv( '../../prepare-data/one-label/simple/downgrade/postpaid/training.csv') training_labels = training_features.pop('UpdatedIn90Days').values # test_features = pd.read_csv('../../prepare-data/one-label/test.csv') # test_features = pd.read_csv('../../prepare-data/one-label/simple/test.csv') test_features = pd.read_csv( '../../prepare-data/one-label/simple/downgrade/postpaid/test.csv') test_labels = test_features.pop('UpdatedIn90Days').values # ----------------------------------------------------------------------------- # 2) Fit auto-classifier clf = AutoSklearnClassifier() clf.fit(training_features, training_labels) # ----------------------------------------------------------------------------- # 3) Perform predictions on test set actual = test_labels predictions = clf.predict(test_features) # ----------------------------------------------------------------------------- # 4) Show result scores; confusion matrix (most useful) and precision/recall print('\nconfusion matrix') # print(confusion_matrix(actual, predictions, labels = [0, 1, 2, 3, 4])) # print(confusion_matrix(actual, predictions, labels = [0, 1, 2])) print(confusion_matrix(actual, predictions, labels=[0, 1])) print('\nprecision')