def test_multilabel(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_multilabel=True) automl = AutoSklearnClassifier(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) assert predictions.shape == (50, 3), print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True score = f1_macro(Y_test, predictions) assert score >= 0.9, print_debug_information(automl) probs = automl.predict_proba(X_train) assert np.mean(probs) == pytest.approx(0.33, rel=1e-1)
def test_regression_pandas_support(tmp_dir, output_dir, dask_client): X, y = sklearn.datasets.fetch_openml( data_id=41514, # diabetes return_X_y=True, as_frame=True, ) # This test only make sense if input is dataframe assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) automl = AutoSklearnRegressor( time_left_for_this_task=40, per_run_time_limit=5, dask_client=dask_client, tmp_folder=tmp_dir, output_folder=output_dir, ) # Make sure we error out because y is not encoded automl.fit(X, y) # Make sure that at least better than random. # We use same X_train==X_test to test code quality assert automl.score(X, y) >= 0.5, print_debug_information(automl) automl.refit(X, y) # Make sure that at least better than random. assert r2(y, automl.predict(X)) > 0.5, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl)
def test_binary(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_binary=True) automl = AutoSklearnClassifier(time_left_for_this_task=40, per_run_time_limit=10, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train, X_test=X_test, y_test=Y_test, dataset_name='binary_test_dataset') predictions = automl.predict(X_test) assert predictions.shape == (50, ), print_debug_information(automl) score = accuracy(Y_test, predictions) assert score > 0.9, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) output_files = glob.glob( os.path.join(output_dir, 'binary_test_dataset_test_*.predict')) assert len(output_files) > 0, (output_files, print_debug_information(automl))
def test_cv_regression(tmp_dir, dask_client): """ Makes sure that when using a cv strategy, we are able to fit a regressor """ X_train, Y_train, X_test, Y_test = putil.get_dataset( 'boston', train_size_maximum=300) automl = AutoSklearnRegressor( time_left_for_this_task=60, per_run_time_limit=10, resampling_strategy='cv', tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) assert predictions.shape == (206, ) score = r2(Y_test, predictions) assert score >= 0.1, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True
def test_binary(tmp_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_binary=True) automl = AutoSklearnClassifier( time_left_for_this_task=40, delete_tmp_folder_after_terminate=False, per_run_time_limit=10, tmp_folder=tmp_dir, dask_client=dask_client, ) automl.fit(X_train, Y_train, X_test=X_test, y_test=Y_test, dataset_name='binary_test_dataset') predictions = automl.predict(X_test) assert predictions.shape == (50, ), print_debug_information(automl) score = accuracy(Y_test, predictions) assert score > 0.9, print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_all_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True
def test_autosklearn2_classification_methods_returns_self_sparse(dask_client): X_train, y_train, X_test, y_test = putil.get_dataset('breast_cancer', make_sparse=True) automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0, delete_tmp_folder_after_terminate=False, dask_client=dask_client) automl_fitted = automl.fit(X_train, y_train) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5) assert automl is automl_ensemble_fitted automl_refitted = automl.refit(X_train.copy(), y_train.copy()) assert automl is automl_refitted predictions = automl_fitted.predict(X_test) assert sklearn.metrics.accuracy_score( y_test, predictions) >= 2 / 3, print_debug_information(automl) assert "boosting" not in str( automl.get_configuration_space(X=X_train, y=y_train)) pickle.dumps(automl_fitted)
def test_autosklearn2_classification_methods_returns_self(dask_client): X_train, y_train, X_test, y_test = putil.get_dataset('iris') automl = AutoSklearn2Classifier(time_left_for_this_task=60, ensemble_size=0, dask_client=dask_client) automl_fitted = automl.fit(X_train, y_train) assert automl is automl_fitted automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5) assert automl is automl_ensemble_fitted automl_refitted = automl.refit(X_train.copy(), y_train.copy()) assert automl is automl_refitted predictions = automl_fitted.predict(X_test) assert sklearn.metrics.accuracy_score( y_test, predictions) >= 2 / 3, print_debug_information(automl) pickle.dumps(automl_fitted)
def test_regression(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('boston') automl = AutoSklearnRegressor(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) assert predictions.shape == (356, ) score = mean_squared_error(Y_test, predictions) # On average np.sqrt(30) away from the target -> ~5.5 on average # Results with select rates drops avg score to a range of -32.40 to -37, on 30 seconds # constraint. With more time_left_for_this_task this is no longer an issue assert score >= -37, print_debug_information(automl) assert count_succeses(automl.cv_results_) > 0
def test_classification_pandas_support(tmp_dir, output_dir, dask_client): X, y = sklearn.datasets.fetch_openml( data_id=2, # cat/num dataset return_X_y=True, as_frame=True, ) # Drop NAN!! X = X.dropna('columns') # This test only make sense if input is dataframe assert isinstance(X, pd.DataFrame) assert isinstance(y, pd.Series) automl = AutoSklearnClassifier( time_left_for_this_task=30, per_run_time_limit=5, exclude_estimators=['libsvm_svc'], dask_client=dask_client, seed=5, tmp_folder=tmp_dir, output_folder=output_dir, ) automl.fit(X, y) # Make sure that at least better than random. # We use same X_train==X_test to test code quality assert automl.score(X, y) > 0.555, print_debug_information(automl) automl.refit(X, y) # Make sure that at least better than random. # accuracy in sklearn needs valid data # It should be 0.555 as the dataset is unbalanced. prediction = automl.predict(X) assert accuracy(y, prediction) > 0.555 assert count_succeses(automl.cv_results_) > 0 assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True
def test_exceptions_inside_log_in_smbo(smbo_run_mock, backend, dask_client): # Below importing and shutdown is a workaround, to make sure # we reset the port to collect messages. Randomly, when running # this test with multiple other test at the same time causes this # test to fail. This resets the singletons of the logging class import logging logging.shutdown() automl = autosklearn.automl.AutoML( backend, 20, 5, metric=accuracy, dask_client=dask_client, ) dataset_name = 'test_exceptions_inside_log' # Create a custom exception to prevent other errors to slip in class MyException(Exception): pass X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') # The first call is on dummy predictor failure message = str(np.random.randint(100)) + '_run_smbo' smbo_run_mock.side_effect = MyException(message) with pytest.raises(MyException): automl.fit( X_train, Y_train, task=MULTICLASS_CLASSIFICATION, dataset_name=dataset_name, ) # make sure that the logfile was created logger_name = 'AutoML(%d):%s' % (1, dataset_name) logger = logging.getLogger(logger_name) logfile = os.path.join(backend.temporary_directory, logger_name + '.log') assert os.path.exists(logfile), print_debug_information(automl) + str(automl._clean_logger()) # Give some time for the error message to be printed in the # log file found_message = False for incr_tolerance in range(5): with open(logfile) as f: lines = f.readlines() if any(message in line for line in lines): found_message = True break else: time.sleep(incr_tolerance) # Speed up the closing after forced crash automl._clean_logger() if not found_message: pytest.fail("Did not find {} in the log file {} for logger {}/{}/{}".format( message, print_debug_information(automl), vars(automl._logger.logger), vars(logger), vars(logging.getLogger()) ))
def test_automl_outputs(backend, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') name = 'iris' data_manager_file = os.path.join( backend.temporary_directory, '.auto-sklearn', 'datamanager.pkl' ) auto = autosklearn.automl.AutoML( backend, 30, 5, initial_configurations_via_metalearning=0, seed=100, metric=accuracy, dask_client=dask_client, ) auto.fit( X=X_train, y=Y_train, X_test=X_test, y_test=Y_test, dataset_name=name, task=MULTICLASS_CLASSIFICATION, ) # pickled data manager (without one hot encoding!) with open(data_manager_file, 'rb') as fh: D = pickle.load(fh) assert np.allclose(D.data['X_train'], X_train) # Check that all directories are there fixture = [ 'true_targets_ensemble.npy', 'start_time_100', 'datamanager.pkl', 'ensemble_read_preds.pkl', 'ensemble_read_losses.pkl', 'runs', 'ensembles', 'ensemble_history.json', ] assert ( sorted(os.listdir(os.path.join(backend.temporary_directory, '.auto-sklearn'))) == sorted(fixture) ) # At least one ensemble, one validation, one test prediction and one # model and one ensemble fixture = glob.glob(os.path.join( backend.temporary_directory, '.auto-sklearn', 'runs', '*', 'predictions_ensemble*npy', )) assert len(fixture) > 0 fixture = glob.glob(os.path.join(backend.temporary_directory, '.auto-sklearn', 'runs', '*', '100.*.model')) assert len(fixture) > 0 fixture = os.listdir(os.path.join(backend.temporary_directory, '.auto-sklearn', 'ensembles')) assert '100.0000000000.ensemble' in fixture # Start time start_time_file_path = os.path.join(backend.temporary_directory, '.auto-sklearn', "start_time_100") with open(start_time_file_path, 'r') as fh: start_time = float(fh.read()) assert time.time() - start_time >= 10, print_debug_information(auto) # Then check that the logger matches the run expectation logfile = glob.glob(os.path.join( auto._backend.temporary_directory, 'AutoML*.log'))[0] parser = AutoMLLogParser(logfile) # The number of ensemble trajectories properly in log file success_ensemble_iters_auto = len(auto.ensemble_performance_history) success_ensemble_iters_log = parser.count_ensembler_success_pynisher_calls() assert success_ensemble_iters_auto == success_ensemble_iters_log, "{} != {}".format( auto.ensemble_performance_history, print_debug_information(auto), ) # We also care that no iteration got lost # This is important because it counts for pynisher calls # and whether a pynisher call actually called the ensemble total_ensemble_iterations = parser.count_ensembler_iterations() assert len(total_ensemble_iterations) > 1 # At least 1 iteration assert range(1, max(total_ensemble_iterations) + 1), total_ensemble_iterations # a point where pynisher is called before budget exhaustion # Dummy not in run history total_calls_to_pynisher_log = parser.count_tae_pynisher_calls() - 1 total_returns_from_pynisher_log = parser.count_tae_pynisher_returns() - 1 total_elements_rh = len([run_value for run_value in auto.runhistory_.data.values( ) if run_value.status == StatusType.RUNNING]) # Make sure we register all calls to pynisher # The less than or equal here is added as a WA as # https://github.com/automl/SMAC3/pull/712 is not yet integrated assert total_elements_rh <= total_calls_to_pynisher_log, print_debug_information(auto) # Make sure we register all returns from pynisher assert total_elements_rh <= total_returns_from_pynisher_log, print_debug_information(auto) # Lastly check that settings are print to logfile ensemble_size = parser.get_automl_setting_from_log(auto._dataset_name, 'ensemble_size') assert auto._ensemble_size == int(ensemble_size) del auto
def test_automl_outputs(backend, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris') name = 'iris' data_manager_file = os.path.join(backend.temporary_directory, '.auto-sklearn', 'datamanager.pkl') auto = autosklearn.automl.AutoML( backend, 30, 5, initial_configurations_via_metalearning=0, seed=100, metric=accuracy, dask_client=dask_client, ) setup_logger(backend.temporary_directory) auto._logger = get_logger('test_automl_outputs') auto.fit( X=X_train, y=Y_train, X_test=X_test, y_test=Y_test, dataset_name=name, task=MULTICLASS_CLASSIFICATION, ) # pickled data manager (without one hot encoding!) with open(data_manager_file, 'rb') as fh: D = pickle.load(fh) assert np.allclose(D.data['X_train'], X_train) # Check that all directories are there fixture = [ 'true_targets_ensemble.npy', 'start_time_100', 'datamanager.pkl', 'ensemble_read_preds.pkl', 'ensemble_read_scores.pkl', 'runs', 'ensembles', 'ensemble_history.json', ] assert (sorted( os.listdir(os.path.join(backend.temporary_directory, '.auto-sklearn'))) == sorted(fixture)) # At least one ensemble, one validation, one test prediction and one # model and one ensemble fixture = glob.glob( os.path.join( backend.temporary_directory, '.auto-sklearn', 'runs', '*', 'predictions_ensemble*npy', )) assert len(fixture) > 0 fixture = glob.glob( os.path.join(backend.temporary_directory, '.auto-sklearn', 'runs', '*', '100.*.model')) assert len(fixture) > 0 fixture = os.listdir( os.path.join(backend.temporary_directory, '.auto-sklearn', 'ensembles')) assert '100.0000000000.ensemble' in fixture # Start time start_time_file_path = os.path.join(backend.temporary_directory, '.auto-sklearn', "start_time_100") with open(start_time_file_path, 'r') as fh: start_time = float(fh.read()) assert time.time() - start_time >= 10, print_debug_information(auto) del auto