예제 #1
0
def test_multilabel(tmp_dir, output_dir, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris',
                                                         make_multilabel=True)
    automl = AutoSklearnClassifier(time_left_for_this_task=30,
                                   per_run_time_limit=5,
                                   tmp_folder=tmp_dir,
                                   dask_client=dask_client,
                                   output_folder=output_dir)

    automl.fit(X_train, Y_train)

    predictions = automl.predict(X_test)
    assert predictions.shape == (50, 3), print_debug_information(automl)
    assert count_succeses(
        automl.cv_results_) > 0, print_debug_information(automl)
    assert includes_train_scores(automl.performance_over_time_.columns) is True
    assert performance_over_time_is_plausible(
        automl.performance_over_time_) is True

    score = f1_macro(Y_test, predictions)
    assert score >= 0.9, print_debug_information(automl)

    probs = automl.predict_proba(X_train)
    assert np.mean(probs) == pytest.approx(0.33, rel=1e-1)
예제 #2
0
def test_regression_pandas_support(tmp_dir, output_dir, dask_client):

    X, y = sklearn.datasets.fetch_openml(
        data_id=41514,  # diabetes
        return_X_y=True,
        as_frame=True,
    )
    # This test only make sense if input is dataframe
    assert isinstance(X, pd.DataFrame)
    assert isinstance(y, pd.Series)
    automl = AutoSklearnRegressor(
        time_left_for_this_task=40,
        per_run_time_limit=5,
        dask_client=dask_client,
        tmp_folder=tmp_dir,
        output_folder=output_dir,
    )

    # Make sure we error out because y is not encoded
    automl.fit(X, y)

    # Make sure that at least better than random.
    # We use same X_train==X_test to test code quality
    assert automl.score(X, y) >= 0.5, print_debug_information(automl)

    automl.refit(X, y)

    # Make sure that at least better than random.
    assert r2(y, automl.predict(X)) > 0.5, print_debug_information(automl)
    assert count_succeses(
        automl.cv_results_) > 0, print_debug_information(automl)
예제 #3
0
def test_binary(tmp_dir, output_dir, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris',
                                                         make_binary=True)
    automl = AutoSklearnClassifier(time_left_for_this_task=40,
                                   per_run_time_limit=10,
                                   tmp_folder=tmp_dir,
                                   dask_client=dask_client,
                                   output_folder=output_dir)

    automl.fit(X_train,
               Y_train,
               X_test=X_test,
               y_test=Y_test,
               dataset_name='binary_test_dataset')

    predictions = automl.predict(X_test)
    assert predictions.shape == (50, ), print_debug_information(automl)

    score = accuracy(Y_test, predictions)
    assert score > 0.9, print_debug_information(automl)
    assert count_succeses(
        automl.cv_results_) > 0, print_debug_information(automl)

    output_files = glob.glob(
        os.path.join(output_dir, 'binary_test_dataset_test_*.predict'))
    assert len(output_files) > 0, (output_files,
                                   print_debug_information(automl))
def test_cv_regression(tmp_dir, dask_client):
    """
    Makes sure that when using a cv strategy, we are able to fit
    a regressor
    """

    X_train, Y_train, X_test, Y_test = putil.get_dataset(
        'boston', train_size_maximum=300)
    automl = AutoSklearnRegressor(
        time_left_for_this_task=60,
        per_run_time_limit=10,
        resampling_strategy='cv',
        tmp_folder=tmp_dir,
        dask_client=dask_client,
    )

    automl.fit(X_train, Y_train)

    predictions = automl.predict(X_test)
    assert predictions.shape == (206, )
    score = r2(Y_test, predictions)
    assert score >= 0.1, print_debug_information(automl)
    assert count_succeses(
        automl.cv_results_) > 0, print_debug_information(automl)
    assert includes_train_scores(automl.performance_over_time_.columns) is True
    assert performance_over_time_is_plausible(
        automl.performance_over_time_) is True
def test_binary(tmp_dir, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris',
                                                         make_binary=True)
    automl = AutoSklearnClassifier(
        time_left_for_this_task=40,
        delete_tmp_folder_after_terminate=False,
        per_run_time_limit=10,
        tmp_folder=tmp_dir,
        dask_client=dask_client,
    )

    automl.fit(X_train,
               Y_train,
               X_test=X_test,
               y_test=Y_test,
               dataset_name='binary_test_dataset')

    predictions = automl.predict(X_test)
    assert predictions.shape == (50, ), print_debug_information(automl)

    score = accuracy(Y_test, predictions)
    assert score > 0.9, print_debug_information(automl)
    assert count_succeses(
        automl.cv_results_) > 0, print_debug_information(automl)
    assert includes_all_scores(automl.performance_over_time_.columns) is True
    assert performance_over_time_is_plausible(
        automl.performance_over_time_) is True
def test_autosklearn2_classification_methods_returns_self_sparse(dask_client):
    X_train, y_train, X_test, y_test = putil.get_dataset('breast_cancer',
                                                         make_sparse=True)
    automl = AutoSklearn2Classifier(time_left_for_this_task=60,
                                    ensemble_size=0,
                                    delete_tmp_folder_after_terminate=False,
                                    dask_client=dask_client)

    automl_fitted = automl.fit(X_train, y_train)
    assert automl is automl_fitted

    automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
    assert automl is automl_ensemble_fitted

    automl_refitted = automl.refit(X_train.copy(), y_train.copy())
    assert automl is automl_refitted

    predictions = automl_fitted.predict(X_test)
    assert sklearn.metrics.accuracy_score(
        y_test, predictions) >= 2 / 3, print_debug_information(automl)

    assert "boosting" not in str(
        automl.get_configuration_space(X=X_train, y=y_train))

    pickle.dumps(automl_fitted)
예제 #7
0
def test_autosklearn2_classification_methods_returns_self(dask_client):
    X_train, y_train, X_test, y_test = putil.get_dataset('iris')
    automl = AutoSklearn2Classifier(time_left_for_this_task=60,
                                    ensemble_size=0,
                                    dask_client=dask_client)

    automl_fitted = automl.fit(X_train, y_train)
    assert automl is automl_fitted

    automl_ensemble_fitted = automl.fit_ensemble(y_train, ensemble_size=5)
    assert automl is automl_ensemble_fitted

    automl_refitted = automl.refit(X_train.copy(), y_train.copy())
    assert automl is automl_refitted

    predictions = automl_fitted.predict(X_test)
    assert sklearn.metrics.accuracy_score(
        y_test, predictions) >= 2 / 3, print_debug_information(automl)

    pickle.dumps(automl_fitted)
예제 #8
0
def test_regression(tmp_dir, output_dir, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('boston')
    automl = AutoSklearnRegressor(time_left_for_this_task=30,
                                  per_run_time_limit=5,
                                  tmp_folder=tmp_dir,
                                  dask_client=dask_client,
                                  output_folder=output_dir)

    automl.fit(X_train, Y_train)

    predictions = automl.predict(X_test)
    assert predictions.shape == (356, )
    score = mean_squared_error(Y_test, predictions)

    # On average np.sqrt(30) away from the target -> ~5.5 on average
    # Results with select rates drops avg score to a range of -32.40 to -37, on 30 seconds
    # constraint. With more time_left_for_this_task this is no longer an issue
    assert score >= -37, print_debug_information(automl)
    assert count_succeses(automl.cv_results_) > 0
예제 #9
0
def test_classification_pandas_support(tmp_dir, output_dir, dask_client):

    X, y = sklearn.datasets.fetch_openml(
        data_id=2,  # cat/num dataset
        return_X_y=True,
        as_frame=True,
    )

    # Drop NAN!!
    X = X.dropna('columns')

    # This test only make sense if input is dataframe
    assert isinstance(X, pd.DataFrame)
    assert isinstance(y, pd.Series)
    automl = AutoSklearnClassifier(
        time_left_for_this_task=30,
        per_run_time_limit=5,
        exclude_estimators=['libsvm_svc'],
        dask_client=dask_client,
        seed=5,
        tmp_folder=tmp_dir,
        output_folder=output_dir,
    )

    automl.fit(X, y)

    # Make sure that at least better than random.
    # We use same X_train==X_test to test code quality
    assert automl.score(X, y) > 0.555, print_debug_information(automl)

    automl.refit(X, y)

    # Make sure that at least better than random.
    # accuracy in sklearn needs valid data
    # It should be 0.555 as the dataset is unbalanced.
    prediction = automl.predict(X)
    assert accuracy(y, prediction) > 0.555
    assert count_succeses(automl.cv_results_) > 0
    assert includes_train_scores(automl.performance_over_time_.columns) is True
    assert performance_over_time_is_plausible(
        automl.performance_over_time_) is True
예제 #10
0
def test_exceptions_inside_log_in_smbo(smbo_run_mock, backend, dask_client):

    # Below importing and shutdown is a workaround, to make sure
    # we reset the port to collect messages. Randomly, when running
    # this test with multiple other test at the same time causes this
    # test to fail. This resets the singletons of the logging class
    import logging
    logging.shutdown()

    automl = autosklearn.automl.AutoML(
        backend,
        20,
        5,
        metric=accuracy,
        dask_client=dask_client,
    )

    dataset_name = 'test_exceptions_inside_log'

    # Create a custom exception to prevent other errors to slip in
    class MyException(Exception):
        pass

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    # The first call is on dummy predictor failure
    message = str(np.random.randint(100)) + '_run_smbo'
    smbo_run_mock.side_effect = MyException(message)

    with pytest.raises(MyException):
        automl.fit(
            X_train,
            Y_train,
            task=MULTICLASS_CLASSIFICATION,
            dataset_name=dataset_name,
        )

    # make sure that the logfile was created
    logger_name = 'AutoML(%d):%s' % (1, dataset_name)
    logger = logging.getLogger(logger_name)
    logfile = os.path.join(backend.temporary_directory, logger_name + '.log')
    assert os.path.exists(logfile), print_debug_information(automl) + str(automl._clean_logger())

    # Give some time for the error message to be printed in the
    # log file
    found_message = False
    for incr_tolerance in range(5):
        with open(logfile) as f:
            lines = f.readlines()
        if any(message in line for line in lines):
            found_message = True
            break
        else:
            time.sleep(incr_tolerance)

    # Speed up the closing after forced crash
    automl._clean_logger()

    if not found_message:
        pytest.fail("Did not find {} in the log file {} for logger {}/{}/{}".format(
            message,
            print_debug_information(automl),
            vars(automl._logger.logger),
            vars(logger),
            vars(logging.getLogger())
        ))
예제 #11
0
def test_automl_outputs(backend, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    name = 'iris'
    data_manager_file = os.path.join(
        backend.temporary_directory,
        '.auto-sklearn',
        'datamanager.pkl'
    )

    auto = autosklearn.automl.AutoML(
        backend, 30, 5,
        initial_configurations_via_metalearning=0,
        seed=100,
        metric=accuracy,
        dask_client=dask_client,
    )
    auto.fit(
        X=X_train,
        y=Y_train,
        X_test=X_test,
        y_test=Y_test,
        dataset_name=name,
        task=MULTICLASS_CLASSIFICATION,
    )

    # pickled data manager (without one hot encoding!)
    with open(data_manager_file, 'rb') as fh:
        D = pickle.load(fh)
        assert np.allclose(D.data['X_train'], X_train)

    # Check that all directories are there
    fixture = [
        'true_targets_ensemble.npy',
        'start_time_100',
        'datamanager.pkl',
        'ensemble_read_preds.pkl',
        'ensemble_read_losses.pkl',
        'runs',
        'ensembles',
        'ensemble_history.json',
    ]
    assert (
        sorted(os.listdir(os.path.join(backend.temporary_directory, '.auto-sklearn')))
        == sorted(fixture)
    )

    # At least one ensemble, one validation, one test prediction and one
    # model and one ensemble
    fixture = glob.glob(os.path.join(
        backend.temporary_directory,
        '.auto-sklearn', 'runs', '*', 'predictions_ensemble*npy',
    ))
    assert len(fixture) > 0

    fixture = glob.glob(os.path.join(backend.temporary_directory, '.auto-sklearn',
                                     'runs', '*', '100.*.model'))
    assert len(fixture) > 0

    fixture = os.listdir(os.path.join(backend.temporary_directory,
                                      '.auto-sklearn', 'ensembles'))
    assert '100.0000000000.ensemble' in fixture

    # Start time
    start_time_file_path = os.path.join(backend.temporary_directory,
                                        '.auto-sklearn', "start_time_100")
    with open(start_time_file_path, 'r') as fh:
        start_time = float(fh.read())
    assert time.time() - start_time >= 10, print_debug_information(auto)

    # Then check that the logger matches the run expectation
    logfile = glob.glob(os.path.join(
           auto._backend.temporary_directory, 'AutoML*.log'))[0]
    parser = AutoMLLogParser(logfile)

    # The number of ensemble trajectories properly in log file
    success_ensemble_iters_auto = len(auto.ensemble_performance_history)
    success_ensemble_iters_log = parser.count_ensembler_success_pynisher_calls()
    assert success_ensemble_iters_auto == success_ensemble_iters_log, "{} != {}".format(
        auto.ensemble_performance_history,
        print_debug_information(auto),
    )

    # We also care that no iteration got lost
    # This is important because it counts for pynisher calls
    # and whether a pynisher call actually called the ensemble
    total_ensemble_iterations = parser.count_ensembler_iterations()
    assert len(total_ensemble_iterations) > 1  # At least 1 iteration
    assert range(1, max(total_ensemble_iterations) + 1), total_ensemble_iterations

    # a point where pynisher is called before budget exhaustion
    # Dummy not in run history
    total_calls_to_pynisher_log = parser.count_tae_pynisher_calls() - 1
    total_returns_from_pynisher_log = parser.count_tae_pynisher_returns() - 1
    total_elements_rh = len([run_value for run_value in auto.runhistory_.data.values(
    ) if run_value.status == StatusType.RUNNING])

    # Make sure we register all calls to pynisher
    # The less than or equal here is added as a WA as
    # https://github.com/automl/SMAC3/pull/712 is not yet integrated
    assert total_elements_rh <= total_calls_to_pynisher_log, print_debug_information(auto)

    # Make sure we register all returns from pynisher
    assert total_elements_rh <= total_returns_from_pynisher_log, print_debug_information(auto)

    # Lastly check that settings are print to logfile
    ensemble_size = parser.get_automl_setting_from_log(auto._dataset_name, 'ensemble_size')
    assert auto._ensemble_size == int(ensemble_size)

    del auto
예제 #12
0
def test_automl_outputs(backend, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    name = 'iris'
    data_manager_file = os.path.join(backend.temporary_directory,
                                     '.auto-sklearn', 'datamanager.pkl')

    auto = autosklearn.automl.AutoML(
        backend,
        30,
        5,
        initial_configurations_via_metalearning=0,
        seed=100,
        metric=accuracy,
        dask_client=dask_client,
    )
    setup_logger(backend.temporary_directory)
    auto._logger = get_logger('test_automl_outputs')
    auto.fit(
        X=X_train,
        y=Y_train,
        X_test=X_test,
        y_test=Y_test,
        dataset_name=name,
        task=MULTICLASS_CLASSIFICATION,
    )

    # pickled data manager (without one hot encoding!)
    with open(data_manager_file, 'rb') as fh:
        D = pickle.load(fh)
        assert np.allclose(D.data['X_train'], X_train)

    # Check that all directories are there
    fixture = [
        'true_targets_ensemble.npy',
        'start_time_100',
        'datamanager.pkl',
        'ensemble_read_preds.pkl',
        'ensemble_read_scores.pkl',
        'runs',
        'ensembles',
        'ensemble_history.json',
    ]
    assert (sorted(
        os.listdir(os.path.join(backend.temporary_directory,
                                '.auto-sklearn'))) == sorted(fixture))

    # At least one ensemble, one validation, one test prediction and one
    # model and one ensemble
    fixture = glob.glob(
        os.path.join(
            backend.temporary_directory,
            '.auto-sklearn',
            'runs',
            '*',
            'predictions_ensemble*npy',
        ))
    assert len(fixture) > 0

    fixture = glob.glob(
        os.path.join(backend.temporary_directory, '.auto-sklearn', 'runs', '*',
                     '100.*.model'))
    assert len(fixture) > 0

    fixture = os.listdir(
        os.path.join(backend.temporary_directory, '.auto-sklearn',
                     'ensembles'))
    assert '100.0000000000.ensemble' in fixture

    # Start time
    start_time_file_path = os.path.join(backend.temporary_directory,
                                        '.auto-sklearn', "start_time_100")
    with open(start_time_file_path, 'r') as fh:
        start_time = float(fh.read())
    assert time.time() - start_time >= 10, print_debug_information(auto)

    del auto