Beispiel #1
0
def evaluate_hmab(algorithms,
                  dataset,
                  run_id,
                  trial_num,
                  seed,
                  time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))

    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)
    cls_task_type = BINARY_CLS if len(set(
        train_data.data[1])) == 2 else MULTICLASS_CLS
    balanced_acc_metric = make_scorer(balanced_accuracy)

    if is_unbalanced_dataset(train_data):
        from solnml.components.feature_engineering.transformations.preprocessor.smote_balancer import DataBalancer
        train_data = DataBalancer().operate(train_data)

    bandit = FirstLayerBandit(cls_task_type,
                              trial_num,
                              algorithms,
                              train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              inner_opt_algorithm=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed,
                              time_limit=time_limit,
                              eval_type='holdout')
    bandit.optimize()
    time_taken = time.time() - _start_time
    model_desc = [
        bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards,
        bandit.action_sequence
    ]

    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)

    bandit.refit()
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)

    data = [
        dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens,
        time_taken, model_desc
    ]
    print(model_desc)
    print(data)

    save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id,
        time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
Beispiel #2
0
def evaluate_hmab(algorithms, dataset, run_id, trial_num, seed, time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))
    exclude_datasets = ['gina_prior2', 'pc2', 'abalone', 'wind', 'waveform-5000(2)',
                        'page-blocks(1)', 'winequality_white', 'pollen']
    alad = AlgorithmAdvisor(task_type=MULTICLASS_CLS, n_algorithm=9,
                            metric='bal_acc', exclude_datasets=exclude_datasets)
    n_algo = 5
    assert dataset in exclude_datasets
    meta_infos = alad.fit_meta_learner()
    assert dataset not in meta_infos
    model_candidates = alad.fetch_algorithm_set(dataset)
    include_models = list()
    print(model_candidates)
    for algo in model_candidates:
        if algo in algorithms and len(include_models) < n_algo:
            include_models.append(algo)
    print('After algorithm recommendation', include_models)

    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS)
    cls_task_type = BINARY_CLS if len(set(train_data.data[1])) == 2 else MULTICLASS_CLS
    balanced_acc_metric = make_scorer(balanced_accuracy)

    if is_unbalanced_dataset(train_data):
        from solnml.components.feature_engineering.transformations.balancer.smote_balancer import DataBalancer
        train_data = DataBalancer().operate(train_data)
    bandit = FirstLayerBandit(cls_task_type, trial_num, include_models, train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              inner_opt_algorithm=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed,
                              time_limit=time_limit,
                              eval_type='holdout')
    bandit.optimize()
    time_taken = time.time() - _start_time
    model_desc = [bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence]

    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)

    bandit.refit()
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)

    data = [dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc]
    print(model_desc)
    print(data)

    save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
Beispiel #3
0
def conduct_fe(dataset='pc4',
               classifier_id='random_forest',
               iter_num=100,
               run_id=0,
               seed=1):
    from autosklearn.pipeline.components.classification import _classifiers

    clf_class = _classifiers[classifier_id]
    cs = clf_class.get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", classifier_id)
    cs.add_hyperparameter(model)
    default_config = cs.get_default_configuration()

    raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed)
    evaluator = ClassificationEvaluator(default_config,
                                        name='fe',
                                        data_node=raw_data,
                                        resampling_strategy='holdout',
                                        seed=seed)

    val_acc = evaluator(default_config)
    estimator = fetch_predict_estimator(default_config, raw_data.data[0],
                                        raw_data.data[1])
    pred = estimator.predict(test_raw_data.data[0])
    test_acc = balanced_accuracy(test_raw_data.data[1], pred)

    optimizer = EvaluationBasedOptimizer(task_type='classification',
                                         input_data=raw_data,
                                         evaluator=evaluator,
                                         model_id=classifier_id,
                                         time_limit_per_trans=240,
                                         mem_limit_per_trans=10000,
                                         seed=seed)

    task_id = 'fe-%s-%s-%d' % (dataset, classifier_id, iter_num)
    val_acc_list, test_acc_list = [], []

    val_acc_list.append(val_acc)
    test_acc_list.append(test_acc)

    for _iter in range(iter_num):
        perf, _, incubent = optimizer.iterate()
        val_acc_list.append(perf)
        train_node = optimizer.apply(raw_data, incubent)
        test_node = optimizer.apply(test_raw_data, incubent)
        estimator = fetch_predict_estimator(default_config, train_node.data[0],
                                            train_node.data[1])
        pred = estimator.predict(test_node.data[0])
        test_perf = balanced_accuracy(test_node.data[1], pred)
        test_acc_list.append(test_perf)
        print(val_acc_list)
        print(test_acc_list)

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([val_acc_list, test_acc_list], f)
def evaluate_2rd_layered_bandit(run_id, mth='rb', dataset='pc4', algo='libsvm_svc',
                                cv='holdout', time_limit=120000, seed=1):
    train_data, test_data = load_train_test_data(dataset)
    bandit = SecondLayerBandit(algo, train_data, dataset_id=dataset, mth=mth, seed=seed, eval_type=cv)

    _start_time = time.time()
    _iter_id = 0
    stats = list()

    while True:
        if time.time() > time_limit + _start_time or bandit.early_stopped_flag:
            break
        res = bandit.play_once()
        print('Iteration %d - %.4f' % (_iter_id, res))
        stats.append([_iter_id, time.time() - _start_time, res])
        _iter_id += 1

    print(bandit.final_rewards)
    print(bandit.action_sequence)
    print(np.mean(bandit.evaluation_cost['fe']))
    print(np.mean(bandit.evaluation_cost['hpo']))

    fe_optimizer = bandit.optimizer['fe']
    final_train_data = fe_optimizer.apply(train_data, bandit.inc['fe'])
    assert final_train_data == bandit.inc['fe']
    final_test_data = fe_optimizer.apply(test_data, bandit.inc['fe'])
    config = bandit.inc['hpo']

    evaluator = ClassificationEvaluator(config, name='fe', seed=seed, resampling_strategy='holdout')
    val_score = evaluator(None, data_node=final_train_data)
    print('==> Best validation score', val_score, res)

    X_train, y_train = final_train_data.data
    clf = fetch_predict_estimator(config, X_train, y_train)
    X_test, y_test = final_test_data.data
    y_pred = clf.predict(X_test)
    test_score = balanced_accuracy(y_test, y_pred)
    print('==> Test score', test_score)

    # Alleviate overfitting.
    y_pred1 = bandit.predict(test_data.data[0])
    test_score1 = balanced_accuracy(y_test, y_pred1)
    print('==> Test score with average ensemble', test_score1)

    y_pred2 = bandit.predict(test_data.data[0], is_weighted=True)
    test_score2 = balanced_accuracy(y_test, y_pred2)
    print('==> Test score with weighted ensemble', test_score2)

    save_path = save_folder + '%s_%s_%d_%d_%s.pkl' % (mth, dataset, time_limit, run_id, algo)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, val_score, test_score, test_score1, test_score2], f)
Beispiel #5
0
def conduct_hpo(dataset='pc4',
                classifier_id='random_forest',
                iter_num=100,
                run_id=0,
                seed=1):
    from autosklearn.pipeline.components.classification import _classifiers

    clf_class = _classifiers[classifier_id]
    cs = clf_class.get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", classifier_id)
    cs.add_hyperparameter(model)

    raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed)
    evaluator = ClassificationEvaluator(cs.get_default_configuration(),
                                        name='hpo',
                                        data_node=raw_data,
                                        resampling_strategy='holdout',
                                        seed=seed)

    default_config = cs.get_default_configuration()
    val_acc = 1. - evaluator(default_config)
    estimator = fetch_predict_estimator(default_config, raw_data.data[0],
                                        raw_data.data[1])
    pred = estimator.predict(test_raw_data.data[0])
    test_acc = balanced_accuracy(test_raw_data.data[1], pred)

    optimizer = SMACOptimizer(evaluator,
                              cs,
                              trials_per_iter=2,
                              output_dir='logs',
                              per_run_time_limit=180)
    task_id = 'hpo-%s-%s-%d' % (dataset, classifier_id, iter_num)

    val_acc_list, test_acc_list = [], []
    val_acc_list.append(val_acc)
    test_acc_list.append(test_acc)

    for _iter in range(iter_num):
        perf, _, config = optimizer.iterate()
        val_acc_list.append(perf)
        estimator = fetch_predict_estimator(config, raw_data.data[0],
                                            raw_data.data[1])
        pred = estimator.predict(test_raw_data.data[0])
        test_perf = balanced_accuracy(test_raw_data.data[1], pred)
        test_acc_list.append(test_perf)
        print(val_acc_list)
        print(test_acc_list)

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([val_acc_list, test_acc_list], f)
Beispiel #6
0
def evaluate_imbalanced(algorithms,
                        dataset,
                        run_id,
                        trial_num,
                        seed,
                        time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset)
    cls_task_type = BINARY_CLS if len(set(
        train_data.data[1])) == 2 else MULTICLASS_CLS
    # ACC or Balanced_ACC
    balanced_acc_metric = make_scorer(balanced_accuracy)
    bandit = FirstLayerBandit(cls_task_type,
                              trial_num,
                              algorithms,
                              train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              opt_algo=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed)
    bandit.optimize()
    model_desc = [
        bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards,
        bandit.action_sequence
    ]

    time_taken = time.time() - _start_time
    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)
    data = [
        dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens,
        time_taken, model_desc
    ]
    print(model_desc)
    print(data[:4])

    save_path = project_dir + 'data/%s_%s_%s_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
Beispiel #7
0
def evaluate(train_data, test_data, config):
    X_train, y_train = train_data.data
    X_test, y_test = test_data.data
    print('X_train/test shapes: %s, %s' %
          (str(X_train.shape), str(X_test.shape)))

    # Build the ML estimator.
    from solnml.components.evaluators.evaluator import fetch_predict_estimator
    estimator = fetch_predict_estimator(config, X_train, y_train)

    y_pred = estimator.predict(X_test)
    return balanced_accuracy(y_test, y_pred)
Beispiel #8
0
def evaluate_autosklearn(algorithms, dataset, run_id, trial_num, seed, time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))

    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS)
    cls_task_type = BINARY_CLS if len(set(train_data.data[1])) == 2 else MULTICLASS_CLS
    balanced_acc_metric = make_scorer(balanced_accuracy)

    if is_unbalanced_dataset(train_data):
        from solnml.components.feature_engineering.transformations.balancer.smote_balancer import DataBalancer
        train_data = DataBalancer().operate(train_data)

    bandit = FirstLayerBandit(cls_task_type, trial_num, algorithms, train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              inner_opt_algorithm=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed,
                              time_limit=time_limit,
                              eval_type='holdout')
    while time.time() - _start_time < time_limit:
        bandit.sub_bandits['random_forest'].optimizer['hpo'].iterate()
    # bandit.optimize()
    # fe_exp_output = bandit.sub_bandits['random_forest'].exp_output['fe']
    # hpo_exp_output = bandit.sub_bandits['random_forest'].exp_output['hpo']
    fe_exp_output = dict()
    hpo_exp_output = bandit.sub_bandits['random_forest'].optimizer['hpo'].exp_output
    inc_config = bandit.sub_bandits['random_forest'].optimizer['hpo'].incumbent_config.get_dictionary()
    inc_config.pop('estimator')
    from solnml.components.models.classification.random_forest import RandomForest
    rf = RandomForest(**inc_config)
    rf.fit(train_data.data[0], train_data.data[1])
    validation_accuracy = bandit.sub_bandits['random_forest'].optimizer['hpo'].incumbent_perf
    best_pred = rf.predict(test_data.data[0])
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)

    # es_pred = bandit._es_predict(test_data)
    # test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)
    data = [dataset, validation_accuracy, test_accuracy, fe_exp_output, hpo_exp_output,
            _start_time]
    save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % (
        ausk_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)

    del_path = './logs/'
    for i in os.listdir(del_path):
        file_data = del_path + "/" + i
        if os.path.isfile(file_data):
            os.remove(file_data)
Beispiel #9
0
def evaluate_base_model(classifier_id, dataset):
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset)

    from autosklearn.pipeline.components.classification import _classifiers
    clf_class = _classifiers[classifier_id]
    cs = clf_class.get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", classifier_id)
    cs.add_hyperparameter(model)
    default_config = cs.get_default_configuration()
    X_train, y_train = train_data.data
    X_test, y_test = test_data.data
    print('X_train/test shapes: %s, %s' %
          (str(X_train.shape), str(X_test.shape)))

    # Build the ML estimator.
    from solnml.components.evaluators.cls_evaluator import fetch_predict_estimator
    estimator = fetch_predict_estimator(default_config, X_train, y_train)

    y_pred = estimator.predict(X_test)
    print(balanced_accuracy(y_test, y_pred))
    print(balanced_accuracy(y_pred, y_test))
Beispiel #10
0
def evaluate_autosklearn(algorithms,
                         dataset,
                         run_id,
                         trial_num,
                         seed,
                         time_limit=1200):
    print('AUSK-%s-%d: %d' % (dataset, run_id, time_limit))
    include_models = algorithms
    automl = autosklearn.classification.AutoSklearnClassifier(
        time_left_for_this_task=time_limit,
        per_run_time_limit=per_run_time_limit,
        include_preprocessors=None,
        exclude_preprocessors=None,
        n_jobs=1,
        include_estimators=include_models,
        ensemble_memory_limit=8192,
        ml_memory_limit=8192,
        ensemble_size=1,
        ensemble_nbest=1,
        initial_configurations_via_metalearning=0,
        seed=int(seed),
        resampling_strategy='holdout',
        resampling_strategy_arguments={'train_size': 0.67})
    print(automl)

    train_data, test_data = load_train_test_data(dataset)
    X, y = train_data.data
    feat_type = [
        'Categorical' if _type == CATEGORICAL else 'Numerical'
        for _type in train_data.feature_types
    ]

    from autosklearn.metrics import balanced_accuracy
    automl.fit(X.copy(),
               y.copy(),
               metric=balanced_accuracy,
               feat_type=feat_type)
    model_desc = automl.show_models()
    print(model_desc)
    val_result = np.max(automl.cv_results_['mean_test_score'])
    print('Best validation accuracy', val_result)

    X_test, y_test = test_data.data
    automl.refit(X.copy(), y.copy())
    y_pred = automl.predict(X_test)
    test_result = balanced_accuracy(y_test, y_pred)
    print('Test accuracy', test_result)
    save_path = project_dir + 'data/%s_%s_%d_%d_%d_%d.pkl' % (
        ausk_flag, dataset, trial_num, len(algorithms), seed, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, val_result, test_result, model_desc], f)
def evaluate_hmab(algorithms,
                  dataset,
                  run_id,
                  trial_num,
                  seed,
                  time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))

    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)
    cls_task_type = BINARY_CLS if len(set(
        train_data.data[1])) == 2 else MULTICLASS_CLS
    balanced_acc_metric = make_scorer(balanced_accuracy)

    if is_unbalanced_dataset(train_data):
        from solnml.components.feature_engineering.transformations.preprocessor.smote_balancer import DataBalancer
        train_data = DataBalancer().operate(train_data)

    bandit = FirstLayerBandit(cls_task_type,
                              trial_num,
                              algorithms,
                              train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              inner_opt_algorithm=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed,
                              time_limit=time_limit,
                              eval_type='partial')
    # while time.time()-_start_time<time_limit:
    #     bandit.sub_bandits['random_forest'].optimizer['fe'].iterate()
    #     # print(bandit.sub_bandits['random_forest'].optimizer['hpo'].exp_output)
    bandit.optimize()
    fe_exp_output = bandit.sub_bandits['random_forest'].optimizer[
        'fe'].exp_output
    hpo_exp_output = bandit.sub_bandits['random_forest'].optimizer[
        'hpo'].exp_output

    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)

    bandit.refit()
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)

    data = [
        dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens,
        fe_exp_output, hpo_exp_output, _start_time
    ]

    save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id,
        time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)

    del_path = './logs/'
    for i in os.listdir(del_path):
        file_data = del_path + "/" + i
        if os.path.isfile(file_data):
            os.remove(file_data)
Beispiel #12
0
def evaluate_autosklearn(algorithms, rep_id, trial_num=100,
                         dataset='credit', time_limit=1200, seed=1,
                         enable_ens=False, enable_meta_learning=False,
                         eval_type='holdout'):
    print('%s\nDataset: %s, Run_id: %d, Budget: %d.\n%s' % ('=' * 50, dataset, rep_id, time_limit, '=' * 50))
    mth_id = 'ausk-ens%d' % enable_ens
    task_id = '%s-%s-%d-%d' % (dataset, mth_id, len(algorithms), trial_num)
    if enable_ens:
        ensemble_size, ensemble_nbest = 50, 50
    else:
        ensemble_size, ensemble_nbest = 1, 1
    if enable_meta_learning:
        init_config_via_metalearning = 25
    else:
        init_config_via_metalearning = 0

    include_models = algorithms

    if eval_type == 'holdout':
        automl = autosklearn.classification.AutoSklearnClassifier(
            time_left_for_this_task=int(time_limit),
            per_run_time_limit=per_run_time_limit,
            n_jobs=1,
            include_estimators=include_models,
            ensemble_memory_limit=16384,
            ml_memory_limit=16384,
            ensemble_size=ensemble_size,
            ensemble_nbest=ensemble_nbest,
            initial_configurations_via_metalearning=init_config_via_metalearning,
            seed=int(seed),
            resampling_strategy='holdout',
            resampling_strategy_arguments={'train_size': 0.67}
        )
    else:
        automl = autosklearn.classification.AutoSklearnClassifier(
            time_left_for_this_task=int(time_limit),
            per_run_time_limit=per_run_time_limit,
            n_jobs=1,
            include_estimators=include_models,
            ensemble_memory_limit=16384,
            ml_memory_limit=16384,
            ensemble_size=ensemble_size,
            ensemble_nbest=ensemble_nbest,
            initial_configurations_via_metalearning=init_config_via_metalearning,
            seed=seed,
            resampling_strategy='cv',
            resampling_strategy_arguments={'folds': 5}
        )

    print(automl)
    raw_data, test_raw_data = load_train_test_data(dataset)
    X, y = raw_data.data
    X_test, y_test = test_raw_data.data
    feat_type = ['Categorical' if _type == CATEGORICAL else 'Numerical'
                 for _type in raw_data.feature_types]
    from autosklearn.metrics import balanced_accuracy as balanced_acc
    automl.fit(X.copy(), y.copy(), feat_type=feat_type, metric=balanced_acc)
    model_desc = automl.show_models()
    str_stats = automl.sprint_statistics()
    valid_results = automl.cv_results_['mean_test_score']
    time_records = automl.cv_results_['mean_fit_time']
    validation_accuracy = np.max(valid_results)

    # Test performance.
    automl.refit(X.copy(), y.copy())
    predictions = automl.predict(X_test)
    test_accuracy = balanced_accuracy(y_test, predictions)

    # Print statistics about the auto-sklearn run such as number of
    # iterations, number of models failed with a time out.
    print(str_stats)
    print(model_desc)
    print('Validation Accuracy:', validation_accuracy)
    print("Test Accuracy      :", test_accuracy)

    save_path = save_dir + '%s-%d.pkl' % (task_id, rep_id)
    with open(save_path, 'wb') as f:
        stats = [model_desc, str_stats, valid_results, time_records, time_limit]
        pickle.dump([validation_accuracy, test_accuracy, stats], f)
def evaluate_2rd_bandit(dataset, algo, time_limit, run_id, seed):
    print('HMAB-%s-%s: run_id=%d' % (dataset, algo, run_id))
    print('==> Start to Evaluate', dataset, 'Budget', time_limit)
    train_data, test_data = load_train_test_data(dataset)
    enable_intersect = True
    bandit = SecondLayerBandit(algo,
                               train_data,
                               per_run_time_limit=300,
                               seed=seed,
                               eval_type='holdout',
                               mth='alter_hpo',
                               enable_intersection=enable_intersect)
    mth_id = 'hmab' if enable_intersect else 'hmab0'
    _start_time = time.time()
    _iter_id = 0
    stats = list()

    while True:
        if time.time() > time_limit + _start_time or bandit.early_stopped_flag:
            break
        res = bandit.play_once()
        print('Iteration %d - %.4f' % (_iter_id, res))
        stats.append([_iter_id, time.time() - _start_time, res])
        _iter_id += 1

    print(bandit.final_rewards)
    print(bandit.action_sequence)
    print(np.mean(bandit.evaluation_cost['fe']))
    print(np.mean(bandit.evaluation_cost['hpo']))

    fe_optimizer = bandit.optimizer['fe']
    final_train_data = fe_optimizer.apply(train_data, bandit.inc['fe'])
    assert final_train_data == bandit.inc['fe']
    final_test_data = fe_optimizer.apply(test_data, bandit.inc['fe'])
    config = bandit.inc['hpo']

    evaluator = ClassificationEvaluator(config,
                                        name='fe',
                                        seed=seed,
                                        resampling_strategy='holdout')
    val_score = evaluator(None, data_node=final_train_data)
    print('==> Best validation score', val_score, res)

    X_train, y_train = final_train_data.data
    clf = fetch_predict_estimator(config, X_train, y_train)
    X_test, y_test = final_test_data.data
    y_pred = clf.predict(X_test)
    test_score = balanced_accuracy(y_test, y_pred)
    print('==> Test score', test_score)

    # Alleviate overfitting.
    y_pred1 = bandit.predict(test_data.data[0])
    test_score1 = balanced_accuracy(y_test, y_pred1)
    print('==> Test score with average ensemble', test_score1)

    y_pred2 = bandit.predict(test_data.data[0], is_weighted=True)
    test_score2 = balanced_accuracy(y_test, y_pred2)
    print('==> Test score with weighted ensemble', test_score2)

    save_path = save_dir + '%s_2rd_bandit_%s_%d_%d_%s.pkl' % (
        mth_id, dataset, time_limit, run_id, algo)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, val_score, test_score, test_score1, test_score2],
                    f)