def evaluate_1stlayer_bandit(run_id,
                             B,
                             algorithms,
                             dataset='credit',
                             trial_num=200,
                             seed=1):
    _start_time = time.time()
    raw_data = load_data(dataset, datanode_returned=True)
    bandit = FirstLayerBandit(trial_num,
                              algorithms,
                              raw_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              eval_type='holdout',
                              seed=seed)
    bandit.B = B
    bandit.optimize(strategy='discounted_ucb')
    print(bandit.final_rewards)
    print(bandit.action_sequence)
    time_cost = time.time() - _start_time

    save_folder = project_dir + 'data/1stlayer-mab/'
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    save_path = save_folder + 'eval_ducb_%.4f_%s_%d_%d_%d.pkl' % (
        B, dataset, run_id, trial_num, len(algorithms))
    with open(save_path, 'wb') as f:
        data = [
            bandit.final_rewards, bandit.time_records, bandit.action_sequence,
            time_cost
        ]
        pickle.dump(data, f)

    return time_cost
Exemple #2
0
def evaluate_hmab(algorithms,
                  run_id,
                  dataset='credit',
                  trial_num=200,
                  n_jobs=1,
                  meta_configs=0,
                  seed=1,
                  eval_type='holdout'):
    task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num)
    _start_time = time.time()
    raw_data, test_raw_data = load_train_test_data(dataset)
    bandit = FirstLayerBandit(trial_num,
                              algorithms,
                              raw_data,
                              output_dir='logs/%s/' % task_id,
                              per_run_time_limit=per_run_time_limit,
                              dataset_name='%s-%d' % (dataset, run_id),
                              n_jobs=n_jobs,
                              meta_configs=meta_configs,
                              seed=seed,
                              eval_type=eval_type)
    bandit.optimize()
    time_cost = int(time.time() - _start_time)
    print(bandit.final_rewards)
    print(bandit.action_sequence)

    validation_accuracy = np.max(bandit.final_rewards)
    # validation_accuracy_without_ens = bandit.validate()
    # assert np.isclose(validation_accuracy, validation_accuracy_without_ens)
    test_accuracy_with_ens = EnsembleBuilder(
        bandit, n_jobs=n_jobs).score(test_raw_data)

    print('Dataset                     : %s' % dataset)
    print('Validation score without ens: %f' % validation_accuracy)
    print("Test score with ensemble    : %f" % test_accuracy_with_ens)

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        stats = [time_cost, 0., bandit.time_records, bandit.final_rewards]
        pickle.dump([validation_accuracy, test_accuracy_with_ens, stats], f)
    del bandit
    return time_cost
Exemple #3
0
def evaluate_1stlayer_bandit(algorithms, mode, dataset='credit', trial_num=200, seed=1):
    _start_time = time.time()
    raw_data = load_data(dataset, datanode_returned=True)
    bandit = FirstLayerBandit(trial_num, algorithms, raw_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              share_feature=mode,
                              seed=seed)
    bandit.optimize()
    print(bandit.final_rewards)
    print(bandit.action_sequence)
    time_cost = time.time() - _start_time

    save_path = project_dir + 'data/shared_hmab_%d_%s_%d_%d_%d.pkl' % (
        mode, dataset, trial_num, len(algorithms), seed)
    with open(save_path, 'wb') as f:
        data = [bandit.final_rewards, bandit.time_records, bandit.action_sequence, time_cost]
        pickle.dump(data, f)

    return time_cost
Exemple #4
0
def evaluate_1stlayer_bandit(algorithms,
                             dataset,
                             run_id,
                             trial_num,
                             seed,
                             time_limit=1200):
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset)
    bandit = FirstLayerBandit(trial_num,
                              algorithms,
                              train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              opt_algo=opt_algo,
                              seed=seed)
    bandit.optimize()
    model_desc = [
        bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards,
        bandit.action_sequence
    ]

    time_taken = time.time() - _start_time
    validation_accuracy = np.max(bandit.final_rewards)
    test_accuracy = bandit.score(test_data, metric_func=balanced_accuracy)
    test_accuracy_with_ens = EnsembleBuilder(bandit).score(
        test_data, metric_func=balanced_accuracy)
    data = [
        dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens,
        time_taken, model_desc
    ]
    print(model_desc)

    print(data[:4])

    save_path = project_dir + 'data/hmab_%s_%s_%d_%d_%d_%d.pkl' % (
        opt_algo, dataset, trial_num, len(algorithms), seed, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
Exemple #5
0
def evaluate_1stlayer_bandit(algorithms,
                             run_id,
                             dataset='credit',
                             trial_num=200,
                             n_jobs=1,
                             meta_configs=0,
                             seed=1):
    task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num)
    _start_time = time.time()
    raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed)
    bandit = FirstLayerBandit(trial_num,
                              algorithms,
                              raw_data,
                              output_dir='logs/%s/' % task_id,
                              per_run_time_limit=per_run_time_limit,
                              dataset_name='%s-%d' % (dataset, run_id),
                              n_jobs=n_jobs,
                              meta_configs=meta_configs,
                              seed=seed,
                              eval_type='holdout')
    bandit.optimize()
    time_cost = int(time.time() - _start_time)
    print(bandit.final_rewards)
    print(bandit.action_sequence)

    validation_accuracy_without_ens0 = np.max(bandit.final_rewards)
    validation_accuracy_without_ens1 = bandit.validate()
    assert np.isclose(validation_accuracy_without_ens0,
                      validation_accuracy_without_ens1)

    test_accuracy_without_ens = bandit.score(test_raw_data)
    # For debug.
    mode = True
    if mode:
        test_accuracy_with_ens0 = ensemble_implementation_examples(
            bandit, test_raw_data)
        test_accuracy_with_ens1 = EnsembleBuilder(bandit).score(test_raw_data)

        print('Dataset                     : %s' % dataset)
        print('Validation score without ens: %f - %f' %
              (validation_accuracy_without_ens0,
               validation_accuracy_without_ens1))
        print("Test score without ensemble : %f" % test_accuracy_without_ens)
        print("Test score with ensemble    : %f - %f" %
              (test_accuracy_with_ens0, test_accuracy_with_ens1))

        save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
        with open(save_path, 'wb') as f:
            stats = [
                time_cost, test_accuracy_with_ens0, test_accuracy_with_ens1,
                test_accuracy_without_ens
            ]
            pickle.dump([
                validation_accuracy_without_ens0, test_accuracy_with_ens1,
                stats
            ], f)
    del bandit
    return time_cost
Exemple #6
0
def ensemble_implementation_examples(bandit: FirstLayerBandit,
                                     test_data: DataNode):
    from sklearn.model_selection import StratifiedShuffleSplit
    from sklearn.metrics import accuracy_score
    from autosklearn.metrics import accuracy
    n_best = 20
    stats = bandit.fetch_ensemble_members(test_data)
    seed = stats['split_seed']
    test_size = 0.2
    train_predictions = []
    test_predictions = []
    for algo_id in bandit.nbest_algo_ids:
        X, y = stats[algo_id]['train_dataset'].data
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=test_size,
                                     random_state=1)
        for train_index, test_index in sss.split(X, y):
            X_train, X_valid = X[train_index], X[test_index]
            y_train, y_valid = y[train_index], y[test_index]

        X_test, y_test = stats[algo_id]['test_dataset'].data
        configs = stats[algo_id]['configurations']
        performance = stats[algo_id]['performance']
        best_index = np.argsort(-np.array(performance))
        best_configs = [configs[i] for i in best_index[:n_best]]

        for config in best_configs:
            try:
                # Build the ML estimator.
                _, estimator = get_estimator(config)
                # print(X_train.shape, X_test.shape)
                estimator.fit(X_train, y_train)
                y_valid_pred = estimator.predict_proba(X_valid)
                y_test_pred = estimator.predict_proba(X_test)
                train_predictions.append(y_valid_pred)
                test_predictions.append(y_test_pred)
            except Exception as e:
                print(str(e))

    es = EnsembleSelection(ensemble_size=50,
                           task_type=1,
                           metric=accuracy,
                           random_state=np.random.RandomState(seed))
    assert len(train_predictions) == len(test_predictions)
    es.fit(train_predictions, y_valid, identifiers=None)
    y_pred = es.predict(test_predictions)
    y_pred = np.argmax(y_pred, axis=-1)
    test_score = accuracy_score(y_test, y_pred)
    return test_score
Exemple #7
0
def evaluate_hmab(algorithms,
                  run_id,
                  dataset='credit',
                  trial_num=200,
                  seed=1,
                  eval_type='holdout'):
    task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num)
    _start_time = time.time()
    raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed)
    bandit = FirstLayerBandit(trial_num,
                              algorithms,
                              raw_data,
                              output_dir='logs/%s/' % task_id,
                              per_run_time_limit=per_run_time_limit,
                              dataset_name='%s-%d' % (dataset, run_id),
                              seed=seed,
                              eval_type=eval_type)
    bandit.optimize()
    time_cost = int(time.time() - _start_time)
    print(bandit.final_rewards)
    print(bandit.action_sequence)

    validation_accuracy = np.max(bandit.final_rewards)
    test_accuracy = bandit.score(test_raw_data)
    # test_accuracy_with_ens = EnsembleBuilder(bandit).score(test_raw_data)

    print('Dataset          : %s' % dataset)
    print('Validation/Test score : %f - %f' %
          (validation_accuracy, test_accuracy))
    # print('Test score with ensem : %f' % test_accuracy_with_ens)

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        stats = [time_cost]
        pickle.dump([validation_accuracy, test_accuracy, stats], f)
    return time_cost