Beispiel #1
0
def evaluate_1stlayer_bandit(algorithms,
                             run_id,
                             dataset='credit',
                             trial_num=200,
                             n_jobs=1,
                             meta_configs=0,
                             seed=1):
    task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num)
    _start_time = time.time()
    raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed)
    bandit = FirstLayerBandit(trial_num,
                              algorithms,
                              raw_data,
                              output_dir='logs/%s/' % task_id,
                              per_run_time_limit=per_run_time_limit,
                              dataset_name='%s-%d' % (dataset, run_id),
                              n_jobs=n_jobs,
                              meta_configs=meta_configs,
                              seed=seed,
                              eval_type='holdout')
    bandit.optimize()
    time_cost = int(time.time() - _start_time)
    print(bandit.final_rewards)
    print(bandit.action_sequence)

    validation_accuracy_without_ens0 = np.max(bandit.final_rewards)
    validation_accuracy_without_ens1 = bandit.validate()
    assert np.isclose(validation_accuracy_without_ens0,
                      validation_accuracy_without_ens1)

    test_accuracy_without_ens = bandit.score(test_raw_data)
    # For debug.
    mode = True
    if mode:
        test_accuracy_with_ens0 = ensemble_implementation_examples(
            bandit, test_raw_data)
        test_accuracy_with_ens1 = EnsembleBuilder(bandit).score(test_raw_data)

        print('Dataset                     : %s' % dataset)
        print('Validation score without ens: %f - %f' %
              (validation_accuracy_without_ens0,
               validation_accuracy_without_ens1))
        print("Test score without ensemble : %f" % test_accuracy_without_ens)
        print("Test score with ensemble    : %f - %f" %
              (test_accuracy_with_ens0, test_accuracy_with_ens1))

        save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
        with open(save_path, 'wb') as f:
            stats = [
                time_cost, test_accuracy_with_ens0, test_accuracy_with_ens1,
                test_accuracy_without_ens
            ]
            pickle.dump([
                validation_accuracy_without_ens0, test_accuracy_with_ens1,
                stats
            ], f)
    del bandit
    return time_cost
Beispiel #2
0
def evaluate_1stlayer_bandit(algorithms, dataset='credit', trial_num=200, seed=1):
    _start_time = time.time()
    raw_data = load_data(dataset, datanode_returned=True)
    bandit = FirstLayerBandit(trial_num, algorithms, raw_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              seed=seed)
    bandit.optimize()
    print(bandit.final_rewards)
    print(bandit.action_sequence)
    time_cost = time.time() - _start_time

    save_path = project_dir + 'data/hmab_%s_%d_%d_%d.pkl' % (
        dataset, trial_num, len(algorithms), seed)
    with open(save_path, 'wb') as f:
        data = [bandit.final_rewards, bandit.time_records, bandit.action_sequence, time_cost]
        pickle.dump(data, f)

    print(bandit.score(raw_data))

    return time_cost
Beispiel #3
0
def evaluate_1stlayer_bandit(algorithms,
                             dataset,
                             run_id,
                             trial_num,
                             seed,
                             time_limit=1200):
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset)
    bandit = FirstLayerBandit(trial_num,
                              algorithms,
                              train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              opt_algo=opt_algo,
                              seed=seed)
    bandit.optimize()
    model_desc = [
        bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards,
        bandit.action_sequence
    ]

    time_taken = time.time() - _start_time
    validation_accuracy = np.max(bandit.final_rewards)
    test_accuracy = bandit.score(test_data, metric_func=balanced_accuracy)
    test_accuracy_with_ens = EnsembleBuilder(bandit).score(
        test_data, metric_func=balanced_accuracy)
    data = [
        dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens,
        time_taken, model_desc
    ]
    print(model_desc)

    print(data[:4])

    save_path = project_dir + 'data/hmab_%s_%s_%d_%d_%d_%d.pkl' % (
        opt_algo, dataset, trial_num, len(algorithms), seed, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
Beispiel #4
0
def evaluate_hmab(algorithms,
                  run_id,
                  dataset='credit',
                  trial_num=200,
                  seed=1,
                  eval_type='holdout'):
    task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num)
    _start_time = time.time()
    raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed)
    bandit = FirstLayerBandit(trial_num,
                              algorithms,
                              raw_data,
                              output_dir='logs/%s/' % task_id,
                              per_run_time_limit=per_run_time_limit,
                              dataset_name='%s-%d' % (dataset, run_id),
                              seed=seed,
                              eval_type=eval_type)
    bandit.optimize()
    time_cost = int(time.time() - _start_time)
    print(bandit.final_rewards)
    print(bandit.action_sequence)

    validation_accuracy = np.max(bandit.final_rewards)
    test_accuracy = bandit.score(test_raw_data)
    # test_accuracy_with_ens = EnsembleBuilder(bandit).score(test_raw_data)

    print('Dataset          : %s' % dataset)
    print('Validation/Test score : %f - %f' %
          (validation_accuracy, test_accuracy))
    # print('Test score with ensem : %f' % test_accuracy_with_ens)

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        stats = [time_cost]
        pickle.dump([validation_accuracy, test_accuracy, stats], f)
    return time_cost