def evaluate_hmab(algorithms, run_id, dataset='credit', trial_num=200, seed=1, eval_type='holdout', enable_ens=False): task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num) _start_time = time.time() raw_data, test_raw_data = load_train_test_data(dataset) bandit = FirstLayerBandit(trial_num, algorithms, raw_data, output_dir='logs/%s/' % task_id, per_run_time_limit=per_run_time_limit, dataset_name='%s-%d' % (dataset, run_id), seed=seed, eval_type=eval_type) bandit.optimize() time_cost = int(time.time() - _start_time) print(bandit.final_rewards) print(bandit.action_sequence) validation_accuracy = np.max(bandit.final_rewards) test_accuracy = bandit.score(test_raw_data, metric_func=balanced_accuracy) test_accuracy_with_ens = EnsembleBuilder(bandit).score(test_raw_data, metric_func=balanced_accuracy) print('Dataset : %s' % dataset) print('Validation/Test score : %f - %f' % (validation_accuracy, test_accuracy)) print('Test score with ensem : %f' % test_accuracy_with_ens) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: stats = [time_cost, test_accuracy_with_ens, bandit.time_records, bandit.final_rewards] pickle.dump([validation_accuracy, test_accuracy, stats], f) return time_cost
def evaluate_1stlayer_bandit(algorithms, run_id, dataset='credit', trial_num=200, n_jobs=1, meta_configs=0, seed=1): task_id = '%s-hmab-%d-%d' % (dataset, len(algorithms), trial_num) _start_time = time.time() raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed) bandit = FirstLayerBandit(trial_num, algorithms, raw_data, output_dir='logs/%s/' % task_id, per_run_time_limit=per_run_time_limit, dataset_name='%s-%d' % (dataset, run_id), n_jobs=n_jobs, meta_configs=meta_configs, seed=seed, eval_type='holdout') bandit.optimize() time_cost = int(time.time() - _start_time) print(bandit.final_rewards) print(bandit.action_sequence) validation_accuracy_without_ens0 = np.max(bandit.final_rewards) validation_accuracy_without_ens1 = bandit.validate() assert np.isclose(validation_accuracy_without_ens0, validation_accuracy_without_ens1) test_accuracy_without_ens = bandit.score(test_raw_data) # For debug. mode = True if mode: test_accuracy_with_ens0 = ensemble_implementation_examples(bandit, test_raw_data) test_accuracy_with_ens1 = EnsembleBuilder(bandit).score(test_raw_data) print('Dataset : %s' % dataset) print('Validation score without ens: %f - %f' % ( validation_accuracy_without_ens0, validation_accuracy_without_ens1)) print("Test score without ensemble : %f" % test_accuracy_without_ens) print("Test score with ensemble : %f - %f" % (test_accuracy_with_ens0, test_accuracy_with_ens1)) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: stats = [time_cost, test_accuracy_with_ens0, test_accuracy_with_ens1, test_accuracy_without_ens] pickle.dump([validation_accuracy_without_ens0, test_accuracy_with_ens1, stats], f) del bandit return time_cost