Esempio n. 1
0
def evaluate_hmab(algorithms,
                  dataset,
                  run_id,
                  trial_num,
                  seed,
                  time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))

    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)
    cls_task_type = BINARY_CLS if len(set(
        train_data.data[1])) == 2 else MULTICLASS_CLS
    balanced_acc_metric = make_scorer(balanced_accuracy)

    if is_unbalanced_dataset(train_data):
        from solnml.components.feature_engineering.transformations.preprocessor.smote_balancer import DataBalancer
        train_data = DataBalancer().operate(train_data)

    bandit = FirstLayerBandit(cls_task_type,
                              trial_num,
                              algorithms,
                              train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              inner_opt_algorithm=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed,
                              time_limit=time_limit,
                              eval_type='holdout')
    bandit.optimize()
    time_taken = time.time() - _start_time
    model_desc = [
        bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards,
        bandit.action_sequence
    ]

    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)

    bandit.refit()
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)

    data = [
        dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens,
        time_taken, model_desc
    ]
    print(model_desc)
    print(data)

    save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id,
        time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
Esempio n. 2
0
def evaluate_hmab(algorithms, dataset, run_id, trial_num, seed, time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))
    exclude_datasets = ['gina_prior2', 'pc2', 'abalone', 'wind', 'waveform-5000(2)',
                        'page-blocks(1)', 'winequality_white', 'pollen']
    alad = AlgorithmAdvisor(task_type=MULTICLASS_CLS, n_algorithm=9,
                            metric='bal_acc', exclude_datasets=exclude_datasets)
    n_algo = 5
    assert dataset in exclude_datasets
    meta_infos = alad.fit_meta_learner()
    assert dataset not in meta_infos
    model_candidates = alad.fetch_algorithm_set(dataset)
    include_models = list()
    print(model_candidates)
    for algo in model_candidates:
        if algo in algorithms and len(include_models) < n_algo:
            include_models.append(algo)
    print('After algorithm recommendation', include_models)

    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS)
    cls_task_type = BINARY_CLS if len(set(train_data.data[1])) == 2 else MULTICLASS_CLS
    balanced_acc_metric = make_scorer(balanced_accuracy)

    if is_unbalanced_dataset(train_data):
        from solnml.components.feature_engineering.transformations.balancer.smote_balancer import DataBalancer
        train_data = DataBalancer().operate(train_data)
    bandit = FirstLayerBandit(cls_task_type, trial_num, include_models, train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              inner_opt_algorithm=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed,
                              time_limit=time_limit,
                              eval_type='holdout')
    bandit.optimize()
    time_taken = time.time() - _start_time
    model_desc = [bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence]

    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)

    bandit.refit()
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)

    data = [dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc]
    print(model_desc)
    print(data)

    save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
Esempio n. 3
0
def evaluate_imbalanced(algorithms,
                        dataset,
                        run_id,
                        trial_num,
                        seed,
                        time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset)
    cls_task_type = BINARY_CLS if len(set(
        train_data.data[1])) == 2 else MULTICLASS_CLS
    # ACC or Balanced_ACC
    balanced_acc_metric = make_scorer(balanced_accuracy)
    bandit = FirstLayerBandit(cls_task_type,
                              trial_num,
                              algorithms,
                              train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              opt_algo=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed)
    bandit.optimize()
    model_desc = [
        bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards,
        bandit.action_sequence
    ]

    time_taken = time.time() - _start_time
    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)
    data = [
        dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens,
        time_taken, model_desc
    ]
    print(model_desc)
    print(data[:4])

    save_path = project_dir + 'data/%s_%s_%s_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
def evaluate_hmab(algorithms,
                  dataset,
                  run_id,
                  trial_num,
                  seed,
                  time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))

    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)
    cls_task_type = BINARY_CLS if len(set(
        train_data.data[1])) == 2 else MULTICLASS_CLS
    balanced_acc_metric = make_scorer(balanced_accuracy)

    if is_unbalanced_dataset(train_data):
        from solnml.components.feature_engineering.transformations.preprocessor.smote_balancer import DataBalancer
        train_data = DataBalancer().operate(train_data)

    bandit = FirstLayerBandit(cls_task_type,
                              trial_num,
                              algorithms,
                              train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              inner_opt_algorithm=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed,
                              time_limit=time_limit,
                              eval_type='partial')
    # while time.time()-_start_time<time_limit:
    #     bandit.sub_bandits['random_forest'].optimizer['fe'].iterate()
    #     # print(bandit.sub_bandits['random_forest'].optimizer['hpo'].exp_output)
    bandit.optimize()
    fe_exp_output = bandit.sub_bandits['random_forest'].optimizer[
        'fe'].exp_output
    hpo_exp_output = bandit.sub_bandits['random_forest'].optimizer[
        'hpo'].exp_output

    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)

    bandit.refit()
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)

    data = [
        dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens,
        fe_exp_output, hpo_exp_output, _start_time
    ]

    save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id,
        time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)

    del_path = './logs/'
    for i in os.listdir(del_path):
        file_data = del_path + "/" + i
        if os.path.isfile(file_data):
            os.remove(file_data)