Beispiel #1
0
def evaluate_2rd_hmab(run_id, mth, dataset, algo,
                      eval_type='holdout', time_limit=1200, seed=1):
    task_type = MULTICLASS_CLS
    train_data, test_data = load_train_test_data(dataset, task_type=task_type)

    from solnml.estimators import Classifier
    clf = Classifier(time_limit=time_limit,
                     per_run_time_limit=300,
                     output_dir=save_folder,
                     ensemble_method=None,
                     evaluation=eval_type,
                     enable_meta_algorithm_selection=False,
                     metric='bal_acc',
                     include_algorithms=[algo],
                     n_jobs=1)

    clf.fit(train_data, opt_strategy=mth)
    pred = clf.predict(test_data)
    test_score = balanced_accuracy_score(test_data.data[1], pred)
    timestamps, perfs = clf.get_val_stats()
    validation_score = np.max(perfs)
    print('Evaluation Num : %d' % len(perfs))
    print('Run ID         : %d' % run_id)
    print('Dataset        : %s' % dataset)
    print('Val/Test score : %f - %f' % (validation_score, test_score))

    save_path = save_folder + '%s_%s_%d_%d_%s.pkl' % (mth, dataset, time_limit, run_id, algo)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, validation_score, test_score], f)
Beispiel #2
0
def evaluate_hmab(algorithms, run_id,
                  time_limit=600,
                  dataset='credit',
                  eval_type='holdout',
                  enable_ens=True, seed=1):
    task_id = '[hmab][%s-%d-%d]' % (dataset, len(algorithms), time_limit)
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS)
    if enable_ens is True:
        ensemble_method = 'ensemble_selection'
    else:
        ensemble_method = None

    clf = Classifier(time_limit=time_limit,
                     amount_of_resource=None,
                     output_dir=save_dir,
                     ensemble_method=ensemble_method,
                     evaluation=eval_type,
                     metric='bal_acc',
                     n_jobs=1)
    clf.fit(train_data)
    clf.refit()
    pred = clf.predict(test_data)
    test_score = balanced_accuracy_score(test_data.data[1], pred)
    timestamps, perfs = clf.get_val_stats()
    validation_score = np.max(perfs)
    print('Dataset          : %s' % dataset)
    print('Validation/Test score : %f - %f' % (validation_score, test_score))

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        stats = [timestamps, perfs]
        pickle.dump([validation_score, test_score, stats], f)