Beispiel #1
0
def evaluate():
    iris = load_iris()
    X, y = iris.data, iris.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=1)
    try:
        dm = DataManager(X_train, y_train)
        train_data = dm.get_data_node(X_train, y_train)
        test_data = dm.get_data_node(X_test, y_test)

        clf = Classifier(dataset_name='iris',
                         time_limit=150,
                         output_dir=save_dir,
                         ensemble_method=ensemble_method,
                         evaluation=eval_type,
                         metric='acc')
        clf.fit(train_data)
        clf.refit()
        pred = clf.predict(test_data)
        print('final score', clf.score(test_data))
    except Exception as e:
        return False
    return True
Beispiel #2
0
def test_cls_without_ensemble():
    save_dir = './data/eval_exps/soln-ml'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    time_limit = 60
    print('==> Start to evaluate with Budget %d' % time_limit)
    ensemble_method = None
    eval_type = 'cv'

    iris = load_iris()
    X, y = iris.data, iris.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1, stratify=y)
    dm = DataManager(X_train, y_train)
    train_data = dm.get_data_node(X_train, y_train)
    test_data = dm.get_data_node(X_test, y_test)

    clf = Classifier(time_limit=time_limit,
                     output_dir=save_dir,
                     ensemble_method=ensemble_method,
                     enable_meta_algorithm_selection=False,
                     include_algorithms=['random_forest'],
                     evaluation=eval_type,
                     metric='acc')
    clf.fit(train_data)
    print(clf.summary())
    clf.refit()

    pred = clf.predict(test_data)
    print(accuracy_score(test_data.data[1], pred))

    shutil.rmtree(save_dir)
Beispiel #3
0
def evaluate_hmab(algorithms,
                  run_id,
                  time_limit=600,
                  dataset='credit',
                  eval_type='holdout',
                  enable_ens=True,
                  seed=1):
    print('%s\nDataset: %s, Run_id: %d, Budget: %d.\n%s' %
          ('=' * 50, dataset, run_id, time_limit, '=' * 50))
    task_id = '[%s][%s-%d-%d]' % (hmab_id, dataset, len(algorithms),
                                  time_limit)
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)
    if enable_ens is True:
        ensemble_method = 'ensemble_selection'
    else:
        ensemble_method = None

    clf = Classifier(time_limit=time_limit,
                     per_run_time_limit=per_run_time_limit,
                     include_algorithms=algorithms,
                     amount_of_resource=None,
                     output_dir=save_dir,
                     ensemble_method=ensemble_method,
                     evaluation=eval_type,
                     metric='bal_acc',
                     n_jobs=1)
    # clf.fit(train_data, meta_datasets=holdout_datasets)
    # clf.fit(train_data, opt_strategy='combined')
    clf.fit(train_data)
    clf.refit()
    pred = clf.predict(test_data)
    test_score = balanced_accuracy_score(test_data.data[1], pred)
    timestamps, perfs = clf.get_val_stats()
    validation_score = np.max(perfs)
    print('Dataset          : %s' % dataset)
    print('Validation/Test score : %f - %f' % (validation_score, test_score))

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        stats = [timestamps, perfs]
        pickle.dump([validation_score, test_score, stats], f)