def evaluate(): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) try: dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) test_data = dm.get_data_node(X_test, y_test) clf = Classifier(dataset_name='iris', time_limit=150, output_dir=save_dir, ensemble_method=ensemble_method, evaluation=eval_type, metric='acc') clf.fit(train_data) clf.refit() pred = clf.predict(test_data) print('final score', clf.score(test_data)) except Exception as e: return False return True
def test_cls_without_ensemble(): save_dir = './data/eval_exps/soln-ml' if not os.path.exists(save_dir): os.makedirs(save_dir) time_limit = 60 print('==> Start to evaluate with Budget %d' % time_limit) ensemble_method = None eval_type = 'cv' iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1, stratify=y) dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) test_data = dm.get_data_node(X_test, y_test) clf = Classifier(time_limit=time_limit, output_dir=save_dir, ensemble_method=ensemble_method, enable_meta_algorithm_selection=False, include_algorithms=['random_forest'], evaluation=eval_type, metric='acc') clf.fit(train_data) print(clf.summary()) clf.refit() pred = clf.predict(test_data) print(accuracy_score(test_data.data[1], pred)) shutil.rmtree(save_dir)
def evaluate_hmab(algorithms, run_id, time_limit=600, dataset='credit', eval_type='holdout', enable_ens=True, seed=1): print('%s\nDataset: %s, Run_id: %d, Budget: %d.\n%s' % ('=' * 50, dataset, run_id, time_limit, '=' * 50)) task_id = '[%s][%s-%d-%d]' % (hmab_id, dataset, len(algorithms), time_limit) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) if enable_ens is True: ensemble_method = 'ensemble_selection' else: ensemble_method = None clf = Classifier(time_limit=time_limit, per_run_time_limit=per_run_time_limit, include_algorithms=algorithms, amount_of_resource=None, output_dir=save_dir, ensemble_method=ensemble_method, evaluation=eval_type, metric='bal_acc', n_jobs=1) # clf.fit(train_data, meta_datasets=holdout_datasets) # clf.fit(train_data, opt_strategy='combined') clf.fit(train_data) clf.refit() pred = clf.predict(test_data) test_score = balanced_accuracy_score(test_data.data[1], pred) timestamps, perfs = clf.get_val_stats() validation_score = np.max(perfs) print('Dataset : %s' % dataset) print('Validation/Test score : %f - %f' % (validation_score, test_score)) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: stats = [timestamps, perfs] pickle.dump([validation_score, test_score, stats], f)