Exemple #1
0
def test_rgs():
    time_limit = 120
    print('==> Start to evaluate with Budget %d' % time_limit)
    ensemble_method = 'blending'
    eval_type = 'holdout'

    boston = load_boston()
    X, y = boston.data, boston.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=1)
    dm = DataManager(X_train, y_train)
    train_data = dm.get_data_node(X_train, y_train)
    test_data = dm.get_data_node(X_test, y_test)

    save_dir = './data/eval_exps/soln-ml'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    rgs = Regressor(metric='mse',
                    ensemble_method=ensemble_method,
                    evaluation=eval_type,
                    time_limit=time_limit,
                    output_dir=save_dir)

    rgs.fit(train_data)
    pred = rgs.predict(test_data)

    print(mean_squared_error(test_data.data[1], pred))
def evaluate_sys(run_id, task_type, mth, dataset, ens_method, enable_meta,
                 eval_type='holdout', time_limit=1200, seed=1):
    _task_type = MULTICLASS_CLS if task_type == 'cls' else REGRESSION
    train_data, test_data = load_train_test_data(dataset, task_type=_task_type)
    _enable_meta = True if enable_meta == 'true' else False
    if task_type == 'cls':
        from solnml.estimators import Classifier
        estimator = Classifier(time_limit=time_limit,
                               per_run_time_limit=300,
                               output_dir=save_folder,
                               ensemble_method=ens_method,
                               enable_meta_algorithm_selection=_enable_meta,
                               evaluation=eval_type,
                               metric='bal_acc',
                               include_algorithms=['random_forest'],
                               include_preprocessors=['extra_trees_based_selector',
                                                      'generic_univariate_selector',
                                                      'liblinear_based_selector',
                                                      'percentile_selector'],
                               n_jobs=1)
    else:
        from solnml.estimators import Regressor
        estimator = Regressor(time_limit=time_limit,
                              per_run_time_limit=300,
                              output_dir=save_folder,
                              ensemble_method=ens_method,
                              enable_meta_algorithm_selection=_enable_meta,
                              evaluation=eval_type,
                              metric='mse',
                              include_algorithms=['random_forest'],
                              include_preprocessors=['extra_trees_based_selector_regression',
                                                     'generic_univariate_selector',
                                                     'liblinear_based_selector',
                                                     'percentile_selector_regression'],
                              n_jobs=1)

    start_time = time.time()
    estimator.fit(train_data, opt_strategy=mth, dataset_id=dataset)
    pred = estimator.predict(test_data)
    if task_type == 'cls':
        test_score = balanced_accuracy_score(test_data.data[1], pred)
    else:
        test_score = mean_squared_error(test_data.data[1], pred)
    validation_score = estimator._ml_engine.solver.incumbent_perf
    eval_dict = estimator._ml_engine.solver.get_eval_dict()
    print('Run ID         : %d' % run_id)
    print('Dataset        : %s' % dataset)
    print('Val/Test score : %f - %f' % (validation_score, test_score))

    save_path = save_folder + 'extremely_small_%s_%s_%s_%s_%d_%d_%d.pkl' % (
        task_type, mth, dataset, enable_meta, time_limit, (ens_method is None), run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, validation_score, test_score, start_time, eval_dict], f)

    # Delete output dir
    shutil.rmtree(os.path.join(estimator.get_output_dir()))
Exemple #3
0
def model_fit(_id,obj,paramsj,X_trainj,y_trainj):
    info_path = './models_information/'+_id+'_information'
    info_file = open(info_path,'w')
    print('Model training begins!')
    try:
        # read data
        X_train = np.array(pd.DataFrame(json.loads(X_trainj)))
        y_train = np.array(pd.DataFrame(json.loads(y_trainj)))[:,0]
        params = json.loads(paramsj)

        #print(y_train)
        dm = DataManager(X_train, y_train)
        train_data = dm.get_data_node(X_train, y_train)
        save_dir = '../data/eval_exps/soln-ml'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        # train mode
        if(obj == 'clf'):
            mdl = Classifier(time_limit=params['time_limit'],
                    output_dir=save_dir,
                    ensemble_method=params['ensemble_method'],
                    evaluation=params['evaluation'],
                    metric=params['metric'],
                    n_jobs=4)

        elif(obj == 'reg'):
            mdl = rgs = Regressor(metric=params['metric'],
                    ensemble_method=params['ensemble_method'],
                    evaluation=params['evaluation'],
                    time_limit=params['time_limit'],
                    output_dir=save_dir,
                    random_state=1,
                    n_jobs=n_jobs)

        mdl.fit(train_data)

    except:
        print('Model training failed!')
        info_file.write('Model training failed!')
        info_file.close()
        return -1
    result = dict()
    result['best_algo_id'] = str(mdl.best_algo_id)
    result['best_hpo_config'] = str(mdl.best_hpo_config)
    result['nbest_algo_id'] = str(mdl.nbest_algo_id)
    result['best_perf'] = str(mdl.best_perf)
    result['best_fe_config'] = str(mdl.best_fe_config)
    result['get_ens_model_info'] = str(mdl.get_ens_model_info)
    #get_ens_model_info is not realized in this version yet
    info_file.write(json.dumps(result))
    info_file.close()
    print('Model training finished!')
    return 0
Exemple #4
0
ensemble_method = args.ens_method
if ensemble_method == 'none':
    ensemble_method = None

print('==> Start to evaluate with Budget %d' % time_limit)

boston = load_boston()
X, y = boston.data, boston.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
dm = DataManager(X_train, y_train)
train_data = dm.get_data_node(X_train, y_train)
test_data = dm.get_data_node(X_test, y_test)

save_dir = './data/eval_exps/soln-ml'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

rgs = Regressor(metric='mse',
                dataset_name='boston',
                ensemble_method=ensemble_method,
                evaluation=eval_type,
                time_limit=time_limit,
                output_dir=save_dir,
                random_state=1,
                n_jobs=n_jobs)

rgs.fit(train_data)
pred = rgs.predict(test_data)

print(mean_squared_error(test_data.data[1], pred))