def evaluate_autosklearn(algorithms,
                         rep_id,
                         trial_num=100,
                         dataset='credit',
                         time_limit=1200,
                         seed=1,
                         ensemble_enable=True):
    print('%s\nDataset: %s, Run_id: %d, Budget: %d.\n%s' %
          ('=' * 50, dataset, rep_id, time_limit, '=' * 50))
    mth_id = 'ausk-ens' if ensemble_enable else 'ausk'
    task_id = '%s-%s-%d-%d' % (dataset, mth_id, len(algorithms), trial_num)
    include_models = algorithms
    if ensemble_enable:
        ensemble_size = 50
        ensem_nbest = len(algorithms) * 20
    else:
        ensemble_size = 1
        ensem_nbest = 1

    automl = autosklearn.classification.AutoSklearnClassifier(
        time_left_for_this_task=int(time_limit),
        per_run_time_limit=per_run_time_limit,
        n_jobs=1,
        include_estimators=include_models,
        ensemble_memory_limit=12288,
        ml_memory_limit=12288,
        ensemble_size=ensemble_size,
        ensemble_nbest=ensem_nbest,
        initial_configurations_via_metalearning=0,
        seed=seed,
        resampling_strategy='holdout',
        resampling_strategy_arguments={'train_size': 0.8})
    print(automl)
    raw_data, test_raw_data = load_train_test_data(dataset)
    X, y = raw_data.data
    X_test, y_test = test_raw_data.data
    feat_type = [
        'Categorical' if _type == CATEGORICAL else 'Numerical'
        for _type in raw_data.feature_types
    ]
    automl.fit(X.copy(), y.copy(), feat_type=feat_type)
    model_desc = automl.show_models()
    str_stats = automl.sprint_statistics()
    test_results = automl.cv_results_['mean_test_score']
    time_records = automl.cv_results_['mean_fit_time']
    validation_accuracy = np.max(test_results)
    predictions = automl.predict(X_test)
    test_accuracy = accuracy_score(y_test, predictions)
    # Print statistics about the auto-sklearn run such as number of
    # iterations, number of models failed with a time out.
    print(str_stats)
    print(model_desc)
    print('Validation Accuracy', validation_accuracy)
    print("Test Accuracy", test_accuracy)

    save_path = save_dir + '%s-%d.pkl' % (task_id, rep_id)
    with open(save_path, 'wb') as f:
        stats = [model_desc, str_stats, test_results, time_records, time_limit]
        pickle.dump([validation_accuracy, test_accuracy, stats], f)
Beispiel #2
0
def check_datasets(datasets):
    for _dataset in datasets:
        try:
            _, _ = load_train_test_data(_dataset,
                                        random_state=1,
                                        task_type=MULTICLASS_CLS)
        except Exception as e:
            raise ValueError('Dataset - %s does not exist!' % _dataset)
Beispiel #3
0
def evaluate_sys(run_id, task_type, mth, dataset, ens_method, enable_meta,
                 eval_type='holdout', time_limit=1200, seed=1, tree_id=0):
    _task_type = MULTICLASS_CLS if task_type == 'cls' else REGRESSION
    train_data, test_data = load_train_test_data(dataset, task_type=_task_type)
    _enable_meta = True if enable_meta == 'true' else False
    if task_type == 'cls':
        from mindware.estimators import Classifier
        estimator = Classifier(time_limit=time_limit,
                               per_run_time_limit=30,
                               output_dir=save_folder,
                               ensemble_method=ens_method,
                               enable_meta_algorithm_selection=_enable_meta,
                               evaluation=eval_type,
                               metric='bal_acc',
                               include_algorithms=['extra_trees', 'random_forest',
                                                   'adaboost', 'gradient_boosting',
                                                   'k_nearest_neighbors', 'liblinear_svc',
                                                   'libsvm_svc', 'lightgbm',
                                                   'logistic_regression', 'random_forest'],
                               n_jobs=1)
    else:
        from mindware.estimators import Regressor
        estimator = Regressor(time_limit=time_limit,
                              per_run_time_limit=90,
                              output_dir=save_folder,
                              ensemble_method=ens_method,
                              enable_meta_algorithm_selection=_enable_meta,
                              evaluation=eval_type,
                              metric='mse',
                              # include_preprocessors=['percentile_selector_regression'],
                              # include_algorithms=['random_forest'],
                              n_jobs=1)

    start_time = time.time()
    estimator.fit(train_data, opt_strategy=mth, dataset_id=dataset, tree_id=tree_id)
    pred = estimator.predict(test_data)
    if task_type == 'cls':
        test_score = balanced_accuracy_score(test_data.data[1], pred)
    else:
        test_score = mean_squared_error(test_data.data[1], pred)
    validation_score = estimator._ml_engine.solver.incumbent_perf
    # eval_dict = estimator._ml_engine.solver.get_eval_dict()
    print('Run ID         : %d' % run_id)
    print('Dataset        : %s' % dataset)
    print('Val/Test score : %f - %f' % (validation_score, test_score))

    save_path = save_folder + '%s_%s_%s_%s_%d_%d_%d_%d.pkl' % (
        task_type, mth, dataset, enable_meta, time_limit, (ens_method is None), tree_id, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, validation_score, test_score, start_time], f)

    # Delete output dir
    shutil.rmtree(os.path.join(estimator.get_output_dir()))
Beispiel #4
0
def evaluate_hmab(algorithms,
                  run_id,
                  time_limit=600,
                  dataset='credit',
                  eval_type='holdout',
                  enable_ens=True,
                  seed=1):
    print('%s\nDataset: %s, Run_id: %d, Budget: %d.\n%s' %
          ('=' * 50, dataset, run_id, time_limit, '=' * 50))
    task_id = '[%s][%s-%d-%d]' % (hmab_id, dataset, len(algorithms),
                                  time_limit)
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)
    if enable_ens is True:
        ensemble_method = 'ensemble_selection'
    else:
        ensemble_method = None

    clf = Classifier(time_limit=time_limit,
                     per_run_time_limit=per_run_time_limit,
                     include_algorithms=algorithms,
                     amount_of_resource=None,
                     output_dir=save_dir,
                     ensemble_method=ensemble_method,
                     evaluation=eval_type,
                     metric='bal_acc',
                     n_jobs=1)
    # clf.fit(train_data, meta_datasets=holdout_datasets)
    # clf.fit(train_data, opt_strategy='combined')
    clf.fit(train_data)
    clf.refit()
    pred = clf.predict(test_data)
    test_score = balanced_accuracy_score(test_data.data[1], pred)
    timestamps, perfs = clf.get_val_stats()
    validation_score = np.max(perfs)
    print('Dataset          : %s' % dataset)
    print('Validation/Test score : %f - %f' % (validation_score, test_score))

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        stats = [timestamps, perfs]
        pickle.dump([validation_score, test_score, stats], f)
def test_balancer():
    dataset = 'winequality_red'
    sys.path.append(os.getcwd())
    from mindware.datasets.utils import load_train_test_data
    raw_data, test_raw_data = load_train_test_data(dataset)
    # data = (
    # np.random.random((10, 4)), np.array([0, 0, 0, 0, 0, 2, 2, 2, 2, 2]))
    # feature_type = [NUMERICAL, NUMERICAL, DISCRETE, DISCRETE]
    # datanode = DataNode(data, feature_type)
    print(raw_data, test_raw_data)
    from mindware.components.feature_engineering.transformations.balancer.data_balancer import DataBalancer
    balancer = DataBalancer()
    a = balancer.operate(raw_data)
    b = balancer.operate(raw_data)
    c = balancer.operate(raw_data)
    assert a == b and b == c
    print(balancer.operate(raw_data))
    test_data = test_raw_data.copy_()
    test_data.data[1] = None
    print(balancer.operate(test_data))
Beispiel #6
0
def evaluate_autosklearn(algorithms,
                         rep_id,
                         dataset='credit',
                         time_limit=1200,
                         seed=1,
                         enable_ens=True,
                         enable_meta_learning=True,
                         eval_type='holdout'):
    print('%s\nDataset: %s, Run_id: %d, Budget: %d.\n%s' %
          ('=' * 50, dataset, rep_id, time_limit, '=' * 50))
    task_id = '[ausk%d][%s-%d-%d]' % (enable_ens, dataset, len(algorithms),
                                      time_limit)
    if enable_ens:
        ensemble_size, ensemble_nbest = 50, 50
    else:
        ensemble_size, ensemble_nbest = 1, 1
    if enable_meta_learning:
        init_config_via_metalearning = 25
    else:
        init_config_via_metalearning = 0

    include_models = None

    if eval_type == 'holdout':
        automl = autosklearn.classification.AutoSklearnClassifier(
            time_left_for_this_task=int(time_limit),
            per_run_time_limit=per_run_time_limit,
            n_jobs=1,
            include_estimators=include_models,
            ensemble_memory_limit=16384,
            ml_memory_limit=16384,
            ensemble_size=ensemble_size,
            ensemble_nbest=ensemble_nbest,
            initial_configurations_via_metalearning=
            init_config_via_metalearning,
            seed=int(seed),
            resampling_strategy='holdout',
            resampling_strategy_arguments={'train_size': 0.67})
    else:
        automl = autosklearn.classification.AutoSklearnClassifier(
            time_left_for_this_task=int(time_limit),
            per_run_time_limit=per_run_time_limit,
            n_jobs=1,
            include_estimators=include_models,
            ensemble_memory_limit=16384,
            ml_memory_limit=16384,
            ensemble_size=ensemble_size,
            ensemble_nbest=ensemble_nbest,
            initial_configurations_via_metalearning=
            init_config_via_metalearning,
            seed=seed,
            resampling_strategy='cv',
            resampling_strategy_arguments={'folds': 5})

    print(automl)
    raw_data, test_raw_data = load_train_test_data(dataset,
                                                   task_type=MULTICLASS_CLS)
    X, y = raw_data.data
    X_test, y_test = test_raw_data.data
    feat_type = [
        'Categorical' if _type == CATEGORICAL else 'Numerical'
        for _type in raw_data.feature_types
    ]
    from autosklearn.metrics import balanced_accuracy as balanced_acc
    automl.fit(X.copy(), y.copy(), feat_type=feat_type, metric=balanced_acc)
    model_desc = automl.show_models()
    str_stats = automl.sprint_statistics()
    valid_results = automl.cv_results_['mean_test_score']
    time_records = automl.cv_results_['mean_fit_time']
    plot_x = convert_ausk_to_plot(time_records, time_limit)

    validation_score = np.max(valid_results)

    # Test performance.
    automl.refit(X.copy(), y.copy())
    predictions = automl.predict(X_test)
    test_score = balanced_accuracy_score(y_test, predictions)

    # Print statistics about the auto-sklearn run such as number of
    # iterations, number of models failed with a time out.
    print(str_stats)
    print(model_desc)
    print('Validation Accuracy:', validation_score)
    print("Test Accuracy      :", test_score)

    save_path = save_dir + '%s-%d.pkl' % (task_id, rep_id)
    with open(save_path, 'wb') as f:
        stats = [plot_x, valid_results]
        pickle.dump([validation_score, test_score, stats], f)
def evaluate_tpot(dataset, task_type, run_id, time_limit, seed=1, use_fe=True):
    n_job = args.n_job
    # Construct the ML model.
    if not use_fe:
        from mindware.utils.tpot_config import classifier_config_dict
        config = classifier_config_dict

    _task_type = MULTICLASS_CLS if task_type == 'cls' else REGRESSION
    if task_type == 'cls':
        if space_type == 'large':
            from tpot.config.classifier import classifier_config_dict
        elif space_type == 'small':
            from tpot.config.classifier_small import classifier_config_dict
        else:
            from tpot.config.classifier_extremely_small import classifier_config_dict
        config_dict = classifier_config_dict
    else:
        if space_type == 'large':
            from tpot.config.regressor import regressor_config_dict
        elif space_type == 'small':
            from tpot.config.regressor_small import regressor_config_dict
        else:
            from tpot.config.regressor_extremely_small import regressor_config_dict
        config_dict = regressor_config_dict

    if task_type == 'cls':
        automl = TPOTClassifier(config_dict=config_dict,
                                generations=10000,
                                population_size=20,
                                verbosity=2,
                                n_jobs=n_job,
                                cv=0.2,
                                scoring='balanced_accuracy',
                                max_eval_time_mins=max_eval_time,
                                max_time_mins=int(time_limit / 60),
                                random_state=seed)
        raw_data, test_raw_data = load_train_test_data(dataset,
                                                       task_type=_task_type)
        X_train, y_train = raw_data.data
        X_test, y_test = test_raw_data.data
        X_train, y_train = X_train.astype('float64'), y_train.astype('int')
        X_test, y_test = X_test.astype('float64'), y_test.astype('int')
    else:
        automl = TPOTRegressor(config_dict=config_dict,
                               generations=10000,
                               population_size=20,
                               verbosity=2,
                               n_jobs=n_job,
                               cv=0.2,
                               scoring='neg_mean_squared_error',
                               max_eval_time_mins=max_eval_time,
                               max_time_mins=int(time_limit / 60),
                               random_state=seed)
        raw_data, test_raw_data = load_train_test_data(dataset,
                                                       task_type=_task_type)
        X_train, y_train = raw_data.data
        X_test, y_test = test_raw_data.data
        X_train, y_train = X_train.astype('float64'), y_train.astype('float64')
        X_test, y_test = X_test.astype('float64'), y_test.astype('float64')

    start_time = time.time()
    automl.fit(X_train, y_train)
    y_hat = automl.predict(X_test)
    pareto_front = automl._pareto_front

    if task_type == 'cls':
        score_func = balanced_accuracy_score
    else:
        score_func = mean_squared_error

    valid_score = max([
        pareto_front.keys[x].wvalues[1] for x in range(len(pareto_front.keys))
    ])
    test_score = score_func(y_test, y_hat)
    print('Run ID         : %d' % run_id)
    print('Dataset        : %s' % dataset)
    print('Val/Test score : %f - %f' % (valid_score, test_score))
    scores = automl.scores
    times = automl.times
    _space_type = '%s_' % space_type if space_type != 'large' else ''
    save_path = save_dir + '%s%s_tpot_%s_false_%d_1_%d.pkl' % (
        _space_type, task_type, dataset, time_limit, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump(
            [dataset, valid_score, test_score, times, scores, start_time], f)
def evaluate_ausk(run_id,
                  task_type,
                  mth,
                  dataset,
                  ens_method,
                  enable_meta,
                  eval_type='holdout',
                  time_limit=1200,
                  seed=1):
    tmp_dir = 'data/exp_sys/ausk_tmp_%s_%s_%s_%d_%d' % (
        task_type, mth, dataset, time_limit, run_id)
    output_dir = 'data/exp_sys/ausk_output_%s_%s_%s_%d_%d' % (
        task_type, mth, dataset, time_limit, run_id)
    initial_configs = 25 if enable_meta == 'true' else 0
    if os.path.exists(tmp_dir):
        try:
            shutil.rmtree(tmp_dir)
            shutil.rmtree(output_dir)
        except:
            pass

    if task_type == 'cls':
        automl = autosklearn.classification.AutoSklearnClassifier(
            time_left_for_this_task=int(time_limit),
            per_run_time_limit=300,
            n_jobs=1,
            include_estimators=['random_forest'],
            include_preprocessors=[
                'extra_trees_preproc_for_classification',
                'liblinear_svc_preprocessor',
                'select_percentile_classification', 'select_rates'
            ],
            ensemble_memory_limit=16384,
            ml_memory_limit=16384,
            ensemble_size=1 if ens_method is None else 50,
            initial_configurations_via_metalearning=initial_configs,
            tmp_folder=tmp_dir,
            output_folder=output_dir,
            delete_tmp_folder_after_terminate=False,
            delete_output_folder_after_terminate=False,
            seed=int(seed),
            resampling_strategy='holdout',
            resampling_strategy_arguments={'train_size': 0.67})
    else:
        automl = autosklearn.regression.AutoSklearnRegressor(
            time_left_for_this_task=int(time_limit),
            per_run_time_limit=300,
            n_jobs=1,
            include_estimators=['random_forest'],
            include_preprocessors=[
                'extra_trees_preproc_for_regression',
                'select_percentile_regression', 'select_rates'
            ],
            ensemble_memory_limit=16384,
            ml_memory_limit=16384,
            ensemble_size=1 if ens_method is None else 50,
            initial_configurations_via_metalearning=initial_configs,
            tmp_folder=tmp_dir,
            output_folder=output_dir,
            delete_tmp_folder_after_terminate=False,
            delete_output_folder_after_terminate=False,
            seed=int(seed),
            resampling_strategy='holdout',
            resampling_strategy_arguments={'train_size': 0.67})

    print(automl)
    _task_type = MULTICLASS_CLS if task_type == 'cls' else REGRESSION
    train_data, test_data = load_train_test_data(dataset, task_type=_task_type)
    X, y = train_data.data
    X_test, y_test = test_data.data
    feat_type = [
        'Categorical' if _type == CATEGORICAL else 'Numerical'
        for _type in train_data.feature_types
    ]
    from autosklearn.metrics import make_scorer
    if task_type == 'cls':
        scorer = make_scorer(name='balanced_accuracy',
                             score_func=balanced_accuracy_score)
        score_func = balanced_accuracy_score
    else:
        scorer = make_scorer(name='mean_squared_error',
                             score_func=mean_squared_error,
                             greater_is_better=False)
        score_func = mean_squared_error
    start_time = time.time()
    automl.fit(X.copy(), y.copy(), feat_type=feat_type, metric=scorer)
    valid_results = automl.cv_results_['mean_test_score']
    if task_type == 'cls':
        validation_score = np.max(valid_results)
    else:
        valid_results = [ele - valid_results[-1] for ele in valid_results[:-1]]
        validation_score = np.min(valid_results)
    # automl.refit(X.copy(), y.copy())
    predictions = automl.predict(X_test)
    test_score = score_func(y_test, predictions)
    model_desc = automl.show_models()
    str_stats = automl.sprint_statistics()
    result_score = automl.cv_results_['mean_test_score']
    result_time = automl.cv_results_['mean_fit_time']

    print('=' * 10)
    # print(model_desc)
    print(str_stats)
    print('=' * 10)

    print('Validation score', validation_score)
    print('Test score', test_score)
    # print(automl.show_models())
    save_path = save_folder + 'extremely_small_%s_%s_%s_%s_%d_%d_%d.pkl' % (
        task_type, mth, dataset, enable_meta, time_limit,
        (ens_method is None), run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([
            dataset, validation_score, test_score, start_time, result_score,
            result_time
        ], f)

    shutil.rmtree(output_dir)
    shutil.rmtree(os.path.join(tmp_dir, '.auto-sklearn'))
def evaluate_package():
    train_data, test_data = load_train_test_data('pc4', data_dir='./')
    Classifier().fit(train_data)
def evaluate(mth, dataset, run_id):
    print(mth, dataset, run_id)
    train_data, test_data = load_train_test_data(dataset,
                                                 test_size=0.3,
                                                 task_type=MULTICLASS_CLS)

    def objective_function(config):
        metric = get_metric('bal_acc')
        _, estimator = get_estimator(config.get_dictionary())
        X_train, y_train = train_data.data
        X_test, y_test = test_data.data
        estimator.fit(X_train, y_train)
        return -metric(estimator, X_test, y_test)

    def tpe_objective_function(config):
        metric = get_metric('bal_acc')
        _, estimator = get_estimator(config)
        X_train, y_train = train_data.data
        X_test, y_test = test_data.data
        estimator.fit(X_train, y_train)
        return -metric(estimator, X_test, y_test)

    config_space = get_configspace()

    if mth == 'gp_bo':
        bo = BO(objective_function, config_space, max_runs=max_runs)
        bo.run()
        print('new BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'rf_bo':
        bo = BO(objective_function,
                config_space,
                surrogate_model='prob_rf',
                max_runs=max_runs)
        bo.run()
        print('new BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'lite_bo':
        from litebo.facade.bo_facade import BayesianOptimization
        bo = BayesianOptimization(objective_function,
                                  config_space,
                                  max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'smac':
        from smac.scenario.scenario import Scenario
        from smac.facade.smac_facade import SMAC
        # Scenario object
        scenario = Scenario({
            "run_obj": "quality",
            "runcount-limit": max_runs,
            "cs": config_space,
            "deterministic": "true"
        })
        smac = SMAC(scenario=scenario,
                    rng=np.random.RandomState(42),
                    tae_runner=objective_function)
        incumbent = smac.optimize()
        perf_bo = objective_function(incumbent)
        print('SMAC BO result')
        print(perf_bo)
    elif mth == 'tpe':
        config_space = get_configspace('tpe')
        from hyperopt import tpe, fmin, Trials
        trials = Trials()
        fmin(tpe_objective_function,
             config_space,
             tpe.suggest,
             max_runs,
             trials=trials)
        perfs = [trial['result']['loss'] for trial in trials.trials]
        perf_bo = min(perfs)
    elif mth == 'tpe_bo':
        from mindware.components.transfer_learning.tlbo.tpe_optimizer import TPE_BO
        bo = TPE_BO(objective_function, config_space, max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'random_search':
        from mindware.components.transfer_learning.tlbo.tpe_optimizer import TPE_BO
        bo = TPE_BO(objective_function,
                    config_space,
                    surrogate_model=mth,
                    max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    else:
        raise ValueError('Invalid method.')
    return perf_bo
                   'liblinear_based_selector',
                   'rfe_selector',
                   'normalizer',
                   'scaler1',
                   'scaler2',
                   'scaler3',
                   'random_tree_embedding',
                   'polynomial',
                   'pca',
                   'nystronem',
                   'lda',
                   'kitchen_sink',
                   'kernel_pca',
                   'cross']
    trans_name = trans_types[trans_id]
    raw_data, _ = load_train_test_data('yeast')
    train_data, valid_data = train_valid_split(raw_data)

    X, y = raw_data.data
    if trans_name == 'fast_ica':
        from sklearn.decomposition import FastICA

        qt = FastICA(n_components=7, random_state=1)
    elif trans_name == 'quantile':
        from mindware.components.feature_engineering.transformations.utils import QuantileTransformer

        qt = QuantileTransformer()
    elif trans_name == 'variance_selector':
        from sklearn.feature_selection import VarianceThreshold

        qt = VarianceThreshold()
Beispiel #12
0
    choices=['none', 'bagging', 'blending', 'stacking', 'ensemble_selection'])
parser.add_argument('--n_jobs', type=int, default=1)

args = parser.parse_args()

dataset = args.dataset
time_limit = args.time_limit
eval_type = args.eval_type
n_jobs = args.n_jobs
ensemble_method = args.ens_method
if ensemble_method == 'none':
    ensemble_method = None

save_dir = './data/eval_exps/soln-ml'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

print('==> Start to evaluate with Budget %d' % time_limit)

train_data, test_data = load_train_test_data(dataset)

clf = Classifier(time_limit=time_limit,
                 output_dir=save_dir,
                 ensemble_method=ensemble_method,
                 evaluation=eval_type,
                 metric='acc',
                 n_jobs=n_jobs)
clf.fit(train_data)
pred = clf.predict(test_data)
print(accuracy_score(test_data.data[1], pred))
Beispiel #13
0
def evaluate_ml_algorithm(dataset,
                          algo,
                          run_id,
                          obj_metric,
                          time_limit=600,
                          seed=1,
                          task_type=None):
    if algo == 'lightgbm':
        _algo = ['LightGBM']
        add_classifier(LightGBM)
    elif algo == 'logistic_regression':
        _algo = ['Logistic_Regression']
        add_classifier(Logistic_Regression)
    else:
        _algo = [algo]
    print('EVALUATE-%s-%s-%s: run_id=%d' % (dataset, algo, obj_metric, run_id))
    train_data, test_data = load_train_test_data(dataset, task_type=task_type)
    if task_type in CLS_TASKS:
        task_type = BINARY_CLS if len(set(
            train_data.data[1])) == 2 else MULTICLASS_CLS
    print(set(train_data.data[1]))

    raw_data, test_raw_data = load_train_test_data(dataset,
                                                   task_type=MULTICLASS_CLS)
    X, y = raw_data.data
    X_test, y_test = test_raw_data.data
    feat_type = [
        'Categorical' if _type == CATEGORICAL else 'Numerical'
        for _type in raw_data.feature_types
    ]
    from autosklearn.metrics import balanced_accuracy as balanced_acc
    automl = AutoSklearnClassifier(
        time_left_for_this_task=int(time_limit),
        per_run_time_limit=180,
        n_jobs=1,
        include_estimators=_algo,
        initial_configurations_via_metalearning=0,
        ensemble_memory_limit=16384,
        ml_memory_limit=16384,
        # tmp_folder='/var/folders/0t/mjph32q55hd10x3qr_kdd2vw0000gn/T/autosklearn_tmp',
        ensemble_size=1,
        seed=int(seed),
        resampling_strategy='holdout',
        resampling_strategy_arguments={'train_size': 0.67})
    automl.fit(X.copy(), y.copy(), feat_type=feat_type, metric=balanced_acc)
    model_desc = automl.show_models()
    str_stats = automl.sprint_statistics()
    valid_results = automl.cv_results_['mean_test_score']
    print('Eval num: %d' % (len(valid_results)))

    validation_score = np.max(valid_results)

    # Test performance.
    automl.refit(X.copy(), y.copy())
    predictions = automl.predict(X_test)
    test_score = balanced_accuracy_score(y_test, predictions)

    # Print statistics about the auto-sklearn run such as number of
    # iterations, number of models failed with a time out.
    print(str_stats)
    print(model_desc)
    print('Validation Accuracy:', validation_score)
    print("Test Accuracy      :", test_score)

    save_path = save_dir + '%s-%s-%s-%d-%d.pkl' % (dataset, algo, obj_metric,
                                                   run_id, time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, algo, validation_score, test_score, task_type],
                    f)