コード例 #1
0
ファイル: evaluate_pipeline.py プロジェクト: kiminh/soln-ml
def evaluate_hmab(algorithms, dataset, run_id, trial_num, seed, time_limit=1200):
    print('%s-%s-%d: %d' % (hmab_flag, dataset, run_id, time_limit))
    exclude_datasets = ['gina_prior2', 'pc2', 'abalone', 'wind', 'waveform-5000(2)',
                        'page-blocks(1)', 'winequality_white', 'pollen']
    alad = AlgorithmAdvisor(task_type=MULTICLASS_CLS, n_algorithm=9,
                            metric='bal_acc', exclude_datasets=exclude_datasets)
    n_algo = 5
    assert dataset in exclude_datasets
    meta_infos = alad.fit_meta_learner()
    assert dataset not in meta_infos
    model_candidates = alad.fetch_algorithm_set(dataset)
    include_models = list()
    print(model_candidates)
    for algo in model_candidates:
        if algo in algorithms and len(include_models) < n_algo:
            include_models.append(algo)
    print('After algorithm recommendation', include_models)

    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS)
    cls_task_type = BINARY_CLS if len(set(train_data.data[1])) == 2 else MULTICLASS_CLS
    balanced_acc_metric = make_scorer(balanced_accuracy)

    if is_unbalanced_dataset(train_data):
        from solnml.components.feature_engineering.transformations.balancer.smote_balancer import DataBalancer
        train_data = DataBalancer().operate(train_data)
    bandit = FirstLayerBandit(cls_task_type, trial_num, include_models, train_data,
                              output_dir='logs',
                              per_run_time_limit=per_run_time_limit,
                              dataset_name=dataset,
                              ensemble_size=50,
                              inner_opt_algorithm=opt_algo,
                              metric=balanced_acc_metric,
                              fe_algo='bo',
                              seed=seed,
                              time_limit=time_limit,
                              eval_type='holdout')
    bandit.optimize()
    time_taken = time.time() - _start_time
    model_desc = [bandit.nbest_algo_ids, bandit.optimal_algo_id, bandit.final_rewards, bandit.action_sequence]

    validation_accuracy = np.max(bandit.final_rewards)
    best_pred = bandit._best_predict(test_data)
    test_accuracy = balanced_accuracy(test_data.data[1], best_pred)

    bandit.refit()
    es_pred = bandit._es_predict(test_data)
    test_accuracy_with_ens = balanced_accuracy(test_data.data[1], es_pred)

    data = [dataset, validation_accuracy, test_accuracy, test_accuracy_with_ens, time_taken, model_desc]
    print(model_desc)
    print(data)

    save_path = project_dir + '%s_%s_%s_%d_%d_%d_%d_%d.pkl' % (
        hmab_flag, opt_algo, dataset, trial_num, len(algorithms), seed, run_id, time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
コード例 #2
0
    def fit(self, train_data: DataNode, dataset_id=None):
        """
        this function includes this following two procedures.
            1. tune each algorithm's hyperparameters.
            2. engineer each algorithm's features automatically.
        :param train_data:
        :return:
        """
        if self.enable_meta_algorithm_selection:
            try:
                alad = AlgorithmAdvisor(task_type=self.task_type,
                                        n_algorithm=9,
                                        metric=self.metric_id)
                n_algo = 5
                model_candidates = alad.fetch_algorithm_set(
                    train_data, dataset_id=dataset_id)
                include_models = list()
                for algo in model_candidates:
                    if algo in self.include_algorithms and len(
                            include_models) < n_algo:
                        include_models.append(algo)
                self.include_algorithms = include_models
                self.logger.info(
                    'Executing meta-learning based algorithm recommendation!')
                self.logger.info('Algorithms recommended: %s' %
                                 ','.join(self.include_algorithms))
            except Exception as e:
                self.logger.error("Meta-learning failed!")

        # Check whether this dataset is balanced or not.
        if self.task_type in CLS_TASKS and is_unbalanced_dataset(train_data):
            # self.include_algorithms = imb_classication_algorithms
            self.logger.info('Input dataset is imbalanced!')
            train_data = DataBalancer().operate(train_data)
        if self.amount_of_resource is None:
            trial_num = len(self.include_algorithms) * 30
        else:
            trial_num = self.amount_of_resource

        self.solver = FirstLayerBandit(
            self.task_type,
            trial_num,
            self.include_algorithms,
            train_data,
            per_run_time_limit=self.per_run_time_limit,
            dataset_name=self.dataset_name,
            ensemble_method=self.ensemble_method,
            ensemble_size=self.ensemble_size,
            inner_opt_algorithm='fixed',
            metric=self.metric,
            fe_algo='bo',
            seed=self.seed,
            time_limit=self.time_limit,
            eval_type=self.evaluation_type,
            output_dir=self.output_dir)
        self.solver.optimize()
コード例 #3
0
def evaluate_autosklearn(algorithms, dataset, run_id, trial_num, seed, time_limit=1200):
    print('AUSK-%s-%d: %d' % (dataset, run_id, time_limit))
    ausk_flag = 'ausk_full'
    if ausk_flag == 'ausk_alad':
        alad = AlgorithmAdvisor(task_type=MULTICLASS_CLS, n_algorithm=9, metric='acc')
        meta_infos = alad.fit_meta_learner()
        assert dataset not in meta_infos
        model_candidates = alad.fetch_algorithm_set(dataset)
        include_models = list()
        print(model_candidates)
        for algo in model_candidates:
            if algo in algorithms and len(include_models) < 3:
                include_models.append(algo)
        print('After algorithm recommendation', include_models)
        n_config_meta_learning = 0
        ensemble_size = 1
    elif ausk_flag == 'ausk_no_meta':
        include_models = algorithms
        n_config_meta_learning = 25
        ensemble_size = 1
    elif ausk_flag == 'ausk_full':
        include_models = algorithms
        n_config_meta_learning = 25
        ensemble_size = 50
    else:
        include_models = algorithms
        n_config_meta_learning = 0
        ensemble_size = 1

    automl = autosklearn.classification.AutoSklearnClassifier(
        time_left_for_this_task=time_limit,
        per_run_time_limit=per_run_time_limit,
        include_preprocessors=None,
        exclude_preprocessors=None,
        n_jobs=1,
        include_estimators=include_models,
        ensemble_memory_limit=8192,
        ml_memory_limit=8192,
        ensemble_size=ensemble_size,
        ensemble_nbest=ensemble_size,
        initial_configurations_via_metalearning=n_config_meta_learning,
        seed=int(seed),
        resampling_strategy='holdout',
        resampling_strategy_arguments={'train_size': 0.67}
    )
    print(automl)

    train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS)
    X, y = train_data.data
    feat_type = ['Categorical' if _type == CATEGORICAL else 'Numerical'
                 for _type in train_data.feature_types]
    from autosklearn.metrics import balanced_accuracy
    automl.fit(X.copy(), y.copy(), metric=balanced_accuracy, feat_type=feat_type)
    model_desc = automl.show_models()
    print(model_desc)
    val_result = np.max(automl.cv_results_['mean_test_score'])
    print('Trial number', len(automl.cv_results_['mean_test_score']))
    print('Best validation accuracy', val_result)

    X_test, y_test = test_data.data
    automl.refit(X.copy(), y.copy())
    y_pred = automl.predict(X_test)
    metric = balanced_accuracy
    test_result = metric(y_test, y_pred)
    print('Test accuracy', test_result)
    save_path = project_dir + '%s_%s_%d_%d_%d_%d_%d.pkl' % (
        ausk_flag, dataset, trial_num, len(algorithms), seed, run_id, time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, val_result, test_result, model_desc], f)
コード例 #4
0
import os
import sys
import numpy as np

sys.path.append(os.getcwd())
from solnml.components.meta_learning.algorithm_recomendation.algorithm_advisor import AlgorithmAdvisor
from solnml.components.meta_learning.algorithm_recomendation.meta_generator import get_feature_vector
from solnml.components.utils.constants import MULTICLASS_CLS

test_datasets = ['gina_prior2', 'pc2', 'abalone', 'wind', 'waveform-5000(2)', 'page-blocks(1)', 'winequality_white', 'pollen']
alad = AlgorithmAdvisor(task_type=MULTICLASS_CLS, exclude_datasets=test_datasets, n_algorithm=5, metric='bal_acc')
meta_infos = alad.fit_meta_learner()
datasets = [item[0] for item in meta_infos]
print(datasets)


# 1.0, 2.0
def topk(l1, l2):
    score = 0
    for item in l1[:5]:
        if item in l2[:5]:
            score += 1
    return score


scores = list()
for test_dataset in test_datasets:
    print(test_dataset in datasets)
    meta_feature = get_feature_vector(test_dataset, dataset_id=test_dataset, task_type=MULTICLASS_CLS)
    algorithms, preds = alad.predict_meta_learner(meta_feature)