def conduct_hpo(optimizer='smac',
                dataset='pc4',
                classifier_id='random_forest',
                runcount_limit=100):
    from autosklearn.pipeline.components.classification import _classifiers

    clf_class = _classifiers[classifier_id]
    cs = clf_class.get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", classifier_id)
    cs.add_hyperparameter(model)

    raw_data = load_data(dataset, datanode_returned=True)
    print(set(raw_data.data[1]))
    evaluator = ClassificationEvaluator(cs.get_default_configuration(),
                                        name='hpo',
                                        data_node=raw_data)

    if optimizer == 'smac':
        optimizer = SMACOptimizer(evaluator,
                                  cs,
                                  evaluation_limit=runcount_limit,
                                  output_dir='logs')
    elif optimizer == 'psmac':
        optimizer = PSMACOptimizer(evaluator,
                                   cs,
                                   args.n,
                                   evaluation_limit=runcount_limit,
                                   output_dir='logs',
                                   trials_per_iter=args.trial)
    perf, cost, config = optimizer.iterate()
    print(perf, cost, config)
    perf, cost, config = optimizer.iterate()
    print(perf, cost, config)
Beispiel #2
0
def conduct_hpo(dataset='pc4',
                classifier_id='random_forest',
                iter_num=100,
                run_id=0,
                seed=1):
    from autosklearn.pipeline.components.classification import _classifiers

    clf_class = _classifiers[classifier_id]
    cs = clf_class.get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", classifier_id)
    cs.add_hyperparameter(model)

    raw_data, test_raw_data = load_train_test_data(dataset, random_state=seed)
    evaluator = ClassificationEvaluator(cs.get_default_configuration(),
                                        name='hpo',
                                        data_node=raw_data,
                                        resampling_strategy='holdout',
                                        seed=seed)

    default_config = cs.get_default_configuration()
    val_acc = 1. - evaluator(default_config)
    estimator = fetch_predict_estimator(default_config, raw_data.data[0],
                                        raw_data.data[1])
    pred = estimator.predict(test_raw_data.data[0])
    test_acc = balanced_accuracy(test_raw_data.data[1], pred)

    optimizer = SMACOptimizer(evaluator,
                              cs,
                              trials_per_iter=2,
                              output_dir='logs',
                              per_run_time_limit=180)
    task_id = 'hpo-%s-%s-%d' % (dataset, classifier_id, iter_num)

    val_acc_list, test_acc_list = [], []
    val_acc_list.append(val_acc)
    test_acc_list.append(test_acc)

    for _iter in range(iter_num):
        perf, _, config = optimizer.iterate()
        val_acc_list.append(perf)
        estimator = fetch_predict_estimator(config, raw_data.data[0],
                                            raw_data.data[1])
        pred = estimator.predict(test_raw_data.data[0])
        test_perf = balanced_accuracy(test_raw_data.data[1], pred)
        test_acc_list.append(test_perf)
        print(val_acc_list)
        print(test_acc_list)

    save_path = save_dir + '%s-%d.pkl' % (task_id, run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([val_acc_list, test_acc_list], f)
Beispiel #3
0
 def prepare_optimizer(self, _arm):
     if _arm == 'fe':
         if self.update_flag[_arm] is True:
             # Build the Feature Engineering component.
             fe_evaluator = Evaluator(self.inc['hpo'], name='fe', resampling_strategy=self.evaluation_type,
                                      seed=self.seed)
             self.optimizer[_arm] = EvaluationBasedOptimizer(
                 self.inc['fe'], fe_evaluator,
                 self.classifier_id, self.per_run_time_limit, self.per_run_mem_limit, self.seed,
                 shared_mode=self.share_fe
             )
         else:
             self.logger.info('No improvement on HPO, so use the old FE optimizer!')
     else:
         if self.update_flag[_arm] is True:
             trials_per_iter = self.optimizer['fe'].evaluation_num_last_iteration
             hpo_evaluator = Evaluator(self.config_space.get_default_configuration(),
                                       data_node=self.inc['fe'],
                                       name='hpo',
                                       resampling_strategy=self.evaluation_type,
                                       seed=self.seed)
             self.optimizer[_arm] = SMACOptimizer(
                 hpo_evaluator, self.config_space, output_dir=self.output_dir,
                 per_run_time_limit=self.per_run_time_limit,
                 trials_per_iter=trials_per_iter // 2, seed=self.seed
             )
         else:
             self.logger.info('No improvement on FE, so use the old HPO optimizer!')
Beispiel #4
0
def conduct_hpo(dataset='pc4', classifier_id='random_forest', iter_num=100, iter_mode=True):
    from autosklearn.pipeline.components.classification import _classifiers

    clf_class = _classifiers[classifier_id]
    cs = clf_class.get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", classifier_id)
    cs.add_hyperparameter(model)

    raw_data = load_data(dataset, datanode_returned=True)
    print(set(raw_data.data[1]))
    evaluator = ClassificationEvaluator(cs.get_default_configuration(), name='hpo', data_node=raw_data)

    if not iter_mode:
        optimizer = SMACOptimizer(evaluator, cs, evaluation_limit=600, output_dir='logs')
        inc, val = optimizer.optimize()
        print(inc, val)
    else:
        import time
        _start_time = time.time()
        optimizer = SMACOptimizer(
            evaluator, cs, trials_per_iter=1,
            output_dir='logs', per_run_time_limit=180
        )
        results = list()
        for _iter in range(iter_num):
            perf, _, _ = optimizer.iterate()
            print(_iter, perf)
            results.append(perf)
        print(results)
        print(time.time() - _start_time)
Beispiel #5
0
def evaluate_issue_source(classifier_id, dataset, opt_type='hpo'):
    _start_time = time.time()
    train_data, test_data = load_train_test_data(dataset)

    from autosklearn.pipeline.components.classification import _classifiers
    clf_class = _classifiers[classifier_id]
    cs = clf_class.get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", classifier_id)
    cs.add_hyperparameter(model)
    default_config = cs.get_default_configuration()

    seed = 2343
    if opt_type == 'hpo':
        evaluator = Evaluator(default_config,
                              data_node=train_data,
                              name='hpo',
                              resampling_strategy='holdout',
                              seed=seed)
        optimizer = SMACOptimizer(evaluator,
                                  cs,
                                  output_dir='logs/',
                                  per_run_time_limit=300,
                                  trials_per_iter=5,
                                  seed=seed)
    else:
        evaluator = Evaluator(default_config,
                              name='fe',
                              resampling_strategy='holdout',
                              seed=seed)
        optimizer = EvaluationBasedOptimizer('classification', train_data,
                                             evaluator, classifier_id, 300,
                                             1024, seed)

    perf_result = list()
    for iter_id in range(20):
        optimizer.iterate()
        print('=' * 30)
        print('ITERATION: %d' % iter_id)
        if opt_type == 'hpo':
            config = optimizer.incumbent_config
            perf = evaluate(train_data, test_data, config)
        else:
            fe_train_data = optimizer.incumbent
            fe_test_data = optimizer.apply(test_data, fe_train_data)
            perf = evaluate(fe_train_data, fe_test_data, default_config)
        print(perf)
        print('=' * 30)
        perf_result.append(perf)

    print(perf_result)
Beispiel #6
0
    def __init__(self, classifier_id: str, data: DataNode,
                 share_fe=False, output_dir='logs',
                 per_run_time_limit=120,
                 per_run_mem_limit=5120,
                 eval_type='cv', dataset_id='default',
                 mth='rb', sw_size=3, strategy='avg',
                 n_jobs=1, seed=1):
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.classifier_id = classifier_id
        self.evaluation_type = eval_type
        self.original_data = data.copy_()
        self.share_fe = share_fe
        self.output_dir = output_dir
        self.mth = mth
        self.strategy = strategy
        self.seed = seed
        self.sliding_window_size = sw_size
        self.logger = get_logger('%s:%s-%d=>%s' % (__class__.__name__, dataset_id, seed, classifier_id))
        np.random.seed(self.seed)

        # Bandit settings.
        self.arms = ['fe', 'hpo']
        self.rewards = dict()
        self.optimizer = dict()
        self.evaluation_cost = dict()
        self.inc = dict()
        self.local_inc = dict()
        for arm in self.arms:
            self.rewards[arm] = list()
            self.evaluation_cost[arm] = list()
        self.pull_cnt = 0
        self.action_sequence = list()
        self.final_rewards = list()
        self.incumbent_perf = -1.
        self.incumbent_source = None
        self.update_flag = dict()
        self.imp_rewards = dict()
        for arm in self.arms:
            self.update_flag[arm] = True
            self.imp_rewards[arm] = list()

        from autosklearn.pipeline.components.classification import _classifiers
        clf_class = _classifiers[classifier_id]
        cs = clf_class.get_hyperparameter_search_space()
        model = UnParametrizedHyperparameter("estimator", classifier_id)
        cs.add_hyperparameter(model)
        self.config_space = cs
        self.default_config = cs.get_default_configuration()
        self.config_space.seed(self.seed)

        # Build the Feature Engineering component.
        fe_evaluator = Evaluator(self.default_config,
                                 name='fe', resampling_strategy=self.evaluation_type,
                                 seed=self.seed)
        self.optimizer['fe'] = EvaluationBasedOptimizer(
                self.original_data, fe_evaluator,
                classifier_id, per_run_time_limit, per_run_mem_limit, self.seed,
                shared_mode=self.share_fe, n_jobs=n_jobs)
        self.inc['fe'], self.local_inc['fe'] = self.original_data, self.original_data

        # Build the HPO component.
        trials_per_iter = len(self.optimizer['fe'].trans_types)
        hpo_evaluator = Evaluator(self.default_config,
                                  data_node=self.original_data, name='hpo',
                                  resampling_strategy=self.evaluation_type,
                                  seed=self.seed)
        if n_jobs == 1:
            self.optimizer['hpo'] = SMACOptimizer(
                hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit,
                trials_per_iter=trials_per_iter // 2, seed=self.seed)
        else:
            self.optimizer['hpo'] = PSMACOptimizer(
                hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit,
                trials_per_iter=trials_per_iter // 2, seed=self.seed,
                n_jobs=n_jobs
            )
        self.inc['hpo'], self.local_inc['hpo'] = self.default_config, self.default_config