예제 #1
0
 def tpe_objective_function(config):
     metric = get_metric('bal_acc')
     _, estimator = get_estimator(config)
     X_train, y_train = train_data.data
     X_test, y_test = test_data.data
     estimator.fit(X_train, y_train)
     return -metric(estimator, X_test, y_test)
def evaluate_ml_algorithm(dataset, algo, obj_metric, seed=1, task_type=None):
    print('EVALUATE-%s-%s-%s' % (dataset, algo, obj_metric))
    train_data = load_data(dataset,
                           task_type=task_type,
                           datanode_returned=True)
    print(set(train_data.data[1]))
    metric = get_metric(obj_metric)

    cs = _classifiers[algo].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()
    hpo_evaluator = ClassificationEvaluator(default_hpo_config,
                                            scorer=metric,
                                            data_node=train_data,
                                            name='hpo',
                                            resampling_strategy='holdout',
                                            seed=seed)
    hpo_optimizer = SMACOptimizer(evaluator=hpo_evaluator,
                                  config_space=cs,
                                  per_run_time_limit=600,
                                  per_run_mem_limit=5120,
                                  output_dir='./logs',
                                  trials_per_iter=args.iter)
    hpo_optimizer.iterate()
    hpo_eval_dict = dict()
    for key, value in hpo_optimizer.eval_dict.items():
        hpo_eval_dict[key[1]] = value

    save_path = save_dir + '%s-%s-%s-hpo.pkl' % (dataset, algo, obj_metric)
    with open(save_path, 'wb') as f:
        pickle.dump(hpo_eval_dict, f)
예제 #3
0
파일: automl.py 프로젝트: Dee-Why/hp-tuner
    def __init__(self,
                 time_limit=300,
                 dataset_name='default_name',
                 amount_of_resource=None,
                 task_type=None,
                 metric='bal_acc',
                 include_algorithms=None,
                 ensemble_method='ensemble_selection',
                 enable_meta_algorithm_selection=True,
                 enable_fe=True,
                 per_run_time_limit=150,
                 ensemble_size=50,
                 evaluation='holdout',
                 output_dir="logs",
                 logging_config=None,
                 random_state=1,
                 n_jobs=1):
        self.metric_id = metric
        self.metric = get_metric(self.metric_id)

        self.dataset_name = dataset_name
        self.time_limit = time_limit
        self.seed = random_state
        self.per_run_time_limit = per_run_time_limit
        self.output_dir = output_dir
        self.logging_config = logging_config
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)
        self.logger = self._get_logger(self.dataset_name)

        self.evaluation_type = evaluation
        self.amount_of_resource = amount_of_resource
        self.ensemble_method = ensemble_method
        self.ensemble_size = ensemble_size
        self.enable_meta_algorithm_selection = enable_meta_algorithm_selection
        self.enable_fe = enable_fe
        self.task_type = task_type
        self.n_jobs = n_jobs
        self.solver = None

        if include_algorithms is not None:
            self.include_algorithms = include_algorithms
        else:
            if task_type in CLS_TASKS:
                if task_type in [IMG_CLS, TEXT_CLS]:
                    raise ValueError(
                        'Please use AutoDL module, instead of AutoML.')
                else:
                    self.include_algorithms = list(classification_algorithms)
            elif task_type in REG_TASKS:
                self.include_algorithms = list(regression_algorithms)
            else:
                raise ValueError("Unknown task type %s" % task_type)
        if ensemble_method is not None and ensemble_method not in ensemble_list:
            raise ValueError("%s is not supported for ensemble!" %
                             ensemble_method)
예제 #4
0
    def __init__(self, estimator, metric, task_type,
                 evaluation_strategy, **evaluation_params):
        self.estimator = estimator
        if task_type not in TASK_TYPES:
            raise ValueError('Unsupported task type: %s' % task_type)
        self.metric = get_metric(metric)
        self.evaluation_strategy = evaluation_strategy
        self.evaluation_params = evaluation_params

        if self.evaluation_strategy == 'holdout':
            if 'train_size' not in self.evaluation_params:
                self.evaluation_params['train_size']
예제 #5
0
def evaluate_ml_algorithm(dataset,
                          algo,
                          run_id,
                          obj_metric,
                          total_resource=20,
                          seed=1,
                          task_type=None):
    print('EVALUATE-%s-%s-%s: run_id=%d' % (dataset, algo, obj_metric, run_id))
    train_data, test_data = load_train_test_data(dataset, task_type=task_type)
    if task_type in CLS_TASKS:
        task_type = BINARY_CLS if len(set(
            train_data.data[1])) == 2 else MULTICLASS_CLS
    print(set(train_data.data[1]))
    metric = get_metric(obj_metric)
    bandit = SecondLayerBandit(task_type,
                               algo,
                               train_data,
                               metric,
                               per_run_time_limit=300,
                               seed=seed,
                               eval_type='holdout',
                               fe_algo='bo',
                               total_resource=total_resource)
    bandit.optimize_fixed_pipeline()

    val_score = bandit.incumbent_perf
    best_config = bandit.inc['hpo']

    fe_optimizer = bandit.optimizer['fe']
    fe_optimizer.fetch_nodes(10)
    best_data_node = fe_optimizer.incumbent
    test_data_node = fe_optimizer.apply(test_data, best_data_node)

    estimator = fetch_predict_estimator(
        task_type,
        best_config,
        best_data_node.data[0],
        best_data_node.data[1],
        weight_balance=best_data_node.enable_balance,
        data_balance=best_data_node.data_balance)
    score = metric(estimator, test_data_node.data[0],
                   test_data_node.data[1]) * metric._sign
    print('Test score', score)

    save_path = save_dir + '%s-%s-%s-%d-%d.pkl' % (dataset, algo, obj_metric,
                                                   run_id, total_resource)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, algo, score, val_score, task_type], f)
예제 #6
0
    def __init__(self,
                 task_type=CLASSIFICATION,
                 optimizer_type='eval_base',
                 metric='acc',
                 trans_set=None,
                 time_budget=None,
                 maximum_evaluation_num=None,
                 time_limit_per_trans=600,
                 mem_limit_per_trans=1024,
                 fe_enabled=True,
                 evaluator=None,
                 debug=False,
                 seed=1,
                 tmp_directory='logs',
                 logging_config=None,
                 model_id=None,
                 task_id='Default'):
        self.fe_enabled = fe_enabled
        self.trans_set = trans_set
        self.maximum_evaluation_num = maximum_evaluation_num
        self.time_budget = time_budget
        self.time_limit_per_trans = time_limit_per_trans
        self.mem_limit_per_trans = mem_limit_per_trans
        self.optimizer_type = optimizer_type
        self.evaluator = evaluator
        self.optimizer = None

        self.metric = get_metric(metric)
        self.task_type = task_type
        self.task_id = task_id
        self.model_id = model_id
        self._seed = seed
        self.tmp_directory = tmp_directory
        self.logging_config = logging_config
        self._logger = self._get_logger(task_id)

        # Set up backend.
        if not os.path.exists(self.tmp_directory):
            os.makedirs(self.tmp_directory)

        # For data preprocessing.
        self.uninformative_columns, self.uninformative_idx = list(), list()
        self.variance_selector = None
        self.onehot_encoder = None
        self.label_encoder = None
예제 #7
0
def evaluate(dataset):
    train_data, test_data = load_train_test_data(dataset, test_size=0.3, task_type=MULTICLASS_CLS)

    cs = _classifiers[algo_name].get_hyperparameter_search_space()
    default_hpo_config = cs.get_default_configuration()
    metric = get_metric('bal_acc')

    fe_cs = get_task_hyperparameter_space(0, algo_name)
    default_fe_config = fe_cs.get_default_configuration()

    evaluator = ClassificationEvaluator(default_hpo_config, default_fe_config, algo_name,
                                        data_node=train_data,
                                        scorer=metric,
                                        name='hpo',
                                        resampling_strategy='holdout',
                                        output_dir='./data/exp_sys',
                                        seed=1)

    from solnml.components.optimizers.tlbo_optimizer import TlboOptimizer

    optimizer = TlboOptimizer(evaluator, cs, time_limit=300, name='hpo')
    optimizer.run()
예제 #8
0
파일: eval_tlbo.py 프로젝트: kiminh/soln-ml
def evaluate(mode, dataset, run_id, metric):
    print(mode, dataset, run_id, metric)

    metric = get_metric(metric)
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)

    cs = _classifiers[algo_name].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo_name)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()

    fe_evaluator = ClassificationEvaluator(default_hpo_config,
                                           scorer=metric,
                                           name='fe',
                                           resampling_strategy='holdout',
                                           seed=1)

    hpo_evaluator = ClassificationEvaluator(default_hpo_config,
                                            scorer=metric,
                                            data_node=train_data,
                                            name='hpo',
                                            resampling_strategy='holdout',
                                            seed=1)

    fe_optimizer = BayesianOptimizationOptimizer(task_type=CLASSIFICATION,
                                                 input_data=train_data,
                                                 evaluator=fe_evaluator,
                                                 model_id=algo_name,
                                                 time_limit_per_trans=600,
                                                 mem_limit_per_trans=5120,
                                                 number_of_unit_resource=10,
                                                 seed=1)

    def objective_function(config):
        if benchmark == 'fe':
            return fe_optimizer.evaluate_function(config)
        else:
            return hpo_evaluator(config)

    if mode == 'bo':
        bo = BO(objective_function,
                config_space,
                max_runs=max_runs,
                surrogate_model='prob_rf')
        bo.run()
        print('BO result')
        print(bo.get_incumbent())
        perf = bo.history_container.incumbent_value
        runs = [bo.configurations, bo.perfs]
    elif mode == 'lite_bo':
        from litebo.facade.bo_facade import BayesianOptimization
        bo = BayesianOptimization(objective_function,
                                  config_space,
                                  max_runs=max_runs)
        bo.run()
        print('BO result')
        print(bo.get_incumbent())
        perf = bo.history_container.incumbent_value
        runs = [bo.configurations, bo.perfs]
    elif mode.startswith('tlbo'):
        _, gp_fusion = mode.split('_')
        meta_feature_vec = metafeature_dict[dataset]
        past_datasets = test_datasets.copy()
        if dataset in past_datasets:
            past_datasets.remove(dataset)
        past_history = load_runhistory(past_datasets)

        gp_models = [
            gp_models_dict[dataset_name] for dataset_name in past_datasets
        ]
        tlbo = TLBO(objective_function,
                    config_space,
                    past_history,
                    gp_models=gp_models,
                    dataset_metafeature=meta_feature_vec,
                    max_runs=max_runs,
                    gp_fusion=gp_fusion)
        tlbo.run()
        print('TLBO result')
        print(tlbo.get_incumbent())
        runs = [tlbo.configurations, tlbo.perfs]
        perf = tlbo.history_container.incumbent_value
    else:
        raise ValueError('Invalid mode.')
    file_saved = '%s_%s_%s_result_%d_%d_%s.pkl' % (mode, algo_name, dataset,
                                                   max_runs, run_id, benchmark)
    with open(data_dir + file_saved, 'wb') as f:
        pk.dump([perf, runs], f)
예제 #9
0
파일: baseline.py 프로젝트: kiminh/soln-ml
    cs.add_conditions(aug_space.get_conditions())

    for estimator_id in algorithm_candidates:
        sub_cs = get_model_config_space(estimator_id,
                                        include_estimator=False,
                                        include_aug=False)
        parent_hyperparameter = {
            'parent': estimator_choice,
            'value': estimator_id
        }
        cs.add_configuration_space(estimator_id,
                                   sub_cs,
                                   parent_hyperparameter=parent_hyperparameter)

    return cs


cs = get_pipeline_config_space(['resnet34', 'mobilenet'])
dataset = 'cifar10'
data_dir = 'data/img_datasets/%s/' % dataset
image_data = ImageDataset(data_path=data_dir, train_val_split=True)

hpo_evaluator = DLEvaluator(cs.get_default_configuration(),
                            IMG_CLS,
                            scorer=get_metric('acc'),
                            dataset=image_data,
                            device='cuda',
                            image_size=32,
                            seed=1)
hpo_evaluator(cs.get_default_configuration())
예제 #10
0
    def __init__(self,
                 task_type,
                 trial_num,
                 classifier_ids: List[str],
                 data: DataNode,
                 metric='acc',
                 ensemble_method='ensemble_selection',
                 ensemble_size=10,
                 per_run_time_limit=300,
                 output_dir=None,
                 dataset_name='default_dataset',
                 eval_type='holdout',
                 share_feature=False,
                 inner_opt_algorithm='rb',
                 fe_algo='bo',
                 time_limit=None,
                 n_jobs=1,
                 seed=1):
        """
        :param classifier_ids: subset of {'adaboost','bernoulli_nb','decision_tree','extra_trees','gaussian_nb','gradient_boosting',
        'gradient_boosting','k_nearest_neighbors','lda','liblinear_svc','libsvm_svc','multinomial_nb','passive_aggressive','qda',
        'random_forest','sgd'}
        """
        self.timestamp = time.time()
        self.task_type = task_type
        self.metric = get_metric(metric)
        self.original_data = data.copy_()
        self.ensemble_method = ensemble_method
        self.ensemble_size = ensemble_size
        self.trial_num = trial_num
        self.n_jobs = n_jobs
        self.alpha = 4
        self.B = 0.01
        self.seed = seed
        self.shared_mode = share_feature
        self.output_dir = output_dir
        np.random.seed(self.seed)

        # Best configuration.
        self.optimal_algo_id = None
        self.nbest_algo_ids = None
        self.best_lower_bounds = None
        self.es = None

        # Set up backend.
        self.dataset_name = dataset_name
        self.time_limit = time_limit
        self.start_time = time.time()
        self.logger = get_logger('Soln-ml: %s' % dataset_name)

        # Bandit settings.
        self.incumbent_perf = -1.
        self.arms = classifier_ids
        self.include_algorithms = classifier_ids
        self.rewards = dict()
        self.sub_bandits = dict()
        self.evaluation_cost = dict()
        self.fe_datanodes = dict()
        self.eval_type = eval_type
        self.fe_algo = fe_algo
        self.inner_opt_algorithm = inner_opt_algorithm
        for arm in self.arms:
            self.rewards[arm] = list()
            self.evaluation_cost[arm] = list()
            self.fe_datanodes[arm] = list()
            self.sub_bandits[arm] = SecondLayerBandit(
                self.task_type,
                arm,
                self.original_data,
                metric=self.metric,
                output_dir=output_dir,
                per_run_time_limit=per_run_time_limit,
                share_fe=self.shared_mode,
                seed=self.seed,
                eval_type=eval_type,
                dataset_id=dataset_name,
                n_jobs=self.n_jobs,
                fe_algo=fe_algo,
                mth=inner_opt_algorithm,
            )

        self.action_sequence = list()
        self.final_rewards = list()
        self.start_time = time.time()
        self.time_records = list()
예제 #11
0
    def __init__(self,
                 task_type,
                 trial_num,
                 classifier_ids: List[str],
                 data: DataNode,
                 include_preprocessors=None,
                 time_limit=None,
                 metric='acc',
                 ensemble_method='ensemble_selection',
                 ensemble_size=50,
                 per_run_time_limit=300,
                 output_dir="logs",
                 dataset_name='default_dataset',
                 eval_type='holdout',
                 inner_opt_algorithm='fixed',
                 enable_fe=True,
                 fe_algo='bo',
                 n_jobs=1,
                 seed=1):
        """
        :param classifier_ids: subset of {'adaboost','bernoulli_nb','decision_tree','extra_trees','gaussian_nb','gradient_boosting',
        'gradient_boosting','k_nearest_neighbors','lda','liblinear_svc','libsvm_svc','multinomial_nb','passive_aggressive','qda',
        'random_forest','sgd'}
        """
        self.timestamp = time.time()
        self.task_type = task_type
        self.include_preprocessors = include_preprocessors
        self.metric = get_metric(metric)
        self.original_data = data.copy_()
        self.ensemble_method = ensemble_method
        self.ensemble_size = ensemble_size
        self.trial_num = trial_num
        self.n_jobs = n_jobs
        self.alpha = 4
        self.seed = seed
        self.output_dir = output_dir
        self.early_stop_flag = False
        # np.random.seed(self.seed)

        # Best configuration.
        self.optimal_algo_id = None
        self.nbest_algo_ids = None
        self.best_lower_bounds = None
        self.es = None

        # Set up backend.
        self.dataset_name = dataset_name
        self.time_limit = time_limit
        self.start_time = time.time()
        self.logger = get_logger('Soln-ml: %s' % dataset_name)

        # Bandit settings.
        self.incumbent_perf = -float("INF")
        self.arms = classifier_ids
        self.include_algorithms = classifier_ids
        self.rewards = dict()
        self.sub_bandits = dict()
        self.evaluation_cost = dict()
        self.eval_type = eval_type
        self.enable_fe = enable_fe
        self.fe_algo = fe_algo
        self.inner_opt_algorithm = inner_opt_algorithm

        # Record the execution cost for each arm.
        if not (self.time_limit is None) ^ (self.trial_num is None):
            raise ValueError('Please set one of time_limit or trial_num.')

        self.arm_cost_stats = dict()
        for _arm in self.arms:
            self.arm_cost_stats[_arm] = list()

        for arm in self.arms:
            self.rewards[arm] = list()
            self.evaluation_cost[arm] = list()
            self.sub_bandits[arm] = SecondLayerBandit(
                self.task_type,
                arm,
                self.original_data,
                include_preprocessors=self.include_preprocessors,
                metric=self.metric,
                output_dir=output_dir,
                per_run_time_limit=per_run_time_limit,
                seed=self.seed,
                eval_type=eval_type,
                dataset_id=dataset_name,
                n_jobs=self.n_jobs,
                fe_algo=fe_algo,
                mth=self.inner_opt_algorithm,
                timestamp=self.timestamp)

        self.action_sequence = list()
        self.final_rewards = list()
        self.start_time = time.time()
        self.time_records = list()
예제 #12
0
def evaluate(mode, dataset, run_id, metric):
    print(mode, dataset, run_id, metric)

    metric = get_metric(metric)
    train_data, test_data = load_train_test_data(dataset,
                                                 task_type=MULTICLASS_CLS)

    cs = _classifiers[algo_name].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo_name)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()

    fe_evaluator = ClassificationEvaluator(default_hpo_config,
                                           scorer=metric,
                                           name='fe',
                                           resampling_strategy='holdout',
                                           seed=1)

    hpo_evaluator = ClassificationEvaluator(default_hpo_config,
                                            scorer=metric,
                                            data_node=train_data,
                                            name='hpo',
                                            resampling_strategy='holdout',
                                            seed=1)

    fe_optimizer = BayesianOptimizationOptimizer(task_type=CLASSIFICATION,
                                                 input_data=train_data,
                                                 evaluator=fe_evaluator,
                                                 model_id=algo_name,
                                                 time_limit_per_trans=600,
                                                 mem_limit_per_trans=5120,
                                                 number_of_unit_resource=10,
                                                 seed=1)

    def objective_function(config):
        if benchmark == 'fe':
            return fe_optimizer.evaluate_function(config)
        else:
            return hpo_evaluator(config)

    meta_feature_vec = metafeature_dict[dataset]
    past_datasets = test_datasets.copy()
    if dataset in past_datasets:
        past_datasets.remove(dataset)
    past_history = load_runhistory(past_datasets)

    tlbo = TLBO_AF(objective_function,
                   config_space,
                   past_history,
                   dataset_metafeature=meta_feature_vec,
                   max_runs=max_runs,
                   acq_method='taff2')

    tlbo.run()
    print('TLBO result')
    print(tlbo.get_incumbent())
    runs = [tlbo.configurations, tlbo.perfs]
    perf = tlbo.history_container.incumbent_value

    file_saved = '%s_%s_result_%d_%d_%s.pkl' % (mode, dataset, max_runs,
                                                run_id, benchmark)
    with open(data_dir + file_saved, 'wb') as f:
        pk.dump([perf, runs], f)
예제 #13
0
    def __init__(self,
                 time_limit=300,
                 trial_num=None,
                 dataset_name='default_name',
                 task_type=IMG_CLS,
                 metric='acc',
                 include_algorithms=None,
                 ensemble_method='ensemble_selection',
                 ensemble_size=50,
                 max_epoch=150,
                 config_file_path=None,
                 evaluation='holdout',
                 logging_config=None,
                 output_dir="logs/",
                 random_state=1,
                 n_jobs=1):
        from solnml.components.models.img_classification import _classifiers as _img_estimators, _addons as _img_addons
        from solnml.components.models.text_classification import _classifiers as _text_estimators, \
            _addons as _text_addons
        from solnml.components.models.object_detection import _classifiers as _od_estimators, _addons as _od_addons

        self.metric_id = metric
        self.metric = get_metric(self.metric_id)

        self.dataset_name = dataset_name
        self.time_limit = time_limit
        self.termination_time = time.time() + self.time_limit
        self.trial_num = trial_num
        self.seed = random_state
        self.output_dir = output_dir
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)
        self.logging_config = logging_config
        self.logger = self._get_logger(self.dataset_name)

        self.evaluation_type = evaluation
        self.ensemble_method = ensemble_method
        self.ensemble_size = ensemble_size
        self.task_type = task_type
        self.n_jobs = n_jobs

        self.config_file_path = config_file_path
        self.update_cs = dict()

        if include_algorithms is not None:
            self.include_algorithms = include_algorithms
        else:
            if task_type == IMG_CLS:
                self.include_algorithms = list(_img_estimators.keys())
            elif task_type == TEXT_CLS:
                self.include_algorithms = list(_text_estimators.keys())
            elif task_type == OBJECT_DET:
                self.include_algorithms = list(_od_estimators.keys())
            else:
                raise ValueError("Unknown task type %s" % task_type)

        if task_type == IMG_CLS:
            self._estimators = _img_estimators
            self._addons = _img_addons
        elif task_type == TEXT_CLS:
            self._estimators = _text_estimators
            self._addons = _text_addons
        elif task_type == OBJECT_DET:
            self._estimators = _od_estimators
            self._addons = _od_addons
        else:
            raise ValueError("Unknown task type %s" % task_type)

        if ensemble_method is not None and ensemble_method not in ensemble_list:
            raise ValueError("%s is not supported for ensemble!" %
                             ensemble_method)
        self.es = None
        self.solvers = dict()
        self.evaluators = dict()
        # Single model.
        self.best_algo_id = None
        self.best_algo_config = None
        # Ensemble models.
        self.candidate_algo_ids = None
        self.device = 'cuda'

        # Neural architecture selection.
        self.nas_evaluator = None
        self.eval_hist_configs = dict()
        self.eval_hist_perfs = dict()

        self.max_epoch = max_epoch
        self.image_size = None
예제 #14
0
    def __init__(self,
                 node_list,
                 node_index,
                 task_type,
                 timestamp,
                 fe_config_space: ConfigurationSpace,
                 cash_config_space: ConfigurationSpace,
                 data: DataNode,
                 fixed_config=None,
                 trial_num=0,
                 time_limit=None,
                 metric='acc',
                 ensemble_method='ensemble_selection',
                 ensemble_size=50,
                 per_run_time_limit=300,
                 output_dir="logs",
                 dataset_name='default_dataset',
                 eval_type='holdout',
                 resampling_params=None,
                 n_jobs=1,
                 seed=1):
        # Tree setting
        self.node_list = node_list
        self.node_index = node_index

        # Set up backend.
        self.dataset_name = dataset_name
        self.trial_num = trial_num
        self.time_limit = time_limit
        self.per_run_time_limit = per_run_time_limit
        self.start_time = time.time()
        self.logger = get_logger('Soln-ml: %s' % dataset_name)

        # Basic settings.
        self.eval_type = eval_type
        self.resampling_params = resampling_params
        self.task_type = task_type
        self.timestamp = timestamp
        self.fe_config_space = fe_config_space
        self.cash_config_space = cash_config_space
        self.fixed_config = fixed_config
        self.original_data = data.copy_()
        self.metric = get_metric(metric)
        self.ensemble_method = ensemble_method
        self.ensemble_size = ensemble_size
        self.n_jobs = n_jobs
        self.seed = seed
        self.output_dir = output_dir

        self.early_stop_flag = False
        self.timeout_flag = False
        self.incumbent_perf = -float("INF")
        self.incumbent = None
        self.eval_dict = dict()

        if self.task_type in CLS_TASKS:
            self.if_imbal = is_imbalanced_dataset(self.original_data)
        else:
            self.if_imbal = False

        self.es = None
예제 #15
0
from solnml.components.utils.constants import CLASSIFICATION, REGRESSION
from solnml.datasets.utils import load_train_test_data
from solnml.components.metrics.metric import get_metric
from solnml.components.evaluators.base_evaluator import fetch_predict_estimator
from solnml.components.evaluators.cls_evaluator import ClassificationEvaluator
from solnml.components.evaluators.rgs_evaluator import RegressionEvaluator

parser = argparse.ArgumentParser()
parser.add_argument('--datasets', type=str, default='diabetes')
parser.add_argument('--metrics', type=str, default='acc')
parser.add_argument('--task', type=str, choices=['reg', 'cls'], default='cls')
parser.add_argument('--output_dir', type=str, default='./data/fe_hpo_results')
args = parser.parse_args()

dataset_list = args.datasets.split(',')
metric = get_metric(args.metrics)
algorithms = ['lightgbm', 'random_forest',
              'libsvm_svc', 'extra_trees',
              'liblinear_svc', 'k_nearest_neighbors',
              'logistic_regression',
              'gradient_boosting', 'adaboost']
task = args.task
if task == 'cls':
    from solnml.components.models.classification import _classifiers

    _estimators = _classifiers
else:
    from solnml.components.models.regression import _regressors

    _estimators = _regressors
예제 #16
0
def evaluate(mth, dataset, run_id):
    print(mth, dataset, run_id)
    train_data, test_data = load_train_test_data(dataset,
                                                 test_size=0.3,
                                                 task_type=MULTICLASS_CLS)

    cs = _classifiers[algo_name].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo_name)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()
    metric = get_metric('bal_acc')

    fe_evaluator = ClassificationEvaluator(default_hpo_config,
                                           scorer=metric,
                                           name='fe',
                                           resampling_strategy='holdout',
                                           seed=1)
    fe_optimizer = BayesianOptimizationOptimizer(task_type=MULTICLASS_CLS,
                                                 input_data=train_data,
                                                 evaluator=fe_evaluator,
                                                 model_id=algo_name,
                                                 time_limit_per_trans=600,
                                                 mem_limit_per_trans=5120,
                                                 number_of_unit_resource=10,
                                                 seed=1)
    config_space = fe_optimizer.hyperparameter_space

    def objective_function(config):
        return fe_optimizer.evaluate_function(config)

    if mth == 'gp_bo':
        bo = BO(objective_function, config_space, max_runs=max_runs)
        bo.run()
        print('new BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'lite_bo':
        from litebo.facade.bo_facade import BayesianOptimization
        bo = BayesianOptimization(objective_function,
                                  config_space,
                                  max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'smac':
        from smac.scenario.scenario import Scenario
        from smac.facade.smac_facade import SMAC
        # Scenario object
        scenario = Scenario({
            "run_obj": "quality",
            "runcount-limit": max_runs,
            "cs": config_space,
            "deterministic": "true"
        })
        smac = SMAC(scenario=scenario,
                    rng=np.random.RandomState(42),
                    tae_runner=objective_function)
        incumbent = smac.optimize()
        perf_bo = objective_function(incumbent)
        print('SMAC BO result')
        print(perf_bo)
    else:
        raise ValueError('Invalid method.')
    return perf_bo