def prepare_optimizer(self, _arm): if _arm == 'fe': if self.update_flag[_arm] is True: # Build the Feature Engineering component. fe_evaluator = Evaluator(self.inc['hpo'], name='fe', resampling_strategy=self.evaluation_type, seed=self.seed) self.optimizer[_arm] = EvaluationBasedOptimizer( self.inc['fe'], fe_evaluator, self.classifier_id, self.per_run_time_limit, self.per_run_mem_limit, self.seed, shared_mode=self.share_fe ) else: self.logger.info('No improvement on HPO, so use the old FE optimizer!') else: if self.update_flag[_arm] is True: trials_per_iter = self.optimizer['fe'].evaluation_num_last_iteration hpo_evaluator = Evaluator(self.config_space.get_default_configuration(), data_node=self.inc['fe'], name='hpo', resampling_strategy=self.evaluation_type, seed=self.seed) self.optimizer[_arm] = SMACOptimizer( hpo_evaluator, self.config_space, output_dir=self.output_dir, per_run_time_limit=self.per_run_time_limit, trials_per_iter=trials_per_iter // 2, seed=self.seed ) else: self.logger.info('No improvement on FE, so use the old HPO optimizer!')
def evaluate_evaluation_based_fe(dataset, time_limit, seed=1): # Prepare the configuration for random forest. from ConfigSpace.hyperparameters import UnParametrizedHyperparameter from autosklearn.pipeline.components.classification.random_forest import RandomForest cs = RandomForest.get_hyperparameter_search_space() clf_hp = UnParametrizedHyperparameter("estimator", 'random_forest') cs.add_hyperparameter(clf_hp) evaluator = Evaluator(cs.get_default_configuration(), name='fe', seed=seed) raw_data = load_data(dataset, datanode_returned=True) pipeline = FEPipeline(fe_enabled=True, optimizer_type='eval_base', time_budget=time_limit, evaluator=evaluator, seed=seed, model_id='random_forest', time_limit_per_trans=300) train_data = pipeline.fit_transform(raw_data) score = evaluator(None, data_node=train_data) print('==> Base validation score', score) save_path = proj_dir + 'data/fe_%s_%d.pkl' % (dataset, time_limit) with open(save_path, 'wb') as f: pickle.dump([dataset, score], f) return score
def evaluate_fe_bugs(dataset, run_id, time_limit, seed): algorithms = [ 'lda', 'k_nearest_neighbors', 'libsvm_svc', 'sgd', 'adaboost', 'random_forest', 'extra_trees', 'decision_tree' ] algo_id = np.random.choice(algorithms, 1)[0] task_id = '%s-fe-%s-%d' % (dataset, algo_id, run_id) print(task_id) # Prepare the configuration for random forest. clf_class = _classifiers[algo_id] cs = clf_class.get_hyperparameter_search_space() clf_hp = UnParametrizedHyperparameter("estimator", algo_id) cs.add_hyperparameter(clf_hp) evaluator = Evaluator(cs.get_default_configuration(), name='fe', seed=seed, resampling_strategy='holdout') pipeline = FEPipeline(fe_enabled=True, optimizer_type='eval_base', time_budget=time_limit, evaluator=evaluator, seed=seed, model_id=algo_id, time_limit_per_trans=per_run_time_limit, task_id=task_id) raw_data, test_raw_data = load_train_test_data(dataset) train_data = pipeline.fit_transform(raw_data.copy_()) test_data = pipeline.transform(test_raw_data.copy_()) train_data_new = pipeline.transform(raw_data.copy_()) assert (train_data.data[0] == train_data_new.data[0]).all() assert (train_data.data[1] == train_data_new.data[1]).all() assert (train_data_new == train_data) score = evaluator(None, data_node=test_data) print('==> Test score', score)
def conduct_hpo(dataset='pc4', classifier_id='random_forest', iter_num=100, iter_mode=True): from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) raw_data = load_data(dataset, datanode_returned=True) print(set(raw_data.data[1])) evaluator = Evaluator(cs.get_default_configuration(), name='hpo', data_node=raw_data) if not iter_mode: optimizer = SMACOptimizer(evaluator, cs, evaluation_limit=600, output_dir='logs') inc, val = optimizer.optimize() print(inc, val) else: import time _start_time = time.time() optimizer = SMACOptimizer(evaluator, cs, trials_per_iter=1, output_dir='logs', per_run_time_limit=180) results = list() for _iter in range(iter_num): perf, _, _ = optimizer.iterate() print(_iter, perf) results.append(perf) print(results) print(time.time() - _start_time)
def evaluate_metalearning_configs(first_bandit): score_list = [] for config in first_bandit.meta_configs: try: config = config.get_dictionary() # print(config) arm = None cs = ConfigurationSpace() for key in config: key_str = key.split(":") if key_str[0] == 'classifier': if key_str[1] == '__choice__': arm = config[key] cs.add_hyperparameter( UnParametrizedHyperparameter( "estimator", config[key])) else: cs.add_hyperparameter( UnParametrizedHyperparameter( key_str[2], config[key])) if arm in first_bandit.arms: transformed_node = apply_metalearning_fe( first_bandit.sub_bandits[arm].optimizer['fe'], config) default_config = cs.sample_configuration(1) hpo_evaluator = Evaluator( None, data_node=transformed_node, name='hpo', resampling_strategy=first_bandit.eval_type, seed=first_bandit.seed) start_time = time.time() score = 1 - hpo_evaluator(default_config) time_cost = time.time() - start_time score_list.append( (arm, score, default_config, transformed_node, time_cost)) transformed_node.score = score # Evaluate the default config start_time = time.time() score = 1 - hpo_evaluator( first_bandit.sub_bandits[arm].default_config) time_cost = time.time() - start_time score_list.append( (arm, score, first_bandit.sub_bandits[arm].default_config, transformed_node, time_cost)) transformed_node.score = score except Exception as e: print(e) # Sort the meta-configs score_list.sort(key=lambda x: x[1], reverse=True) meta_arms = list() for arm_score_config in score_list: if arm_score_config[0] in meta_arms: continue first_bandit.sub_bandits[ arm_score_config[0]].default_config = arm_score_config[2] first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats( 'fe', (arm_score_config[1], arm_score_config[4], arm_score_config[3])) # first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats('hpo', # (arm_score_config[1], arm_score_config[4], # arm_score_config[2])) first_bandit.sub_bandits[arm_score_config[0]].optimizer[ 'fe'].hp_config = arm_score_config[2] meta_arms.append(arm_score_config[0]) for arm in first_bandit.arms: if arm not in meta_arms: meta_arms.append(arm) first_bandit.final_rewards.append(score_list[0][1]) first_bandit.action_sequence.append(score_list[0][0]) first_bandit.time_records.append(score_list[0][2]) first_bandit.arms = meta_arms first_bandit.logger.info("Arms after evaluating meta-configs: " + str(first_bandit.arms))
def evaluate(): perf = Evaluator( self.local_inc['hpo'], data_node=self.local_inc['fe'], name='fe', resampling_strategy=self.evaluation_type, seed=self.seed)(self.local_inc['hpo']) return perf
def __init__(self, classifier_id: str, data: DataNode, share_fe=False, output_dir='logs', per_run_time_limit=120, per_run_mem_limit=5120, eval_type='cv', dataset_id='default', mth='rb', sw_size=3, strategy='avg', n_jobs=1, seed=1): self.per_run_time_limit = per_run_time_limit self.per_run_mem_limit = per_run_mem_limit self.classifier_id = classifier_id self.evaluation_type = eval_type self.original_data = data.copy_() self.share_fe = share_fe self.output_dir = output_dir self.mth = mth self.strategy = strategy self.seed = seed self.sliding_window_size = sw_size self.logger = get_logger('%s:%s-%d=>%s' % (__class__.__name__, dataset_id, seed, classifier_id)) np.random.seed(self.seed) # Bandit settings. self.arms = ['fe', 'hpo'] self.rewards = dict() self.optimizer = dict() self.evaluation_cost = dict() self.inc = dict() self.local_inc = dict() for arm in self.arms: self.rewards[arm] = list() self.evaluation_cost[arm] = list() self.pull_cnt = 0 self.action_sequence = list() self.final_rewards = list() self.incumbent_perf = -1. self.incumbent_source = None self.update_flag = dict() self.imp_rewards = dict() for arm in self.arms: self.update_flag[arm] = True self.imp_rewards[arm] = list() from autosklearn.pipeline.components.classification import _classifiers clf_class = _classifiers[classifier_id] cs = clf_class.get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", classifier_id) cs.add_hyperparameter(model) self.config_space = cs self.default_config = cs.get_default_configuration() self.config_space.seed(self.seed) # Build the Feature Engineering component. fe_evaluator = Evaluator(self.default_config, name='fe', resampling_strategy=self.evaluation_type, seed=self.seed) self.optimizer['fe'] = EvaluationBasedOptimizer( self.original_data, fe_evaluator, classifier_id, per_run_time_limit, per_run_mem_limit, self.seed, shared_mode=self.share_fe, n_jobs=n_jobs) self.inc['fe'], self.local_inc['fe'] = self.original_data, self.original_data # Build the HPO component. trials_per_iter = len(self.optimizer['fe'].trans_types) hpo_evaluator = Evaluator(self.default_config, data_node=self.original_data, name='hpo', resampling_strategy=self.evaluation_type, seed=self.seed) if n_jobs == 1: self.optimizer['hpo'] = SMACOptimizer( hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit, trials_per_iter=trials_per_iter // 2, seed=self.seed) else: self.optimizer['hpo'] = PSMACOptimizer( hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit, trials_per_iter=trials_per_iter // 2, seed=self.seed, n_jobs=n_jobs ) self.inc['hpo'], self.local_inc['hpo'] = self.default_config, self.default_config