def test_get_conditions(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) self.assertEqual([], cs.get_conditions()) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) self.assertEqual([cond1], cs.get_conditions())
def remove_hyperparameter( config_space: ConfigSpace.ConfigurationSpace, hyperparameter_name: str) -> ConfigSpace.ConfigurationSpace: config_space_prime = ConfigSpace.ConfigurationSpace(meta=config_space.meta) for hyperparameter in config_space.get_hyperparameters(): if hyperparameter.name != hyperparameter_name: config_space_prime.add_hyperparameter(hyperparameter) for condition in config_space.get_conditions(): if condition.parent.name != hyperparameter_name and condition.child.name != hyperparameter_name: config_space_prime.add_condition(condition) elif condition.parent.name == hyperparameter_name: raise ValueError('Hyperparameter %s can not be removed ' 'as it is a parent condition (child: %s)' % (hyperparameter_name, condition.child.name)) return config_space_prime
def __init__(self, node_list, node_index, task_type, timestamp, fe_config_space: ConfigurationSpace, cash_config_space: ConfigurationSpace, data: DataNode, fixed_config=None, time_limit=None, trial_num=0, metric='acc', optimizer='smac', ensemble_method='ensemble_selection', ensemble_size=50, per_run_time_limit=300, output_dir="logs", dataset_name='default_dataset', eval_type='holdout', resampling_params=None, n_jobs=1, seed=1): super(JointBlock, self).__init__(node_list, node_index, task_type, timestamp, fe_config_space, cash_config_space, data, fixed_config=fixed_config, time_limit=time_limit, trial_num=trial_num, metric=metric, optimizer=optimizer, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) self.fixed_config = fixed_config # Combine configuration space cs = ConfigurationSpace() if fe_config_space is not None: cs.add_hyperparameters(fe_config_space.get_hyperparameters()) cs.add_conditions(fe_config_space.get_conditions()) cs.add_forbidden_clauses(fe_config_space.get_forbiddens()) if cash_config_space is not None: cs.add_hyperparameters(cash_config_space.get_hyperparameters()) cs.add_conditions(cash_config_space.get_conditions()) cs.add_forbidden_clauses(cash_config_space.get_forbiddens()) self.joint_cs = cs # Define evaluator and optimizer if self.task_type in CLS_TASKS: from mindware.components.evaluators.cls_evaluator import ClassificationEvaluator self.evaluator = ClassificationEvaluator( fixed_config=fixed_config, scorer=self.metric, data_node=self.original_data, if_imbal=self.if_imbal, timestamp=self.timestamp, output_dir=self.output_dir, seed=self.seed, resampling_strategy=self.eval_type, resampling_params=self.resampling_params) else: from mindware.components.evaluators.rgs_evaluator import RegressionEvaluator self.evaluator = RegressionEvaluator( fixed_config=fixed_config, scorer=self.metric, data_node=self.original_data, timestamp=self.timestamp, output_dir=self.output_dir, seed=self.seed, resampling_strategy=self.eval_type, resampling_params=self.resampling_params) self.optimizer = build_hpo_optimizer( self.eval_type, self.evaluator, self.joint_cs, optimizer=self.optimizer, output_dir=self.output_dir, per_run_time_limit=self.per_run_time_limit, inner_iter_num_per_iter=1, timestamp=self.timestamp, seed=self.seed, n_jobs=self.n_jobs)
def __init__(self, node_list, node_index, task_type, timestamp, fe_config_space: ConfigurationSpace, cash_config_space: ConfigurationSpace, data: DataNode, fixed_config=None, time_limit=None, trial_num=0, metric='acc', ensemble_method='ensemble_selection', ensemble_size=50, per_run_time_limit=300, output_dir="logs", dataset_name='default_dataset', eval_type='holdout', resampling_params=None, n_jobs=1, seed=1): """ :param classifier_ids: subset of {'adaboost','bernoulli_nb','decision_tree','extra_trees','gaussian_nb','gradient_boosting', 'gradient_boosting','k_nearest_neighbors','lda','liblinear_svc','libsvm_svc','multinomial_nb','passive_aggressive','qda', 'random_forest','sgd'} """ super(ConditioningBlock, self).__init__(node_list, node_index, task_type, timestamp, fe_config_space, cash_config_space, data, fixed_config=fixed_config, time_limit=time_limit, trial_num=trial_num, metric=metric, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) # Best configuration. self.optimal_arm = None self.best_lower_bounds = None # Bandit settings. self.alpha = 4 self.arms = list( cash_config_space.get_hyperparameter('algorithm').choices) self.rewards = dict() self.sub_bandits = dict() self.evaluation_cost = dict() self.arm_cost_stats = dict() for _arm in self.arms: self.arm_cost_stats[_arm] = list() for arm in self.arms: self.rewards[arm] = list() self.evaluation_cost[arm] = list() hps = cash_config_space.get_hyperparameters() cs = ConfigurationSpace() cs.add_hyperparameter(Constant('algorithm', arm)) for hp in hps: if hp.name.split(':')[0] == arm: cs.add_hyperparameter(hp) # Add active conditions conds = cash_config_space.get_conditions() for cond in conds: try: cs.add_condition(cond) except: pass # Add active forbidden clauses forbids = cash_config_space.get_forbiddens() for forbid in forbids: try: cs.add_forbidden_clause(forbid) except: pass from solnml.blocks.block_utils import get_node_type child_type = get_node_type(node_list, node_index + 1) self.sub_bandits[arm] = child_type( node_list, node_index + 1, task_type, timestamp, deepcopy(fe_config_space), deepcopy(cs), data.copy_(), fixed_config=fixed_config, time_limit=time_limit, metric=metric, ensemble_method=ensemble_method, ensemble_size=ensemble_size, per_run_time_limit=per_run_time_limit, output_dir=output_dir, dataset_name=dataset_name, eval_type=eval_type, resampling_params=resampling_params, n_jobs=n_jobs, seed=seed) self.action_sequence = list() self.final_rewards = list() self.start_time = time.time() self.time_records = list() # Initialize the parameters. self.pull_cnt = 0 self.pick_id = 0 self.update_cnt = 0 arm_num = len(self.arms) self.optimal_algo_id = None self.arm_candidate = self.arms.copy() self.best_lower_bounds = np.zeros(arm_num) _iter_id = 0 if self.time_limit is None: if arm_num * self.alpha > self.trial_num: raise ValueError('Trial number should be larger than %d.' % (arm_num * self.alpha)) else: self.trial_num = MAX_INT