예제 #1
0
 def test_get_conditions(self):
     cs = ConfigurationSpace()
     hp1 = CategoricalHyperparameter("parent", [0, 1])
     cs.add_hyperparameter(hp1)
     hp2 = UniformIntegerHyperparameter("child", 0, 10)
     cs.add_hyperparameter(hp2)
     self.assertEqual([], cs.get_conditions())
     cond1 = EqualsCondition(hp2, hp1, 0)
     cs.add_condition(cond1)
     self.assertEqual([cond1], cs.get_conditions())
 def test_get_conditions(self):
     cs = ConfigurationSpace()
     hp1 = CategoricalHyperparameter("parent", [0, 1])
     cs.add_hyperparameter(hp1)
     hp2 = UniformIntegerHyperparameter("child", 0, 10)
     cs.add_hyperparameter(hp2)
     self.assertEqual([], cs.get_conditions())
     cond1 = EqualsCondition(hp2, hp1, 0)
     cs.add_condition(cond1)
     self.assertEqual([cond1], cs.get_conditions())
예제 #3
0
def remove_hyperparameter(
        config_space: ConfigSpace.ConfigurationSpace,
        hyperparameter_name: str) -> ConfigSpace.ConfigurationSpace:
    config_space_prime = ConfigSpace.ConfigurationSpace(meta=config_space.meta)
    for hyperparameter in config_space.get_hyperparameters():
        if hyperparameter.name != hyperparameter_name:
            config_space_prime.add_hyperparameter(hyperparameter)
    for condition in config_space.get_conditions():
        if condition.parent.name != hyperparameter_name and condition.child.name != hyperparameter_name:
            config_space_prime.add_condition(condition)
        elif condition.parent.name == hyperparameter_name:
            raise ValueError('Hyperparameter %s can not be removed '
                             'as it is a parent condition (child: %s)' %
                             (hyperparameter_name, condition.child.name))
    return config_space_prime
예제 #4
0
    def __init__(self,
                 node_list,
                 node_index,
                 task_type,
                 timestamp,
                 fe_config_space: ConfigurationSpace,
                 cash_config_space: ConfigurationSpace,
                 data: DataNode,
                 fixed_config=None,
                 time_limit=None,
                 trial_num=0,
                 metric='acc',
                 optimizer='smac',
                 ensemble_method='ensemble_selection',
                 ensemble_size=50,
                 per_run_time_limit=300,
                 output_dir="logs",
                 dataset_name='default_dataset',
                 eval_type='holdout',
                 resampling_params=None,
                 n_jobs=1,
                 seed=1):
        super(JointBlock, self).__init__(node_list,
                                         node_index,
                                         task_type,
                                         timestamp,
                                         fe_config_space,
                                         cash_config_space,
                                         data,
                                         fixed_config=fixed_config,
                                         time_limit=time_limit,
                                         trial_num=trial_num,
                                         metric=metric,
                                         optimizer=optimizer,
                                         ensemble_method=ensemble_method,
                                         ensemble_size=ensemble_size,
                                         per_run_time_limit=per_run_time_limit,
                                         output_dir=output_dir,
                                         dataset_name=dataset_name,
                                         eval_type=eval_type,
                                         resampling_params=resampling_params,
                                         n_jobs=n_jobs,
                                         seed=seed)

        self.fixed_config = fixed_config

        # Combine configuration space
        cs = ConfigurationSpace()
        if fe_config_space is not None:
            cs.add_hyperparameters(fe_config_space.get_hyperparameters())
            cs.add_conditions(fe_config_space.get_conditions())
            cs.add_forbidden_clauses(fe_config_space.get_forbiddens())
        if cash_config_space is not None:
            cs.add_hyperparameters(cash_config_space.get_hyperparameters())
            cs.add_conditions(cash_config_space.get_conditions())
            cs.add_forbidden_clauses(cash_config_space.get_forbiddens())
        self.joint_cs = cs

        # Define evaluator and optimizer
        if self.task_type in CLS_TASKS:
            from mindware.components.evaluators.cls_evaluator import ClassificationEvaluator
            self.evaluator = ClassificationEvaluator(
                fixed_config=fixed_config,
                scorer=self.metric,
                data_node=self.original_data,
                if_imbal=self.if_imbal,
                timestamp=self.timestamp,
                output_dir=self.output_dir,
                seed=self.seed,
                resampling_strategy=self.eval_type,
                resampling_params=self.resampling_params)
        else:
            from mindware.components.evaluators.rgs_evaluator import RegressionEvaluator
            self.evaluator = RegressionEvaluator(
                fixed_config=fixed_config,
                scorer=self.metric,
                data_node=self.original_data,
                timestamp=self.timestamp,
                output_dir=self.output_dir,
                seed=self.seed,
                resampling_strategy=self.eval_type,
                resampling_params=self.resampling_params)

        self.optimizer = build_hpo_optimizer(
            self.eval_type,
            self.evaluator,
            self.joint_cs,
            optimizer=self.optimizer,
            output_dir=self.output_dir,
            per_run_time_limit=self.per_run_time_limit,
            inner_iter_num_per_iter=1,
            timestamp=self.timestamp,
            seed=self.seed,
            n_jobs=self.n_jobs)
예제 #5
0
    def __init__(self,
                 node_list,
                 node_index,
                 task_type,
                 timestamp,
                 fe_config_space: ConfigurationSpace,
                 cash_config_space: ConfigurationSpace,
                 data: DataNode,
                 fixed_config=None,
                 time_limit=None,
                 trial_num=0,
                 metric='acc',
                 ensemble_method='ensemble_selection',
                 ensemble_size=50,
                 per_run_time_limit=300,
                 output_dir="logs",
                 dataset_name='default_dataset',
                 eval_type='holdout',
                 resampling_params=None,
                 n_jobs=1,
                 seed=1):
        """
        :param classifier_ids: subset of {'adaboost','bernoulli_nb','decision_tree','extra_trees','gaussian_nb','gradient_boosting',
        'gradient_boosting','k_nearest_neighbors','lda','liblinear_svc','libsvm_svc','multinomial_nb','passive_aggressive','qda',
        'random_forest','sgd'}
        """
        super(ConditioningBlock,
              self).__init__(node_list,
                             node_index,
                             task_type,
                             timestamp,
                             fe_config_space,
                             cash_config_space,
                             data,
                             fixed_config=fixed_config,
                             time_limit=time_limit,
                             trial_num=trial_num,
                             metric=metric,
                             ensemble_method=ensemble_method,
                             ensemble_size=ensemble_size,
                             per_run_time_limit=per_run_time_limit,
                             output_dir=output_dir,
                             dataset_name=dataset_name,
                             eval_type=eval_type,
                             resampling_params=resampling_params,
                             n_jobs=n_jobs,
                             seed=seed)

        # Best configuration.
        self.optimal_arm = None
        self.best_lower_bounds = None

        # Bandit settings.
        self.alpha = 4
        self.arms = list(
            cash_config_space.get_hyperparameter('algorithm').choices)
        self.rewards = dict()
        self.sub_bandits = dict()
        self.evaluation_cost = dict()

        self.arm_cost_stats = dict()
        for _arm in self.arms:
            self.arm_cost_stats[_arm] = list()

        for arm in self.arms:
            self.rewards[arm] = list()
            self.evaluation_cost[arm] = list()

            hps = cash_config_space.get_hyperparameters()
            cs = ConfigurationSpace()
            cs.add_hyperparameter(Constant('algorithm', arm))
            for hp in hps:
                if hp.name.split(':')[0] == arm:
                    cs.add_hyperparameter(hp)

            # Add active conditions
            conds = cash_config_space.get_conditions()
            for cond in conds:
                try:
                    cs.add_condition(cond)
                except:
                    pass

            # Add active forbidden clauses
            forbids = cash_config_space.get_forbiddens()
            for forbid in forbids:
                try:
                    cs.add_forbidden_clause(forbid)
                except:
                    pass

            from solnml.blocks.block_utils import get_node_type
            child_type = get_node_type(node_list, node_index + 1)
            self.sub_bandits[arm] = child_type(
                node_list,
                node_index + 1,
                task_type,
                timestamp,
                deepcopy(fe_config_space),
                deepcopy(cs),
                data.copy_(),
                fixed_config=fixed_config,
                time_limit=time_limit,
                metric=metric,
                ensemble_method=ensemble_method,
                ensemble_size=ensemble_size,
                per_run_time_limit=per_run_time_limit,
                output_dir=output_dir,
                dataset_name=dataset_name,
                eval_type=eval_type,
                resampling_params=resampling_params,
                n_jobs=n_jobs,
                seed=seed)

        self.action_sequence = list()
        self.final_rewards = list()
        self.start_time = time.time()
        self.time_records = list()

        # Initialize the parameters.
        self.pull_cnt = 0
        self.pick_id = 0
        self.update_cnt = 0
        arm_num = len(self.arms)
        self.optimal_algo_id = None
        self.arm_candidate = self.arms.copy()
        self.best_lower_bounds = np.zeros(arm_num)
        _iter_id = 0
        if self.time_limit is None:
            if arm_num * self.alpha > self.trial_num:
                raise ValueError('Trial number should be larger than %d.' %
                                 (arm_num * self.alpha))
        else:
            self.trial_num = MAX_INT