def test_search_space_transform_encoding() -> None: trans = _SearchSpaceTransform({"x0": IntUniformDistribution(0, 3)}) assert len(trans.column_to_encoded_columns) == 1 numpy.testing.assert_equal(trans.column_to_encoded_columns[0], numpy.array([0])) numpy.testing.assert_equal(trans.encoded_column_to_column, numpy.array([0])) trans = _SearchSpaceTransform({"x0": CategoricalDistribution(["foo", "bar", "baz"])}) assert len(trans.column_to_encoded_columns) == 1 numpy.testing.assert_equal(trans.column_to_encoded_columns[0], numpy.array([0, 1, 2])) numpy.testing.assert_equal(trans.encoded_column_to_column, numpy.array([0, 0, 0])) trans = _SearchSpaceTransform( { "x0": UniformDistribution(0, 3), "x1": CategoricalDistribution(["foo", "bar", "baz"]), "x3": DiscreteUniformDistribution(0, 1, q=0.2), } ) assert len(trans.column_to_encoded_columns) == 3 numpy.testing.assert_equal(trans.column_to_encoded_columns[0], numpy.array([0])) numpy.testing.assert_equal(trans.column_to_encoded_columns[1], numpy.array([1, 2, 3])) numpy.testing.assert_equal(trans.column_to_encoded_columns[2], numpy.array([4])) numpy.testing.assert_equal(trans.encoded_column_to_column, numpy.array([0, 1, 1, 1, 2]))
def test_search_space_transform_untransform_params() -> None: search_space = { "x0": DiscreteUniformDistribution(0, 1, q=0.2), "x1": CategoricalDistribution(["foo", "bar", "baz", "qux"]), "x2": IntLogUniformDistribution(1, 10), "x3": CategoricalDistribution(["quux", "quuz"]), "x4": UniformDistribution(2, 3), "x5": LogUniformDistribution(1, 10), "x6": IntUniformDistribution(2, 4), "x7": CategoricalDistribution(["corge"]), } params = { "x0": 0.2, "x1": "qux", "x2": 1, "x3": "quux", "x4": 2.0, "x5": 1.0, "x6": 2, "x7": "corge", } trans = _SearchSpaceTransform(search_space) trans_params = trans.transform(params) untrans_params = trans.untransform(trans_params) for name in params.keys(): assert untrans_params[name] == params[name]
def test_search_space_transform_untransform_params() -> None: search_space = { "x0": CategoricalDistribution(["corge"]), "x1": CategoricalDistribution(["foo", "bar", "baz", "qux"]), "x2": CategoricalDistribution(["quux", "quuz"]), "x3": FloatDistribution(2, 3), "x4": FloatDistribution(-2, 2), "x5": FloatDistribution(1, 10, log=True), "x6": FloatDistribution(1, 1, log=True), "x7": FloatDistribution(0, 1, step=0.2), "x8": IntDistribution(2, 4), "x9": IntDistribution(1, 10, log=True), "x10": IntDistribution(1, 9, step=2), } params = { "x0": "corge", "x1": "qux", "x2": "quux", "x3": 2.0, "x4": -2, "x5": 1.0, "x6": 1.0, "x7": 0.2, "x8": 2, "x9": 1, "x10": 3, } trans = _SearchSpaceTransform(search_space) trans_params = trans.transform(params) untrans_params = trans.untransform(trans_params) for name in params.keys(): assert untrans_params[name] == params[name]
def test_crossover_deterministic(crossover: BaseCrossover, rand_value: float, expected_params: np.ndarray) -> None: study = optuna.study.create_study() search_space: Dict[str, BaseDistribution] = { "x": FloatDistribution(1, 10), "y": FloatDistribution(1, 10), } numerical_transform = _SearchSpaceTransform(search_space) parent_params = np.array([[1.0, 2.0], [3.0, 4.0]]) if crossover.n_parents == 3: parent_params = np.append(parent_params, [[5.0, 6.0]], axis=0) def _rand(*args: Any, **kwargs: Any) -> Any: if len(args) == 0: return rand_value return np.full(args[0], rand_value) def _normal(*args: Any, **kwargs: Any) -> Any: if kwargs.get("size") is None: return rand_value return np.full(kwargs.get("size"), rand_value) # type: ignore rng = Mock() rng.rand = Mock(side_effect=_rand) rng.normal = Mock(side_effect=_normal) child_params = crossover.crossover(parent_params, rng, study, numerical_transform.bounds) np.testing.assert_almost_equal(child_params, expected_params)
def evaluate( self, study: Study, params: Optional[List[str]] = None, *, target: Optional[Callable[[FrozenTrial], float]] = None, ) -> Dict[str, float]: if target is None and study._is_multi_objective(): raise ValueError( "If the `study` is being used for multi-objective optimization, " "please specify the `target`. For example, use " "`target=lambda t: t.values[0]` for the first objective value." ) distributions = _get_distributions(study, params) if len(distributions) == 0: return OrderedDict() trials = [] for trial in _filter_nonfinite(study.get_trials( deepcopy=False, states=(TrialState.COMPLETE, )), target=target): if any(name not in trial.params for name in distributions.keys()): continue trials.append(trial) trans = _SearchSpaceTransform(distributions, transform_log=False, transform_step=False) n_trials = len(trials) self._trans_params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) self._trans_values = numpy.empty(n_trials, dtype=numpy.float64) for trial_idx, trial in enumerate(trials): self._trans_params[trial_idx] = trans.transform(trial.params) self._trans_values[ trial_idx] = trial.value if target is None else target(trial) encoded_column_to_column = trans.encoded_column_to_column if self._trans_params.size == 0: # `params` were given but as an empty list. return OrderedDict() forest = self._forest forest.fit(self._trans_params, self._trans_values) feature_importances = forest.feature_importances_ feature_importances_reduced = numpy.zeros(len(distributions)) numpy.add.at(feature_importances_reduced, encoded_column_to_column, feature_importances) param_importances = OrderedDict() self._param_names = list(distributions.keys()) for i in feature_importances_reduced.argsort()[::-1]: param_importances[ self._param_names[i]] = feature_importances_reduced[i].item() return param_importances
def test_search_space_transform_numerical( transform_log: bool, transform_step: bool, param: Any, distribution: BaseDistribution, ) -> None: trans = _SearchSpaceTransform({"x0": distribution}, transform_log, transform_step) expected_low = distribution.low # type: ignore expected_high = distribution.high # type: ignore if isinstance(distribution, LogUniformDistribution): if transform_log: expected_low = math.log(expected_low) expected_high = math.log(expected_high) elif isinstance(distribution, DiscreteUniformDistribution): if transform_step: half_step = 0.5 * distribution.q expected_low -= half_step expected_high += half_step elif isinstance(distribution, FloatDistribution): if transform_log and distribution.log: expected_low = math.log(expected_low) expected_high = math.log(expected_high) if transform_step and distribution.step is not None: half_step = 0.5 * distribution.step expected_low -= half_step expected_high += half_step elif isinstance(distribution, IntUniformDistribution): if transform_step: half_step = 0.5 * distribution.step expected_low -= half_step expected_high += half_step elif isinstance(distribution, IntLogUniformDistribution): if transform_step: half_step = 0.5 expected_low -= half_step expected_high += half_step if transform_log: expected_low = math.log(expected_low) expected_high = math.log(expected_high) elif isinstance(distribution, IntDistribution): if transform_step: half_step = 0.5 * distribution.step expected_low -= half_step expected_high += half_step if distribution.log and transform_log: expected_low = math.log(expected_low) expected_high = math.log(expected_high) for bound in trans.bounds: assert bound[0] == expected_low assert bound[1] == expected_high trans_params = trans.transform({"x0": param}) assert trans_params.size == 1 assert expected_low <= trans_params <= expected_high
def test_is_compatible_search_space() -> None: transform = _SearchSpaceTransform({ "x0": optuna.distributions.FloatDistribution(2, 3), "x1": optuna.distributions.CategoricalDistribution( ["foo", "bar", "baz", "qux"]), }) assert optuna.samplers._cmaes._is_compatible_search_space( transform, { "x1": optuna.distributions.CategoricalDistribution( ["foo", "bar", "baz", "qux"]), "x0": optuna.distributions.FloatDistribution(2, 3), }, ) # Same search space size, but different param names. assert not optuna.samplers._cmaes._is_compatible_search_space( transform, { "x0": optuna.distributions.FloatDistribution(2, 3), "foo": optuna.distributions.CategoricalDistribution( ["foo", "bar", "baz", "qux"]), }, ) # x2 is added. assert not optuna.samplers._cmaes._is_compatible_search_space( transform, { "x0": optuna.distributions.FloatDistribution(2, 3), "x1": optuna.distributions.CategoricalDistribution( ["foo", "bar", "baz", "qux"]), "x2": optuna.distributions.FloatDistribution(2, 3, step=0.1), }, ) # x0 is not found. assert not optuna.samplers._cmaes._is_compatible_search_space( transform, { "x1": optuna.distributions.CategoricalDistribution( ["foo", "bar", "baz", "qux"]), }, )
def test_search_space_transform_values_categorical( param: Any, distribution: CategoricalDistribution) -> None: trans = _SearchSpaceTransform({"x0": distribution}) for bound in trans.bounds: assert bound[0] == 0.0 assert bound[1] == 1.0 trans_params = trans.transform({"x0": param}) for trans_param in trans_params: assert trans_param in (0.0, 1.0)
def sample_relative( self, study: Study, trial: FrozenTrial, search_space: Dict[str, BaseDistribution]) -> Dict[str, Any]: if search_space == {}: return {} sample = self._sample_qmc(study, search_space) trans = _SearchSpaceTransform(search_space) sample = trans.bounds[:, 0] + sample * (trans.bounds[:, 1] - trans.bounds[:, 0]) return trans.untransform(sample[0, :])
def sample_independent( self, study: Study, trial: FrozenTrial, param_name: str, param_distribution: distributions.BaseDistribution, ) -> Any: search_space = {param_name: param_distribution} trans = _SearchSpaceTransform(search_space) trans_params = self._rng.uniform(trans.bounds[:, 0], trans.bounds[:, 1]) return trans.untransform(trans_params)[param_name]
def test_search_space_transform_shapes_dtypes(param: Any, distribution: BaseDistribution) -> None: trans = _SearchSpaceTransform({"x0": distribution}) trans_params = trans.transform({"x0": param}) if isinstance(distribution, CategoricalDistribution): expected_bounds_shape = (len(distribution.choices), 2) expected_params_shape = (len(distribution.choices),) else: expected_bounds_shape = (1, 2) expected_params_shape = (1,) assert trans.bounds.shape == expected_bounds_shape assert trans.bounds.dtype == numpy.float64 assert trans_params.shape == expected_params_shape assert trans_params.dtype == numpy.float64
def test_crossover_numerical_distribution(crossover: BaseCrossover) -> None: study = optuna.study.create_study() rng = np.random.RandomState() search_space = {"x": FloatDistribution(1, 10), "y": IntDistribution(1, 10)} numerical_transform = _SearchSpaceTransform(search_space) parent_params = np.array([[1.0, 2], [3.0, 4]]) if crossover.n_parents == 3: parent_params = np.append(parent_params, [[5.0, 6]], axis=0) child_params = crossover.crossover(parent_params, rng, study, numerical_transform.bounds) assert child_params.ndim == 1 assert len(child_params) == len(search_space) assert np.nan not in child_params assert np.inf not in child_params
def test_crossover_duplicated_param_values(crossover: BaseCrossover) -> None: param_values = [1.0, 2.0] study = optuna.study.create_study() rng = np.random.RandomState() search_space = {"x": FloatDistribution(1, 10), "y": IntDistribution(1, 10)} numerical_transform = _SearchSpaceTransform(search_space) parent_params = np.array([param_values, param_values]) if crossover.n_parents == 3: parent_params = np.append(parent_params, [param_values], axis=0) child_params = crossover.crossover(parent_params, rng, study, numerical_transform.bounds) assert child_params.ndim == 1 np.testing.assert_almost_equal(child_params, param_values)
def evaluate( self, study: Study, params: Optional[List[str]] = None, *, target: Optional[Callable[[FrozenTrial], float]] = None, ) -> Dict[str, float]: if target is None and study._is_multi_objective(): raise ValueError( "If the `study` is being used for multi-objective optimization, " "please specify the `target`. For example, use " "`target=lambda t: t.values[0]` for the first objective value." ) distributions = _get_distributions(study, params=params) if params is None: params = list(distributions.keys()) assert params is not None if len(params) == 0: return OrderedDict() trials: List[FrozenTrial] = _get_filtered_trials(study, params=params, target=target) trans = _SearchSpaceTransform(distributions, transform_log=False, transform_step=False) trans_params: np.ndarray = _get_trans_params(trials, trans) target_values: np.ndarray = _get_target_values(trials, target) forest = self._forest forest.fit(X=trans_params, y=target_values) # Create Tree Explainer object that can calculate shap values. explainer = TreeExplainer(forest) # Generate SHAP values for the parameters during the trials. feature_shap_values: np.ndarray = explainer.shap_values(trans_params) param_shap_values = np.zeros((len(trials), len(params))) np.add.at(param_shap_values.T, trans.encoded_column_to_column, feature_shap_values.T) # Calculate the mean absolute SHAP value for each parameter. # List of tuples ("feature_name": mean_abs_shap_value). mean_abs_shap_values = np.abs(param_shap_values).mean(axis=0) return _sort_dict_by_importance(_param_importances_to_dict(params, mean_abs_shap_values))
def crossover( crossover_name: str, study: Study, parent_population: Sequence[FrozenTrial], search_space: Dict[str, BaseDistribution], rng: np.random.RandomState, swapping_prob: float, dominates: Callable[[FrozenTrial, FrozenTrial, Sequence[StudyDirection]], bool], ) -> Dict[str, Any]: numerical_search_space: Dict[str, BaseDistribution] = {} numerical_distributions: List[BaseDistribution] = [] for key, value in search_space.items(): if isinstance(value, _NUMERICAL_DISTRIBUTIONS): numerical_search_space[key] = value numerical_distributions.append(value) numerical_transform: Optional[_SearchSpaceTransform] = None if len(numerical_distributions) != 0: numerical_transform = _SearchSpaceTransform(numerical_search_space) while True: # Repeat while parameters lie outside search space boundaries. child_params = try_crossover( crossover_name, study, parent_population, search_space, rng, swapping_prob, dominates, numerical_search_space, numerical_distributions, numerical_transform, ) if _is_contained(child_params, search_space): break return child_params
def evaluate( self, study: Study, params: Optional[List[str]] = None, *, target: Optional[Callable[[FrozenTrial], float]] = None, ) -> Dict[str, float]: if target is None and study._is_multi_objective(): raise ValueError( "If the `study` is being used for multi-objective optimization, " "please specify the `target`. For example, use " "`target=lambda t: t.values[0]` for the first objective value." ) distributions = _get_distributions(study, params=params) if params is None: params = list(distributions.keys()) assert params is not None if len(params) == 0: return OrderedDict() trials: List[FrozenTrial] = _get_filtered_trials(study, params=params, target=target) trans = _SearchSpaceTransform(distributions, transform_log=False, transform_step=False) trans_params: numpy.ndarray = _get_trans_params(trials, trans) target_values: numpy.ndarray = _get_target_values(trials, target) forest = self._forest forest.fit(X=trans_params, y=target_values) feature_importances = forest.feature_importances_ # Untransform feature importances to param importances # by adding up relevant feature importances. param_importances = numpy.zeros(len(params)) numpy.add.at(param_importances, trans.encoded_column_to_column, feature_importances) return _sort_dict_by_importance(_param_importances_to_dict(params, param_importances))
def test_transform_untransform_params_at_bounds( transform_log: bool, transform_step: bool, distribution: BaseDistribution) -> None: EPS = 1e-12 # Skip the following two conditions that do not clip in `_untransform_numerical_param`: # 1. `IntDistribution(log=True)` without `transform_log` if not transform_log and (isinstance(distribution, IntDistribution) and distribution.log): return trans = _SearchSpaceTransform({"x0": distribution}, transform_log, transform_step) # Manually create round-off errors. lower_bound = trans.bounds[0][0] - EPS upper_bound = trans.bounds[0][1] + EPS trans_lower_param = _untransform_numerical_param(lower_bound, distribution, transform_log) trans_upper_param = _untransform_numerical_param(upper_bound, distribution, transform_log) assert trans_lower_param == distribution.low # type: ignore assert trans_upper_param == distribution.high # type: ignore
def sample_relative( self, study: "optuna.Study", trial: "optuna.trial.FrozenTrial", search_space: Dict[str, BaseDistribution], ) -> Dict[str, Any]: self._raise_error_if_multi_objective(study) if len(search_space) == 0: return {} completed_trials = self._get_trials(study) if len(completed_trials) < self._n_startup_trials: return {} if len(search_space) == 1: _logger.info( "`CmaEsSampler` only supports two or more dimensional continuous " "search space. `{}` is used instead of `CmaEsSampler`.".format( self._independent_sampler.__class__.__name__)) self._warn_independent_sampling = False return {} trans = _SearchSpaceTransform(search_space) optimizer, n_restarts = self._restore_optimizer(completed_trials) if optimizer is None: n_restarts = 0 optimizer = self._init_optimizer(trans) if self._restart_strategy is None: generation_attr_key = "cma:generation" # for backward compatibility else: generation_attr_key = "cma:restart_{}:generation".format( n_restarts) if optimizer.dim != len(trans.bounds): _logger.info( "`CmaEsSampler` does not support dynamic search space. " "`{}` is used instead of `CmaEsSampler`.".format( self._independent_sampler.__class__.__name__)) self._warn_independent_sampling = False return {} # TODO(c-bata): Reduce the number of wasted trials during parallel optimization. # See https://github.com/optuna/optuna/pull/920#discussion_r385114002 for details. solution_trials = [ t for t in completed_trials if optimizer.generation == t.system_attrs.get( generation_attr_key, -1) ] if len(solution_trials) >= optimizer.population_size: solutions: List[Tuple[np.ndarray, float]] = [] for t in solution_trials[:optimizer.population_size]: assert t.value is not None, "completed trials must have a value" x = trans.transform(t.params) y = t.value if study.direction == StudyDirection.MINIMIZE else -t.value solutions.append((x, y)) optimizer.tell(solutions) if self._restart_strategy == "ipop" and optimizer.should_stop(): n_restarts += 1 generation_attr_key = "cma:restart_{}:generation".format( n_restarts) popsize = optimizer.population_size * self._inc_popsize optimizer = self._init_optimizer(trans, population_size=popsize, randomize_start_point=True) # Store optimizer optimizer_str = pickle.dumps(optimizer).hex() optimizer_attrs = _split_optimizer_str(optimizer_str) for key in optimizer_attrs: study._storage.set_trial_system_attr(trial._trial_id, key, optimizer_attrs[key]) # Caution: optimizer should update its seed value seed = self._cma_rng.randint(1, 2**16) + trial.number optimizer._rng = np.random.RandomState(seed) params = optimizer.ask() study._storage.set_trial_system_attr(trial._trial_id, generation_attr_key, optimizer.generation) study._storage.set_trial_system_attr(trial._trial_id, "cma:n_restarts", n_restarts) external_values = trans.untransform(params) return external_values
def evaluate( self, study: Study, params: Optional[List[str]] = None, *, target: Optional[Callable[[FrozenTrial], float]] = None, ) -> Dict[str, float]: if target is None and study._is_multi_objective(): raise ValueError( "If the `study` is being used for multi-objective optimization, " "please specify the `target`." ) distributions = _get_distributions(study, params) if len(distributions) == 0: return OrderedDict() trials = [] for trial in study.trials: if trial.state != TrialState.COMPLETE: continue if any(name not in trial.params for name in distributions.keys()): continue trials.append(trial) trans = _SearchSpaceTransform(distributions, transform_log=False, transform_step=False) n_trials = len(trials) trans_params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) trans_values = numpy.empty(n_trials, dtype=numpy.float64) for trial_idx, trial in enumerate(trials): trans_params[trial_idx] = trans.transform(trial.params) trans_values[trial_idx] = trial.value if target is None else target(trial) trans_bounds = trans.bounds column_to_encoded_columns = trans.column_to_encoded_columns if trans_params.size == 0: # `params` were given but as an empty list. return OrderedDict() # Many (deep) copies of the search spaces are required during the tree traversal and using # Optuna distributions will create a bottleneck. # Therefore, search spaces (parameter distributions) are represented by a single # `numpy.ndarray`, coupled with a list of flags that indicate whether they are categorical # or not. evaluator = self._evaluator evaluator.fit( X=trans_params, y=trans_values, search_spaces=trans_bounds, column_to_encoded_columns=column_to_encoded_columns, ) importances = {} for i, name in enumerate(distributions.keys()): importance, _ = evaluator.get_importance((i,)) importances[name] = importance total_importance = sum(importances.values()) for name in importances.keys(): importances[name] /= total_importance sorted_importances = OrderedDict( reversed( sorted(importances.items(), key=lambda name_and_importance: name_and_importance[1]) ) ) return sorted_importances
def evaluate( self, study: Study, params: Optional[List[str]] = None, *, target: Optional[Callable[[FrozenTrial], float]] = None, ) -> Dict[str, float]: if target is None and study._is_multi_objective(): raise ValueError( "If the `study` is being used for multi-objective optimization, " "please specify the `target`. For example, use " "`target=lambda t: t.values[0]` for the first objective value." ) distributions = _get_distributions(study, params=params) if params is None: params = list(distributions.keys()) assert params is not None # fANOVA does not support parameter distributions with a single value. # However, there is no reason to calculate parameter importance in such case anyway, # since it will always be 0 as the parameter is constant in the objective function. non_single_distributions = { name: dist for name, dist in distributions.items() if not dist.single() } single_distributions = { name: dist for name, dist in distributions.items() if dist.single() } if len(non_single_distributions) == 0: return OrderedDict() trials: List[FrozenTrial] = _get_filtered_trials(study, params=params, target=target) trans = _SearchSpaceTransform(non_single_distributions, transform_log=False, transform_step=False) trans_params: numpy.ndarray = _get_trans_params(trials, trans) target_values: numpy.ndarray = _get_target_values(trials, target) evaluator = self._evaluator evaluator.fit( X=trans_params, y=target_values, search_spaces=trans.bounds, column_to_encoded_columns=trans.column_to_encoded_columns, ) param_importances = numpy.array([ evaluator.get_importance(i)[0] for i in range(len(non_single_distributions)) ]) param_importances /= numpy.sum(param_importances) return _sort_dict_by_importance({ **_param_importances_to_dict(non_single_distributions.keys(), param_importances), **_param_importances_to_dict(single_distributions.keys(), 0.0), })
def sample_relative( self, study: Study, trial: FrozenTrial, search_space: Dict[str, BaseDistribution], ) -> Dict[str, Any]: assert isinstance(search_space, OrderedDict) trials = [ t for t in study.get_trials(deepcopy=False) if t.state == TrialState.COMPLETE ] if self._constraints_func is not None: self._update_trial_constraints(trials) if len(search_space) == 0: return {} n_trials = len(trials) if n_trials < self._n_startup_trials: return {} trans = _SearchSpaceTransform(search_space) n_objectives = len(study.directions) values = numpy.empty((n_trials, n_objectives), dtype=numpy.float64) params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) if self._constraints_func is not None: n_constraints = len(next(iter(self._trial_constraints.values()))) con = numpy.empty((n_trials, n_constraints), dtype=numpy.float64) else: con = None bounds = trans.bounds for trial_idx, trial in enumerate(trials): params[trial_idx] = trans.transform(trial.params) assert len(study.directions) == len(trial.values) for obj_idx, (direction, value) in enumerate( zip(study.directions, trial.values)): assert value is not None if direction == StudyDirection.MINIMIZE: # BoTorch always assumes maximization. value *= -1 values[trial_idx, obj_idx] = value if con is not None: con[trial_idx] = self._trial_constraints[trial_idx] values = torch.from_numpy(values) params = torch.from_numpy(params) if con is not None: con = torch.from_numpy(con) bounds = torch.from_numpy(bounds) if con is not None: if con.dim() == 1: con.unsqueeze_(-1) bounds.transpose_(0, 1) if self._candidates_func is None: self._candidates_func = _get_default_candidates_func( n_objectives=n_objectives) candidates = self._candidates_func(params, values, con, bounds) if not isinstance(candidates, torch.Tensor): raise TypeError("Candidates must be a torch.Tensor.") if candidates.dim() == 2: if candidates.size(0) != 1: raise ValueError( "Candidates batch optimization is not supported and the first dimension must " "have size 1 if candidates is a two-dimensional tensor. Actual: " f"{candidates.size()}.") # Batch size is one. Get rid of the batch dimension. candidates = candidates.squeeze(0) if candidates.dim() != 1: raise ValueError("Candidates must be one or two-dimensional.") if candidates.size(0) != bounds.size(1): raise ValueError( "Candidates size must match with the given bounds. Actual candidates: " f"{candidates.size(0)}, bounds: {bounds.size(1)}.") candidates = candidates.numpy() params = trans.untransform(candidates) # Exclude upper bounds for parameters that should have their upper bounds excluded. # TODO(hvy): Remove this exclusion logic when it is handled by the data transformer. for name, param in params.items(): distribution = search_space[name] if isinstance(distribution, UniformDistribution): params[name] = min(params[name], search_space[name].high - 1e-8) elif isinstance(distribution, LogUniformDistribution): params[name] = min( params[name], math.exp(math.log(search_space[name].high) - 1e-8)) return params
def sample_relative( self, study: Study, trial: FrozenTrial, search_space: Dict[str, BaseDistribution], ) -> Dict[str, Any]: assert isinstance(search_space, OrderedDict) if len(search_space) == 0: return {} trials = [ t for t in study.get_trials(deepcopy=False) if t.state == TrialState.COMPLETE ] n_trials = len(trials) if n_trials < self._n_startup_trials: return {} trans = _SearchSpaceTransform(search_space) n_objectives = len(study.directions) values = numpy.empty((n_trials, n_objectives), dtype=numpy.float64) params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) con = None bounds = trans.bounds for trial_idx, trial in enumerate(trials): params[trial_idx] = trans.transform(trial.params) assert len(study.directions) == len(trial.values) for obj_idx, (direction, value) in enumerate( zip(study.directions, trial.values)): assert value is not None if direction == StudyDirection.MINIMIZE: # BoTorch always assumes maximization. value *= -1 values[trial_idx, obj_idx] = value if self._constraints_func is not None: constraints = study._storage.get_trial_system_attrs( trial._trial_id).get("botorch:constraints") if constraints is not None: n_constraints = len(constraints) if con is None: con = numpy.full((n_trials, n_constraints), numpy.nan, dtype=numpy.float64) elif n_constraints != con.shape[1]: raise RuntimeError( f"Expected {con.shape[1]} constraints but received {n_constraints}." ) con[trial_idx] = constraints if self._constraints_func is not None: if con is None: warnings.warn( "`constraints_func` was given but no call to it correctly computed " "constraints. Constraints passed to `candidates_func` will be `None`." ) elif numpy.isnan(con).any(): warnings.warn( "`constraints_func` was given but some calls to it did not correctly compute " "constraints. Constraints passed to `candidates_func` will contain NaN." ) values = torch.from_numpy(values) params = torch.from_numpy(params) if con is not None: con = torch.from_numpy(con) bounds = torch.from_numpy(bounds) if con is not None: if con.dim() == 1: con.unsqueeze_(-1) bounds.transpose_(0, 1) if self._candidates_func is None: self._candidates_func = _get_default_candidates_func( n_objectives=n_objectives) candidates = self._candidates_func(params, values, con, bounds) if not isinstance(candidates, torch.Tensor): raise TypeError("Candidates must be a torch.Tensor.") if candidates.dim() == 2: if candidates.size(0) != 1: raise ValueError( "Candidates batch optimization is not supported and the first dimension must " "have size 1 if candidates is a two-dimensional tensor. Actual: " f"{candidates.size()}.") # Batch size is one. Get rid of the batch dimension. candidates = candidates.squeeze(0) if candidates.dim() != 1: raise ValueError("Candidates must be one or two-dimensional.") if candidates.size(0) != bounds.size(1): raise ValueError( "Candidates size must match with the given bounds. Actual candidates: " f"{candidates.size(0)}, bounds: {bounds.size(1)}.") candidates = candidates.numpy() params = trans.untransform(candidates) return params
def evaluate( self, study: Study, params: Optional[List[str]] = None, *, target: Optional[Callable[[FrozenTrial], float]] = None, ) -> Dict[str, float]: if target is None and study._is_multi_objective(): raise ValueError( "If the `study` is being used for multi-objective optimization, " "please specify the `target`. For example, use " "`target=lambda t: t.values[0]` for the first objective value." ) distributions = _get_distributions(study, params) if len(distributions) == 0: return OrderedDict() # fANOVA does not support parameter distributions with a single value. # However, there is no reason to calculate parameter importance in such case anyway, # since it will always be 0 as the parameter is constant in the objective function. zero_importances = {name: 0.0 for name, dist in distributions.items() if dist.single()} distributions = {name: dist for name, dist in distributions.items() if not dist.single()} trials = [] for trial in _filter_nonfinite( study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)), target=target ): if any(name not in trial.params for name in distributions.keys()): continue trials.append(trial) trans = _SearchSpaceTransform(distributions, transform_log=False, transform_step=False) n_trials = len(trials) trans_params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) trans_values = numpy.empty(n_trials, dtype=numpy.float64) for trial_idx, trial in enumerate(trials): trans_params[trial_idx] = trans.transform(trial.params) trans_values[trial_idx] = trial.value if target is None else target(trial) trans_bounds = trans.bounds column_to_encoded_columns = trans.column_to_encoded_columns if trans_params.size == 0: # `params` were given but as an empty list. return OrderedDict() # Many (deep) copies of the search spaces are required during the tree traversal and using # Optuna distributions will create a bottleneck. # Therefore, search spaces (parameter distributions) are represented by a single # `numpy.ndarray`, coupled with a list of flags that indicate whether they are categorical # or not. evaluator = self._evaluator evaluator.fit( X=trans_params, y=trans_values, search_spaces=trans_bounds, column_to_encoded_columns=column_to_encoded_columns, ) importances = {} for i, name in enumerate(distributions.keys()): importance, _ = evaluator.get_importance(i) importances[name] = importance importances = {**importances, **zero_importances} total_importance = sum(importances.values()) for name in importances: importances[name] /= total_importance sorted_importances = OrderedDict( reversed( sorted(importances.items(), key=lambda name_and_importance: name_and_importance[1]) ) ) return sorted_importances