def sample_independent( self, study: Study, trial: FrozenTrial, param_name: str, param_distribution: BaseDistribution, ) -> Any: # If param_name isn't in self._fixed_params.keys(), param_value is set to None. param_value = self._fixed_params.get(param_name) if param_value is None: # Unfixed params are sampled here. return self._base_sampler.sample_independent( study, trial, param_name, param_distribution) else: # Fixed params are sampled here. # Check if a parameter value is contained in the range of this distribution. param_value_in_internal_repr = param_distribution.to_internal_repr( param_value) contained = param_distribution._contains( param_value_in_internal_repr) if not contained: warnings.warn( f"Fixed parameter '{param_name}' with value {param_value} is out of range " f"for distribution {param_distribution}.") return param_value
def sample_independent( self, study: Study, trial: FrozenTrial, param_name: str, param_distribution: BaseDistribution, ) -> Any: if "grid_id" not in trial.system_attrs: message = "All parameters must be specified when using GridSampler with enqueue_trial." raise ValueError(message) if param_name not in self._search_space: message = "The parameter name, {}, is not found in the given grid.".format( param_name) raise ValueError(message) # TODO(c-bata): Reduce the number of duplicated evaluations on multiple workers. # Current selection logic may evaluate the same parameters multiple times. # See https://gist.github.com/c-bata/f759f64becb24eea2040f4b2e3afce8f for details. grid_id = trial.system_attrs["grid_id"] param_value = self._all_grids[grid_id][self._param_names.index( param_name)] contains = param_distribution._contains( param_distribution.to_internal_repr(param_value)) if not contains: warnings.warn( f"The value `{param_value}` is out of range of the parameter `{param_name}`. " f"The value will be used but the actual distribution is: `{param_distribution}`." ) return param_value
def sample_independent( self, study: Study, trial: FrozenTrial, param_name: str, param_distribution: BaseDistribution, ) -> Any: if param_name not in self._search_space: message = "The parameter name, {}, is not found in the given grid.".format(param_name) raise ValueError(message) # TODO(c-bata): Reduce the number of duplicated evaluations on multiple workers. # Current selection logic may evaluate the same parameters multiple times. # See https://gist.github.com/c-bata/f759f64becb24eea2040f4b2e3afce8f for details. grid_id = trial.system_attrs["grid_id"] param_value = self._all_grids[grid_id][self._param_names.index(param_name)] contains = param_distribution._contains(param_distribution.to_internal_repr(param_value)) if not contains: raise ValueError( "The value `{}` is out of range of the parameter `{}`. Please make " "sure the search space of the `GridSampler` only contains values " "consistent with the distribution specified in the objective " "function. The distribution is: `{}`.".format( param_value, param_name, param_distribution ) ) return param_value
def _suggest(self, name: str, distribution: BaseDistribution) -> Any: storage = self.storage trial_id = self._trial_id trial = storage.get_trial(trial_id) if name in trial.distributions: # No need to sample if already suggested. distributions.check_distribution_compatibility( trial.distributions[name], distribution) param_value = distribution.to_external_repr( storage.get_trial_param(trial_id, name)) else: if self._is_fixed_param(name, distribution): param_value = storage.get_trial_system_attrs( trial_id)["fixed_params"][name] elif distribution.single(): param_value = distributions._get_single_value(distribution) elif self._is_relative_param(name, distribution): param_value = self.relative_params[name] else: study = pruners._filter_study(self.study, trial) param_value = self.study.sampler.sample_independent( study, trial, name, distribution) param_value_in_internal_repr = distribution.to_internal_repr( param_value) storage.set_trial_param(trial_id, name, param_value_in_internal_repr, distribution) return param_value
def sample_independent( self, study: "optuna.study.Study", trial: "optuna.trial.FrozenTrial", param_name: str, param_distribution: BaseDistribution, ) -> Any: param_value = self.params[param_name] assert param_distribution._contains( param_distribution.to_internal_repr(param_value)) return param_value
def set_trial_param( self, trial_id: int, param_name: str, param_value_internal: float, distribution: distributions.BaseDistribution, ) -> None: self.check_trial_is_updatable(trial_id, self.get_trial(trial_id).state) # Check param distribution compatibility with previous trial(s). study_id = self.get_study_id_from_trial_id(trial_id) param_distribution = self._get_study_param_distribution(study_id) if param_name in param_distribution: distributions.check_distribution_compatibility( param_distribution[param_name], distribution) trial = self.get_trial(trial_id) queries: Mapping[Union[str, bytes], Union[bytes, float, int, str]] queries = dict() # Set study param distribution. param_distribution[param_name] = distribution queries[self._key_study_param_distribution(study_id)] = pickle.dumps( param_distribution) # Set params. trial.params[param_name] = distribution.to_external_repr( param_value_internal) trial.distributions[param_name] = distribution queries[self._key_trial(trial_id)] = pickle.dumps(trial) self._redis.mset(queries)
def sample_independent( self, study: Study, trial: FrozenTrial, param_name: str, param_distribution: BaseDistribution, ) -> Any: self._raise_error_if_multi_objective(study) values, scores = _get_observation_pairs(study, [param_name], self._multivariate, self._constant_liar) n = len(scores) if n < self._n_startup_trials: return self._random_sampler.sample_independent( study, trial, param_name, param_distribution) below, above = _split_observation_pairs(values, scores, self._gamma(n)) mpe_below = _ParzenEstimator(below, {param_name: param_distribution}, self._parzen_estimator_parameters) mpe_above = _ParzenEstimator(above, {param_name: param_distribution}, self._parzen_estimator_parameters) samples_below = mpe_below.sample(self._rng, self._n_ei_candidates) log_likelihoods_below = mpe_below.log_pdf(samples_below) log_likelihoods_above = mpe_above.log_pdf(samples_below) ret = TPESampler._compare(samples_below, log_likelihoods_below, log_likelihoods_above) return param_distribution.to_external_repr(ret[param_name])
def set_trial_param( self, trial_id: int, param_name: str, param_value_internal: float, distribution: distributions.BaseDistribution, ) -> None: self._check_trial_id(trial_id) self.check_trial_is_updatable(trial_id, self.get_trial(trial_id).state) # Check param distribution compatibility with previous trial(s). study_id = self.get_study_id_from_trial_id(trial_id) param_distribution = self._get_study_param_distribution(study_id) if param_name in param_distribution: distributions.check_distribution_compatibility( param_distribution[param_name], distribution) trial = self.get_trial(trial_id) with self._redis.pipeline() as pipe: pipe.multi() # Set study param distribution. param_distribution[param_name] = distribution pipe.set(self._key_study_param_distribution(study_id), pickle.dumps(param_distribution)) # Set params. trial.params[param_name] = distribution.to_external_repr( param_value_internal) trial.distributions[param_name] = distribution pipe.set(self._key_trial(trial_id), pickle.dumps(trial)) pipe.execute()
def set_trial_param( self, trial_id: int, param_name: str, param_value_internal: float, distribution: distributions.BaseDistribution, ) -> None: with self._lock: trial = self._get_trial(trial_id) self.check_trial_is_updatable(trial_id, trial.state) study_id = self._trial_id_to_study_id_and_number[trial_id][0] # Check param distribution compatibility with previous trial(s). if param_name in self._studies[study_id].param_distribution: distributions.check_distribution_compatibility( self._studies[study_id].param_distribution[param_name], distribution) # Set param distribution. self._studies[study_id].param_distribution[ param_name] = distribution # Set param. trial = copy.copy(trial) trial.params = copy.copy(trial.params) trial.params[param_name] = distribution.to_external_repr( param_value_internal) trial.distributions = copy.copy(trial.distributions) trial.distributions[param_name] = distribution self._set_trial(trial_id, trial)
def _is_relative_param(self, name: str, distribution: BaseDistribution) -> bool: if name not in self.relative_params: return False if name not in self.relative_search_space: raise ValueError( "The parameter '{}' was sampled by `sample_relative` method " "but it is not contained in the relative search space.".format(name) ) relative_distribution = self.relative_search_space[name] distributions.check_distribution_compatibility(relative_distribution, distribution) param_value = self.relative_params[name] param_value_in_internal_repr = distribution.to_internal_repr(param_value) return distribution._contains(param_value_in_internal_repr)
def sample_independent( self, study: Study, trial: FrozenTrial, param_name: str, param_distribution: BaseDistribution, ) -> Any: values, scores, violations = _get_observation_pairs( study, [param_name], self._multivariate, self._constant_liar, self._constraints_func is not None, ) n = len(scores) self._log_independent_sampling(n, trial, param_name) if n < self._n_startup_trials: return self._random_sampler.sample_independent( study, trial, param_name, param_distribution) indices_below, indices_above = _split_observation_pairs( scores, self._gamma(n), violations) # `None` items are intentionally converted to `nan` and then filtered out. # For `nan` conversion, the dtype must be float. config_values = { k: np.asarray(v, dtype=float) for k, v in values.items() } below = _build_observation_dict(config_values, indices_below) above = _build_observation_dict(config_values, indices_above) if study._is_multi_objective(): weights_below = _calculate_weights_below_for_multi_objective( config_values, scores, indices_below, violations) mpe_below = _ParzenEstimator( below, {param_name: param_distribution}, self._parzen_estimator_parameters, weights_below, ) else: mpe_below = _ParzenEstimator(below, {param_name: param_distribution}, self._parzen_estimator_parameters) mpe_above = _ParzenEstimator(above, {param_name: param_distribution}, self._parzen_estimator_parameters) samples_below = mpe_below.sample(self._rng, self._n_ei_candidates) log_likelihoods_below = mpe_below.log_pdf(samples_below) log_likelihoods_above = mpe_above.log_pdf(samples_below) ret = TPESampler._compare(samples_below, log_likelihoods_below, log_likelihoods_above) return param_distribution.to_external_repr(ret[param_name])
def _is_fixed_param(self, name: str, distribution: BaseDistribution) -> bool: system_attrs = self.storage.get_trial_system_attrs(self._trial_id) if "fixed_params" not in system_attrs: return False if name not in system_attrs["fixed_params"]: return False param_value = system_attrs["fixed_params"][name] param_value_in_internal_repr = distribution.to_internal_repr(param_value) contained = distribution._contains(param_value_in_internal_repr) if not contained: warnings.warn( "Fixed parameter '{}' with value {} is out of range " "for distribution {}.".format(name, param_value, distribution) ) return True
def _suggest(self, name: str, distribution: BaseDistribution) -> Any: if name not in self._params: raise ValueError( "The value of the parameter '{}' is not found. Please set it at " "the construction of the FrozenTrial object.".format(name)) value = self._params[name] param_value_in_internal_repr = distribution.to_internal_repr(value) if not distribution._contains(param_value_in_internal_repr): raise ValueError("The value {} of the parameter '{}' is out of " "the range of the distribution {}.".format( value, name, distribution)) if name in self._distributions: distributions.check_distribution_compatibility( self._distributions[name], distribution) self._distributions[name] = distribution return value
def test_sample_independent(sampler_class: Callable[[], BaseMultiObjectiveSampler], distribution: BaseDistribution) -> None: study = optuna.multi_objective.study.create_study(["minimize", "maximize"], sampler=sampler_class()) for i in range(100): value = study.sampler.sample_independent(study, _create_new_trial(study), "x", distribution) assert distribution._contains(distribution.to_internal_repr(value)) if not isinstance(distribution, CategoricalDistribution): # Please see https://github.com/optuna/optuna/pull/393 why this assertion is needed. assert not isinstance(value, np.floating) if isinstance(distribution, FloatDistribution): if distribution.step is not None: # Check the value is a multiple of `distribution.q` which is # the quantization interval of the distribution. value -= distribution.low value /= distribution.step round_value = np.round(value) np.testing.assert_almost_equal(round_value, value)
def set_trial_param( self, trial_id: int, param_name: str, param_value_internal: float, distribution: distributions.BaseDistribution, ) -> None: with self._lock: cached_trial = self._get_cached_trial(trial_id) if cached_trial is not None: self._check_trial_is_updatable(cached_trial) study_id, _ = self._trial_id_to_study_id_and_number[trial_id] cached_dist = self._studies[study_id].param_distribution.get( param_name, None) if cached_dist: distributions.check_distribution_compatibility( cached_dist, distribution) else: # On cache miss, check compatibility against previous trials in the database # and INSERT immediately to prevent other processes from creating incompatible # ones. By INSERT, it is assumed that no previous entry has been persisted # already. self._backend._check_and_set_param_distribution( study_id, trial_id, param_name, param_value_internal, distribution) self._studies[study_id].param_distribution[ param_name] = distribution params = copy.copy(cached_trial.params) params[param_name] = distribution.to_external_repr( param_value_internal) cached_trial.params = params dists = copy.copy(cached_trial.distributions) dists[param_name] = distribution cached_trial.distributions = dists if cached_dist: # Already persisted in case of cache miss so no need to update. updates = self._get_updates(trial_id) updates.params[param_name] = param_value_internal updates.distributions[param_name] = distribution self._flush_trial(trial_id) return self._backend.set_trial_param(trial_id, param_name, param_value_internal, distribution)
def set_trial_param( self, trial_id: int, param_name: str, param_value_internal: float, distribution: distributions.BaseDistribution, ) -> None: with self._lock: cached_trial = self._get_cached_trial(trial_id) if cached_trial is not None: self._check_trial_is_updatable(cached_trial) study_id, _ = self._trial_id_to_study_id_and_number[trial_id] cached_dist = self._studies[study_id].param_distribution.get( param_name, None) if cached_dist: distributions.check_distribution_compatibility( cached_dist, distribution) else: self._backend._check_or_set_param_distribution( trial_id, param_name, param_value_internal, distribution) self._studies[study_id].param_distribution[ param_name] = distribution params = copy.copy(cached_trial.params) params[param_name] = distribution.to_external_repr( param_value_internal) cached_trial.params = params dists = copy.copy(cached_trial.distributions) dists[param_name] = distribution cached_trial.distributions = dists if cached_dist: updates = self._get_updates(trial_id) updates.params[param_name] = param_value_internal updates.distributions[param_name] = distribution return self._backend.set_trial_param(trial_id, param_name, param_value_internal, distribution)
def _distribution_to_dict(dist: BaseDistribution) -> Dict[str, Any]: return {"name": dist.__class__.__name__, "attributes": dist._asdict()}