Beispiel #1
0
    def sample_independent(
        self,
        study: Study,
        trial: FrozenTrial,
        param_name: str,
        param_distribution: BaseDistribution,
    ) -> Any:

        # If param_name isn't in self._fixed_params.keys(), param_value is set to None.
        param_value = self._fixed_params.get(param_name)

        if param_value is None:
            # Unfixed params are sampled here.
            return self._base_sampler.sample_independent(
                study, trial, param_name, param_distribution)
        else:
            # Fixed params are sampled here.
            # Check if a parameter value is contained in the range of this distribution.
            param_value_in_internal_repr = param_distribution.to_internal_repr(
                param_value)
            contained = param_distribution._contains(
                param_value_in_internal_repr)

            if not contained:
                warnings.warn(
                    f"Fixed parameter '{param_name}' with value {param_value} is out of range "
                    f"for distribution {param_distribution}.")
            return param_value
Beispiel #2
0
    def sample_independent(
        self,
        study: Study,
        trial: FrozenTrial,
        param_name: str,
        param_distribution: BaseDistribution,
    ) -> Any:

        if "grid_id" not in trial.system_attrs:
            message = "All parameters must be specified when using GridSampler with enqueue_trial."
            raise ValueError(message)

        if param_name not in self._search_space:
            message = "The parameter name, {}, is not found in the given grid.".format(
                param_name)
            raise ValueError(message)

        # TODO(c-bata): Reduce the number of duplicated evaluations on multiple workers.
        # Current selection logic may evaluate the same parameters multiple times.
        # See https://gist.github.com/c-bata/f759f64becb24eea2040f4b2e3afce8f for details.
        grid_id = trial.system_attrs["grid_id"]
        param_value = self._all_grids[grid_id][self._param_names.index(
            param_name)]
        contains = param_distribution._contains(
            param_distribution.to_internal_repr(param_value))
        if not contains:
            warnings.warn(
                f"The value `{param_value}` is out of range of the parameter `{param_name}`. "
                f"The value will be used but the actual distribution is: `{param_distribution}`."
            )

        return param_value
Beispiel #3
0
    def sample_independent(
        self,
        study: Study,
        trial: FrozenTrial,
        param_name: str,
        param_distribution: BaseDistribution,
    ) -> Any:

        if param_name not in self._search_space:
            message = "The parameter name, {}, is not found in the given grid.".format(param_name)
            raise ValueError(message)

        # TODO(c-bata): Reduce the number of duplicated evaluations on multiple workers.
        # Current selection logic may evaluate the same parameters multiple times.
        # See https://gist.github.com/c-bata/f759f64becb24eea2040f4b2e3afce8f for details.
        grid_id = trial.system_attrs["grid_id"]
        param_value = self._all_grids[grid_id][self._param_names.index(param_name)]
        contains = param_distribution._contains(param_distribution.to_internal_repr(param_value))
        if not contains:
            raise ValueError(
                "The value `{}` is out of range of the parameter `{}`. Please make "
                "sure the search space of the `GridSampler` only contains values "
                "consistent with the distribution specified in the objective "
                "function. The distribution is: `{}`.".format(
                    param_value, param_name, param_distribution
                )
            )

        return param_value
Beispiel #4
0
    def _suggest(self, name: str, distribution: BaseDistribution) -> Any:

        storage = self.storage
        trial_id = self._trial_id

        trial = storage.get_trial(trial_id)

        if name in trial.distributions:
            # No need to sample if already suggested.
            distributions.check_distribution_compatibility(
                trial.distributions[name], distribution)
            param_value = distribution.to_external_repr(
                storage.get_trial_param(trial_id, name))
        else:
            if self._is_fixed_param(name, distribution):
                param_value = storage.get_trial_system_attrs(
                    trial_id)["fixed_params"][name]
            elif distribution.single():
                param_value = distributions._get_single_value(distribution)
            elif self._is_relative_param(name, distribution):
                param_value = self.relative_params[name]
            else:
                study = pruners._filter_study(self.study, trial)
                param_value = self.study.sampler.sample_independent(
                    study, trial, name, distribution)

            param_value_in_internal_repr = distribution.to_internal_repr(
                param_value)
            storage.set_trial_param(trial_id, name,
                                    param_value_in_internal_repr, distribution)

        return param_value
Beispiel #5
0
 def sample_independent(
     self,
     study: "optuna.study.Study",
     trial: "optuna.trial.FrozenTrial",
     param_name: str,
     param_distribution: BaseDistribution,
 ) -> Any:
     param_value = self.params[param_name]
     assert param_distribution._contains(
         param_distribution.to_internal_repr(param_value))
     return param_value
Beispiel #6
0
    def set_trial_param(
        self,
        trial_id: int,
        param_name: str,
        param_value_internal: float,
        distribution: distributions.BaseDistribution,
    ) -> None:

        self.check_trial_is_updatable(trial_id, self.get_trial(trial_id).state)

        # Check param distribution compatibility with previous trial(s).
        study_id = self.get_study_id_from_trial_id(trial_id)
        param_distribution = self._get_study_param_distribution(study_id)
        if param_name in param_distribution:
            distributions.check_distribution_compatibility(
                param_distribution[param_name], distribution)

        trial = self.get_trial(trial_id)

        queries: Mapping[Union[str, bytes], Union[bytes, float, int, str]]
        queries = dict()

        # Set study param distribution.
        param_distribution[param_name] = distribution
        queries[self._key_study_param_distribution(study_id)] = pickle.dumps(
            param_distribution)

        # Set params.
        trial.params[param_name] = distribution.to_external_repr(
            param_value_internal)
        trial.distributions[param_name] = distribution
        queries[self._key_trial(trial_id)] = pickle.dumps(trial)

        self._redis.mset(queries)
Beispiel #7
0
    def sample_independent(
        self,
        study: Study,
        trial: FrozenTrial,
        param_name: str,
        param_distribution: BaseDistribution,
    ) -> Any:

        self._raise_error_if_multi_objective(study)

        values, scores = _get_observation_pairs(study, [param_name],
                                                self._multivariate,
                                                self._constant_liar)

        n = len(scores)

        if n < self._n_startup_trials:
            return self._random_sampler.sample_independent(
                study, trial, param_name, param_distribution)

        below, above = _split_observation_pairs(values, scores, self._gamma(n))
        mpe_below = _ParzenEstimator(below, {param_name: param_distribution},
                                     self._parzen_estimator_parameters)
        mpe_above = _ParzenEstimator(above, {param_name: param_distribution},
                                     self._parzen_estimator_parameters)
        samples_below = mpe_below.sample(self._rng, self._n_ei_candidates)
        log_likelihoods_below = mpe_below.log_pdf(samples_below)
        log_likelihoods_above = mpe_above.log_pdf(samples_below)
        ret = TPESampler._compare(samples_below, log_likelihoods_below,
                                  log_likelihoods_above)

        return param_distribution.to_external_repr(ret[param_name])
Beispiel #8
0
    def set_trial_param(
        self,
        trial_id: int,
        param_name: str,
        param_value_internal: float,
        distribution: distributions.BaseDistribution,
    ) -> None:

        self._check_trial_id(trial_id)
        self.check_trial_is_updatable(trial_id, self.get_trial(trial_id).state)

        # Check param distribution compatibility with previous trial(s).
        study_id = self.get_study_id_from_trial_id(trial_id)
        param_distribution = self._get_study_param_distribution(study_id)
        if param_name in param_distribution:
            distributions.check_distribution_compatibility(
                param_distribution[param_name], distribution)

        trial = self.get_trial(trial_id)

        with self._redis.pipeline() as pipe:
            pipe.multi()
            # Set study param distribution.
            param_distribution[param_name] = distribution
            pipe.set(self._key_study_param_distribution(study_id),
                     pickle.dumps(param_distribution))

            # Set params.
            trial.params[param_name] = distribution.to_external_repr(
                param_value_internal)
            trial.distributions[param_name] = distribution
            pipe.set(self._key_trial(trial_id), pickle.dumps(trial))
            pipe.execute()
Beispiel #9
0
    def set_trial_param(
        self,
        trial_id: int,
        param_name: str,
        param_value_internal: float,
        distribution: distributions.BaseDistribution,
    ) -> None:

        with self._lock:
            trial = self._get_trial(trial_id)

            self.check_trial_is_updatable(trial_id, trial.state)

            study_id = self._trial_id_to_study_id_and_number[trial_id][0]
            # Check param distribution compatibility with previous trial(s).
            if param_name in self._studies[study_id].param_distribution:
                distributions.check_distribution_compatibility(
                    self._studies[study_id].param_distribution[param_name],
                    distribution)

            # Set param distribution.
            self._studies[study_id].param_distribution[
                param_name] = distribution

            # Set param.
            trial = copy.copy(trial)
            trial.params = copy.copy(trial.params)
            trial.params[param_name] = distribution.to_external_repr(
                param_value_internal)
            trial.distributions = copy.copy(trial.distributions)
            trial.distributions[param_name] = distribution
            self._set_trial(trial_id, trial)
Beispiel #10
0
    def _is_relative_param(self, name: str, distribution: BaseDistribution) -> bool:

        if name not in self.relative_params:
            return False

        if name not in self.relative_search_space:
            raise ValueError(
                "The parameter '{}' was sampled by `sample_relative` method "
                "but it is not contained in the relative search space.".format(name)
            )

        relative_distribution = self.relative_search_space[name]
        distributions.check_distribution_compatibility(relative_distribution, distribution)

        param_value = self.relative_params[name]
        param_value_in_internal_repr = distribution.to_internal_repr(param_value)
        return distribution._contains(param_value_in_internal_repr)
Beispiel #11
0
    def sample_independent(
        self,
        study: Study,
        trial: FrozenTrial,
        param_name: str,
        param_distribution: BaseDistribution,
    ) -> Any:

        values, scores, violations = _get_observation_pairs(
            study,
            [param_name],
            self._multivariate,
            self._constant_liar,
            self._constraints_func is not None,
        )

        n = len(scores)

        self._log_independent_sampling(n, trial, param_name)

        if n < self._n_startup_trials:
            return self._random_sampler.sample_independent(
                study, trial, param_name, param_distribution)

        indices_below, indices_above = _split_observation_pairs(
            scores, self._gamma(n), violations)
        # `None` items are intentionally converted to `nan` and then filtered out.
        # For `nan` conversion, the dtype must be float.
        config_values = {
            k: np.asarray(v, dtype=float)
            for k, v in values.items()
        }
        below = _build_observation_dict(config_values, indices_below)
        above = _build_observation_dict(config_values, indices_above)

        if study._is_multi_objective():
            weights_below = _calculate_weights_below_for_multi_objective(
                config_values, scores, indices_below, violations)
            mpe_below = _ParzenEstimator(
                below,
                {param_name: param_distribution},
                self._parzen_estimator_parameters,
                weights_below,
            )
        else:
            mpe_below = _ParzenEstimator(below,
                                         {param_name: param_distribution},
                                         self._parzen_estimator_parameters)
        mpe_above = _ParzenEstimator(above, {param_name: param_distribution},
                                     self._parzen_estimator_parameters)
        samples_below = mpe_below.sample(self._rng, self._n_ei_candidates)
        log_likelihoods_below = mpe_below.log_pdf(samples_below)
        log_likelihoods_above = mpe_above.log_pdf(samples_below)
        ret = TPESampler._compare(samples_below, log_likelihoods_below,
                                  log_likelihoods_above)

        return param_distribution.to_external_repr(ret[param_name])
Beispiel #12
0
    def _is_fixed_param(self, name: str, distribution: BaseDistribution) -> bool:

        system_attrs = self.storage.get_trial_system_attrs(self._trial_id)
        if "fixed_params" not in system_attrs:
            return False

        if name not in system_attrs["fixed_params"]:
            return False

        param_value = system_attrs["fixed_params"][name]
        param_value_in_internal_repr = distribution.to_internal_repr(param_value)

        contained = distribution._contains(param_value_in_internal_repr)
        if not contained:
            warnings.warn(
                "Fixed parameter '{}' with value {} is out of range "
                "for distribution {}.".format(name, param_value, distribution)
            )
        return True
Beispiel #13
0
    def _suggest(self, name: str, distribution: BaseDistribution) -> Any:

        if name not in self._params:
            raise ValueError(
                "The value of the parameter '{}' is not found. Please set it at "
                "the construction of the FrozenTrial object.".format(name))

        value = self._params[name]
        param_value_in_internal_repr = distribution.to_internal_repr(value)
        if not distribution._contains(param_value_in_internal_repr):
            raise ValueError("The value {} of the parameter '{}' is out of "
                             "the range of the distribution {}.".format(
                                 value, name, distribution))

        if name in self._distributions:
            distributions.check_distribution_compatibility(
                self._distributions[name], distribution)

        self._distributions[name] = distribution

        return value
Beispiel #14
0
def test_sample_independent(sampler_class: Callable[[],
                                                    BaseMultiObjectiveSampler],
                            distribution: BaseDistribution) -> None:
    study = optuna.multi_objective.study.create_study(["minimize", "maximize"],
                                                      sampler=sampler_class())
    for i in range(100):
        value = study.sampler.sample_independent(study,
                                                 _create_new_trial(study), "x",
                                                 distribution)
        assert distribution._contains(distribution.to_internal_repr(value))

        if not isinstance(distribution, CategoricalDistribution):
            # Please see https://github.com/optuna/optuna/pull/393 why this assertion is needed.
            assert not isinstance(value, np.floating)

        if isinstance(distribution, FloatDistribution):
            if distribution.step is not None:
                # Check the value is a multiple of `distribution.q` which is
                # the quantization interval of the distribution.
                value -= distribution.low
                value /= distribution.step
                round_value = np.round(value)
                np.testing.assert_almost_equal(round_value, value)
Beispiel #15
0
    def set_trial_param(
        self,
        trial_id: int,
        param_name: str,
        param_value_internal: float,
        distribution: distributions.BaseDistribution,
    ) -> None:

        with self._lock:
            cached_trial = self._get_cached_trial(trial_id)
            if cached_trial is not None:
                self._check_trial_is_updatable(cached_trial)

                study_id, _ = self._trial_id_to_study_id_and_number[trial_id]
                cached_dist = self._studies[study_id].param_distribution.get(
                    param_name, None)
                if cached_dist:
                    distributions.check_distribution_compatibility(
                        cached_dist, distribution)
                else:
                    # On cache miss, check compatibility against previous trials in the database
                    # and INSERT immediately to prevent other processes from creating incompatible
                    # ones. By INSERT, it is assumed that no previous entry has been persisted
                    # already.
                    self._backend._check_and_set_param_distribution(
                        study_id, trial_id, param_name, param_value_internal,
                        distribution)
                    self._studies[study_id].param_distribution[
                        param_name] = distribution

                params = copy.copy(cached_trial.params)
                params[param_name] = distribution.to_external_repr(
                    param_value_internal)
                cached_trial.params = params

                dists = copy.copy(cached_trial.distributions)
                dists[param_name] = distribution
                cached_trial.distributions = dists

                if cached_dist:  # Already persisted in case of cache miss so no need to update.
                    updates = self._get_updates(trial_id)
                    updates.params[param_name] = param_value_internal
                    updates.distributions[param_name] = distribution
                    self._flush_trial(trial_id)
                return

        self._backend.set_trial_param(trial_id, param_name,
                                      param_value_internal, distribution)
Beispiel #16
0
    def set_trial_param(
        self,
        trial_id: int,
        param_name: str,
        param_value_internal: float,
        distribution: distributions.BaseDistribution,
    ) -> None:

        with self._lock:
            cached_trial = self._get_cached_trial(trial_id)
            if cached_trial is not None:
                self._check_trial_is_updatable(cached_trial)

                study_id, _ = self._trial_id_to_study_id_and_number[trial_id]
                cached_dist = self._studies[study_id].param_distribution.get(
                    param_name, None)
                if cached_dist:
                    distributions.check_distribution_compatibility(
                        cached_dist, distribution)
                else:
                    self._backend._check_or_set_param_distribution(
                        trial_id, param_name, param_value_internal,
                        distribution)
                    self._studies[study_id].param_distribution[
                        param_name] = distribution

                params = copy.copy(cached_trial.params)
                params[param_name] = distribution.to_external_repr(
                    param_value_internal)
                cached_trial.params = params

                dists = copy.copy(cached_trial.distributions)
                dists[param_name] = distribution
                cached_trial.distributions = dists

                if cached_dist:
                    updates = self._get_updates(trial_id)
                    updates.params[param_name] = param_value_internal
                    updates.distributions[param_name] = distribution
                return

        self._backend.set_trial_param(trial_id, param_name,
                                      param_value_internal, distribution)
Beispiel #17
0
def _distribution_to_dict(dist: BaseDistribution) -> Dict[str, Any]:
    return {"name": dist.__class__.__name__, "attributes": dist._asdict()}