Example #1
0
    def sample_independent(
        self,
        study: Study,
        trial: FrozenTrial,
        param_name: str,
        param_distribution: BaseDistribution,
    ) -> Any:

        self._raise_error_if_multi_objective(study)

        values, scores = _get_observation_pairs(study, [param_name],
                                                self._multivariate,
                                                self._constant_liar)

        n = len(scores)

        if n < self._n_startup_trials:
            return self._random_sampler.sample_independent(
                study, trial, param_name, param_distribution)

        below, above = _split_observation_pairs(values, scores, self._gamma(n))
        mpe_below = _ParzenEstimator(below, {param_name: param_distribution},
                                     self._parzen_estimator_parameters)
        mpe_above = _ParzenEstimator(above, {param_name: param_distribution},
                                     self._parzen_estimator_parameters)
        samples_below = mpe_below.sample(self._rng, self._n_ei_candidates)
        log_likelihoods_below = mpe_below.log_pdf(samples_below)
        log_likelihoods_above = mpe_above.log_pdf(samples_below)
        ret = TPESampler._compare(samples_below, log_likelihoods_below,
                                  log_likelihoods_above)

        return param_distribution.to_external_repr(ret[param_name])
Example #2
0
    def _sample_relative(
            self, study: Study, trial: FrozenTrial,
            search_space: Dict[str, BaseDistribution]) -> Dict[str, Any]:

        if search_space == {}:
            return {}

        param_names = list(search_space.keys())
        values, scores = _get_observation_pairs(study, param_names,
                                                self._multivariate,
                                                self._constant_liar)

        # If the number of samples is insufficient, we run random trial.
        n = len(scores)
        if n < self._n_startup_trials:
            return {}

        # We divide data into below and above.
        below, above = _split_observation_pairs(values, scores, self._gamma(n))
        # We then sample by maximizing log likelihood ratio.
        mpe_below = _ParzenEstimator(below, search_space,
                                     self._parzen_estimator_parameters)
        mpe_above = _ParzenEstimator(above, search_space,
                                     self._parzen_estimator_parameters)
        samples_below = mpe_below.sample(self._rng, self._n_ei_candidates)
        log_likelihoods_below = mpe_below.log_pdf(samples_below)
        log_likelihoods_above = mpe_above.log_pdf(samples_below)
        ret = TPESampler._compare(samples_below, log_likelihoods_below,
                                  log_likelihoods_above)

        for param_name, dist in search_space.items():
            ret[param_name] = dist.to_external_repr(ret[param_name])

        return ret
Example #3
0
    def _sample_numerical(
            self,
            low,  # type: float
            high,  # type: float
            below,  # type: np.ndarray
            above,  # type: np.ndarray
            q=None,  # type: Optional[float]
            is_log=False,  # type: bool
    ):
        # type: (...) -> float

        if is_log:
            low = np.log(low)
            high = np.log(high)
            below = np.log(below)
            above = np.log(above)

        size = (self._n_ei_candidates, )

        parzen_estimator_below = _ParzenEstimator(
            mus=below,
            low=low,
            high=high,
            parameters=self._parzen_estimator_parameters)
        samples_below = self._sample_from_gmm(
            parzen_estimator=parzen_estimator_below,
            low=low,
            high=high,
            q=q,
            size=size,
        )
        log_likelihoods_below = self._gmm_log_pdf(
            samples=samples_below,
            parzen_estimator=parzen_estimator_below,
            low=low,
            high=high,
            q=q,
        )

        parzen_estimator_above = _ParzenEstimator(
            mus=above,
            low=low,
            high=high,
            parameters=self._parzen_estimator_parameters)

        log_likelihoods_above = self._gmm_log_pdf(
            samples=samples_below,
            parzen_estimator=parzen_estimator_above,
            low=low,
            high=high,
            q=q,
        )

        ret = float(
            TPESampler._compare(samples=samples_below,
                                log_l=log_likelihoods_below,
                                log_g=log_likelihoods_above)[0])
        return math.exp(ret) if is_log else ret
Example #4
0
    def sample_independent(
        self,
        study: Study,
        trial: FrozenTrial,
        param_name: str,
        param_distribution: BaseDistribution,
    ) -> Any:

        values, scores, violations = _get_observation_pairs(
            study,
            [param_name],
            self._multivariate,
            self._constant_liar,
            self._constraints_func is not None,
        )

        n = len(scores)

        self._log_independent_sampling(n, trial, param_name)

        if n < self._n_startup_trials:
            return self._random_sampler.sample_independent(
                study, trial, param_name, param_distribution)

        indices_below, indices_above = _split_observation_pairs(
            scores, self._gamma(n), violations)
        # `None` items are intentionally converted to `nan` and then filtered out.
        # For `nan` conversion, the dtype must be float.
        config_values = {
            k: np.asarray(v, dtype=float)
            for k, v in values.items()
        }
        below = _build_observation_dict(config_values, indices_below)
        above = _build_observation_dict(config_values, indices_above)

        if study._is_multi_objective():
            weights_below = _calculate_weights_below_for_multi_objective(
                config_values, scores, indices_below, violations)
            mpe_below = _ParzenEstimator(
                below,
                {param_name: param_distribution},
                self._parzen_estimator_parameters,
                weights_below,
            )
        else:
            mpe_below = _ParzenEstimator(below,
                                         {param_name: param_distribution},
                                         self._parzen_estimator_parameters)
        mpe_above = _ParzenEstimator(above, {param_name: param_distribution},
                                     self._parzen_estimator_parameters)
        samples_below = mpe_below.sample(self._rng, self._n_ei_candidates)
        log_likelihoods_below = mpe_below.log_pdf(samples_below)
        log_likelihoods_above = mpe_above.log_pdf(samples_below)
        ret = TPESampler._compare(samples_below, log_likelihoods_below,
                                  log_likelihoods_above)

        return param_distribution.to_external_repr(ret[param_name])
Example #5
0
    def _sample_relative(
            self, study: Study, trial: FrozenTrial,
            search_space: Dict[str, BaseDistribution]) -> Dict[str, Any]:

        if search_space == {}:
            return {}

        param_names = list(search_space.keys())
        values, scores, violations = _get_observation_pairs(
            study,
            param_names,
            self._multivariate,
            self._constant_liar,
            self._constraints_func is not None,
        )

        # If the number of samples is insufficient, we run random trial.
        n = len(scores)
        if n < self._n_startup_trials:
            return {}

        # We divide data into below and above.
        indices_below, indices_above = _split_observation_pairs(
            scores, self._gamma(n), violations)
        # `None` items are intentionally converted to `nan` and then filtered out.
        # For `nan` conversion, the dtype must be float.
        config_values = {
            k: np.asarray(v, dtype=float)
            for k, v in values.items()
        }
        below = _build_observation_dict(config_values, indices_below)
        above = _build_observation_dict(config_values, indices_above)

        # We then sample by maximizing log likelihood ratio.
        if study._is_multi_objective():
            weights_below = _calculate_weights_below_for_multi_objective(
                config_values, scores, indices_below, violations)
            mpe_below = _ParzenEstimator(below, search_space,
                                         self._parzen_estimator_parameters,
                                         weights_below)
        else:
            mpe_below = _ParzenEstimator(below, search_space,
                                         self._parzen_estimator_parameters)
        mpe_above = _ParzenEstimator(above, search_space,
                                     self._parzen_estimator_parameters)
        samples_below = mpe_below.sample(self._rng, self._n_ei_candidates)
        log_likelihoods_below = mpe_below.log_pdf(samples_below)
        log_likelihoods_above = mpe_above.log_pdf(samples_below)
        ret = TPESampler._compare(samples_below, log_likelihoods_below,
                                  log_likelihoods_above)

        for param_name, dist in search_space.items():
            ret[param_name] = dist.to_external_repr(ret[param_name])

        return ret
def test_invalid_weights(weights: Callable[[int], np.ndarray]) -> None:
    parameters = _ParzenEstimatorParameters(
        prior_weight=1.0,
        consider_prior=False,
        consider_magic_clip=False,
        consider_endpoints=False,
        weights=weights,
        multivariate=False,
    )
    with pytest.raises(ValueError):
        _ParzenEstimator({"a": np.asarray([0.0])},
                         {"a": distributions.FloatDistribution(-1.0, 1.0)},
                         parameters)
Example #7
0
def test_calculate_shape_check(mus: np.ndarray, prior: bool, magic_clip: bool,
                               endpoints: bool) -> None:

    parameters = _ParzenEstimatorParameters(
        prior_weight=1.0,
        consider_prior=prior,
        consider_magic_clip=magic_clip,
        consider_endpoints=endpoints,
        weights=default_weights,
        multivariate=False,
    )
    mpe = _ParzenEstimator({"a": mus},
                           {"a": distributions.UniformDistribution(-1.0, 1.0)},
                           parameters)
    s_weights, s_mus, s_sigmas = mpe._weights, mpe._mus["a"], mpe._sigmas["a"]

    # Result contains an additional value for a prior distribution if prior is True or
    # len(mus) == 0 (in this case, prior is always used).
    assert s_mus is not None
    assert s_sigmas is not None
    assert len(
        s_weights) == len(mus) + int(prior) if len(mus) > 0 else len(mus) + 1
    assert len(
        s_mus) == len(mus) + int(prior) if len(mus) > 0 else len(mus) + 1
    assert len(
        s_sigmas) == len(mus) + int(prior) if len(mus) > 0 else len(mus) + 1
Example #8
0
def test_suggest_with_step_parzen_estimator(multivariate: bool) -> None:
    parameters = _ParzenEstimatorParameters(
        consider_prior=False,
        prior_weight=0.0,
        consider_magic_clip=False,
        consider_endpoints=False,
        weights=lambda x: np.arange(x) + 1.0,
        multivariate=multivariate,
    )

    # Define search space for distribution with step argument and true ranges
    search_space = {
        "c": distributions.DiscreteUniformDistribution(low=1.0,
                                                       high=7.0,
                                                       q=3.0),
        "d": distributions.IntUniformDistribution(low=1, high=5, step=2),
    }
    multivariate_samples = {"c": np.array([4]), "d": np.array([3])}
    valid_ranges = {
        "c": set(np.arange(1.0, 10.0, 3.0)),
        "d": set(np.arange(1, 7, 2))
    }

    sigmas0 = 1 if multivariate else None
    with patch(_PRECOMPUTE_SIGMAS0, return_value=sigmas0):
        mpe = _ParzenEstimator(multivariate_samples, search_space, parameters)

    # Draw 10 samples, and check if all valid values are sampled.
    output_samples = mpe.sample(np.random.RandomState(0), 10)
    for param_name in output_samples:
        assert set(output_samples[param_name]) == valid_ranges[param_name]
def test_sample_parzen_estimator(multivariate: bool) -> None:

    parameters = _ParzenEstimatorParameters(
        consider_prior=False,
        prior_weight=0.0,
        consider_magic_clip=False,
        consider_endpoints=False,
        weights=lambda x: np.arange(x) + 1.0,
        multivariate=multivariate,
    )
    sigmas0 = 1e-8 if multivariate else None
    with patch(_PRECOMPUTE_SIGMAS0, return_value=sigmas0):
        mpe = _ParzenEstimator(MULTIVARIATE_SAMPLES, SEARCH_SPACE, parameters)

    # Test the shape of the samples.
    output_samples = mpe.sample(np.random.RandomState(0), 3)
    for param_name in output_samples:
        assert output_samples[param_name].shape == (3, )

    # Test the values of the output for multivariate case.
    # As we set ``consider_prior`` = False and pre-computed sigma to be 1e-8,
    # the samples almost equals to the input ``MULTIVARIATE_SAMPLES``.
    output_samples = mpe.sample(np.random.RandomState(0), 1)
    if multivariate:
        for param_name, samples in output_samples.items():
            np.testing.assert_almost_equal(
                samples,
                MULTIVARIATE_SAMPLES[param_name],
                decimal=2,
                err_msg="parameter {}".format(param_name),
            )

    # Test the output when the seeds are fixed.
    assert output_samples == mpe.sample(np.random.RandomState(0), 1)
def test_invalid_prior_weight(prior_weight: float, mus: np.ndarray) -> None:

    parameters = _ParzenEstimatorParameters(
        prior_weight=prior_weight,
        consider_prior=True,
        consider_magic_clip=False,
        consider_endpoints=False,
        weights=default_weights,
        multivariate=False,
    )
    mpe = _ParzenEstimator({"a": mus},
                           {"a": distributions.FloatDistribution(-1.0, 1.0)},
                           parameters)
    weights = mpe._weights
    assert len(weights) == len(mus) + 1

    # TODO(HideakiImamura): After modifying the body to raise an error, modify the test as well.
    if prior_weight is None:
        assert all([np.isnan(w) for w in weights])
Example #11
0
def test_log_pdf_parzen_estimator(multivariate: bool) -> None:
    parameters = _ParzenEstimatorParameters(
        consider_prior=False,
        prior_weight=1.0,
        consider_magic_clip=True,
        consider_endpoints=True,
        weights=lambda x: np.arange(x) + 1.0,
        multivariate=multivariate,
    )
    # Parzen estimator almost becomes mixture of Dirac measures.
    sigmas0 = 1e-8 if multivariate else None
    with patch(_PRECOMPUTE_SIGMAS0, return_value=sigmas0):
        mpe = _ParzenEstimator(MULTIVARIATE_SAMPLES, SEARCH_SPACE, parameters)

    log_pdf = mpe.log_pdf(MULTIVARIATE_SAMPLES)
    output_samples = mpe.sample(np.random.RandomState(0), 100)
    output_log_pdf = mpe.log_pdf(output_samples)
    # The likelihood of the previous observations is a positive value, and that of the points
    # sampled by the Parzen estimator is almost zero.
    assert np.all(log_pdf >= output_log_pdf)
Example #12
0
def test_calculate(mus: np.ndarray, flags: Dict[str, bool],
                   expected: Dict[str, List[float]]) -> None:

    parameters = _ParzenEstimatorParameters(
        prior_weight=1.0,
        consider_prior=flags["prior"],
        consider_magic_clip=flags["magic_clip"],
        consider_endpoints=flags["endpoints"],
        weights=default_weights,
        multivariate=False,
    )
    mpe = _ParzenEstimator({"a": mus},
                           {"a": distributions.UniformDistribution(-1.0, 1.0)},
                           parameters)
    s_weights, s_mus, s_sigmas = mpe._weights, mpe._mus["a"], mpe._sigmas["a"]

    # Result contains an additional value for a prior distribution if consider_prior is True.
    np.testing.assert_almost_equal(s_weights, expected["weights"])
    np.testing.assert_almost_equal(s_mus, expected["mus"])
    np.testing.assert_almost_equal(s_sigmas, expected["sigmas"])
    def _sample_mo_numerical(
        self,
        study: optuna.study.Study,
        trial: optuna.trial.FrozenTrial,
        low: float,
        high: float,
        below: np.ndarray,
        above: np.ndarray,
        q: Optional[float] = None,
        is_log: bool = False,
    ) -> float:
        if is_log:
            low = np.log(low)
            high = np.log(high)
            below = np.log(below)
            above = np.log(above)

        size = (self._n_ehvi_candidates,)

        weights_below: Callable[[int], np.ndarray]

        weights_below = lambda _: np.asarray(  # NOQA
            study._storage.get_trial(trial._trial_id).system_attrs[_WEIGHTS_BELOW_KEY],
            dtype=float,
        )

        parzen_estimator_parameters_below = _ParzenEstimatorParameters(
            self._parzen_estimator_parameters.consider_prior,
            self._parzen_estimator_parameters.prior_weight,
            self._parzen_estimator_parameters.consider_magic_clip,
            self._parzen_estimator_parameters.consider_endpoints,
            weights_below,
        )
        parzen_estimator_below = _ParzenEstimator(
            mus=below, low=low, high=high, parameters=parzen_estimator_parameters_below
        )
        samples_below = self._sample_from_gmm(
            parzen_estimator=parzen_estimator_below,
            low=low,
            high=high,
            q=q,
            size=size,
        )
        log_likelihoods_below = self._gmm_log_pdf(
            samples=samples_below,
            parzen_estimator=parzen_estimator_below,
            low=low,
            high=high,
            q=q,
        )

        weights_above = self._weights
        parzen_estimator_parameters_above = _ParzenEstimatorParameters(
            self._parzen_estimator_parameters.consider_prior,
            self._parzen_estimator_parameters.prior_weight,
            self._parzen_estimator_parameters.consider_magic_clip,
            self._parzen_estimator_parameters.consider_endpoints,
            weights_above,
        )
        parzen_estimator_above = _ParzenEstimator(
            mus=above, low=low, high=high, parameters=parzen_estimator_parameters_above
        )
        log_likelihoods_above = self._gmm_log_pdf(
            samples=samples_below,
            parzen_estimator=parzen_estimator_above,
            low=low,
            high=high,
            q=q,
        )

        ret = float(
            TPESampler._compare(
                samples=samples_below, log_l=log_likelihoods_below, log_g=log_likelihoods_above
            )[0]
        )
        return math.exp(ret) if is_log else ret
Example #14
0
def test_init_parzen_estimator(consider_prior: bool,
                               multivariate: bool) -> None:

    parameters = _ParzenEstimatorParameters(
        consider_prior=consider_prior,
        prior_weight=1.0,
        consider_magic_clip=False,
        consider_endpoints=False,
        weights=lambda x: np.arange(x) + 1.0,
        multivariate=multivariate,
    )

    sigmas0 = 1 if multivariate else None
    with patch(_PRECOMPUTE_SIGMAS0, return_value=sigmas0):
        mpe = _ParzenEstimator(MULTIVARIATE_SAMPLES, SEARCH_SPACE, parameters)

    weights = np.array([1] + consider_prior * [1], dtype=float)
    weights /= weights.sum()
    q = {"a": None, "b": None, "c": 3.0, "d": 1.0, "e": None, "f": None}
    low = {
        "a": 1.0,
        "b": np.log(1.0),
        "c": -0.5,
        "d": 0.5,
        "e": np.log(0.5),
        "f": None
    }
    high = {
        "a": 100.0,
        "b": np.log(100.0),
        "c": 101.5,
        "d": 100.5,
        "e": np.log(100.5),
        "f": None
    }

    assert np.all(mpe._weights == weights)
    assert mpe._q == q
    assert mpe._low == low
    assert mpe._high == high

    expected_sigmas_univariate = {
        "a": [49.5 if consider_prior else 99.0] + consider_prior * [99.0],
        "b": [np.log(100) / 2 if consider_prior else np.log(100.0)] +
        consider_prior * [np.log(100)],
        "c": [49.5 if consider_prior else 100.5] + consider_prior * [102.0],
        "d": [49.5 if consider_prior else 99.5] + consider_prior * [100.0],
        "e":
        [(np.log(100.5) + np.log(0.5)) / 2 if consider_prior else np.log(100.5)
         ] + consider_prior * [np.log(100.5) - np.log(0.5)],
        "f":
        None,
    }
    expected_sigmas_multivariate = {
        "a": [99.0] + consider_prior * [99.0],
        "b": [np.log(100.0)] + consider_prior * [np.log(100)],
        "c": [102.0] + consider_prior * [102.0],
        "d": [100.0] + consider_prior * [100.0],
        "e": [np.log(100.5) - np.log(0.5)] +
        consider_prior * [np.log(100.5) - np.log(0.5)],
        "f":
        None,
    }
    expected_mus = {
        "a": [1.0] + consider_prior * [50.5],
        "b": [np.log(1.0)] + consider_prior * [np.log(100) / 2.0],
        "c": [1.0] + consider_prior * [50.5],
        "d": [1.0] + consider_prior * [50.5],
        "e":
        [np.log(1.0)] + consider_prior * [(np.log(100.5) + np.log(0.5)) / 2.0],
        "f": None,
    }
    expected_categorical_weights = {
        "a":
        None,
        "b":
        None,
        "c":
        None,
        "d":
        None,
        "e":
        None,
        "f":
        np.array([[0.2, 0.6, 0.2], [1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0]])
        if consider_prior else np.array([[0.25, 0.5, 0.25]]),
    }

    for param_name in mpe._sigmas:
        np.testing.assert_equal(
            mpe._sigmas[param_name],
            expected_sigmas_multivariate[param_name]
            if multivariate else expected_sigmas_univariate[param_name],
            err_msg='parameter "{}"'.format(param_name),
        )
        np.testing.assert_equal(
            mpe._mus[param_name],
            expected_mus[param_name],
            err_msg="parameter: {}".format(param_name),
        )
        np.testing.assert_equal(
            mpe._categorical_weights[param_name],
            expected_categorical_weights[param_name],
            err_msg="parameter: {}".format(param_name),
        )
Example #15
0
    def _sample_mo_numerical(
        self,
        study: "multi_objective.study.MultiObjectiveStudy",
        trial: "multi_objective.trial.FrozenMultiObjectiveTrial",
        low: float,
        high: float,
        below: np.ndarray,
        above: np.ndarray,
        q: Optional[float] = None,
        is_log: bool = False,
    ) -> float:
        if is_log:
            low = np.log(low)
            high = np.log(high)
            below = np.log(below)
            above = np.log(above)

        size = (self._n_ehvi_candidates, )
        if self._weights is _default_weights_above:
            weights_below = study._storage.get_trial(
                trial._trial_id).system_attrs[_WEIGHTS_BELOW_KEY]
        else:
            weights_below = self._weights
        parzen_estimator_parameters_below = _ParzenEstimatorParameters(
            self._parzen_estimator_parameters.consider_prior,
            self._parzen_estimator_parameters.prior_weight,
            self._parzen_estimator_parameters.consider_magic_clip,
            self._parzen_estimator_parameters.consider_endpoints,
            weights_below,
        )
        parzen_estimator_below = _ParzenEstimator(
            mus=below,
            low=low,
            high=high,
            parameters=parzen_estimator_parameters_below)
        samples_below = self._sample_from_gmm(
            parzen_estimator=parzen_estimator_below,
            low=low,
            high=high,
            q=q,
            size=size,
        )
        log_likelihoods_below = self._gmm_log_pdf(
            samples=samples_below,
            parzen_estimator=parzen_estimator_below,
            low=low,
            high=high,
            q=q,
        )

        weights_above = self._weights
        parzen_estimator_parameters_above = _ParzenEstimatorParameters(
            self._parzen_estimator_parameters.consider_prior,
            self._parzen_estimator_parameters.prior_weight,
            self._parzen_estimator_parameters.consider_magic_clip,
            self._parzen_estimator_parameters.consider_endpoints,
            weights_above,
        )
        parzen_estimator_above = _ParzenEstimator(
            mus=above,
            low=low,
            high=high,
            parameters=parzen_estimator_parameters_above)
        log_likelihoods_above = self._gmm_log_pdf(
            samples=samples_below,
            parzen_estimator=parzen_estimator_above,
            low=low,
            high=high,
            q=q,
        )

        ret = float(
            TPESampler._compare(samples=samples_below,
                                log_l=log_likelihoods_below,
                                log_g=log_likelihoods_above)[0])
        return math.exp(ret) if is_log else ret