Esempio n. 1
0
def objective(trial: Trial):
    cat_train_dataset = Pool(_train_df[features],
                             _train_df["likes_log"],
                             cat_features=cat_features)
    cat_valid_dataset = Pool(_valid_df[features],
                             _valid_df["likes_log"],
                             cat_features=cat_features)
    params = {
        "depth": trial.suggest_int("depth", 4, 30),
        "num_leaves": trial.suggest_int("num_leaves", 16, 300),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 4, 50),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.001, 0.3),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 0.01, 0.5),
    }
    cat_model = CatBoostRegressor(**params,
                                  iterations=3500,
                                  grow_policy="Lossguide")
    cat_model.fit(
        cat_train_dataset,
        verbose_eval=100,
        eval_set=[cat_valid_dataset],
        early_stopping_rounds=200,
    )
    y_pred_cat = np.expm1(cat_model.predict(_valid_df[features]))
    y_pred_cat[y_pred_cat < 0] = 0
    y_true = _valid_df["likes"].values
    rmsle = np.sqrt(mean_squared_log_error(y_true, y_pred_cat))
    return rmsle
Esempio n. 2
0
    def _get_params(self, trial: trial_module.Trial) -> Dict[str, Any]:
        params = self.params.copy()  # type: Dict[str, Any]

        if self.param_distributions is None:
            params["feature_fraction"] = trial.suggest_discrete_uniform(
                "feature_fraction", 0.1, 1.0, 0.05)
            params["max_depth"] = trial.suggest_int("max_depth", 1, 7)
            params["num_leaves"] = trial.suggest_int("num_leaves", 2,
                                                     2**params["max_depth"])
            # See https://github.com/Microsoft/LightGBM/issues/907
            params["min_data_in_leaf"] = trial.suggest_int(
                "min_data_in_leaf",
                1,
                max(1, int(self.n_samples / params["num_leaves"])),
            )
            params["lambda_l1"] = trial.suggest_loguniform(
                "lambda_l1", 1e-09, 10.0)
            params["lambda_l2"] = trial.suggest_loguniform(
                "lambda_l2", 1e-09, 10.0)

            if params["boosting_type"] != "goss":
                params["bagging_fraction"] = trial.suggest_discrete_uniform(
                    "bagging_fraction", 0.5, 0.95, 0.05)
                params["bagging_freq"] = trial.suggest_int(
                    "bagging_freq", 1, 10)

            return params

        for name, distribution in self.param_distributions.items():
            params[name] = trial._suggest(name, distribution)

        return params
Esempio n. 3
0
def test_suggest_low_equals_high(storage_init_func):
    # type: (typing.Callable[[], storages.BaseStorage]) -> None

    study = create_study(storage_init_func(),
                         sampler=samplers.TPESampler(n_startup_trials=0))
    trial = Trial(study, study._storage.create_new_trial(study.study_id))

    # Parameter values are determined without suggestion when low == high.
    with patch.object(trial, '_suggest', wraps=trial._suggest) as mock_object:
        assert trial.suggest_uniform('a', 1., 1.) == 1.  # Suggesting a param.
        assert trial.suggest_uniform('a', 1.,
                                     1.) == 1.  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_loguniform('b', 1.,
                                        1.) == 1.  # Suggesting a param.
        assert trial.suggest_loguniform('b', 1.,
                                        1.) == 1.  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_discrete_uniform('c', 1., 1.,
                                              1.) == 1.  # Suggesting a param.
        assert trial.suggest_discrete_uniform(
            'c', 1., 1., 1.) == 1.  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_int('d', 1, 1) == 1  # Suggesting a param.
        assert trial.suggest_int('d', 1, 1) == 1  # Suggesting the same param.
        assert mock_object.call_count == 0
Esempio n. 4
0
 def obj(t: Trial) -> float:
     t.suggest_uniform("a", 1.0, 100.0)
     t.suggest_loguniform("b", 1.0, 100.0)
     t.suggest_discrete_uniform("c", 1.0, 100.0, 3.0)
     t.suggest_int("d", 1, 100)
     t.suggest_int("e", 0, 100, step=2)
     t.suggest_int("f", 1, 100, log=True)
     t.suggest_categorical("g", ["x", "y", "z"])
     return 0.0
Esempio n. 5
0
def test_suggest_low_equals_high(storage_mode: str) -> None:

    with patch.object(
        distributions, "_get_single_value", wraps=distributions._get_single_value
    ) as mock_object, StorageSupplier(storage_mode) as storage:

        study = create_study(storage=storage, sampler=samplers.TPESampler(n_startup_trials=0))

        trial = Trial(study, study._storage.create_new_trial(study._study_id))

        assert trial.suggest_uniform("a", 1.0, 1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 1
        assert trial.suggest_uniform("a", 1.0, 1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 1

        assert trial.suggest_loguniform("b", 1.0, 1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 2
        assert trial.suggest_loguniform("b", 1.0, 1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 2

        assert trial.suggest_discrete_uniform("c", 1.0, 1.0, 1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 3
        assert (
            trial.suggest_discrete_uniform("c", 1.0, 1.0, 1.0) == 1.0
        )  # Suggesting the same param.
        assert mock_object.call_count == 3

        assert trial.suggest_int("d", 1, 1) == 1  # Suggesting a param.
        assert mock_object.call_count == 4
        assert trial.suggest_int("d", 1, 1) == 1  # Suggesting the same param.
        assert mock_object.call_count == 4

        assert trial.suggest_float("e", 1.0, 1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 5
        assert trial.suggest_float("e", 1.0, 1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 5

        assert trial.suggest_float("f", 0.5, 0.5, log=True) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 6
        assert trial.suggest_float("f", 0.5, 0.5, log=True) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 6

        assert trial.suggest_float("g", 0.5, 0.5, log=False) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 7
        assert trial.suggest_float("g", 0.5, 0.5, log=False) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 7

        assert trial.suggest_float("h", 0.5, 0.5, step=1.0) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 8
        assert trial.suggest_float("h", 0.5, 0.5, step=1.0) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 8

        assert trial.suggest_int("i", 1, 1, log=True) == 1  # Suggesting a param.
        assert mock_object.call_count == 9
        assert trial.suggest_int("i", 1, 1, log=True) == 1  # Suggesting the same param.
        assert mock_object.call_count == 9
Esempio n. 6
0
    def sample_params_values(self, trial: Trial, suggested_params: Dict,
                             estimated_n_trials: int) -> Dict:
        """Sample hyperparameters from suggested.

        Args:
            trial: optuna trial object.
            suggested_params: dict with parameters.
            estimated_n_trials: maximum number of hyperparameter estimations.

        Returns:
            dict with sampled hyperparameters.

        """
        logger.debug('Suggested parameters:')
        logger.debug(suggested_params)

        trial_values = copy(suggested_params)

        trial_values['feature_fraction'] = trial.suggest_uniform(
            name='feature_fraction',
            low=0.5,
            high=1.0,
        )

        trial_values['num_leaves'] = trial.suggest_int(
            name='num_leaves',
            low=16,
            high=255,
        )

        if estimated_n_trials > 30:
            trial_values['bagging_fraction'] = trial.suggest_uniform(
                name='bagging_fraction',
                low=0.5,
                high=1.0,
            )

            trial_values['min_sum_hessian_in_leaf'] = trial.suggest_loguniform(
                name='min_sum_hessian_in_leaf',
                low=1e-3,
                high=10.0,
            )

        if estimated_n_trials > 100:
            trial_values['reg_alpha'] = trial.suggest_loguniform(
                name='reg_alpha',
                low=1e-8,
                high=10.0,
            )
            trial_values['reg_lambda'] = trial.suggest_loguniform(
                name='reg_lambda',
                low=1e-8,
                high=10.0,
            )

        return trial_values
Esempio n. 7
0
    def objective(trial: Trial) -> float:

        trial.suggest_uniform("a", 0, 10)
        trial.suggest_loguniform("b", 0.1, 10)
        trial.suggest_discrete_uniform("c", 0, 10, 1)
        trial.suggest_int("d", 0, 10)
        trial.suggest_categorical("e", ["foo", "bar", "baz"])
        trial.suggest_int("f", 1, 10, log=True)

        return 1.0
Esempio n. 8
0
def test_check_distribution_suggest_loguniform(storage_init_func):
    # type: (typing.Callable[[], storages.BaseStorage]) -> None

    sampler = samplers.RandomSampler()
    study = create_study(storage_init_func(), sampler=sampler)
    trial = Trial(study, study._storage.create_new_trial(study._study_id))

    with pytest.warns(None) as record:
        trial.suggest_loguniform("x", 10, 20)
        trial.suggest_loguniform("x", 10, 20)
        trial.suggest_loguniform("x", 10, 30)

    # we expect exactly one warning
    assert len(record) == 1
Esempio n. 9
0
def test_suggest_low_equals_high(storage_init_func):
    # type: (Callable[[], storages.BaseStorage]) -> None

    study = create_study(storage_init_func(), sampler=samplers.TPESampler(n_startup_trials=0))
    trial = Trial(study, study._storage.create_new_trial(study._study_id))

    # Parameter values are determined without suggestion when low == high.
    with patch.object(trial, "_suggest", wraps=trial._suggest) as mock_object:
        assert trial.suggest_uniform("a", 1.0, 1.0) == 1.0  # Suggesting a param.
        assert trial.suggest_uniform("a", 1.0, 1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_loguniform("b", 1.0, 1.0) == 1.0  # Suggesting a param.
        assert trial.suggest_loguniform("b", 1.0, 1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_discrete_uniform("c", 1.0, 1.0, 1.0) == 1.0  # Suggesting a param.
        assert (
            trial.suggest_discrete_uniform("c", 1.0, 1.0, 1.0) == 1.0
        )  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_int("d", 1, 1) == 1  # Suggesting a param.
        assert trial.suggest_int("d", 1, 1) == 1  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_float("e", 1.0, 1.0) == 1.0  # Suggesting a param.
        assert trial.suggest_float("e", 1.0, 1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_float("f", 0.5, 0.5, log=True) == 0.5  # Suggesting a param.
        assert trial.suggest_float("f", 0.5, 0.5, log=True) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 0
        assert trial.suggest_float("g", 0.5, 0.5, log=False) == 0.5  # Suggesting a param.
        assert trial.suggest_float("g", 0.5, 0.5, log=False) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 0
Esempio n. 10
0
def test_check_distribution_suggest_float(storage_mode: str) -> None:

    sampler = samplers.RandomSampler()
    with StorageSupplier(storage_mode) as storage:
        study = create_study(storage=storage, sampler=sampler)
        trial = Trial(study, study._storage.create_new_trial(study._study_id))

        x1 = trial.suggest_float("x1", 10, 20)
        x2 = trial.suggest_uniform("x1", 10, 20)

        assert x1 == x2

        x3 = trial.suggest_float("x2", 1e-5, 1e-3, log=True)
        x4 = trial.suggest_loguniform("x2", 1e-5, 1e-3)

        assert x3 == x4

        x5 = trial.suggest_float("x3", 10, 20, step=1.0)
        x6 = trial.suggest_discrete_uniform("x3", 10, 20, 1.0)

        assert x5 == x6
        with pytest.raises(ValueError):
            trial.suggest_float("x4", 1e-5, 1e-2, step=1e-5, log=True)

        with pytest.raises(ValueError):
            trial.suggest_int("x1", 10, 20)

        trial = Trial(study, study._storage.create_new_trial(study._study_id))
        with pytest.raises(ValueError):
            trial.suggest_int("x1", 10, 20)
Esempio n. 11
0
def test_check_distribution_suggest_float(storage_init_func):
    # type: (Callable[[], storages.BaseStorage]) -> None

    sampler = samplers.RandomSampler()
    study = create_study(storage_init_func(), sampler=sampler)
    trial = Trial(study, study._storage.create_new_trial(study._study_id))

    x1 = trial.suggest_float("x1", 10, 20)
    x2 = trial.suggest_uniform("x1", 10, 20)

    assert x1 == x2

    x3 = trial.suggest_float("x2", 1e-5, 1e-3, log=True)
    x4 = trial.suggest_loguniform("x2", 1e-5, 1e-3)

    assert x3 == x4

    x5 = trial.suggest_float("x3", 10, 20, step=1.0)
    x6 = trial.suggest_discrete_uniform("x3", 10, 20, 1.0)

    assert x5 == x6
    with pytest.raises(ValueError):
        trial.suggest_float("x4", 1e-5, 1e-2, step=1e-5, log=True)

    with pytest.raises(ValueError):
        trial.suggest_int("x1", 10, 20)

    trial = Trial(study, study._storage.create_new_trial(study._study_id))
    with pytest.raises(ValueError):
        trial.suggest_int("x1", 10, 20)
Esempio n. 12
0
    def objective(trial: Trial) -> float:
        x1 = trial.suggest_uniform("x1", 0.1, 3)
        x2 = trial.suggest_loguniform("x2", 0.1, 3)
        x3 = trial.suggest_discrete_uniform("x3", 0, 3, 1)
        if trial.number % 2 == 0:
            x4 = trial.suggest_uniform("x4", 0.1, 3)

        value = x1**4 + x2 + x3
        if trial.number % 2 == 0:
            value += x4
        return value
Esempio n. 13
0
def test_suggest_loguniform(storage_mode: str) -> None:

    with pytest.raises(ValueError):
        FloatDistribution(low=1.0, high=0.9, log=True)

    with pytest.raises(ValueError):
        FloatDistribution(low=0.0, high=0.9, log=True)

    sampler = DeterministicSampler({"x": 1.0, "y": 2.0})

    with StorageSupplier(storage_mode) as storage:
        study = create_study(storage=storage, sampler=sampler)
        trial = Trial(study, study._storage.create_new_trial(study._study_id))

        assert trial.suggest_loguniform("x", 0.1,
                                        4.0) == 1.0  # Test suggesting a param.
        assert trial.suggest_loguniform(
            "x", 0.1, 4.0) == 1.0  # Test suggesting the same param.
        assert trial.suggest_loguniform(
            "y", 0.1, 4.0) == 2.0  # Test suggesting a different param.
        assert trial.params == {"x": 1.0, "y": 2.0}
Esempio n. 14
0
    def objective(trial: Trial) -> float:

        a = trial.suggest_int("a", 0, 100)
        b = trial.suggest_uniform("b", -0.1, 0.1)
        c = trial.suggest_categorical("c", ("x", "y"))
        d = trial.suggest_discrete_uniform("d", -5, 5, 1)
        e = trial.suggest_loguniform("e", 0.0001, 1)

        if c == "x":
            return a * d
        else:
            return b * e
Esempio n. 15
0
def test_check_distribution_suggest_loguniform(storage_mode: str) -> None:

    sampler = samplers.RandomSampler()
    with StorageSupplier(storage_mode) as storage:
        study = create_study(storage=storage, sampler=sampler)
        trial = Trial(study, study._storage.create_new_trial(study._study_id))

        with pytest.warns(None) as record:
            trial.suggest_loguniform("x", 10, 20)
            trial.suggest_loguniform("x", 10, 20)
            trial.suggest_loguniform("x", 10, 30)

        # we expect exactly one warning (not counting ones caused by deprecation)
        assert len([r for r in record if r.category != FutureWarning]) == 1

        with pytest.raises(ValueError):
            trial.suggest_int("x", 10, 20)

        trial = Trial(study, study._storage.create_new_trial(study._study_id))
        with pytest.raises(ValueError):
            trial.suggest_int("x", 10, 20)
Esempio n. 16
0
def test_check_distribution_suggest_loguniform(storage_mode: str) -> None:

    sampler = samplers.RandomSampler()
    with StorageSupplier(storage_mode) as storage:
        study = create_study(storage=storage, sampler=sampler)
        trial = Trial(study, study._storage.create_new_trial(study._study_id))

        with pytest.warns(None) as record:
            trial.suggest_loguniform("x", 10, 20)
            trial.suggest_loguniform("x", 10, 20)
            trial.suggest_loguniform("x", 10, 30)

        # we expect exactly one warning
        assert len(record) == 1

        with pytest.raises(ValueError):
            trial.suggest_int("x", 10, 20)

        trial = Trial(study, study._storage.create_new_trial(study._study_id))
        with pytest.raises(ValueError):
            trial.suggest_int("x", 10, 20)
 def get_optuna_parameter(self, trial: Trial):
     if self.suggestion_type == OptunaSuggestion.DiscreteUniform:
         return trial.suggest_discrete_uniform(self.name, self.low,
                                               self.high, self.kwargs["q"])
     elif self.suggestion_type == OptunaSuggestion.Int:
         return trial.suggest_int(self.name, self.low, self.high)
     elif self.suggestion_type == OptunaSuggestion.LogUniform:
         return trial.suggest_loguniform(self.name, self.low, self.high)
     elif self.suggestion_type == OptunaSuggestion.Uniform:
         return trial.suggest_uniform(self.name, self.low, self.high)
     else:
         raise NotImplementedError
Esempio n. 18
0
    def objective(trial: Trial) -> float:

        # Predefined parameters are sampled by `sample_relative()` method.
        assert trial.suggest_uniform("a", 0, 5) == 3.2
        assert trial.suggest_categorical("b", ["foo", "bar", "baz"]) == "baz"

        # Other parameters are sampled by `sample_independent()` method.
        assert trial.suggest_int("c", 20, 50) == unknown_param_value
        assert trial.suggest_loguniform("d", 1, 100) == unknown_param_value
        assert trial.suggest_uniform("e", 20, 40) == unknown_param_value

        return 0.0
Esempio n. 19
0
    def sample_params_values(self, trial: Trial, suggested_params: Dict,
                             estimated_n_trials: int) -> Dict:
        """Sample hyperparameters from suggested.

        Args:
            trial: Optuna trial object.
            suggested_params: Dict with parameters.
            estimated_n_trials: Maximum number of hyperparameter estimation.

        Returns:
            Dict with sampled hyperparameters.

        """

        trial_values = copy(suggested_params)

        try:
            nan_rate = getattr(self, '_nan_rate')
        except AttributeError:
            nan_rate = 0

        trial_values['max_depth'] = trial.suggest_int(name='max_depth',
                                                      low=3,
                                                      high=7)

        if nan_rate > 0:
            trial_values['nan_mode'] = trial.suggest_categorical(
                name='nan_mode', choices=['Max', 'Min'])

        if estimated_n_trials > 20:
            trial_values['l2_leaf_reg'] = trial.suggest_loguniform(
                name='l2_leaf_reg',
                low=1e-8,
                high=10.0,
            )

            # trial_values['bagging_temperature'] = trial.suggest_loguniform(
            #     name='bagging_temperature',
            #     low=0.01,
            #     high=10.0,
            # )

        if estimated_n_trials > 50:
            trial_values['min_data_in_leaf'] = trial.suggest_int(
                name='min_data_in_leaf', low=1, high=20)

            # the only case when used this parameter is when categorical columns more than 0
            if len(self._le_cat_features) > 0:
                trial_values['one_hot_max_size'] = trial.suggest_int(
                    name='one_hot_max_size', low=3, high=10)

        return trial_values
Esempio n. 20
0
    def __init__(self, cfg: EEGLearnerConfig, trial: Trial):
        watch_hidden_size = trial.suggest_int('watch_hidden_size', 1, 15)
        reg_hidden_size = trial.suggest_int('reg_hidden_size', 1, 15)
        embedding_size = trial.suggest_int('embedding_size', 0, 15)
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)

        super().__init__(cfg.net.watch_len,
                         cfg.net.reg_len,
                         watch_hidden_size,
                         reg_hidden_size,
                         embedding_size,
                         lr,
                         n_subjects=cfg.net.n_subjects)
Esempio n. 21
0
 def objective(trial: Trial) -> Tuple[float, float]:
     p0 = trial.suggest_float("p0", -10, 10)
     p1 = trial.suggest_uniform("p1", 3, 5)
     p2 = trial.suggest_loguniform("p2", 0.00001, 0.1)
     p3 = trial.suggest_discrete_uniform("p3", 100, 200, q=5)
     p4 = trial.suggest_int("p4", -20, -15)
     p5 = cast(int, trial.suggest_categorical("p5", [7, 1, 100]))
     p6 = trial.suggest_float("p6", -10, 10, step=1.0)
     p7 = trial.suggest_int("p7", 1, 7, log=True)
     return (
         p0 + p1 + p2,
         p3 + p4 + p5 + p6 + p7,
     )
Esempio n. 22
0
    def __call__(self, trial: Trial) -> float:
        """Calculate an objective value."""
        train, val, test, num_users, num_items =\
            preprocess_datasets(data=self.data, seed=12345)

        # sample a set of hyperparameters.
        config = yaml.safe_load(open('../config.yaml', 'r'))
        eta = config['eta']
        max_iters = config['max_iters']
        batch_size = config['batch_size']
        pre_iters = config['pre_iters']
        post_iters = config['post_iters']
        post_steps = config['post_steps']
        dim = trial.suggest_discrete_uniform('dim', 5, 50, 5)
        lam = trial.suggest_loguniform('lam', 1e-6, 1)
        if '-at' in self.model_name:
            epsilon = trial.suggest_loguniform('epsilon', 1e-3, 1)

        ops.reset_default_graph()
        tf.set_random_seed(12345)
        sess = tf.Session()
        if '-at' not in self.model_name:
            model = MFIPS(num_users=num_users, num_items=num_items, dim=dim, lam=lam, eta=eta)
            score, _, _, _, _, _ = train_mfips(
                sess, model=model, data=self.data, train=train, val=val, test=test,
                max_iters=max_iters, batch_size=batch_size, model_name=self.model_name)
        else:
            model = MFIPS(num_users=num_users, num_items=num_items, dim=dim, lam=lam, eta=eta, num=0)
            model1 = MFIPS(num_users=num_users, num_items=num_items, dim=dim, lam=lam, eta=eta, num=1)
            model2 = MFIPS(num_users=num_users, num_items=num_items, dim=dim, lam=lam, eta=eta, num=2)
            score, _, _, _, _, _ = train_mfips_with_at(
                sess, model=model, mfips1=model1, mfips2=model2, data=self.data,
                train=train, val=val, test=test, epsilon=epsilon,
                pre_iters=pre_iters, post_iters=post_iters, post_steps=post_steps,
                batch_size=batch_size, model_name=self.model_name)

        return score
Esempio n. 23
0
    def __init__(self, trial: Trial):
        watch_hidden_size = trial.suggest_int('watch_hidden_size', 1, 15)
        reg_hidden_size = trial.suggest_int('reg_hidden_size', 1, 15)
        embedding_size = trial.suggest_int('embedding_size', 0, 15)
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)

        super().__init__(
            self.watch_len,
            self.reg_len,
            watch_hidden_size,
            reg_hidden_size,
            embedding_size,
            lr,
            n_subjects=self.n_subjects
        )
Esempio n. 24
0
    def objective(trial: Trial) -> float:
        x1 = trial.suggest_uniform("x1", 0.1, 3)
        x2 = trial.suggest_loguniform("x2", 0.1, 3)
        x3 = trial.suggest_discrete_uniform("x3", 0, 3, 1)
        x4 = trial.suggest_int("x4", -3, 3)
        x5 = trial.suggest_categorical("x5", [1.0, 1.1, 1.2])
        if trial.number % 2 == 0:
            # Conditional parameters are ignored unless `params` is specified and is not `None`.
            x6 = trial.suggest_uniform("x6", 0.1, 3)

        assert isinstance(x5, float)
        value = x1**4 + x2 + x3 - x4**2 - x5
        if trial.number % 2 == 0:
            value += x6
        return value
Esempio n. 25
0
def test_check_distribution_suggest_float(storage_init_func):
    # type: (typing.Callable[[], storages.BaseStorage]) -> None

    sampler = samplers.RandomSampler()
    study = create_study(storage_init_func(), sampler=sampler)
    trial = Trial(study, study._storage.create_new_trial(study._study_id))

    x1 = trial.suggest_float("x1", 10, 20)
    x2 = trial.suggest_uniform("x1", 10, 20)

    assert x1 == x2

    x3 = trial.suggest_float("x2", 1e-5, 1e-3, log=True)
    x4 = trial.suggest_loguniform("x2", 1e-5, 1e-3)

    assert x3 == x4
Esempio n. 26
0
def define_hyperparameters(trial: Trial) -> Pipeline:
    ngram_range = trial.suggest_categorical("vectorizer__ngram_range",
                                            ["11", "12"])
    vectorizer = TfidfVectorizer(
        stop_words="english",
        min_df=2,
        ngram_range=string_to_tuple(ngram_range),
    )

    clf = trial.suggest_categorical(
        "clf", ["SVC", "RandomForestClassifier", "MultinomialNB"])
    if clf == "SVC":
        C = trial.suggest_uniform("clf__C", 0.1, 0.2)
        classifier = SVC(C=C)
    elif clf == "RandomForestClassifier":
        max_depth = trial.suggest_int("clf__max_depth", 2, 4)
        classifier = RandomForestClassifier(max_depth=max_depth)
    else:
        alpha = trial.suggest_loguniform("clf__alpha", 1e-2, 1e-1)
        classifier = MultinomialNB(alpha=alpha)

    return Pipeline([("vectorizer", vectorizer), ("clf", classifier)])
Esempio n. 27
0
    def _get_params(self, trial: trial_module.Trial) -> Dict[str, Any]:
        params = self.params.copy()  # type: Dict[str, Any]

        if self.param_distributions is None:
            params["colsample_bylevel"] = trial.suggest_discrete_uniform(
                "colsample_bylevel", 0.1, 1.0, 0.05
            )
            params["max_depth"] = trial.suggest_int("max_depth", 1, 7)
            # https://catboost.ai/docs/concepts/parameter-tuning.html#tree-growing-policy
            # params["num_leaves"] = trial.suggest_int(
            #     "num_leaves", 2, 2 ** params["max_depth"]
            # )
            # See https://github.com/Microsoft/LightGBM/issues/907
            params["num_leaves"] = 31
            params["min_data_in_leaf"] = trial.suggest_int(
                "min_data_in_leaf",
                1,
                max(1, int(self.n_samples / params["num_leaves"])),
            )
            params["l2_leaf_reg"] = trial.suggest_loguniform("lambda_l2", 1e-09, 10.0)

            if params["bootstrap_type"] == "Bayesian":
                params["bagging_temperature"] = trial.suggest_discrete_uniform(
                    "bagging_temperature", 0.5, 0.95, 0.05
                )
            elif (
                params["bootstrap_type"] == "Bernoulli"
                or params["bootstrap_type"] == "Poisson"
            ):
                params["subsample"] = trial.suggest_uniform("subsample", 0.1, 1)

            return params

        for name, distribution in self.param_distributions.items():
            params[name] = trial._suggest(name, distribution)

        return params
    def __call__(self, trial: Trial) -> float:
        """Calculate an objective value."""

        logger = logging.getLogger(__name__)  # Create a custom logger

        # Create logging handlers
        c_handler = logging.StreamHandler()
        f_handler = logging.FileHandler(
            Path(f'../logs/{self.data}/{self.model_name}/simulations.log'),
            mode='w')

        # Create logging formatters and add them to handlers
        c_format = logging.Formatter('%(message)s')
        f_format = logging.Formatter('%(message)s')
        c_handler.setFormatter(c_format)
        f_handler.setFormatter(f_format)

        logger.addHandler(c_handler)
        logger.addHandler(f_handler)

        train, val, test, num_users, num_items =\
            preprocess_datasets(data=self.data, seed=rand_seed_val)

        # sample a set of hyperparameters.
        config = yaml.safe_load(open('../config.yaml', 'r'))
        eta = config['eta']
        max_iters = config['max_iters']
        batch_size = config['batch_size']
        pre_iters = config['pre_iters']
        post_iters = config['post_iters']
        post_steps = config['post_steps']
        dim = trial.suggest_discrete_uniform('dim', 5, 50, 5)
        lam = trial.suggest_loguniform('lam', 1e-6, 1)
        if '-at' in self.model_name:
            epsilon = trial.suggest_loguniform('epsilon', 1e-3, 1)

        ops.reset_default_graph()
        tf.set_random_seed(rand_seed_val)
        sess = tf.Session()
        if '-without_ipw' in self.model_name:
            logger.debug('*** Without IPW ***')
            model = MFMODEL(num_users=num_users,
                            num_items=num_items,
                            dim=dim,
                            lam=lam,
                            eta=eta)
            score, _, _, _, _, _, _, _ = train_mfmodel_without_ipw(
                sess,
                model=model,
                data=self.data,
                train=train,
                val=val,
                test=test,
                max_iters=max_iters,
                batch_size=batch_size,
                model_name=self.model_name)
        elif '-at' not in self.model_name:
            logger.debug(
                '*** With IPW and without Asymmetric Tri-training ***')
            model = MFMODEL(num_users=num_users,
                            num_items=num_items,
                            dim=dim,
                            lam=lam,
                            eta=eta)
            score, _, _, _, _, _, _, _ = train_mfmodel(
                sess,
                model=model,
                data=self.data,
                train=train,
                val=val,
                test=test,
                max_iters=max_iters,
                batch_size=batch_size,
                model_name=self.model_name)
        else:
            logger.debug('*** With IPW and Asymmetric Tri-training ***')
            model = MFMODEL(num_users=num_users,
                            num_items=num_items,
                            dim=dim,
                            lam=lam,
                            eta=eta,
                            num=0)
            model1 = MFMODEL(num_users=num_users,
                             num_items=num_items,
                             dim=dim,
                             lam=lam,
                             eta=eta,
                             num=1)
            model2 = MFMODEL(num_users=num_users,
                             num_items=num_items,
                             dim=dim,
                             lam=lam,
                             eta=eta,
                             num=2)
            score, _, _, _, _, _, _, _ = train_mfmodel_with_at(
                sess,
                model=model,
                mfmodel1=model1,
                mfmodel2=model2,
                data=self.data,
                train=train,
                val=val,
                test=test,
                epsilon=epsilon,
                pre_iters=pre_iters,
                post_iters=post_iters,
                post_steps=post_steps,
                batch_size=batch_size,
                model_name=self.model_name)

        return score
Esempio n. 29
0
 def __call__(self, name: str, trial: Trial):
     return trial.suggest_loguniform(name, self.low, self.high)
Esempio n. 30
0
def test_suggest_low_equals_high(storage_init_func):
    # type: (Callable[[], storages.BaseStorage]) -> None

    study = create_study(storage_init_func(),
                         sampler=samplers.TPESampler(n_startup_trials=0))
    trial = Trial(study, study._storage.create_new_trial(study._study_id))

    with patch.object(
            optuna.distributions,
            "_get_single_value",
            wraps=optuna.distributions._get_single_value) as mock_object:
        assert trial.suggest_uniform("a", 1.0,
                                     1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 1
        assert trial.suggest_uniform("a", 1.0,
                                     1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 1

        assert trial.suggest_loguniform("b", 1.0,
                                        1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 2
        assert trial.suggest_loguniform(
            "b", 1.0, 1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 2

        assert trial.suggest_discrete_uniform(
            "c", 1.0, 1.0, 1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 3
        assert (trial.suggest_discrete_uniform("c", 1.0, 1.0, 1.0) == 1.0
                )  # Suggesting the same param.
        assert mock_object.call_count == 3

        assert trial.suggest_int("d", 1, 1) == 1  # Suggesting a param.
        assert mock_object.call_count == 4
        assert trial.suggest_int("d", 1, 1) == 1  # Suggesting the same param.
        assert mock_object.call_count == 4

        assert trial.suggest_float("e", 1.0, 1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 5
        assert trial.suggest_float("e", 1.0,
                                   1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 5

        assert trial.suggest_float("f", 0.5, 0.5,
                                   log=True) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 6
        assert trial.suggest_float(
            "f", 0.5, 0.5, log=True) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 6

        assert trial.suggest_float("g", 0.5, 0.5,
                                   log=False) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 7
        assert trial.suggest_float(
            "g", 0.5, 0.5, log=False) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 7

        assert trial.suggest_float("h", 0.5, 0.5,
                                   step=1.0) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 8
        assert trial.suggest_float(
            "h", 0.5, 0.5, step=1.0) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 8

        assert trial.suggest_int("i", 1, 1,
                                 log=True) == 1  # Suggesting a param.
        assert mock_object.call_count == 9
        assert trial.suggest_int("i", 1, 1,
                                 log=True) == 1  # Suggesting the same param.
        assert mock_object.call_count == 9