Ejemplo n.º 1
0
def teardown_module():
    # type: () -> None

    StorageSupplier.teardown_common_tempfile()
Ejemplo n.º 2
0
def setup_module():
    # type: () -> None

    StorageSupplier.setup_common_tempfile()
Ejemplo n.º 3
0
def test_run_trial(storage_mode):
    # type: (str) -> None

    with StorageSupplier(storage_mode) as storage:
        study = optuna.create_study(storage=storage)

        # Test trial without exception.
        study._run_trial(func, catch=(Exception, ), gc_after_trial=True)
        check_study(study)

        # Test trial with acceptable exception.
        def func_value_error(_):
            # type: (optuna.trial.Trial) -> float

            raise ValueError

        trial = study._run_trial(func_value_error,
                                 catch=(ValueError, ),
                                 gc_after_trial=True)
        frozen_trial = study._storage.get_trial(trial._trial_id)

        expected_message = (
            "Setting status of trial#1 as TrialState.FAIL because of the "
            "following error: ValueError()")
        assert frozen_trial.state == optuna.trial.TrialState.FAIL
        assert frozen_trial.system_attrs["fail_reason"] == expected_message

        # Test trial with unacceptable exception.
        with pytest.raises(ValueError):
            study._run_trial(func_value_error,
                             catch=(ArithmeticError, ),
                             gc_after_trial=True)

        # Test trial with invalid objective value: None
        def func_none(_):
            # type: (optuna.trial.Trial) -> float

            return None  # type: ignore

        trial = study._run_trial(func_none,
                                 catch=(Exception, ),
                                 gc_after_trial=True)
        frozen_trial = study._storage.get_trial(trial._trial_id)

        expected_message = (
            "Setting status of trial#3 as TrialState.FAIL because the returned "
            "value from the objective function cannot be casted to float. "
            "Returned value is: None")
        assert frozen_trial.state == optuna.trial.TrialState.FAIL
        assert frozen_trial.system_attrs["fail_reason"] == expected_message

        # Test trial with invalid objective value: nan
        def func_nan(_):
            # type: (optuna.trial.Trial) -> float

            return float("nan")

        trial = study._run_trial(func_nan,
                                 catch=(Exception, ),
                                 gc_after_trial=True)
        frozen_trial = study._storage.get_trial(trial._trial_id)

        expected_message = (
            "Setting status of trial#4 as TrialState.FAIL because the objective "
            "function returned nan.")
        assert frozen_trial.state == optuna.trial.TrialState.FAIL
        assert frozen_trial.system_attrs["fail_reason"] == expected_message
Ejemplo n.º 4
0
def test_create_new_trial_with_template_trial(storage_mode: str) -> None:

    start_time = datetime.now()
    complete_time = datetime.now()
    template_trial = FrozenTrial(
        state=TrialState.COMPLETE,
        value=10000,
        datetime_start=start_time,
        datetime_complete=complete_time,
        params={"x": 0.5},
        distributions={"x": UniformDistribution(0, 1)},
        user_attrs={"foo": "bar"},
        system_attrs={"baz": 123},
        intermediate_values={
            1: 10,
            2: 100,
            3: 1000
        },
        number=55,  # This entry is ignored.
        trial_id=-1,  # dummy value (unused).
    )

    def _check_trials(trials: List[FrozenTrial], idx: int,
                      trial_id: int) -> None:
        assert len(trials) == idx + 1
        assert len({t._trial_id for t in trials}) == idx + 1
        assert trial_id in {t._trial_id for t in trials}
        assert {t.number for t in trials} == set(range(idx + 1))
        assert all(t.state == template_trial.state for t in trials)
        assert all(t.params == template_trial.params for t in trials)
        assert all(t.distributions == template_trial.distributions
                   for t in trials)
        assert all(t.intermediate_values == template_trial.intermediate_values
                   for t in trials)
        assert all(t.user_attrs == template_trial.user_attrs for t in trials)
        assert all(t.system_attrs == template_trial.system_attrs
                   for t in trials)
        assert all(t.datetime_start == template_trial.datetime_start
                   for t in trials)
        assert all(t.datetime_complete == template_trial.datetime_complete
                   for t in trials)
        assert all(t.value == template_trial.value for t in trials)

    with StorageSupplier(storage_mode) as storage:

        study_id = storage.create_new_study()

        n_trial_in_study = 3
        for i in range(n_trial_in_study):
            trial_id = storage.create_new_trial(study_id,
                                                template_trial=template_trial)
            trials = storage.get_all_trials(study_id)
            _check_trials(trials, i, trial_id)

        # Create trial in non-existent study.
        with pytest.raises(KeyError):
            storage.create_new_trial(study_id + 1)

        study_id2 = storage.create_new_study()
        for i in range(n_trial_in_study):
            storage.create_new_trial(study_id2, template_trial=template_trial)
            trials = storage.get_all_trials(study_id2)
            assert {t.number for t in trials} == set(range(i + 1))

        trials = storage.get_all_trials(study_id) + storage.get_all_trials(
            study_id2)
        # Check trial_ids are unique across studies.
        assert len({t._trial_id for t in trials}) == 2 * n_trial_in_study
Ejemplo n.º 5
0
def test_set_trial_param(storage_mode: str) -> None:

    with StorageSupplier(storage_mode) as storage:

        # Setup test across multiple studies and trials.
        study_id = storage.create_new_study()
        trial_id_1 = storage.create_new_trial(study_id)
        trial_id_2 = storage.create_new_trial(study_id)
        trial_id_3 = storage.create_new_trial(storage.create_new_study())

        # Setup distributions.
        distribution_x = UniformDistribution(low=1.0, high=2.0)
        distribution_y_1 = CategoricalDistribution(choices=("Shibuya", "Ebisu",
                                                            "Meguro"))
        distribution_y_2 = CategoricalDistribution(choices=("Shibuya",
                                                            "Shinsen"))
        distribution_z = LogUniformDistribution(low=1.0, high=100.0)

        # Set new params.
        storage.set_trial_param(trial_id_1, "x", 0.5, distribution_x)
        storage.set_trial_param(trial_id_1, "y", 2, distribution_y_1)
        assert storage.get_trial_param(trial_id_1, "x") == 0.5
        assert storage.get_trial_param(trial_id_1, "y") == 2
        # Check set_param breaks neither get_trial nor get_trial_params.
        assert storage.get_trial(trial_id_1).params == {
            "x": 0.5,
            "y": "Meguro"
        }
        assert storage.get_trial_params(trial_id_1) == {
            "x": 0.5,
            "y": "Meguro"
        }
        # Duplicated registration should overwrite.
        storage.set_trial_param(trial_id_1, "x", 0.6, distribution_x)
        assert storage.get_trial_param(trial_id_1, "x") == 0.6
        assert storage.get_trial(trial_id_1).params == {
            "x": 0.6,
            "y": "Meguro"
        }
        assert storage.get_trial_params(trial_id_1) == {
            "x": 0.6,
            "y": "Meguro"
        }

        # Set params to another trial.
        storage.set_trial_param(trial_id_2, "x", 0.3, distribution_x)
        storage.set_trial_param(trial_id_2, "z", 0.1, distribution_z)
        assert storage.get_trial_param(trial_id_2, "x") == 0.3
        assert storage.get_trial_param(trial_id_2, "z") == 0.1
        assert storage.get_trial(trial_id_2).params == {"x": 0.3, "z": 0.1}
        assert storage.get_trial_params(trial_id_2) == {"x": 0.3, "z": 0.1}

        # Set params with distributions that do not match previous ones.
        with pytest.raises(ValueError):
            storage.set_trial_param(trial_id_2, "x", 0.5, distribution_z)
        with pytest.raises(ValueError):
            storage.set_trial_param(trial_id_2, "y", 0.5, distribution_z)
        # Choices in CategoricalDistribution should match including its order.
        with pytest.raises(ValueError):
            storage.set_trial_param(
                trial_id_2, "y", 2,
                CategoricalDistribution(choices=("Meguro", "Shibuya",
                                                 "Ebisu")))

        storage.set_trial_state(trial_id_2, TrialState.COMPLETE)
        # Cannot assign params to finished trial.
        with pytest.raises(RuntimeError):
            storage.set_trial_param(trial_id_2, "y", 2, distribution_y_1)
        # Check the previous call does not change the params.
        with pytest.raises(KeyError):
            storage.get_trial_param(trial_id_2, "y")
        # State should be checked prior to distribution compatibility.
        with pytest.raises(RuntimeError):
            storage.set_trial_param(trial_id_2, "y", 0.4, distribution_z)

        # Set params of trials in a different study.
        storage.set_trial_param(trial_id_3, "y", 1, distribution_y_2)
        assert storage.get_trial_param(trial_id_3, "y") == 1
        assert storage.get_trial(trial_id_3).params == {"y": "Shinsen"}
        assert storage.get_trial_params(trial_id_3) == {"y": "Shinsen"}

        # Set params of non-existent trial.
        non_existent_trial_id = max([trial_id_1, trial_id_2, trial_id_3]) + 1
        with pytest.raises(KeyError):
            storage.set_trial_param(non_existent_trial_id, "x", 0.1,
                                    distribution_x)
Ejemplo n.º 6
0
def test_best_trials_command_flatten(output_format: Optional[str]) -> None:

    with StorageSupplier("sqlite") as storage:
        assert isinstance(storage, RDBStorage)
        storage_url = str(storage.engine.url)
        study_name = "test_study"
        n_trials = 10

        study = optuna.create_study(
            storage, study_name=study_name, directions=("minimize", "minimize")
        )
        study.optimize(objective_func_multi_objective, n_trials=n_trials)
        attrs = (
            "number",
            "values",
            "datetime_start",
            "datetime_complete",
            "duration",
            "params",
            "user_attrs",
            "state",
        )

        # Run command.
        command = [
            "optuna",
            "best-trials",
            "--storage",
            storage_url,
            "--study-name",
            study_name,
            "--flatten",
        ]

        if output_format is not None:
            command += ["--format", output_format]

        output = str(subprocess.check_output(command).decode().strip())
        trials = _parse_output(output, output_format or "table")
        best_trials = [trial.number for trial in study.best_trials]

        assert len(trials) == len(best_trials)

        df = study.trials_dataframe(attrs)

        for trial in trials:
            assert set(trial.keys()) <= set(df.columns)
            number = int(trial["number"]) if output_format in (None, "table") else trial["number"]
            for key in df.columns:
                expected_value = df.loc[number][key]

                # The param may be NaN when the objective function has branched search space.
                if (
                    key.startswith("params_")
                    and isinstance(expected_value, float)
                    and np.isnan(expected_value)
                ):
                    if output_format is None or output_format == "table":
                        assert trial[key] == ""
                    else:
                        assert key not in trial
                    continue

                value = trial[key]
                if isinstance(value, (int, float)):
                    if np.isnan(expected_value):
                        assert np.isnan(value)
                    else:
                        assert value == expected_value
                elif isinstance(expected_value, Timestamp):
                    assert value == expected_value.strftime("%Y-%m-%d %H:%M:%S")
                elif isinstance(expected_value, Timedelta):
                    assert value == str(expected_value.to_pytimedelta())
                else:
                    assert value == str(expected_value)
Ejemplo n.º 7
0
def test_trials_dataframe(storage_mode, attrs, multi_index):
    # type: (str, Tuple[str, ...], bool) -> None

    def f(trial):
        # type: (optuna.trial.Trial) -> float

        x = trial.suggest_int("x", 1, 1)
        y = trial.suggest_categorical("y", (2.5,))
        assert isinstance(y, float)
        trial.set_user_attr("train_loss", 3)
        trial.set_system_attr("foo", "bar")
        value = x + y  # 3.5

        # Test reported intermediate values, although it in practice is not "intermediate".
        trial.report(value, step=0)

        return value

    with StorageSupplier(storage_mode) as storage:
        study = optuna.create_study(storage=storage)
        study.optimize(f, n_trials=3)
        df = study.trials_dataframe(attrs=attrs, multi_index=multi_index)
        # Change index to access rows via trial number.
        if multi_index:
            df.set_index(("number", ""), inplace=True, drop=False)
        else:
            df.set_index("number", inplace=True, drop=False)
        assert len(df) == 3

        # Number columns are as follows (total of 13):
        #   non-nested: 6 (number, value, state, datetime_start, datetime_complete, duration)
        #   params: 2
        #   distributions: 2
        #   user_attrs: 1
        #   system_attrs: 1
        #   intermediate_values: 1
        expected_n_columns = len(attrs)
        if "params" in attrs:
            expected_n_columns += 1
        if "distributions" in attrs:
            expected_n_columns += 1
        assert len(df.columns) == expected_n_columns

        for i in range(3):
            assert df.number[i] == i
            assert df.state[i] == "COMPLETE"
            assert df.value[i] == 3.5
            assert isinstance(df.datetime_start[i], pd.Timestamp)
            assert isinstance(df.datetime_complete[i], pd.Timestamp)

            if multi_index:
                if "distributions" in attrs:
                    assert ("distributions", "x") in df.columns
                    assert ("distributions", "y") in df.columns
                if "_trial_id" in attrs:
                    assert ("trial_id", "") in df.columns  # trial_id depends on other tests.
                if "duration" in attrs:
                    assert ("duration", "") in df.columns

                assert df.params.x[i] == 1
                assert df.params.y[i] == 2.5
                assert df.user_attrs.train_loss[i] == 3
                assert df.system_attrs.foo[i] == "bar"
            else:
                if "distributions" in attrs:
                    assert "distributions_x" in df.columns
                    assert "distributions_y" in df.columns
                if "_trial_id" in attrs:
                    assert "trial_id" in df.columns  # trial_id depends on other tests.
                if "duration" in attrs:
                    assert "duration" in df.columns

                assert df.params_x[i] == 1
                assert df.params_y[i] == 2.5
                assert df.user_attrs_train_loss[i] == 3
                assert df.system_attrs_foo[i] == "bar"
Ejemplo n.º 8
0
def test_best_trial_command(
    objective: Callable[[Trial], float], output_format: Optional[str]
) -> None:

    with StorageSupplier("sqlite") as storage:
        assert isinstance(storage, RDBStorage)
        storage_url = str(storage.engine.url)
        study_name = "test_study"
        n_trials = 10

        study = optuna.create_study(storage, study_name=study_name)
        study.optimize(objective, n_trials=n_trials)
        attrs = (
            "number",
            "value",
            "datetime_start",
            "datetime_complete",
            "duration",
            "params",
            "user_attrs",
            "state",
        )

        # Run command.
        command = [
            "optuna",
            "best-trial",
            "--storage",
            storage_url,
            "--study-name",
            study_name,
        ]

        if output_format is not None:
            command += ["--format", output_format]

        output = str(subprocess.check_output(command).decode().strip())
        best_trial = _parse_output(output, output_format or "table")

        if output_format is None or output_format == "table":
            assert len(best_trial) == 1
            best_trial = best_trial[0]

        df = study.trials_dataframe(attrs, multi_index=True)

        for key in df.columns:
            expected_value = df.loc[study.best_trial.number][key]

            # The param may be NaN when the objective function has branched search space.
            if (
                key[0] == "params"
                and isinstance(expected_value, float)
                and np.isnan(expected_value)
            ):
                if output_format is None or output_format == "table":
                    assert key[1] not in eval(best_trial["params"])
                else:
                    assert key[1] not in best_trial["params"]
                continue

            if key[1] == "":
                value = best_trial[key[0]]
            else:
                if output_format is None or output_format == "table":
                    value = eval(best_trial[key[0]])[key[1]]
                else:
                    value = best_trial[key[0]][key[1]]

            if isinstance(value, (int, float)):
                if np.isnan(expected_value):
                    assert np.isnan(value)
                else:
                    assert value == expected_value
            elif isinstance(expected_value, Timestamp):
                assert value == expected_value.strftime("%Y-%m-%d %H:%M:%S")
            elif isinstance(expected_value, Timedelta):
                assert value == str(expected_value.to_pytimedelta())
            else:
                assert value == str(expected_value)
Ejemplo n.º 9
0
def test_studies_command_flatten(output_format: Optional[str]) -> None:

    with StorageSupplier("sqlite") as storage:
        assert isinstance(storage, RDBStorage)
        storage_url = str(storage.engine.url)

        # First study.
        study_1 = optuna.create_study(storage)

        # Second study.
        study_2 = optuna.create_study(
            storage, study_name="study_2", directions=["minimize", "maximize"]
        )
        study_2.optimize(objective_func_multi_objective, n_trials=10)

        # Run command.
        command = ["optuna", "studies", "--storage", storage_url, "--flatten"]
        if output_format is not None:
            command += ["--format", output_format]

        output = str(subprocess.check_output(command).decode().strip())
        studies = _parse_output(output, output_format or "table")

        if output_format is None or output_format == "table":
            expected_keys_1 = expected_keys_2 = [
                "name",
                "direction_0",
                "direction_1",
                "n_trials",
                "datetime_start",
            ]
        else:
            expected_keys_1 = ["name", "direction_0", "n_trials", "datetime_start"]
            expected_keys_2 = [
                "name",
                "direction_0",
                "direction_1",
                "n_trials",
                "datetime_start",
            ]

        assert len(studies) == 2
        if output_format is None or output_format == "table":
            assert list(studies[0].keys()) == expected_keys_1
            assert list(studies[1].keys()) == expected_keys_2
        else:
            assert set(studies[0].keys()) == set(expected_keys_1)
            assert set(studies[1].keys()) == set(expected_keys_2)

        # Check study_name, direction, and n_trials for the first study.
        assert studies[0]["name"] == study_1.study_name
        if output_format is None or output_format == "table":
            assert studies[0]["n_trials"] == "0"
        else:
            assert studies[0]["n_trials"] == 0
        assert studies[0]["direction_0"] == "MINIMIZE"

        # Check study_name, direction, and n_trials for the second study.
        assert studies[1]["name"] == study_2.study_name
        if output_format is None or output_format == "table":
            assert studies[1]["n_trials"] == "10"
        else:
            assert studies[1]["n_trials"] == 10
        assert studies[1]["direction_0"] == "MINIMIZE"
        assert studies[1]["direction_1"] == "MAXIMIZE"
Ejemplo n.º 10
0
def test_suggest_low_equals_high(storage_mode: str) -> None:

    with patch.object(
            distributions,
            "_get_single_value",
            wraps=distributions._get_single_value
    ) as mock_object, StorageSupplier(storage_mode) as storage:

        study = create_study(storage=storage,
                             sampler=samplers.TPESampler(n_startup_trials=0))

        trial = Trial(study, study._storage.create_new_trial(study._study_id))

        assert trial.suggest_uniform("a", 1.0,
                                     1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 1
        assert trial.suggest_uniform("a", 1.0,
                                     1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 1

        assert trial.suggest_loguniform("b", 1.0,
                                        1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 2
        assert trial.suggest_loguniform(
            "b", 1.0, 1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 2

        assert trial.suggest_discrete_uniform(
            "c", 1.0, 1.0, 1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 3
        assert (trial.suggest_discrete_uniform("c", 1.0, 1.0, 1.0) == 1.0
                )  # Suggesting the same param.
        assert mock_object.call_count == 3

        assert trial.suggest_int("d", 1, 1) == 1  # Suggesting a param.
        assert mock_object.call_count == 4
        assert trial.suggest_int("d", 1, 1) == 1  # Suggesting the same param.
        assert mock_object.call_count == 4

        assert trial.suggest_float("e", 1.0, 1.0) == 1.0  # Suggesting a param.
        assert mock_object.call_count == 5
        assert trial.suggest_float("e", 1.0,
                                   1.0) == 1.0  # Suggesting the same param.
        assert mock_object.call_count == 5

        assert trial.suggest_float("f", 0.5, 0.5,
                                   log=True) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 6
        assert trial.suggest_float(
            "f", 0.5, 0.5, log=True) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 6

        assert trial.suggest_float("g", 0.5, 0.5,
                                   log=False) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 7
        assert trial.suggest_float(
            "g", 0.5, 0.5, log=False) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 7

        assert trial.suggest_float("h", 0.5, 0.5,
                                   step=1.0) == 0.5  # Suggesting a param.
        assert mock_object.call_count == 8
        assert trial.suggest_float(
            "h", 0.5, 0.5, step=1.0) == 0.5  # Suggesting the same param.
        assert mock_object.call_count == 8

        assert trial.suggest_int("i", 1, 1,
                                 log=True) == 1  # Suggesting a param.
        assert mock_object.call_count == 9
        assert trial.suggest_int("i", 1, 1,
                                 log=True) == 1  # Suggesting the same param.
        assert mock_object.call_count == 9
Ejemplo n.º 11
0
 def optimize(self, storage_mode: str, sampler_mode: str,
              n_trials: int) -> None:
     with StorageSupplier(storage_mode) as storage:
         sampler = create_sampler(sampler_mode)
         study = optuna.create_study(storage=storage, sampler=sampler)
         study.optimize(self.objective, n_trials=n_trials)
Ejemplo n.º 12
0
def test_trials_dataframe(storage_mode, include_internal_fields, multi_index):
    # type: (str, bool, bool) -> None

    def f(trial):
        # type: (optuna.trial.Trial) -> float

        x = trial.suggest_int('x', 1, 1)
        y = trial.suggest_categorical('y', (2.5, ))
        trial.set_user_attr('train_loss', 3)
        value = x + y  # 3.5

        # Test reported intermediate values, although it in practice is not "intermediate".
        trial.report(value, step=0)

        return value

    with StorageSupplier(storage_mode) as storage:
        study = optuna.create_study(storage=storage)
        study.optimize(f, n_trials=3)
        df = study.trials_dataframe(
            include_internal_fields=include_internal_fields,
            multi_index=multi_index)
        # Change index to access rows via trial number.
        if multi_index:
            df.set_index(('number', ''), inplace=True, drop=False)
        else:
            df.set_index('number', inplace=True, drop=False)
        assert len(df) == 3
        # TODO(Yanase): Remove number from system_attrs after adding TrialModel.number.
        # Number expected columns are as follows (total of 10):
        #   non-nested: 5
        #   params: 2
        #   user_attrs: 1
        #   system_attrs: 1
        #   intermediate_values: 1
        expected_n_columns = 10
        if include_internal_fields:
            # distributions: 2
            # trial_id: 1
            expected_n_columns += 3
        assert len(df.columns) == expected_n_columns

        for i in range(3):
            assert df.number[i] == i
            assert df.state[i] == 'COMPLETE'
            assert df.value[i] == 3.5
            assert isinstance(df.datetime_start[i], pd.Timestamp)
            assert isinstance(df.datetime_complete[i], pd.Timestamp)

            if multi_index:
                if include_internal_fields:
                    assert ('distributions', 'x') in df.columns
                    assert ('distributions', 'y') in df.columns
                    assert (
                        'trial_id',
                        '') in df.columns  # trial_id depends on other tests.

                assert df.params.x[i] == 1
                assert df.params.y[i] == 2.5
                assert df.user_attrs.train_loss[i] == 3
                assert df.system_attrs._number[i] == i
            else:
                if include_internal_fields:
                    assert 'distributions_x' in df.columns
                    assert 'distributions_y' in df.columns
                    assert 'trial_id' in df.columns  # trial_id depends on other tests.

                assert df.params_x[i] == 1
                assert df.params_y[i] == 2.5
                assert df.user_attrs_train_loss[i] == 3
                assert df.system_attrs__number[i] == i