def teardown_module(): # type: () -> None StorageSupplier.teardown_common_tempfile()
def setup_module(): # type: () -> None StorageSupplier.setup_common_tempfile()
def test_run_trial(storage_mode): # type: (str) -> None with StorageSupplier(storage_mode) as storage: study = optuna.create_study(storage=storage) # Test trial without exception. study._run_trial(func, catch=(Exception, ), gc_after_trial=True) check_study(study) # Test trial with acceptable exception. def func_value_error(_): # type: (optuna.trial.Trial) -> float raise ValueError trial = study._run_trial(func_value_error, catch=(ValueError, ), gc_after_trial=True) frozen_trial = study._storage.get_trial(trial._trial_id) expected_message = ( "Setting status of trial#1 as TrialState.FAIL because of the " "following error: ValueError()") assert frozen_trial.state == optuna.trial.TrialState.FAIL assert frozen_trial.system_attrs["fail_reason"] == expected_message # Test trial with unacceptable exception. with pytest.raises(ValueError): study._run_trial(func_value_error, catch=(ArithmeticError, ), gc_after_trial=True) # Test trial with invalid objective value: None def func_none(_): # type: (optuna.trial.Trial) -> float return None # type: ignore trial = study._run_trial(func_none, catch=(Exception, ), gc_after_trial=True) frozen_trial = study._storage.get_trial(trial._trial_id) expected_message = ( "Setting status of trial#3 as TrialState.FAIL because the returned " "value from the objective function cannot be casted to float. " "Returned value is: None") assert frozen_trial.state == optuna.trial.TrialState.FAIL assert frozen_trial.system_attrs["fail_reason"] == expected_message # Test trial with invalid objective value: nan def func_nan(_): # type: (optuna.trial.Trial) -> float return float("nan") trial = study._run_trial(func_nan, catch=(Exception, ), gc_after_trial=True) frozen_trial = study._storage.get_trial(trial._trial_id) expected_message = ( "Setting status of trial#4 as TrialState.FAIL because the objective " "function returned nan.") assert frozen_trial.state == optuna.trial.TrialState.FAIL assert frozen_trial.system_attrs["fail_reason"] == expected_message
def test_create_new_trial_with_template_trial(storage_mode: str) -> None: start_time = datetime.now() complete_time = datetime.now() template_trial = FrozenTrial( state=TrialState.COMPLETE, value=10000, datetime_start=start_time, datetime_complete=complete_time, params={"x": 0.5}, distributions={"x": UniformDistribution(0, 1)}, user_attrs={"foo": "bar"}, system_attrs={"baz": 123}, intermediate_values={ 1: 10, 2: 100, 3: 1000 }, number=55, # This entry is ignored. trial_id=-1, # dummy value (unused). ) def _check_trials(trials: List[FrozenTrial], idx: int, trial_id: int) -> None: assert len(trials) == idx + 1 assert len({t._trial_id for t in trials}) == idx + 1 assert trial_id in {t._trial_id for t in trials} assert {t.number for t in trials} == set(range(idx + 1)) assert all(t.state == template_trial.state for t in trials) assert all(t.params == template_trial.params for t in trials) assert all(t.distributions == template_trial.distributions for t in trials) assert all(t.intermediate_values == template_trial.intermediate_values for t in trials) assert all(t.user_attrs == template_trial.user_attrs for t in trials) assert all(t.system_attrs == template_trial.system_attrs for t in trials) assert all(t.datetime_start == template_trial.datetime_start for t in trials) assert all(t.datetime_complete == template_trial.datetime_complete for t in trials) assert all(t.value == template_trial.value for t in trials) with StorageSupplier(storage_mode) as storage: study_id = storage.create_new_study() n_trial_in_study = 3 for i in range(n_trial_in_study): trial_id = storage.create_new_trial(study_id, template_trial=template_trial) trials = storage.get_all_trials(study_id) _check_trials(trials, i, trial_id) # Create trial in non-existent study. with pytest.raises(KeyError): storage.create_new_trial(study_id + 1) study_id2 = storage.create_new_study() for i in range(n_trial_in_study): storage.create_new_trial(study_id2, template_trial=template_trial) trials = storage.get_all_trials(study_id2) assert {t.number for t in trials} == set(range(i + 1)) trials = storage.get_all_trials(study_id) + storage.get_all_trials( study_id2) # Check trial_ids are unique across studies. assert len({t._trial_id for t in trials}) == 2 * n_trial_in_study
def test_set_trial_param(storage_mode: str) -> None: with StorageSupplier(storage_mode) as storage: # Setup test across multiple studies and trials. study_id = storage.create_new_study() trial_id_1 = storage.create_new_trial(study_id) trial_id_2 = storage.create_new_trial(study_id) trial_id_3 = storage.create_new_trial(storage.create_new_study()) # Setup distributions. distribution_x = UniformDistribution(low=1.0, high=2.0) distribution_y_1 = CategoricalDistribution(choices=("Shibuya", "Ebisu", "Meguro")) distribution_y_2 = CategoricalDistribution(choices=("Shibuya", "Shinsen")) distribution_z = LogUniformDistribution(low=1.0, high=100.0) # Set new params. storage.set_trial_param(trial_id_1, "x", 0.5, distribution_x) storage.set_trial_param(trial_id_1, "y", 2, distribution_y_1) assert storage.get_trial_param(trial_id_1, "x") == 0.5 assert storage.get_trial_param(trial_id_1, "y") == 2 # Check set_param breaks neither get_trial nor get_trial_params. assert storage.get_trial(trial_id_1).params == { "x": 0.5, "y": "Meguro" } assert storage.get_trial_params(trial_id_1) == { "x": 0.5, "y": "Meguro" } # Duplicated registration should overwrite. storage.set_trial_param(trial_id_1, "x", 0.6, distribution_x) assert storage.get_trial_param(trial_id_1, "x") == 0.6 assert storage.get_trial(trial_id_1).params == { "x": 0.6, "y": "Meguro" } assert storage.get_trial_params(trial_id_1) == { "x": 0.6, "y": "Meguro" } # Set params to another trial. storage.set_trial_param(trial_id_2, "x", 0.3, distribution_x) storage.set_trial_param(trial_id_2, "z", 0.1, distribution_z) assert storage.get_trial_param(trial_id_2, "x") == 0.3 assert storage.get_trial_param(trial_id_2, "z") == 0.1 assert storage.get_trial(trial_id_2).params == {"x": 0.3, "z": 0.1} assert storage.get_trial_params(trial_id_2) == {"x": 0.3, "z": 0.1} # Set params with distributions that do not match previous ones. with pytest.raises(ValueError): storage.set_trial_param(trial_id_2, "x", 0.5, distribution_z) with pytest.raises(ValueError): storage.set_trial_param(trial_id_2, "y", 0.5, distribution_z) # Choices in CategoricalDistribution should match including its order. with pytest.raises(ValueError): storage.set_trial_param( trial_id_2, "y", 2, CategoricalDistribution(choices=("Meguro", "Shibuya", "Ebisu"))) storage.set_trial_state(trial_id_2, TrialState.COMPLETE) # Cannot assign params to finished trial. with pytest.raises(RuntimeError): storage.set_trial_param(trial_id_2, "y", 2, distribution_y_1) # Check the previous call does not change the params. with pytest.raises(KeyError): storage.get_trial_param(trial_id_2, "y") # State should be checked prior to distribution compatibility. with pytest.raises(RuntimeError): storage.set_trial_param(trial_id_2, "y", 0.4, distribution_z) # Set params of trials in a different study. storage.set_trial_param(trial_id_3, "y", 1, distribution_y_2) assert storage.get_trial_param(trial_id_3, "y") == 1 assert storage.get_trial(trial_id_3).params == {"y": "Shinsen"} assert storage.get_trial_params(trial_id_3) == {"y": "Shinsen"} # Set params of non-existent trial. non_existent_trial_id = max([trial_id_1, trial_id_2, trial_id_3]) + 1 with pytest.raises(KeyError): storage.set_trial_param(non_existent_trial_id, "x", 0.1, distribution_x)
def test_best_trials_command_flatten(output_format: Optional[str]) -> None: with StorageSupplier("sqlite") as storage: assert isinstance(storage, RDBStorage) storage_url = str(storage.engine.url) study_name = "test_study" n_trials = 10 study = optuna.create_study( storage, study_name=study_name, directions=("minimize", "minimize") ) study.optimize(objective_func_multi_objective, n_trials=n_trials) attrs = ( "number", "values", "datetime_start", "datetime_complete", "duration", "params", "user_attrs", "state", ) # Run command. command = [ "optuna", "best-trials", "--storage", storage_url, "--study-name", study_name, "--flatten", ] if output_format is not None: command += ["--format", output_format] output = str(subprocess.check_output(command).decode().strip()) trials = _parse_output(output, output_format or "table") best_trials = [trial.number for trial in study.best_trials] assert len(trials) == len(best_trials) df = study.trials_dataframe(attrs) for trial in trials: assert set(trial.keys()) <= set(df.columns) number = int(trial["number"]) if output_format in (None, "table") else trial["number"] for key in df.columns: expected_value = df.loc[number][key] # The param may be NaN when the objective function has branched search space. if ( key.startswith("params_") and isinstance(expected_value, float) and np.isnan(expected_value) ): if output_format is None or output_format == "table": assert trial[key] == "" else: assert key not in trial continue value = trial[key] if isinstance(value, (int, float)): if np.isnan(expected_value): assert np.isnan(value) else: assert value == expected_value elif isinstance(expected_value, Timestamp): assert value == expected_value.strftime("%Y-%m-%d %H:%M:%S") elif isinstance(expected_value, Timedelta): assert value == str(expected_value.to_pytimedelta()) else: assert value == str(expected_value)
def test_trials_dataframe(storage_mode, attrs, multi_index): # type: (str, Tuple[str, ...], bool) -> None def f(trial): # type: (optuna.trial.Trial) -> float x = trial.suggest_int("x", 1, 1) y = trial.suggest_categorical("y", (2.5,)) assert isinstance(y, float) trial.set_user_attr("train_loss", 3) trial.set_system_attr("foo", "bar") value = x + y # 3.5 # Test reported intermediate values, although it in practice is not "intermediate". trial.report(value, step=0) return value with StorageSupplier(storage_mode) as storage: study = optuna.create_study(storage=storage) study.optimize(f, n_trials=3) df = study.trials_dataframe(attrs=attrs, multi_index=multi_index) # Change index to access rows via trial number. if multi_index: df.set_index(("number", ""), inplace=True, drop=False) else: df.set_index("number", inplace=True, drop=False) assert len(df) == 3 # Number columns are as follows (total of 13): # non-nested: 6 (number, value, state, datetime_start, datetime_complete, duration) # params: 2 # distributions: 2 # user_attrs: 1 # system_attrs: 1 # intermediate_values: 1 expected_n_columns = len(attrs) if "params" in attrs: expected_n_columns += 1 if "distributions" in attrs: expected_n_columns += 1 assert len(df.columns) == expected_n_columns for i in range(3): assert df.number[i] == i assert df.state[i] == "COMPLETE" assert df.value[i] == 3.5 assert isinstance(df.datetime_start[i], pd.Timestamp) assert isinstance(df.datetime_complete[i], pd.Timestamp) if multi_index: if "distributions" in attrs: assert ("distributions", "x") in df.columns assert ("distributions", "y") in df.columns if "_trial_id" in attrs: assert ("trial_id", "") in df.columns # trial_id depends on other tests. if "duration" in attrs: assert ("duration", "") in df.columns assert df.params.x[i] == 1 assert df.params.y[i] == 2.5 assert df.user_attrs.train_loss[i] == 3 assert df.system_attrs.foo[i] == "bar" else: if "distributions" in attrs: assert "distributions_x" in df.columns assert "distributions_y" in df.columns if "_trial_id" in attrs: assert "trial_id" in df.columns # trial_id depends on other tests. if "duration" in attrs: assert "duration" in df.columns assert df.params_x[i] == 1 assert df.params_y[i] == 2.5 assert df.user_attrs_train_loss[i] == 3 assert df.system_attrs_foo[i] == "bar"
def test_best_trial_command( objective: Callable[[Trial], float], output_format: Optional[str] ) -> None: with StorageSupplier("sqlite") as storage: assert isinstance(storage, RDBStorage) storage_url = str(storage.engine.url) study_name = "test_study" n_trials = 10 study = optuna.create_study(storage, study_name=study_name) study.optimize(objective, n_trials=n_trials) attrs = ( "number", "value", "datetime_start", "datetime_complete", "duration", "params", "user_attrs", "state", ) # Run command. command = [ "optuna", "best-trial", "--storage", storage_url, "--study-name", study_name, ] if output_format is not None: command += ["--format", output_format] output = str(subprocess.check_output(command).decode().strip()) best_trial = _parse_output(output, output_format or "table") if output_format is None or output_format == "table": assert len(best_trial) == 1 best_trial = best_trial[0] df = study.trials_dataframe(attrs, multi_index=True) for key in df.columns: expected_value = df.loc[study.best_trial.number][key] # The param may be NaN when the objective function has branched search space. if ( key[0] == "params" and isinstance(expected_value, float) and np.isnan(expected_value) ): if output_format is None or output_format == "table": assert key[1] not in eval(best_trial["params"]) else: assert key[1] not in best_trial["params"] continue if key[1] == "": value = best_trial[key[0]] else: if output_format is None or output_format == "table": value = eval(best_trial[key[0]])[key[1]] else: value = best_trial[key[0]][key[1]] if isinstance(value, (int, float)): if np.isnan(expected_value): assert np.isnan(value) else: assert value == expected_value elif isinstance(expected_value, Timestamp): assert value == expected_value.strftime("%Y-%m-%d %H:%M:%S") elif isinstance(expected_value, Timedelta): assert value == str(expected_value.to_pytimedelta()) else: assert value == str(expected_value)
def test_studies_command_flatten(output_format: Optional[str]) -> None: with StorageSupplier("sqlite") as storage: assert isinstance(storage, RDBStorage) storage_url = str(storage.engine.url) # First study. study_1 = optuna.create_study(storage) # Second study. study_2 = optuna.create_study( storage, study_name="study_2", directions=["minimize", "maximize"] ) study_2.optimize(objective_func_multi_objective, n_trials=10) # Run command. command = ["optuna", "studies", "--storage", storage_url, "--flatten"] if output_format is not None: command += ["--format", output_format] output = str(subprocess.check_output(command).decode().strip()) studies = _parse_output(output, output_format or "table") if output_format is None or output_format == "table": expected_keys_1 = expected_keys_2 = [ "name", "direction_0", "direction_1", "n_trials", "datetime_start", ] else: expected_keys_1 = ["name", "direction_0", "n_trials", "datetime_start"] expected_keys_2 = [ "name", "direction_0", "direction_1", "n_trials", "datetime_start", ] assert len(studies) == 2 if output_format is None or output_format == "table": assert list(studies[0].keys()) == expected_keys_1 assert list(studies[1].keys()) == expected_keys_2 else: assert set(studies[0].keys()) == set(expected_keys_1) assert set(studies[1].keys()) == set(expected_keys_2) # Check study_name, direction, and n_trials for the first study. assert studies[0]["name"] == study_1.study_name if output_format is None or output_format == "table": assert studies[0]["n_trials"] == "0" else: assert studies[0]["n_trials"] == 0 assert studies[0]["direction_0"] == "MINIMIZE" # Check study_name, direction, and n_trials for the second study. assert studies[1]["name"] == study_2.study_name if output_format is None or output_format == "table": assert studies[1]["n_trials"] == "10" else: assert studies[1]["n_trials"] == 10 assert studies[1]["direction_0"] == "MINIMIZE" assert studies[1]["direction_1"] == "MAXIMIZE"
def test_suggest_low_equals_high(storage_mode: str) -> None: with patch.object( distributions, "_get_single_value", wraps=distributions._get_single_value ) as mock_object, StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=samplers.TPESampler(n_startup_trials=0)) trial = Trial(study, study._storage.create_new_trial(study._study_id)) assert trial.suggest_uniform("a", 1.0, 1.0) == 1.0 # Suggesting a param. assert mock_object.call_count == 1 assert trial.suggest_uniform("a", 1.0, 1.0) == 1.0 # Suggesting the same param. assert mock_object.call_count == 1 assert trial.suggest_loguniform("b", 1.0, 1.0) == 1.0 # Suggesting a param. assert mock_object.call_count == 2 assert trial.suggest_loguniform( "b", 1.0, 1.0) == 1.0 # Suggesting the same param. assert mock_object.call_count == 2 assert trial.suggest_discrete_uniform( "c", 1.0, 1.0, 1.0) == 1.0 # Suggesting a param. assert mock_object.call_count == 3 assert (trial.suggest_discrete_uniform("c", 1.0, 1.0, 1.0) == 1.0 ) # Suggesting the same param. assert mock_object.call_count == 3 assert trial.suggest_int("d", 1, 1) == 1 # Suggesting a param. assert mock_object.call_count == 4 assert trial.suggest_int("d", 1, 1) == 1 # Suggesting the same param. assert mock_object.call_count == 4 assert trial.suggest_float("e", 1.0, 1.0) == 1.0 # Suggesting a param. assert mock_object.call_count == 5 assert trial.suggest_float("e", 1.0, 1.0) == 1.0 # Suggesting the same param. assert mock_object.call_count == 5 assert trial.suggest_float("f", 0.5, 0.5, log=True) == 0.5 # Suggesting a param. assert mock_object.call_count == 6 assert trial.suggest_float( "f", 0.5, 0.5, log=True) == 0.5 # Suggesting the same param. assert mock_object.call_count == 6 assert trial.suggest_float("g", 0.5, 0.5, log=False) == 0.5 # Suggesting a param. assert mock_object.call_count == 7 assert trial.suggest_float( "g", 0.5, 0.5, log=False) == 0.5 # Suggesting the same param. assert mock_object.call_count == 7 assert trial.suggest_float("h", 0.5, 0.5, step=1.0) == 0.5 # Suggesting a param. assert mock_object.call_count == 8 assert trial.suggest_float( "h", 0.5, 0.5, step=1.0) == 0.5 # Suggesting the same param. assert mock_object.call_count == 8 assert trial.suggest_int("i", 1, 1, log=True) == 1 # Suggesting a param. assert mock_object.call_count == 9 assert trial.suggest_int("i", 1, 1, log=True) == 1 # Suggesting the same param. assert mock_object.call_count == 9
def optimize(self, storage_mode: str, sampler_mode: str, n_trials: int) -> None: with StorageSupplier(storage_mode) as storage: sampler = create_sampler(sampler_mode) study = optuna.create_study(storage=storage, sampler=sampler) study.optimize(self.objective, n_trials=n_trials)
def test_trials_dataframe(storage_mode, include_internal_fields, multi_index): # type: (str, bool, bool) -> None def f(trial): # type: (optuna.trial.Trial) -> float x = trial.suggest_int('x', 1, 1) y = trial.suggest_categorical('y', (2.5, )) trial.set_user_attr('train_loss', 3) value = x + y # 3.5 # Test reported intermediate values, although it in practice is not "intermediate". trial.report(value, step=0) return value with StorageSupplier(storage_mode) as storage: study = optuna.create_study(storage=storage) study.optimize(f, n_trials=3) df = study.trials_dataframe( include_internal_fields=include_internal_fields, multi_index=multi_index) # Change index to access rows via trial number. if multi_index: df.set_index(('number', ''), inplace=True, drop=False) else: df.set_index('number', inplace=True, drop=False) assert len(df) == 3 # TODO(Yanase): Remove number from system_attrs after adding TrialModel.number. # Number expected columns are as follows (total of 10): # non-nested: 5 # params: 2 # user_attrs: 1 # system_attrs: 1 # intermediate_values: 1 expected_n_columns = 10 if include_internal_fields: # distributions: 2 # trial_id: 1 expected_n_columns += 3 assert len(df.columns) == expected_n_columns for i in range(3): assert df.number[i] == i assert df.state[i] == 'COMPLETE' assert df.value[i] == 3.5 assert isinstance(df.datetime_start[i], pd.Timestamp) assert isinstance(df.datetime_complete[i], pd.Timestamp) if multi_index: if include_internal_fields: assert ('distributions', 'x') in df.columns assert ('distributions', 'y') in df.columns assert ( 'trial_id', '') in df.columns # trial_id depends on other tests. assert df.params.x[i] == 1 assert df.params.y[i] == 2.5 assert df.user_attrs.train_loss[i] == 3 assert df.system_attrs._number[i] == i else: if include_internal_fields: assert 'distributions_x' in df.columns assert 'distributions_y' in df.columns assert 'trial_id' in df.columns # trial_id depends on other tests. assert df.params_x[i] == 1 assert df.params_y[i] == 2.5 assert df.user_attrs_train_loss[i] == 3 assert df.system_attrs__number[i] == i