def test_suggest_discrete_uniform_range(storage_mode: str, range_config: Dict[str, float]) -> None: sampler = samplers.RandomSampler() # Check upper endpoints. mock = Mock() mock.side_effect = lambda study, trial, param_name, distribution: distribution.high with patch.object(sampler, "sample_independent", mock) as mock_object, StorageSupplier( storage_mode ) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(UserWarning): x = trial.suggest_discrete_uniform( "x", range_config["low"], range_config["high"], range_config["q"] ) assert x == range_config["mod_high"] assert mock_object.call_count == 1 # Check lower endpoints. mock = Mock() mock.side_effect = lambda study, trial, param_name, distribution: distribution.low with patch.object(sampler, "sample_independent", mock) as mock_object, StorageSupplier( storage_mode ) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(UserWarning): x = trial.suggest_discrete_uniform( "x", range_config["low"], range_config["high"], range_config["q"] ) assert x == range_config["low"] assert mock_object.call_count == 1
def test_check_distribution_suggest_float(storage_init_func): # type: (Callable[[], storages.BaseStorage]) -> None sampler = samplers.RandomSampler() study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) x1 = trial.suggest_float("x1", 10, 20) x2 = trial.suggest_uniform("x1", 10, 20) assert x1 == x2 x3 = trial.suggest_float("x2", 1e-5, 1e-3, log=True) x4 = trial.suggest_loguniform("x2", 1e-5, 1e-3) assert x3 == x4 x5 = trial.suggest_float("x3", 10, 20, step=1.0) x6 = trial.suggest_discrete_uniform("x3", 10, 20, 1.0) assert x5 == x6 with pytest.raises(ValueError): trial.suggest_float("x4", 1e-5, 1e-2, step=1e-5, log=True) with pytest.raises(ValueError): trial.suggest_int("x1", 10, 20) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("x1", 10, 20)
def test_get_param_importances(storage_init_func: Callable[[], storages.BaseStorage]) -> None: def objective(trial: Trial) -> float: x1 = trial.suggest_uniform("x1", 0.1, 3) x2 = trial.suggest_loguniform("x2", 0.1, 3) x3 = trial.suggest_discrete_uniform("x3", 0, 3, 1) x4 = trial.suggest_int("x4", -3, 3) x5 = trial.suggest_categorical("x5", [1.0, 1.1, 1.2]) if trial.number % 2 == 0: # Conditional parameters are ignored unless `params` is specified and is not `None`. x6 = trial.suggest_uniform("x6", 0.1, 3) assert isinstance(x5, float) value = x1 ** 4 + x2 + x3 - x4 ** 2 - x5 if trial.number % 2 == 0: value += x6 return value study = create_study(storage_init_func(), sampler=samplers.RandomSampler()) study.optimize(objective, n_trials=3) param_importance = get_param_importances(study, evaluator=FanovaImportanceEvaluator()) assert isinstance(param_importance, OrderedDict) assert len(param_importance) == 5 assert all(param_name in param_importance for param_name in ["x1", "x2", "x3", "x4", "x5"]) for param_name, importance in param_importance.items(): assert isinstance(param_name, str) assert isinstance(importance, float) assert math.isclose(1.0, sum(i for i in param_importance.values()))
def __init__( self, independent_sampler: Optional[BaseSampler] = None, warn_independent_sampling: bool = True, skopt_kwargs: Optional[Dict[str, Any]] = None, n_startup_trials: int = 1, *, consider_pruned_trials: bool = False, ) -> None: _imports.check() self._skopt_kwargs = skopt_kwargs or {} if "dimensions" in self._skopt_kwargs: del self._skopt_kwargs["dimensions"] self._independent_sampler = independent_sampler or samplers.RandomSampler( ) self._warn_independent_sampling = warn_independent_sampling self._n_startup_trials = n_startup_trials self._search_space = samplers.IntersectionSearchSpace() self._consider_pruned_trials = consider_pruned_trials if self._consider_pruned_trials: warnings.warn( "`consider_pruned_trials` option is an experimental feature." " The interface can change in the future.", ExperimentalWarning, )
def test_suggest_loguniform(storage_init_func): # type: (Callable[[], storages.BaseStorage]) -> None with pytest.raises(ValueError): LogUniformDistribution(low=1.0, high=0.9) with pytest.raises(ValueError): LogUniformDistribution(low=0.0, high=0.9) mock = Mock() mock.side_effect = [1.0, 2.0, 3.0] sampler = samplers.RandomSampler() with patch.object(sampler, "sample_independent", mock) as mock_object: study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) distribution = LogUniformDistribution(low=0.1, high=4.0) assert trial._suggest("x", distribution) == 1.0 # Test suggesting a param. assert trial._suggest( "x", distribution) == 1.0 # Test suggesting the same param. assert trial._suggest( "y", distribution) == 3.0 # Test suggesting a different param. assert trial.params == {"x": 1.0, "y": 3.0} assert mock_object.call_count == 3
def test_suggest_loguniform(storage_mode: str) -> None: with pytest.raises(ValueError): LogUniformDistribution(low=1.0, high=0.9) with pytest.raises(ValueError): LogUniformDistribution(low=0.0, high=0.9) mock = Mock() mock.side_effect = [1.0, 2.0] sampler = samplers.RandomSampler() with patch.object( sampler, "sample_independent", mock) as mock_object, StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) distribution = LogUniformDistribution(low=0.1, high=4.0) assert trial._suggest("x", distribution) == 1.0 # Test suggesting a param. assert trial._suggest( "x", distribution) == 1.0 # Test suggesting the same param. assert trial._suggest( "y", distribution) == 2.0 # Test suggesting a different param. assert trial.params == {"x": 1.0, "y": 2.0} assert mock_object.call_count == 2
def test_suggest_int_range( storage_init_func: Callable[[], storages.BaseStorage], range_config: Dict[str, int] ) -> None: sampler = samplers.RandomSampler() # Check upper endpoints. mock = Mock() mock.side_effect = lambda study, trial, param_name, distribution: distribution.high with patch.object(sampler, "sample_independent", mock) as mock_object: study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(UserWarning): x = trial.suggest_int( "x", range_config["low"], range_config["high"], step=range_config["step"] ) assert x == range_config["mod_high"] assert mock_object.call_count == 1 # Check lower endpoints. mock = Mock() mock.side_effect = lambda study, trial, param_name, distribution: distribution.low with patch.object(sampler, "sample_independent", mock) as mock_object: study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(UserWarning): x = trial.suggest_int( "x", range_config["low"], range_config["high"], step=range_config["step"] ) assert x == range_config["low"] assert mock_object.call_count == 1
def test_suggest_int_log(storage_init_func: Callable[[], storages.BaseStorage]) -> None: mock = Mock() mock.side_effect = [1, 2] sampler = samplers.RandomSampler() study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) distribution = IntLogUniformDistribution(low=1, high=3) with patch.object(sampler, "sample_independent", mock) as mock_object: assert trial._suggest("x", distribution) == 1 # Test suggesting a param. assert trial._suggest("x", distribution) == 1 # Test suggesting the same param. assert trial._suggest("y", distribution) == 2 # Test suggesting a different param. assert trial.params == {"x": 1, "y": 2} assert mock_object.call_count == 2 study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with warnings.catch_warnings(): # UserWarning will be raised since [0.5, 10] is not divisible by 1. warnings.simplefilter("ignore", category=UserWarning) with pytest.raises(ValueError): trial.suggest_int("z", 0.5, 10, log=True) # type: ignore study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("w", 1, 3, step=2, log=True)
def test_suggest_discrete_uniform_range(storage_init_func, range_config): # type: (typing.Callable[[], storages.BaseStorage], typing.Dict[str, float]) -> None sampler = samplers.RandomSampler() # Check upper endpoints. mock = Mock() mock.side_effect = lambda study, trial, param_name, distribution: distribution.high with patch.object(sampler, 'sample_independent', mock) as mock_object: study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study.storage.create_new_trial_id(study.study_id)) x = trial.suggest_discrete_uniform('x', range_config['low'], range_config['high'], range_config['q']) assert x == range_config['mod_high'] assert mock_object.call_count == 1 # Check lower endpoints. mock = Mock() mock.side_effect = lambda study, trial, param_name, distribution: distribution.low with patch.object(sampler, 'sample_independent', mock) as mock_object: study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study.storage.create_new_trial_id(study.study_id)) x = trial.suggest_discrete_uniform('x', range_config['low'], range_config['high'], range_config['q']) assert x == range_config['low'] assert mock_object.call_count == 1
def test_check_distribution_suggest_float(storage_mode: str) -> None: sampler = samplers.RandomSampler() with StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) x1 = trial.suggest_float("x1", 10, 20) x2 = trial.suggest_uniform("x1", 10, 20) assert x1 == x2 x3 = trial.suggest_float("x2", 1e-5, 1e-3, log=True) x4 = trial.suggest_loguniform("x2", 1e-5, 1e-3) assert x3 == x4 x5 = trial.suggest_float("x3", 10, 20, step=1.0) x6 = trial.suggest_discrete_uniform("x3", 10, 20, 1.0) assert x5 == x6 with pytest.raises(ValueError): trial.suggest_float("x4", 1e-5, 1e-2, step=1e-5, log=True) with pytest.raises(ValueError): trial.suggest_int("x1", 10, 20) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("x1", 10, 20)
def __init__(self, independent_sampler=None, warn_independent_sampling=True, skopt_kwargs=None): # type: (Optional[BaseSampler], bool, Optional[Dict[str, Any]]) -> None _check_skopt_availability() self._skopt_kwargs = skopt_kwargs or {} if 'dimensions' in self._skopt_kwargs: del self._skopt_kwargs['dimensions'] self._independent_sampler = independent_sampler or samplers.RandomSampler() self._warn_independent_sampling = warn_independent_sampling
def test_check_distribution_suggest_discrete_uniform(storage_init_func): # type: (typing.Callable[[], storages.BaseStorage]) -> None sampler = samplers.RandomSampler() study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(None) as record: trial.suggest_discrete_uniform("x", 10, 20, 2) trial.suggest_discrete_uniform("x", 10, 20, 2) trial.suggest_discrete_uniform("x", 10, 22, 2) # we expect exactly one warning assert len(record) == 1
def test_check_distribution_suggest_int( storage_init_func: Callable[[], storages.BaseStorage], enable_log: bool ) -> None: sampler = samplers.RandomSampler() study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(None) as record: trial.suggest_int("x", 10, 20, log=enable_log) trial.suggest_int("x", 10, 20, log=enable_log) trial.suggest_int("x", 10, 22, log=enable_log) # We expect exactly one warning. assert len(record) == 1
def test_check_distribution_suggest_float(storage_init_func): # type: (typing.Callable[[], storages.BaseStorage]) -> None sampler = samplers.RandomSampler() study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) x1 = trial.suggest_float("x1", 10, 20) x2 = trial.suggest_uniform("x1", 10, 20) assert x1 == x2 x3 = trial.suggest_float("x2", 1e-5, 1e-3, log=True) x4 = trial.suggest_loguniform("x2", 1e-5, 1e-3) assert x3 == x4
def test_suggest_int(storage_init_func: Callable[[], storages.BaseStorage]) -> None: mock = Mock() mock.side_effect = [1, 2] sampler = samplers.RandomSampler() with patch.object(sampler, "sample_independent", mock) as mock_object: study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) distribution = IntUniformDistribution(low=0, high=3) assert trial._suggest("x", distribution) == 1 # Test suggesting a param. assert trial._suggest("x", distribution) == 1 # Test suggesting the same param. assert trial._suggest("y", distribution) == 2 # Test suggesting a different param. assert trial.params == {"x": 1, "y": 2} assert mock_object.call_count == 2
def test_suggest_int_log_invalid_range(storage_mode: str) -> None: sampler = samplers.RandomSampler() with StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with warnings.catch_warnings(): # UserWarning will be raised since [0.5, 10] is not divisible by 1. warnings.simplefilter("ignore", category=UserWarning) with pytest.raises(ValueError): trial.suggest_int("z", 0.5, 10, log=True) # type: ignore with StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("w", 1, 3, step=2, log=True)
def test_suggest_discrete_uniform(storage_init_func): # type: (typing.Callable[[], storages.BaseStorage]) -> None mock = Mock() mock.side_effect = [1., 2., 3.] sampler = samplers.RandomSampler() with patch.object(sampler, 'sample_independent', mock) as mock_object: study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study.storage.create_new_trial_id(study.study_id)) distribution = distributions.DiscreteUniformDistribution(low=0., high=3., q=1.) assert trial._suggest('x', distribution) == 1. # Test suggesting a param. assert trial._suggest('x', distribution) == 1. # Test suggesting the same param. assert trial._suggest('y', distribution) == 3. # Test suggesting a different param. assert trial.params == {'x': 1., 'y': 3.} assert mock_object.call_count == 3
def test_check_distribution_suggest_categorical( storage_init_func: Callable[[], storages.BaseStorage]) -> None: sampler = samplers.RandomSampler() study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) trial.suggest_categorical("x", [10, 20, 30]) with pytest.raises(ValueError): trial.suggest_categorical("x", [10, 20]) with pytest.raises(ValueError): trial.suggest_int("x", 10, 20) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("x", 10, 20)
def test_check_distribution_suggest_categorical(storage_mode: str) -> None: sampler = samplers.RandomSampler() with StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) trial.suggest_categorical("x", [10, 20, 30]) with pytest.raises(ValueError): trial.suggest_categorical("x", [10, 20]) with pytest.raises(ValueError): trial.suggest_int("x", 10, 20) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("x", 10, 20)
def __init__( self, independent_sampler=None, warn_independent_sampling=True, skopt_kwargs=None, n_startup_trials=1, ): # type: (Optional[BaseSampler], bool, Optional[Dict[str, Any]], int) -> None _check_skopt_availability() self._skopt_kwargs = skopt_kwargs or {} if "dimensions" in self._skopt_kwargs: del self._skopt_kwargs["dimensions"] self._independent_sampler = independent_sampler or samplers.RandomSampler() self._warn_independent_sampling = warn_independent_sampling self._n_startup_trials = n_startup_trials
def test_get_param_importances(storage_mode: str, evaluator_init_func: Callable[ [], BaseImportanceEvaluator], normalize: bool) -> None: def objective(trial: Trial) -> float: x1 = trial.suggest_float("x1", 0.1, 3) x2 = trial.suggest_float("x2", 0.1, 3, log=True) x3 = trial.suggest_float("x3", 0, 3, step=1) x4 = trial.suggest_int("x4", -3, 3) x5 = trial.suggest_int("x5", 1, 5, log=True) x6 = trial.suggest_categorical("x6", [1.0, 1.1, 1.2]) if trial.number % 2 == 0: # Conditional parameters are ignored unless `params` is specified and is not `None`. x7 = trial.suggest_float("x7", 0.1, 3) assert isinstance(x6, float) value = x1**4 + x2 + x3 - x4**2 - x5 + x6 if trial.number % 2 == 0: value += x7 return value with StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=samplers.RandomSampler()) study.optimize(objective, n_trials=3) param_importance = get_param_importances( study, evaluator=evaluator_init_func(), normalize=normalize) assert isinstance(param_importance, OrderedDict) assert len(param_importance) == 6 assert all(param_name in param_importance for param_name in ["x1", "x2", "x3", "x4", "x5", "x6"]) prev_importance = float("inf") for param_name, importance in param_importance.items(): assert isinstance(param_name, str) assert isinstance(importance, float) assert importance <= prev_importance prev_importance = importance # Sanity check for param importances assert all(0 <= x < float("inf") for x in param_importance.values()) if normalize: assert np.isclose(sum(param_importance.values()), 1.0)
def __init__( self, independent_sampler=None, warn_independent_sampling=True, skopt_kwargs=None, n_startup_trials=1, ): # type: (Optional[BaseSampler], bool, Optional[Dict[str, Any]], int) -> None _imports.check() self._skopt_kwargs = skopt_kwargs or {} if "dimensions" in self._skopt_kwargs: del self._skopt_kwargs["dimensions"] self._independent_sampler = independent_sampler or samplers.RandomSampler( ) self._warn_independent_sampling = warn_independent_sampling self._n_startup_trials = n_startup_trials self._search_space = samplers.IntersectionSearchSpace()
def test_suggest_int(storage_mode: str) -> None: mock = Mock() mock.side_effect = [1, 2] sampler = samplers.RandomSampler() with patch.object( sampler, "sample_independent", mock) as mock_object, StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) distribution = IntDistribution(low=0, high=3) assert trial._suggest("x", distribution) == 1 # Test suggesting a param. assert trial._suggest( "x", distribution) == 1 # Test suggesting the same param. assert trial._suggest( "y", distribution) == 2 # Test suggesting a different param. assert trial.params == {"x": 1, "y": 2} assert mock_object.call_count == 2
def test_check_distribution_suggest_int(storage_mode: str, enable_log: bool) -> None: sampler = samplers.RandomSampler() with StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(None) as record: trial.suggest_int("x", 10, 20, log=enable_log) trial.suggest_int("x", 10, 20, log=enable_log) trial.suggest_int("x", 10, 22, log=enable_log) # We expect exactly one warning. assert len(record) == 1 with pytest.raises(ValueError): trial.suggest_float("x", 10, 20, log=enable_log) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_float("x", 10, 20, log=enable_log)
def test_check_distribution_suggest_loguniform(storage_mode: str) -> None: sampler = samplers.RandomSampler() with StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(None) as record: trial.suggest_loguniform("x", 10, 20) trial.suggest_loguniform("x", 10, 20) trial.suggest_loguniform("x", 10, 30) # we expect exactly one warning (not counting ones caused by deprecation) assert len([r for r in record if r.category != FutureWarning]) == 1 with pytest.raises(ValueError): trial.suggest_int("x", 10, 20) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("x", 10, 20)
def test_check_distribution_suggest_uniform( storage_init_func: Callable[[], storages.BaseStorage]) -> None: sampler = samplers.RandomSampler() study = create_study(storage_init_func(), sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(None) as record: trial.suggest_uniform("x", 10, 20) trial.suggest_uniform("x", 10, 20) trial.suggest_uniform("x", 10, 30) # we expect exactly one warning assert len(record) == 1 with pytest.raises(ValueError): trial.suggest_int("x", 10, 20) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("x", 10, 20)
def test_check_distribution_suggest_loguniform(storage_mode: str) -> None: sampler = samplers.RandomSampler() with StorageSupplier(storage_mode) as storage: study = create_study(storage=storage, sampler=sampler) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.warns(None) as record: trial.suggest_loguniform("x", 10, 20) trial.suggest_loguniform("x", 10, 20) trial.suggest_loguniform("x", 10, 30) # we expect exactly one warning assert len(record) == 1 with pytest.raises(ValueError): trial.suggest_int("x", 10, 20) trial = Trial(study, study._storage.create_new_trial(study._study_id)) with pytest.raises(ValueError): trial.suggest_int("x", 10, 20)
def __init__(self, independent_sampler: Optional[BaseSampler] = None, warn_independent_sampling: bool = True, skopt_kwargs: Optional[Dict[str, Any]] = None, n_startup_trials: int = 1, *, consider_pruned_trials: bool = False) -> None: _imports.check() self._skopt_kwargs = skopt_kwargs or {} if "dimensions" in self._skopt_kwargs: del self._skopt_kwargs["dimensions"] self._independent_sampler = independent_sampler or samplers.RandomSampler( ) self._warn_independent_sampling = warn_independent_sampling self._n_startup_trials = n_startup_trials self._search_space = samplers.IntersectionSearchSpace() self._consider_pruned_trials = consider_pruned_trials if self._consider_pruned_trials: self._raise_experimental_warning_for_consider_pruned_trials()
def fit( self, X: MultipleDataType, y: Optional[TargetDataType] = None, cat_features: Optional[FeatureType] = None, text_features: Optional[FeatureType] = None, pairs: Optional[PairsType] = None, sample_weight: SampleWeightType = None, group_id: Optional[FeatureType] = None, group_weight: Optional[FeatureType] = None, subgroup_id: Optional[FeatureType] = None, pairs_weight: Optional[FeatureType] = None, baseline: Optional[FeatureType] = None, use_best_model: Optional[bool] = None, eval_set: Optional[cb.Pool] = None, verbose: Optional[Union[bool, int]] = None, logging_level: Optional[str] = None, plot: bool = False, column_description: Optional[str] = None, verbose_eval: Optional[Union[bool, int]] = None, metric_period: Optional[int] = None, silent: Optional[bool] = None, early_stopping_rounds: Optional[int] = None, save_snapshot: Optional[bool] = None, snapshot_file: Optional[str] = None, snapshot_interval: Optional[int] = None, init_model: Optional[str] = None, ): logger = logging.getLogger(__name__) # catboost\core.py # CatBoost._prepare_train_params train_params = self._prepare_train_params( X, y, cat_features, text_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, ) n_samples = len(X) # get_params params = train_params["params"] eval_name = params.get("loss_function") early_stopping_rounds = early_stopping_rounds n_estimators = params.get("iterations") is_classifier = self._estimator_type == "classifier" is_higher_better = _is_higher_better(eval_name) cv = check_cv(cv=self.cv, y=y, classifier=is_classifier) if self.study is None: sampler = samplers.RandomSampler() direction = "maximize" if is_higher_better else "minimize" self.study = study_module.create_study(direction=direction, sampler=sampler) # hyper_parameter tuning dataset = cb.Pool(X, label=y) objective = _Objective( params, dataset=dataset, cv=cv, eval_name=eval_name, n_samples=n_samples, is_higher_better=is_higher_better, early_stopping_rounds=early_stopping_rounds, n_estimators=n_estimators, param_distributions=self.param_distributions, ) logger.info("Searching the best hyper_parameters") self.study.optimize(objective, n_trials=self.n_trials, timeout=self.timeout) logger.info("Done") logger.info("Starting refit") if self.refit: pass return self
def create_study( storage: Optional[Union[str, storages.BaseStorage]] = None, sampler: Optional["samplers.BaseSampler"] = None, pruner: Optional[pruners.BasePruner] = None, study_name: Optional[str] = None, direction: Optional[str] = None, load_if_exists: bool = False, *, directions: Optional[Sequence[str]] = None, ) -> Study: """Create a new :class:`~optuna.study.Study`. Example: .. testcode:: import optuna def objective(trial): x = trial.suggest_uniform("x", 0, 10) return x ** 2 study = optuna.create_study() study.optimize(objective, n_trials=3) Args: storage: Database URL. If this argument is set to None, in-memory storage is used, and the :class:`~optuna.study.Study` will not be persistent. .. note:: When a database URL is passed, Optuna internally uses `SQLAlchemy`_ to handle the database. Please refer to `SQLAlchemy's document`_ for further details. If you want to specify non-default options to `SQLAlchemy Engine`_, you can instantiate :class:`~optuna.storages.RDBStorage` with your desired options and pass it to the ``storage`` argument instead of a URL. .. _SQLAlchemy: https://www.sqlalchemy.org/ .. _SQLAlchemy's document: https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls .. _SQLAlchemy Engine: https://docs.sqlalchemy.org/en/latest/core/engines.html sampler: A sampler object that implements background algorithm for value suggestion. If :obj:`None` is specified, :class:`~optuna.samplers.TPESampler` is used as the default. See also :class:`~optuna.samplers`. pruner: A pruner object that decides early stopping of unpromising trials. If :obj:`None` is specified, :class:`~optuna.pruners.MedianPruner` is used as the default. See also :class:`~optuna.pruners`. study_name: Study's name. If this argument is set to None, a unique name is generated automatically. direction: Direction of optimization. Set ``minimize`` for minimization and ``maximize`` for maximization. .. note:: If none of `direction` and `directions` are specified, the direction of the study is set to "minimize". directions: A sequence of directions during multi-objective optimization. load_if_exists: Flag to control the behavior to handle a conflict of study names. In the case where a study named ``study_name`` already exists in the ``storage``, a :class:`~optuna.exceptions.DuplicatedStudyError` is raised if ``load_if_exists`` is set to :obj:`False`. Otherwise, the creation of the study is skipped, and the existing one is returned. Returns: A :class:`~optuna.study.Study` object. Raises: :exc:`ValueError`: If the length of ``directions`` is zero. Or, if ``direction`` is neither 'minimize' nor 'maximize' when it is a string. Or, if the element of ``directions`` is neither `minimize` nor `maximize`. Or, if both ``direction`` and ``directions`` are specified. See also: :func:`optuna.create_study` is an alias of :func:`optuna.study.create_study`. """ if direction is None and directions is None: directions = ["minimize"] elif direction is not None and directions is not None: raise ValueError("Specify only one of `direction` and `directions`.") elif direction is not None: directions = [direction] elif directions is not None: directions = list(directions) else: assert False if len(directions) < 1: raise ValueError("The number of objectives must be greater than 0.") elif any(d != "minimize" and d != "maximize" for d in directions): raise ValueError("Please set either 'minimize' or 'maximize' to direction.") direction_objects = [ StudyDirection.MINIMIZE if d == "minimize" else StudyDirection.MAXIMIZE for d in directions ] storage = storages.get_storage(storage) try: study_id = storage.create_new_study(study_name) except exceptions.DuplicatedStudyError: if load_if_exists: assert study_name is not None _logger.info( "Using an existing study with name '{}' instead of " "creating a new one.".format(study_name) ) study_id = storage.get_study_id_from_name(study_name) else: raise # TODO(imamura): Change the default sampler for MO as NSGAII after MO sampler refactoring. if sampler is None and len(direction_objects) > 1: _logger.info( "Multi-objective optimization is set, but no sampler is specified. " "The sampler is set to `optuna.samplers.RandomSampler`." ) sampler = samplers.RandomSampler() study_name = storage.get_study_name_from_id(study_id) study = Study(study_name=study_name, storage=storage, sampler=sampler, pruner=pruner) study._storage.set_study_directions(study_id, direction_objects) return study