def test_use_existing_experiment_by_id(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" mlflow.set_tracking_uri(tracking_uri) experiment_id = mlflow.create_experiment("foo") mlflow_kwargs = {"experiment_id": experiment_id} mlflc = MLflowCallback(tracking_uri=tracking_uri, create_experiment=False, mlflow_kwargs=mlflow_kwargs) study = optuna.create_study() for _ in range(10): study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiment_list = mlfl_client.list_experiments() assert len(experiment_list) == 1 experiment = experiment_list[0] assert experiment.experiment_id == experiment_id assert experiment.name == "foo" runs = mlfl_client.list_run_infos(experiment_id) assert len(runs) == 10
def test_tag_truncation(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" study_name = "my_study" n_trials = 3 mlflc = MLflowCallback(tracking_uri=tracking_uri) study = optuna.create_study(study_name=study_name) study.optimize(_objective_func_long_user_attr, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() assert len(experiments) == 1 experiment = experiments[0] assert experiment.name == study_name experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == n_trials first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() my_user_attr = first_run_dict["data"]["tags"]["my_user_attr"] assert len(my_user_attr) <= 5000
def test_log_params(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" metric_name = "metric" study_name = "my_study" mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name) study = optuna.create_study(study_name=study_name) study.enqueue_trial({"x": 1.0, "y": 1.0, "z": 1.0}) study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == 1 first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() for param_name, param_value in study.best_params.items(): assert param_name in first_run_dict["data"]["params"] assert first_run_dict["data"]["params"][param_name] == str(param_value) assert first_run_dict["data"]["tags"][ f"{param_name}_distribution"] == str( study.best_trial.distributions[param_name])
def test_chunk_info(tmpdir: py.path.local) -> None: num_objective = mlflow.utils.validation.MAX_METRICS_PER_BATCH + 1 num_params = mlflow.utils.validation.MAX_PARAMS_TAGS_PER_BATCH + 1 def objective(trial: optuna.trial.Trial) -> Tuple[float, ...]: for i in range(num_params): trial.suggest_float(f"x_{i}", 0, 1) return tuple([1.0] * num_objective) tracking_uri = f"file:{tmpdir}" study_name = "my_study" n_trials = 1 mlflc = MLflowCallback(tracking_uri=tracking_uri) study = optuna.create_study(study_name=study_name, directions=["maximize"] * num_objective) study.optimize(objective, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiment = mlfl_client.list_experiments()[0] run_infos = mlfl_client.list_run_infos(experiment.experiment_id) assert len(run_infos) == n_trials run = mlfl_client.get_run(run_infos[0].run_id) run_dict = run.to_dictionary() # The `tags` contains param's distributions and other information too, such as trial number. assert len(run_dict["data"]["tags"]) > num_params assert len(run_dict["data"]["params"]) == num_params assert len(run_dict["data"]["metrics"]) == num_objective
def test_nest_trials(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" study_name = "my_study" mlflow.set_tracking_uri(tracking_uri) mlflow.set_experiment(study_name) mlflc = MLflowCallback(tracking_uri=tracking_uri, mlflow_kwargs={"nested": True}) study = optuna.create_study(study_name=study_name) n_trials = 3 with mlflow.start_run() as parent_run: study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() experiment_id = experiments[0].experiment_id all_runs = mlfl_client.search_runs([experiment_id]) child_runs = [r for r in all_runs if MLFLOW_PARENT_RUN_ID in r.data.tags] assert len(all_runs) == n_trials + 1 assert len(child_runs) == n_trials assert all(r.data.tags[MLFLOW_PARENT_RUN_ID] == parent_run.info.run_id for r in child_runs) assert all( set(r.data.params.keys()) == {"x", "y", "z"} for r in child_runs) assert all(set(r.data.metrics.keys()) == {"value"} for r in child_runs)
def test_use_existing_or_default_experiment(tmpdir: py.path.local, name: Optional[str], expected: str) -> None: if name is not None: tracking_uri = f"file:{tmpdir}" mlflow.set_tracking_uri(tracking_uri) mlflow.set_experiment(name) else: # Target directory can't exist when initializing first # run with default experiment at non-default uri. tracking_uri = f"file:{tmpdir}/foo" mlflow.set_tracking_uri(tracking_uri) mlflc = MLflowCallback(tracking_uri=tracking_uri, create_experiment=False) study = optuna.create_study() for _ in range(10): # Simulate multiple optimization runs under same experiment. study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiment = mlfl_client.list_experiments()[0] runs = mlfl_client.list_run_infos(experiment.experiment_id) assert experiment.name == expected assert len(runs) == 10
def test_tag_study_user_attrs(tmpdir: py.path.local, tag_study_user_attrs: bool) -> None: tracking_uri = f"file:{tmpdir}" study_name = "my_study" n_trials = 3 mlflc = MLflowCallback(tracking_uri=tracking_uri, tag_study_user_attrs=tag_study_user_attrs) study = optuna.create_study(study_name=study_name) study.set_user_attr("my_study_attr", "a") study.optimize(_objective_func_long_user_attr, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() assert len(experiments) == 1 experiment = experiments[0] assert experiment.name == study_name experiment_id = experiment.experiment_id runs = mlfl_client.search_runs([experiment_id]) assert len(runs) == n_trials if tag_study_user_attrs: assert all((r.data.tags["my_study_attr"] == "a") for r in runs) else: assert all(("my_study_attr" not in r.data.tags) for r in runs)
def test_log_metric(tmpdir: py.path.local, func: Callable, names: List[str], values: List[float]) -> None: tracking_uri = f"file:{tmpdir}" study_name = "my_study" mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=names) study = optuna.create_study( study_name=study_name, directions=["minimize" for _ in range(len(values))]) study.enqueue_trial({"x": 1.0, "y": 1.0, "z": 1.0}) study.optimize(func, n_trials=1, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == 1 first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert all(name in first_run_dict["data"]["metrics"] for name in names) assert all([ first_run_dict["data"]["metrics"][name] == val for name, val in zip(names, values) ])
def test_track_in_mlflow_decorator(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" study_name = "my_study" n_trials = 3 metric_name = "additional_metric" metric = 3.14 mlflc = MLflowCallback(tracking_uri=tracking_uri) def _objective_func(trial: optuna.trial.Trial) -> float: """Objective function""" x = trial.suggest_float("x", -1.0, 1.0) y = trial.suggest_float("y", 20, 30, log=True) z = trial.suggest_categorical("z", (-1.0, 1.0)) assert isinstance(z, float) trial.set_user_attr("my_user_attr", "my_user_attr_value") mlflow.log_metric(metric_name, metric) return (x - 2)**2 + (y - 25)**2 + z tracked_objective = mlflc.track_in_mlflow()(_objective_func) study = optuna.create_study(study_name=study_name) study.optimize(tracked_objective, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() assert len(experiments) == 1 experiment = experiments[0] assert experiment.name == study_name experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == n_trials first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert metric_name in first_run_dict["data"]["metrics"] assert first_run_dict["data"]["metrics"][metric_name] == metric assert tracked_objective.__name__ == _objective_func.__name__ assert tracked_objective.__doc__ == _objective_func.__doc__
def test_initialize_experiment(tmpdir: py.path.local) -> None: tracking_file_name = "file:{}".format(tmpdir) metric_name = "my_metric_name" study_name = "my_study" mlflc = MLflowCallback(tracking_uri=tracking_file_name, metric_name=metric_name) study = optuna.create_study(study_name=study_name) mlflc._initialize_experiment(study) mlfl_client = MlflowClient(tracking_file_name) experiments = mlfl_client.list_experiments() assert len(experiments) == 1 experiment = experiments[0] assert experiment.name == study_name
def test_multiobjective_raises_on_name_mismatch(tmpdir: py.path.local, metrics: List[str]) -> None: tracking_uri = f"file:{tmpdir}" mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metrics) study = optuna.create_study(study_name="my_study", directions=["minimize", "maximize"]) with pytest.raises(ValueError): study.optimize(_multiobjective_func, n_trials=1, callbacks=[mlflc])
def test_log_metric(tmpdir: py.path.local, names: List[str], values: List[float]) -> None: tracking_file_name = "file:{}".format(tmpdir) study_name = "my_study" mlflc = MLflowCallback(tracking_uri=tracking_file_name, metric_name=names) study = optuna.create_study(study_name=study_name) mlflc._initialize_experiment(study) with mlflow.start_run(): mlflc._log_metrics(values) mlfl_client = MlflowClient(tracking_file_name) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == 1 first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert all(name in first_run_dict["data"]["metrics"] for name in names) assert all([ first_run_dict["data"]["metrics"][name] == val for name, val in zip(names, values) ])
def test_log_metric_none(tmpdir: py.path.local) -> None: tracking_file_name = "file:{}".format(tmpdir) metric_name = "my_metric_name" study_name = "my_study" metric_value = None mlflc = MLflowCallback(tracking_uri=tracking_file_name, metric_name=metric_name) study = optuna.create_study(study_name=study_name) mlflc._initialize_experiment(study) with mlflow.start_run(): mlflc._log_metrics(metric_value) mlfl_client = MlflowClient(tracking_file_name) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == 1 first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() # when `values` is `None`, do not save values with metric names assert metric_name not in first_run_dict["data"]["metrics"]
def test_mlflow_callback_fails_when_nest_trials_is_false_and_active_run_exists( tmpdir: py.path.local, ) -> None: tracking_uri = f"file:{tmpdir}" study_name = "my_study" mlflow.set_tracking_uri(tracking_uri) mlflow.set_experiment(study_name) mlflc = MLflowCallback(tracking_uri=tracking_uri) study = optuna.create_study(study_name=study_name) with mlflow.start_run(): with pytest.raises(Exception, match=r"Run with UUID \w+ is already active."): study.optimize(_objective_func, n_trials=1, callbacks=[mlflc])
def test_tag_always_logged(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" study_name = "my_study" n_trials = 3 mlflc = MLflowCallback(tracking_uri=tracking_uri) study = optuna.create_study(study_name=study_name) study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiment = mlfl_client.list_experiments()[0] runs = mlfl_client.search_runs([experiment.experiment_id]) assert all((r.data.tags["direction"] == "MINIMIZE") for r in runs) assert all((r.data.tags["state"] == "COMPLETE") for r in runs)
def test_run_name(tmpdir: py.path.local, run_name: Optional[str], expected: str) -> None: tracking_uri = f"file:{tmpdir}" mlflow_kwargs = {"run_name": run_name} mlflc = MLflowCallback(tracking_uri=tracking_uri, mlflow_kwargs=mlflow_kwargs) study = optuna.create_study() study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiment = mlfl_client.list_experiments()[0] run_info = mlfl_client.list_run_infos(experiment.experiment_id)[0] run = mlfl_client.get_run(run_info.run_id) tags = run.data.tags assert tags["mlflow.runName"] == expected
def test_study_name(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" study_name = "my_study" n_trials = 3 mlflc = MLflowCallback(tracking_uri=tracking_uri) study = optuna.create_study(study_name=study_name) study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) assert len(mlfl_client.list_experiments()) == 1 experiment = mlfl_client.list_experiments()[0] runs = mlfl_client.list_run_infos(experiment.experiment_id) assert experiment.name == study_name assert len(runs) == n_trials
def optimize( params_fp: Path = Path(config.CONFIG_DIR, "params.json"), study_name: Optional[str] = "optimization", num_trials: int = 100, ) -> None: """Optimize a subset of hyperparameters towards an objective. This saves the best trial's parameters into `config/params.json`. Args: params_fp (Path, optional): Location of parameters (just using num_samples, num_epochs, etc.) to use for training. Defaults to `config/params.json`. study_name (str, optional): Name of the study to save trial runs under. Defaults to `optimization`. num_trials (int, optional): Number of trials to run. Defaults to 100. """ # Starting parameters (not actually used but needed for set up) params = Namespace(**utils.load_dict(filepath=params_fp)) # Optimize pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5) study = optuna.create_study(study_name=study_name, direction="maximize", pruner=pruner) mlflow_callback = MLflowCallback(tracking_uri=mlflow.get_tracking_uri(), metric_name="f1") study.optimize( lambda trial: main.objective(params, trial), n_trials=num_trials, callbacks=[mlflow_callback], ) # All trials trials_df = study.trials_dataframe() trials_df = trials_df.sort_values(["value"], ascending=False) # Best trial logger.info(f"Best value (f1): {study.best_trial.value}") params = {**params.__dict__, **study.best_trial.params} params["threshold"] = study.best_trial.user_attrs["threshold"] with open(params_fp, "w") as fp: json.dump(params, fp=fp, indent=2, cls=NumpyEncoder) logger.info(json.dumps(params, indent=2, cls=NumpyEncoder))
def optimize(num_trials: int = 100) -> None: """Optimize a subset of hyperparameters towards an objective. This saves the best trial's arguments into `config/args.json`. Args: num_trials (int, optional): Number of trials to run. Defaults to 100. """ # Starting arguments (not actually used but needed for set up) args_fp = Path(config.CONFIG_DIR, "args.json") args = Namespace(**utils.load_dict(filepath=args_fp)) # Optimize pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5) study = optuna.create_study( study_name="optimization", direction="maximize", pruner=pruner ) mlflow_callback = MLflowCallback( tracking_uri=mlflow.get_tracking_uri(), metric_name="f1" ) study.optimize( lambda trial: train.objective(args, trial), n_trials=num_trials, callbacks=[mlflow_callback], ) # All trials trials_df = study.trials_dataframe() trials_df = trials_df.sort_values( ["value"], ascending=False ) # sort by metric trials_df.to_csv( Path(config.EXPERIMENTS_DIR, "trials.csv"), index=False ) # save # Best trial logger.info(f"Best value (f1): {study.best_trial.value}") params = {**args.__dict__, **study.best_trial.params} params["threshold"] = study.best_trial.user_attrs["threshold"] with open(Path(config.CONFIG_DIR, "args.json"), "w") as fp: json.dump(params, fp=fp, indent=2, cls=NumpyEncoder) logger.info(json.dumps(params, indent=2, cls=NumpyEncoder))
def test_log_mlflow_tags(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" expected_tags = {"foo": 0, "bar": 1} mlflow_kwargs = {"tags": expected_tags} mlflc = MLflowCallback(tracking_uri=tracking_uri, mlflow_kwargs=mlflow_kwargs) study = optuna.create_study() study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiment = mlfl_client.list_experiments()[0] run_info = mlfl_client.list_run_infos(experiment.experiment_id)[0] run = mlfl_client.get_run(run_info.run_id) tags = run.data.tags assert all([k in tags.keys() for k in expected_tags.keys()]) assert all( [tags[key] == str(value) for key, value in expected_tags.items()])
def test_tag_trial_user_attrs(tmpdir: py.path.local, tag_trial_user_attrs: bool) -> None: tracking_uri = "file:{}".format(tmpdir) study_name = "my_study" n_trials = 3 mlflc = MLflowCallback(tracking_uri=tracking_uri, tag_trial_user_attrs=tag_trial_user_attrs) study = optuna.create_study(study_name=study_name) study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiment = mlfl_client.list_experiments()[0] runs = mlfl_client.search_runs([experiment.experiment_id]) if tag_trial_user_attrs: assert all((r.data.tags["my_user_attr"] == "my_user_attr_value") for r in runs) else: assert all(("my_user_attr" not in r.data.tags) for r in runs)
def test_log_params(tmpdir: py.path.local) -> None: tracking_file_name = "file:{}".format(tmpdir) metric_name = "my_metric_name" study_name = "my_study" param1_name = "my_param1" param1_value = "a" param2_name = "my_param2" param2_value = 5 params = {param1_name: param1_value, param2_name: param2_value} mlflc = MLflowCallback(tracking_uri=tracking_file_name, metric_name=metric_name) study = optuna.create_study(study_name=study_name) mlflc._initialize_experiment(study) with mlflow.start_run(): trial = optuna.trial.create_trial( params=params, distributions={ param1_name: optuna.distributions.CategoricalDistribution(["a", "b"]), param2_name: optuna.distributions.UniformDistribution(0, 10), }, value=5.0, ) mlflc._log_params(trial.params) mlfl_client = MlflowClient(tracking_file_name) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == 1 first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert param1_name in first_run_dict["data"]["params"] assert first_run_dict["data"]["params"][param1_name] == param1_value assert param2_name in first_run_dict["data"]["params"] assert first_run_dict["data"]["params"][param2_name] == str(param2_value)
def test_metric_name(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" metric_name = "my_metric_name" mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name) study = optuna.create_study(study_name="my_study") study.optimize(_objective_func, n_trials=3, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert metric_name in first_run_dict["data"]["metrics"]
def test_study_name(tmpdir): # type: (py.path.local) -> None tracking_file_name = "file:{}".format(tmpdir) study_name = "my_study" n_trials = 3 mlflc = MLflowCallback(tracking_uri=tracking_file_name) study = optuna.create_study(study_name=study_name) study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_file_name) experiments = mlfl_client.list_experiments() assert len(experiments) == 1 experiment = experiments[0] assert experiment.name == study_name experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == n_trials first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert "value" in first_run_dict["data"]["metrics"] assert "x" in first_run_dict["data"]["params"] assert "y" in first_run_dict["data"]["params"] assert "z" in first_run_dict["data"]["params"] assert first_run_dict["data"]["tags"]["direction"] == "MINIMIZE" assert first_run_dict["data"]["tags"]["state"] == "COMPLETE" assert (first_run_dict["data"]["tags"]["x_distribution"] == "UniformDistribution(high=1.0, low=-1.0)") assert (first_run_dict["data"]["tags"]["y_distribution"] == "LogUniformDistribution(high=30, low=20)") assert (first_run_dict["data"]["tags"]["z_distribution"] == "CategoricalDistribution(choices=(-1.0, 1.0))") assert first_run_dict["data"]["tags"][ "my_user_attr"] == "my_user_attr_value"
def test_metric_name_multiobjective(tmpdir: py.path.local, names: Union[str, List[str]], expected: List[str]) -> None: tracking_uri = f"file:{tmpdir}" mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=names) study = optuna.create_study(study_name="my_study", directions=["minimize", "maximize"]) study.optimize(_multiobjective_func, n_trials=3, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert all([e in first_run_dict["data"]["metrics"] for e in expected])
def test_log_metric_none(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" metric_name = "metric" study_name = "my_study" mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name) study = optuna.create_study(study_name=study_name) study.optimize(lambda _: np.nan, n_trials=1, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == 1 first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() # When `values` is `None`, do not save values with metric names. assert metric_name not in first_run_dict["data"]["metrics"]
trial.suggest_int('classifier__max_depth', 0, 6), 'classifier__min_child_weight': trial.suggest_float('classifier__min_child_weight', 0, 1), 'classifier__eta': trial.suggest_float('classifier__eta', 0, 1), 'classifier__subsample': trial.suggest_float('classifier__subsample', 0, 1), 'classifier__colsample_bytree': trial.suggest_float('classifier__colsample_bytree', 0, 1) } clf.set_params(**params) return average_score_on_cross_val_classification(clf, X, Y.values.ravel(), cv=8) #return -np.mean(cross_val_score(clf, X, Y.values.ravel(), cv=8)) mlflc = MLflowCallback( tracking_uri="/home/jupyter/mlruns/", metric_name='accuracy', ) #sampler = SkoptSampler() study = optuna.create_study(study_name='xgboost_metrics', direction='maximize') study.optimize(objective, n_trials=30, callbacks=[mlflc]) # clf.set_params(**study.best_params) # clf.fit(X, Y)
if __name__ == "__main__": study = optuna.load_study( study_name="k8s_mlflow", storage="postgresql://{}:{}@postgres:5432/{}".format( os.environ["POSTGRES_USER"], os.environ["POSTGRES_PASSWORD"], os.environ["POSTGRES_DB"], ), ) study.optimize( objective, n_trials=100, timeout=600, callbacks=[ MLflowCallback(tracking_uri="http://mlflow:5000/", metric_name="val_accuracy") ], ) print("Number of finished trials: {}".format(len(study.trials))) print("Best trial:") trial = study.best_trial print(" Value: {}".format(trial.value)) print(" Params: ") for key, value in trial.params.items(): print(" {}: {}".format(key, value))
# pylint: disable=invalid-name import logging from typing import Any, Dict import numpy as np import pandas as pd import lightgbm as lgb from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold import optuna from optuna.integration.mlflow import MLflowCallback mlflc = MLflowCallback( tracking_uri="./mlruns", metric_name='mean squared error', ) def score(y_true: np.ndarray, y_pred: np.ndarray) -> float: """[summary] Args: y_true (np.ndarray): target value y_pred (np.ndarray): Predictive value Returns: float: score """ return mean_squared_error(y_true, y_pred)
def test_multiobjective_raises_on_type_mismatch(tmpdir: py.path.local, metrics: Any) -> None: tracking_uri = f"file:{tmpdir}" with pytest.raises(TypeError): MLflowCallback(tracking_uri=tracking_uri, metric_name=metrics)