Example #1
0
def test_use_existing_experiment_by_id(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    mlflow.set_tracking_uri(tracking_uri)
    experiment_id = mlflow.create_experiment("foo")

    mlflow_kwargs = {"experiment_id": experiment_id}
    mlflc = MLflowCallback(tracking_uri=tracking_uri,
                           create_experiment=False,
                           mlflow_kwargs=mlflow_kwargs)
    study = optuna.create_study()

    for _ in range(10):
        study.optimize(_objective_func, n_trials=1, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiment_list = mlfl_client.list_experiments()
    assert len(experiment_list) == 1

    experiment = experiment_list[0]
    assert experiment.experiment_id == experiment_id
    assert experiment.name == "foo"

    runs = mlfl_client.list_run_infos(experiment_id)
    assert len(runs) == 10
Example #2
0
def test_tag_truncation(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"
    n_trials = 3

    mlflc = MLflowCallback(tracking_uri=tracking_uri)
    study = optuna.create_study(study_name=study_name)
    study.optimize(_objective_func_long_user_attr,
                   n_trials=n_trials,
                   callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()
    assert len(experiments) == 1

    experiment = experiments[0]
    assert experiment.name == study_name
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == n_trials

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    my_user_attr = first_run_dict["data"]["tags"]["my_user_attr"]
    assert len(my_user_attr) <= 5000
Example #3
0
def test_log_params(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    metric_name = "metric"
    study_name = "my_study"

    mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name)
    study = optuna.create_study(study_name=study_name)
    study.enqueue_trial({"x": 1.0, "y": 1.0, "z": 1.0})
    study.optimize(_objective_func, n_trials=1, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()
    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == 1

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    for param_name, param_value in study.best_params.items():
        assert param_name in first_run_dict["data"]["params"]
        assert first_run_dict["data"]["params"][param_name] == str(param_value)
        assert first_run_dict["data"]["tags"][
            f"{param_name}_distribution"] == str(
                study.best_trial.distributions[param_name])
Example #4
0
def test_chunk_info(tmpdir: py.path.local) -> None:

    num_objective = mlflow.utils.validation.MAX_METRICS_PER_BATCH + 1
    num_params = mlflow.utils.validation.MAX_PARAMS_TAGS_PER_BATCH + 1

    def objective(trial: optuna.trial.Trial) -> Tuple[float, ...]:
        for i in range(num_params):
            trial.suggest_float(f"x_{i}", 0, 1)

        return tuple([1.0] * num_objective)

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"
    n_trials = 1

    mlflc = MLflowCallback(tracking_uri=tracking_uri)
    study = optuna.create_study(study_name=study_name,
                                directions=["maximize"] * num_objective)
    study.optimize(objective, n_trials=n_trials, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiment = mlfl_client.list_experiments()[0]
    run_infos = mlfl_client.list_run_infos(experiment.experiment_id)
    assert len(run_infos) == n_trials

    run = mlfl_client.get_run(run_infos[0].run_id)
    run_dict = run.to_dictionary()

    # The `tags` contains param's distributions and other information too, such as trial number.
    assert len(run_dict["data"]["tags"]) > num_params
    assert len(run_dict["data"]["params"]) == num_params
    assert len(run_dict["data"]["metrics"]) == num_objective
Example #5
0
def test_nest_trials(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"
    mlflow.set_tracking_uri(tracking_uri)
    mlflow.set_experiment(study_name)

    mlflc = MLflowCallback(tracking_uri=tracking_uri,
                           mlflow_kwargs={"nested": True})
    study = optuna.create_study(study_name=study_name)

    n_trials = 3
    with mlflow.start_run() as parent_run:
        study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()
    experiment_id = experiments[0].experiment_id

    all_runs = mlfl_client.search_runs([experiment_id])
    child_runs = [r for r in all_runs if MLFLOW_PARENT_RUN_ID in r.data.tags]

    assert len(all_runs) == n_trials + 1
    assert len(child_runs) == n_trials
    assert all(r.data.tags[MLFLOW_PARENT_RUN_ID] == parent_run.info.run_id
               for r in child_runs)
    assert all(
        set(r.data.params.keys()) == {"x", "y", "z"} for r in child_runs)
    assert all(set(r.data.metrics.keys()) == {"value"} for r in child_runs)
Example #6
0
def test_use_existing_or_default_experiment(tmpdir: py.path.local,
                                            name: Optional[str],
                                            expected: str) -> None:

    if name is not None:
        tracking_uri = f"file:{tmpdir}"
        mlflow.set_tracking_uri(tracking_uri)
        mlflow.set_experiment(name)

    else:
        # Target directory can't exist when initializing first
        # run with default experiment at non-default uri.
        tracking_uri = f"file:{tmpdir}/foo"
        mlflow.set_tracking_uri(tracking_uri)

    mlflc = MLflowCallback(tracking_uri=tracking_uri, create_experiment=False)
    study = optuna.create_study()

    for _ in range(10):
        # Simulate multiple optimization runs under same experiment.
        study.optimize(_objective_func, n_trials=1, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiment = mlfl_client.list_experiments()[0]
    runs = mlfl_client.list_run_infos(experiment.experiment_id)

    assert experiment.name == expected
    assert len(runs) == 10
Example #7
0
def test_tag_study_user_attrs(tmpdir: py.path.local,
                              tag_study_user_attrs: bool) -> None:

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"
    n_trials = 3

    mlflc = MLflowCallback(tracking_uri=tracking_uri,
                           tag_study_user_attrs=tag_study_user_attrs)
    study = optuna.create_study(study_name=study_name)
    study.set_user_attr("my_study_attr", "a")
    study.optimize(_objective_func_long_user_attr,
                   n_trials=n_trials,
                   callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()
    assert len(experiments) == 1

    experiment = experiments[0]
    assert experiment.name == study_name
    experiment_id = experiment.experiment_id

    runs = mlfl_client.search_runs([experiment_id])
    assert len(runs) == n_trials

    if tag_study_user_attrs:
        assert all((r.data.tags["my_study_attr"] == "a") for r in runs)
    else:
        assert all(("my_study_attr" not in r.data.tags) for r in runs)
Example #8
0
def test_log_metric(tmpdir: py.path.local, func: Callable, names: List[str],
                    values: List[float]) -> None:

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"

    mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=names)
    study = optuna.create_study(
        study_name=study_name,
        directions=["minimize" for _ in range(len(values))])
    study.enqueue_trial({"x": 1.0, "y": 1.0, "z": 1.0})
    study.optimize(func, n_trials=1, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()
    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == 1

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    assert all(name in first_run_dict["data"]["metrics"] for name in names)
    assert all([
        first_run_dict["data"]["metrics"][name] == val
        for name, val in zip(names, values)
    ])
Example #9
0
def test_track_in_mlflow_decorator(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"
    n_trials = 3

    metric_name = "additional_metric"
    metric = 3.14

    mlflc = MLflowCallback(tracking_uri=tracking_uri)

    def _objective_func(trial: optuna.trial.Trial) -> float:
        """Objective function"""

        x = trial.suggest_float("x", -1.0, 1.0)
        y = trial.suggest_float("y", 20, 30, log=True)
        z = trial.suggest_categorical("z", (-1.0, 1.0))
        assert isinstance(z, float)
        trial.set_user_attr("my_user_attr", "my_user_attr_value")
        mlflow.log_metric(metric_name, metric)
        return (x - 2)**2 + (y - 25)**2 + z

    tracked_objective = mlflc.track_in_mlflow()(_objective_func)

    study = optuna.create_study(study_name=study_name)
    study.optimize(tracked_objective, n_trials=n_trials, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()
    assert len(experiments) == 1

    experiment = experiments[0]
    assert experiment.name == study_name
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == n_trials

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    assert metric_name in first_run_dict["data"]["metrics"]
    assert first_run_dict["data"]["metrics"][metric_name] == metric

    assert tracked_objective.__name__ == _objective_func.__name__
    assert tracked_objective.__doc__ == _objective_func.__doc__
Example #10
0
def test_initialize_experiment(tmpdir: py.path.local) -> None:
    tracking_file_name = "file:{}".format(tmpdir)
    metric_name = "my_metric_name"
    study_name = "my_study"

    mlflc = MLflowCallback(tracking_uri=tracking_file_name,
                           metric_name=metric_name)
    study = optuna.create_study(study_name=study_name)

    mlflc._initialize_experiment(study)

    mlfl_client = MlflowClient(tracking_file_name)
    experiments = mlfl_client.list_experiments()
    assert len(experiments) == 1

    experiment = experiments[0]
    assert experiment.name == study_name
Example #11
0
def test_multiobjective_raises_on_name_mismatch(tmpdir: py.path.local,
                                                metrics: List[str]) -> None:

    tracking_uri = f"file:{tmpdir}"
    mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metrics)
    study = optuna.create_study(study_name="my_study",
                                directions=["minimize", "maximize"])

    with pytest.raises(ValueError):
        study.optimize(_multiobjective_func, n_trials=1, callbacks=[mlflc])
Example #12
0
def test_log_metric(tmpdir: py.path.local, names: List[str],
                    values: List[float]) -> None:

    tracking_file_name = "file:{}".format(tmpdir)
    study_name = "my_study"

    mlflc = MLflowCallback(tracking_uri=tracking_file_name, metric_name=names)
    study = optuna.create_study(study_name=study_name)
    mlflc._initialize_experiment(study)

    with mlflow.start_run():
        mlflc._log_metrics(values)

    mlfl_client = MlflowClient(tracking_file_name)
    experiments = mlfl_client.list_experiments()
    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == 1

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    assert all(name in first_run_dict["data"]["metrics"] for name in names)
    assert all([
        first_run_dict["data"]["metrics"][name] == val
        for name, val in zip(names, values)
    ])
Example #13
0
def test_log_metric_none(tmpdir: py.path.local) -> None:
    tracking_file_name = "file:{}".format(tmpdir)
    metric_name = "my_metric_name"
    study_name = "my_study"
    metric_value = None

    mlflc = MLflowCallback(tracking_uri=tracking_file_name,
                           metric_name=metric_name)
    study = optuna.create_study(study_name=study_name)
    mlflc._initialize_experiment(study)

    with mlflow.start_run():
        mlflc._log_metrics(metric_value)

    mlfl_client = MlflowClient(tracking_file_name)
    experiments = mlfl_client.list_experiments()
    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == 1

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    # when `values` is `None`, do not save values with metric names
    assert metric_name not in first_run_dict["data"]["metrics"]
Example #14
0
def test_mlflow_callback_fails_when_nest_trials_is_false_and_active_run_exists(
    tmpdir: py.path.local, ) -> None:

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"
    mlflow.set_tracking_uri(tracking_uri)
    mlflow.set_experiment(study_name)

    mlflc = MLflowCallback(tracking_uri=tracking_uri)
    study = optuna.create_study(study_name=study_name)

    with mlflow.start_run():
        with pytest.raises(Exception,
                           match=r"Run with UUID \w+ is already active."):
            study.optimize(_objective_func, n_trials=1, callbacks=[mlflc])
Example #15
0
def test_tag_always_logged(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"
    n_trials = 3

    mlflc = MLflowCallback(tracking_uri=tracking_uri)
    study = optuna.create_study(study_name=study_name)
    study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiment = mlfl_client.list_experiments()[0]
    runs = mlfl_client.search_runs([experiment.experiment_id])

    assert all((r.data.tags["direction"] == "MINIMIZE") for r in runs)
    assert all((r.data.tags["state"] == "COMPLETE") for r in runs)
Example #16
0
def test_run_name(tmpdir: py.path.local, run_name: Optional[str],
                  expected: str) -> None:

    tracking_uri = f"file:{tmpdir}"

    mlflow_kwargs = {"run_name": run_name}
    mlflc = MLflowCallback(tracking_uri=tracking_uri,
                           mlflow_kwargs=mlflow_kwargs)
    study = optuna.create_study()
    study.optimize(_objective_func, n_trials=1, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiment = mlfl_client.list_experiments()[0]
    run_info = mlfl_client.list_run_infos(experiment.experiment_id)[0]
    run = mlfl_client.get_run(run_info.run_id)
    tags = run.data.tags
    assert tags["mlflow.runName"] == expected
Example #17
0
def test_study_name(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    study_name = "my_study"
    n_trials = 3

    mlflc = MLflowCallback(tracking_uri=tracking_uri)
    study = optuna.create_study(study_name=study_name)
    study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    assert len(mlfl_client.list_experiments()) == 1

    experiment = mlfl_client.list_experiments()[0]
    runs = mlfl_client.list_run_infos(experiment.experiment_id)

    assert experiment.name == study_name
    assert len(runs) == n_trials
Example #18
0
def optimize(
    params_fp: Path = Path(config.CONFIG_DIR, "params.json"),
    study_name: Optional[str] = "optimization",
    num_trials: int = 100,
) -> None:
    """Optimize a subset of hyperparameters towards an objective.

    This saves the best trial's parameters into `config/params.json`.

    Args:
        params_fp (Path, optional): Location of parameters (just using num_samples,
                                  num_epochs, etc.) to use for training.
                                  Defaults to `config/params.json`.
        study_name (str, optional): Name of the study to save trial runs under. Defaults to `optimization`.
        num_trials (int, optional): Number of trials to run. Defaults to 100.
    """
    # Starting parameters (not actually used but needed for set up)
    params = Namespace(**utils.load_dict(filepath=params_fp))

    # Optimize
    pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5)
    study = optuna.create_study(study_name=study_name,
                                direction="maximize",
                                pruner=pruner)
    mlflow_callback = MLflowCallback(tracking_uri=mlflow.get_tracking_uri(),
                                     metric_name="f1")
    study.optimize(
        lambda trial: main.objective(params, trial),
        n_trials=num_trials,
        callbacks=[mlflow_callback],
    )

    # All trials
    trials_df = study.trials_dataframe()
    trials_df = trials_df.sort_values(["value"], ascending=False)

    # Best trial
    logger.info(f"Best value (f1): {study.best_trial.value}")
    params = {**params.__dict__, **study.best_trial.params}
    params["threshold"] = study.best_trial.user_attrs["threshold"]
    with open(params_fp, "w") as fp:
        json.dump(params, fp=fp, indent=2, cls=NumpyEncoder)
    logger.info(json.dumps(params, indent=2, cls=NumpyEncoder))
Example #19
0
def optimize(num_trials: int = 100) -> None:
    """Optimize a subset of hyperparameters towards an objective.

    This saves the best trial's arguments into `config/args.json`.

    Args:
        num_trials (int, optional): Number of trials to run. Defaults to 100.
    """
    # Starting arguments (not actually used but needed for set up)
    args_fp = Path(config.CONFIG_DIR, "args.json")
    args = Namespace(**utils.load_dict(filepath=args_fp))

    # Optimize
    pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5)
    study = optuna.create_study(
        study_name="optimization", direction="maximize", pruner=pruner
    )
    mlflow_callback = MLflowCallback(
        tracking_uri=mlflow.get_tracking_uri(), metric_name="f1"
    )
    study.optimize(
        lambda trial: train.objective(args, trial),
        n_trials=num_trials,
        callbacks=[mlflow_callback],
    )

    # All trials
    trials_df = study.trials_dataframe()
    trials_df = trials_df.sort_values(
        ["value"], ascending=False
    )  # sort by metric
    trials_df.to_csv(
        Path(config.EXPERIMENTS_DIR, "trials.csv"), index=False
    )  # save

    # Best trial
    logger.info(f"Best value (f1): {study.best_trial.value}")
    params = {**args.__dict__, **study.best_trial.params}
    params["threshold"] = study.best_trial.user_attrs["threshold"]
    with open(Path(config.CONFIG_DIR, "args.json"), "w") as fp:
        json.dump(params, fp=fp, indent=2, cls=NumpyEncoder)
    logger.info(json.dumps(params, indent=2, cls=NumpyEncoder))
Example #20
0
def test_log_mlflow_tags(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    expected_tags = {"foo": 0, "bar": 1}
    mlflow_kwargs = {"tags": expected_tags}

    mlflc = MLflowCallback(tracking_uri=tracking_uri,
                           mlflow_kwargs=mlflow_kwargs)
    study = optuna.create_study()
    study.optimize(_objective_func, n_trials=1, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiment = mlfl_client.list_experiments()[0]
    run_info = mlfl_client.list_run_infos(experiment.experiment_id)[0]
    run = mlfl_client.get_run(run_info.run_id)
    tags = run.data.tags

    assert all([k in tags.keys() for k in expected_tags.keys()])
    assert all(
        [tags[key] == str(value) for key, value in expected_tags.items()])
Example #21
0
def test_tag_trial_user_attrs(tmpdir: py.path.local,
                              tag_trial_user_attrs: bool) -> None:
    tracking_uri = "file:{}".format(tmpdir)
    study_name = "my_study"
    n_trials = 3

    mlflc = MLflowCallback(tracking_uri=tracking_uri,
                           tag_trial_user_attrs=tag_trial_user_attrs)
    study = optuna.create_study(study_name=study_name)
    study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiment = mlfl_client.list_experiments()[0]
    runs = mlfl_client.search_runs([experiment.experiment_id])

    if tag_trial_user_attrs:
        assert all((r.data.tags["my_user_attr"] == "my_user_attr_value")
                   for r in runs)
    else:
        assert all(("my_user_attr" not in r.data.tags) for r in runs)
Example #22
0
def test_log_params(tmpdir: py.path.local) -> None:
    tracking_file_name = "file:{}".format(tmpdir)
    metric_name = "my_metric_name"
    study_name = "my_study"

    param1_name = "my_param1"
    param1_value = "a"
    param2_name = "my_param2"
    param2_value = 5

    params = {param1_name: param1_value, param2_name: param2_value}

    mlflc = MLflowCallback(tracking_uri=tracking_file_name,
                           metric_name=metric_name)
    study = optuna.create_study(study_name=study_name)
    mlflc._initialize_experiment(study)

    with mlflow.start_run():

        trial = optuna.trial.create_trial(
            params=params,
            distributions={
                param1_name:
                optuna.distributions.CategoricalDistribution(["a", "b"]),
                param2_name:
                optuna.distributions.UniformDistribution(0, 10),
            },
            value=5.0,
        )
        mlflc._log_params(trial.params)

    mlfl_client = MlflowClient(tracking_file_name)
    experiments = mlfl_client.list_experiments()
    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == 1

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    assert param1_name in first_run_dict["data"]["params"]
    assert first_run_dict["data"]["params"][param1_name] == param1_value

    assert param2_name in first_run_dict["data"]["params"]
    assert first_run_dict["data"]["params"][param2_name] == str(param2_value)
Example #23
0
def test_metric_name(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    metric_name = "my_metric_name"

    mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name)
    study = optuna.create_study(study_name="my_study")
    study.optimize(_objective_func, n_trials=3, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()

    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    assert metric_name in first_run_dict["data"]["metrics"]
Example #24
0
def test_study_name(tmpdir):
    # type: (py.path.local) -> None

    tracking_file_name = "file:{}".format(tmpdir)
    study_name = "my_study"
    n_trials = 3

    mlflc = MLflowCallback(tracking_uri=tracking_file_name)
    study = optuna.create_study(study_name=study_name)
    study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_file_name)
    experiments = mlfl_client.list_experiments()
    assert len(experiments) == 1

    experiment = experiments[0]
    assert experiment.name == study_name
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == n_trials

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()
    assert "value" in first_run_dict["data"]["metrics"]
    assert "x" in first_run_dict["data"]["params"]
    assert "y" in first_run_dict["data"]["params"]
    assert "z" in first_run_dict["data"]["params"]
    assert first_run_dict["data"]["tags"]["direction"] == "MINIMIZE"
    assert first_run_dict["data"]["tags"]["state"] == "COMPLETE"
    assert (first_run_dict["data"]["tags"]["x_distribution"] ==
            "UniformDistribution(high=1.0, low=-1.0)")
    assert (first_run_dict["data"]["tags"]["y_distribution"] ==
            "LogUniformDistribution(high=30, low=20)")
    assert (first_run_dict["data"]["tags"]["z_distribution"] ==
            "CategoricalDistribution(choices=(-1.0, 1.0))")
    assert first_run_dict["data"]["tags"][
        "my_user_attr"] == "my_user_attr_value"
Example #25
0
def test_metric_name_multiobjective(tmpdir: py.path.local,
                                    names: Union[str, List[str]],
                                    expected: List[str]) -> None:

    tracking_uri = f"file:{tmpdir}"

    mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=names)
    study = optuna.create_study(study_name="my_study",
                                directions=["minimize", "maximize"])
    study.optimize(_multiobjective_func, n_trials=3, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()

    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    assert all([e in first_run_dict["data"]["metrics"] for e in expected])
Example #26
0
def test_log_metric_none(tmpdir: py.path.local) -> None:

    tracking_uri = f"file:{tmpdir}"
    metric_name = "metric"
    study_name = "my_study"

    mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name)
    study = optuna.create_study(study_name=study_name)
    study.optimize(lambda _: np.nan, n_trials=1, callbacks=[mlflc])

    mlfl_client = MlflowClient(tracking_uri)
    experiments = mlfl_client.list_experiments()
    experiment = experiments[0]
    experiment_id = experiment.experiment_id

    run_infos = mlfl_client.list_run_infos(experiment_id)
    assert len(run_infos) == 1

    first_run_id = run_infos[0].run_id
    first_run = mlfl_client.get_run(first_run_id)
    first_run_dict = first_run.to_dictionary()

    # When `values` is `None`, do not save values with metric names.
    assert metric_name not in first_run_dict["data"]["metrics"]
Example #27
0
        trial.suggest_int('classifier__max_depth', 0, 6),
        'classifier__min_child_weight':
        trial.suggest_float('classifier__min_child_weight', 0, 1),
        'classifier__eta':
        trial.suggest_float('classifier__eta', 0, 1),
        'classifier__subsample':
        trial.suggest_float('classifier__subsample', 0, 1),
        'classifier__colsample_bytree':
        trial.suggest_float('classifier__colsample_bytree', 0, 1)
    }

    clf.set_params(**params)

    return average_score_on_cross_val_classification(clf,
                                                     X,
                                                     Y.values.ravel(),
                                                     cv=8)
    #return -np.mean(cross_val_score(clf, X, Y.values.ravel(), cv=8))


mlflc = MLflowCallback(
    tracking_uri="/home/jupyter/mlruns/",
    metric_name='accuracy',
)

#sampler = SkoptSampler()
study = optuna.create_study(study_name='xgboost_metrics', direction='maximize')
study.optimize(objective, n_trials=30, callbacks=[mlflc])
# clf.set_params(**study.best_params)
# clf.fit(X, Y)
Example #28
0
if __name__ == "__main__":
    study = optuna.load_study(
        study_name="k8s_mlflow",
        storage="postgresql://{}:{}@postgres:5432/{}".format(
            os.environ["POSTGRES_USER"],
            os.environ["POSTGRES_PASSWORD"],
            os.environ["POSTGRES_DB"],
        ),
    )
    study.optimize(
        objective,
        n_trials=100,
        timeout=600,
        callbacks=[
            MLflowCallback(tracking_uri="http://mlflow:5000/",
                           metric_name="val_accuracy")
        ],
    )

    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))
Example #29
0
# pylint: disable=invalid-name

import logging
from typing import Any, Dict

import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

import optuna
from optuna.integration.mlflow import MLflowCallback

mlflc = MLflowCallback(
    tracking_uri="./mlruns",
    metric_name='mean squared error',
)


def score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    """[summary]

    Args:
        y_true (np.ndarray): target value
        y_pred (np.ndarray): Predictive value

    Returns:
        float: score
    """
    return mean_squared_error(y_true, y_pred)
Example #30
0
def test_multiobjective_raises_on_type_mismatch(tmpdir: py.path.local,
                                                metrics: Any) -> None:

    tracking_uri = f"file:{tmpdir}"
    with pytest.raises(TypeError):
        MLflowCallback(tracking_uri=tracking_uri, metric_name=metrics)