コード例 #1
0
def test_pipeline_run_hook_getting_configs(
    kedro_project,
    dummy_run_params,
    dummy_pipeline,
    dummy_catalog,
):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(tracking=dict(params=dict(
            dict_params=dict(flatten=True, recursive=False, sep="-")), ), ),
    )

    bootstrap_project(kedro_project)
    with KedroSession.create(project_path=kedro_project, ) as session:
        context = session.load_context()

        mlflow_node_hook = MlflowHook()
        mlflow_node_hook.after_context_created(context)
        mlflow_node_hook.before_pipeline_run(run_params=dummy_run_params,
                                             pipeline=dummy_pipeline,
                                             catalog=dummy_catalog)

        assert (
            mlflow_node_hook.flatten,
            mlflow_node_hook.recursive,
            mlflow_node_hook.sep,
        ) == (True, False, "-")
コード例 #2
0
def test_modelify_logs_in_mlflow(monkeypatch, example_repo, artifacts_list):
    monkeypatch.chdir(example_repo)

    bootstrap_project(Path().cwd())
    with KedroSession.create(project_path=Path().cwd()) as session:
        context = session.load_context()
        catalog = context.catalog
        catalog.save("trained_model", 2)

    runs_list_before_cmd = context.mlflow.server._mlflow_client.list_run_infos(
        context.mlflow.tracking.experiment._experiment.experiment_id)
    cli_runner = CliRunner()

    result = cli_runner.invoke(
        cli_modelify,
        ["--pipeline", "inference", "--input-name", "my_input_data"],
        catch_exceptions=True,
    )

    runs_list_after_cmd = context.mlflow.server._mlflow_client.list_run_infos(
        context.mlflow.tracking.experiment._experiment.experiment_id)

    assert result.exit_code == 0
    for artifact in artifacts_list:
        assert (f"The data_set '{artifact}' is added to the Pipeline catalog"
                in result.output)
    assert "Model successfully logged" in result.output
    assert len(runs_list_after_cmd) - len(runs_list_before_cmd) == 1
コード例 #3
0
ファイル: cli.py プロジェクト: Galileo-Galilei/kedro-mlflow
def ui(env: str, port: str, host: str):
    """Opens the mlflow user interface with the
    project-specific settings of mlflow.yml. This interface
    enables to browse and compares runs.
    """

    project_path = Path().cwd()
    bootstrap_project(project_path)
    with KedroSession.create(
            project_path=project_path,
            env=env,
    ) as session:

        context = session.load_context()
        host = host or context.mlflow.ui.host
        port = port or context.mlflow.ui.port

        if context.mlflow.server.mlflow_tracking_uri.startswith("http"):
            webbrowser.open(context.mlflow.server.mlflow_tracking_uri)
        else:
            # call mlflow ui with specific options
            # TODO : add more options for ui
            subprocess.call([
                "mlflow",
                "ui",
                "--backend-store-uri",
                context.mlflow.server.mlflow_tracking_uri,
                "--host",
                host,
                "--port",
                port,
            ])
コード例 #4
0
def test_mlflow_config_default(kedro_project):
    # kedro_project is a pytest.fixture in conftest
    dict_config = dict(
        server=dict(
            mlflow_tracking_uri="mlruns",
            credentials=None,
        ),
        tracking=dict(
            disable_tracking=dict(pipelines=["my_disabled_pipeline"]),
            experiment=dict(name="fake_package", restore_if_deleted=True),
            run=dict(id="123456789", name="my_run", nested=True),
            params=dict(
                dict_params=dict(
                    flatten=True,
                    recursive=False,
                    sep="-",
                ),
                long_params_strategy="truncate",
            ),
        ),
        ui=dict(port="5151", host="localhost"),
    )

    _write_yaml(kedro_project / "conf" / "local" / "mlflow.yml", dict_config)
    expected = dict_config.copy()
    expected["server"]["mlflow_tracking_uri"] = (kedro_project /
                                                 "mlruns").as_uri()

    bootstrap_project(kedro_project)
    with KedroSession.create(project_path=kedro_project) as session:
        context = session.load_context()
        assert context.mlflow.dict(exclude={"project_path"}) == expected
コード例 #5
0
def test_mlflow_hook_automatically_prefix_metrics_dataset(
    kedro_project_with_mlflow_conf, dummy_catalog
):

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # triggers conf setup

        # config_with_base_mlflow_conf is a conftest fixture
        mlflow_hook = MlflowHook()
        mlflow_hook.after_context_created(context)  # setup mlflow config

        mlflow_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of below arguments,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
        )
        # Check if metrics datasets have prefix with its names.
        # for metric
        assert dummy_catalog._data_sets["my_metrics"]._prefix == "my_metrics"
        assert dummy_catalog._data_sets["another_metrics"]._prefix == "foo"
        assert dummy_catalog._data_sets["my_metric"].key == "my_metric"
        assert dummy_catalog._data_sets["another_metric"].key == "foo"
コード例 #6
0
def test_kedro_mlflow_config_setup_tracking_priority(kedro_project_with_mlflow_conf):
    """Test if the mlflow_tracking uri set is the one of mlflow.yml
    if it also exist in credentials.
    """
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project

    (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text(
        yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2")))
    )

    config = KedroMlflowConfig(
        server=dict(
            mlflow_tracking_uri="mlruns1",
            credentials="my_mlflow_creds",
        ),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()
        config.setup(context)

    assert (
        mlflow.get_tracking_uri()
        == (kedro_project_with_mlflow_conf / "mlruns1").as_uri()
    )

    # reset folder to avoid interference with other tests
    (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text("")
コード例 #7
0
def test_kedro_mlflow_config_setup_set_experiment_globally(
    kedro_project_with_mlflow_conf,
):

    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="mlruns"),
        tracking=dict(experiment=dict(name="incredible_exp")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    mlflow_client = MlflowClient(mlflow_tracking_uri)
    runs_list_before_interactive_run = mlflow_client.list_run_infos(
        config.tracking.experiment._experiment.experiment_id
    )

    with mlflow.start_run():
        mlflow.log_param("a", 1)
        my_run_id = mlflow.active_run().info.run_id

    runs_list_after_interactive_run = mlflow_client.list_run_infos(
        config.tracking.experiment._experiment.experiment_id
    )

    assert (
        len(runs_list_after_interactive_run) - len(runs_list_before_interactive_run)
        == 1
    )
    assert runs_list_after_interactive_run[0].run_id == my_run_id
コード例 #8
0
def test_mlflow_config_in_uninitialized_project(kedro_project):
    # config_with_base_mlflow_conf is a pytest.fixture in conftest
    with pytest.raises(
            KedroMlflowConfigError,
            match="No 'mlflow.yml' config file found in environment"):
        bootstrap_project(kedro_project)
        with KedroSession.create(project_path=kedro_project) as session:
            session.load_context()
コード例 #9
0
def test_modelify_with_infer_signature_arg(monkeypatch,
                                           kp_for_modelify_persistent_input):

    monkeypatch.chdir(kp_for_modelify_persistent_input)

    cli_runner = CliRunner()

    bootstrap_project(Path().cwd())
    my_input_data = pd.DataFrame({
        "col_int": [1, 2, 3],
        "col_str": ["a", "b", "c"]
    })
    with KedroSession.create() as session:
        context = session.load_context()
        catalog = context.catalog
        catalog.save("trained_model", 2)
        catalog.save("my_input_data", my_input_data)

    runs_id_set_before_cmd = set([
        run_info.run_id
        for run_info in context.mlflow.server._mlflow_client.list_run_infos(
            context.mlflow.tracking.experiment._experiment.experiment_id)
    ])

    result = cli_runner.invoke(
        cli_modelify,
        [
            "--pipeline",
            "inference",
            "--input-name",
            "my_input_data",
            "--infer-signature",
        ],
        catch_exceptions=True,
    )

    assert result.exit_code == 0

    runs_id_set_after_cmd = set([
        run_info.run_id
        for run_info in context.mlflow.server._mlflow_client.list_run_infos(
            context.mlflow.tracking.experiment._experiment.experiment_id)
    ])

    new_run_id = list(runs_id_set_after_cmd - runs_id_set_before_cmd)[0]

    loaded_model = mlflow.pyfunc.load_model(f"runs:/{new_run_id}/model")

    assert loaded_model.metadata.get_input_schema().to_dict() == [
        {
            "name": "col_int",
            "type": "long"
        },
        {
            "name": "col_str",
            "type": "string"
        },
    ]
コード例 #10
0
def test_modelify_with_infer_input_example(monkeypatch,
                                           kp_for_modelify_persistent_input,
                                           flag_infer_signature):

    monkeypatch.chdir(kp_for_modelify_persistent_input)

    cli_runner = CliRunner()

    bootstrap_project(Path().cwd())
    my_input_data = pd.DataFrame({
        "col_int": [1, 2, 3],
        "col_str": ["a", "b", "c"]
    })
    with KedroSession.create() as session:
        context = session.load_context()
        catalog = context.catalog
        catalog.save("trained_model", 2)
        catalog.save("my_input_data", my_input_data)

    runs_id_set_before_cmd = set([
        run_info.run_id
        for run_info in context.mlflow.server._mlflow_client.list_run_infos(
            context.mlflow.tracking.experiment._experiment.experiment_id)
    ])

    cmd = [
        "--pipeline",
        "inference",
        "--input-name",
        "my_input_data",
        "--infer-input-example",
    ]
    if flag_infer_signature:
        cmd.append("--infer-signature")

    result = cli_runner.invoke(
        cli_modelify,
        cmd,
        catch_exceptions=True,
    )

    assert result.exit_code == 0

    runs_id_set_after_cmd = set([
        run_info.run_id
        for run_info in context.mlflow.server._mlflow_client.list_run_infos(
            context.mlflow.tracking.experiment._experiment.experiment_id)
    ])

    new_run_id = list(runs_id_set_after_cmd - runs_id_set_before_cmd)[0]

    loaded_model = mlflow.pyfunc.load_model(f"runs:/{new_run_id}/model")

    assert loaded_model.metadata.saved_input_example_info == {
        "artifact_path": "input_example.json",
        "pandas_orient": "split",
        "type": "dataframe",
    }
コード例 #11
0
def test_mlflow_hook_save_pipeline_ml(
    kedro_project_with_mlflow_conf,
    pipeline_to_run,
    dummy_catalog,
    dummy_run_params,
):

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(
            project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # triggers conf setup

        # config_with_base_mlflow_conf is a conftest fixture
        mlflow_hook = MlflowHook()
        mlflow_hook.after_context_created(context)  # setup mlflow config
        runner = SequentialRunner()
        mlflow_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of below arguments,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
        )
        mlflow_hook.before_pipeline_run(run_params=dummy_run_params,
                                        pipeline=pipeline_to_run,
                                        catalog=dummy_catalog)
        runner.run(pipeline_to_run, dummy_catalog, session._hook_manager)
        run_id = mlflow.active_run().info.run_id
        mlflow_hook.after_pipeline_run(run_params=dummy_run_params,
                                       pipeline=pipeline_to_run,
                                       catalog=dummy_catalog)
        # test : parameters should have been logged
        mlflow_client = MlflowClient(context.mlflow.server.mlflow_tracking_uri)
        run_data = mlflow_client.get_run(run_id).data

        # all run_params are recorded as tags
        for k, v in dummy_run_params.items():
            if v:
                assert run_data.tags[k] == str(v)

        # params are not recorded because we don't have MlflowHook here
        # and the model should not be logged when it is not a PipelineML
        nb_artifacts = len(mlflow_client.list_artifacts(run_id))
        if isinstance(pipeline_to_run, PipelineML):
            assert nb_artifacts == 1
        else:
            assert nb_artifacts == 0

        if isinstance(pipeline_to_run, PipelineML):
            trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
            assert trained_model.metadata.signature.to_dict() == {
                "inputs": '[{"name": "a", "type": "long"}]',
                "outputs": None,
            }
コード例 #12
0
def test_mlflow_hook_save_pipeline_ml_with_artifact_path(
    kedro_project_with_mlflow_conf,
    env_from_dict,
    dummy_pipeline,
    dummy_catalog,
    dummy_run_params,
    artifact_path,
    expected_artifact_path,
):
    # config_with_base_mlflow_conf is a conftest fixture
    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(
            project_path=kedro_project_with_mlflow_conf) as session:
        mlflow_hook = MlflowHook()
        runner = SequentialRunner()

        log_model_kwargs = {
            "conda_env": env_from_dict,
        }
        if artifact_path is not None:
            # we need to test what happens if the key is NOT present
            log_model_kwargs["artifact_path"] = artifact_path

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline.only_nodes_with_tags("training"),
            inference=dummy_pipeline.only_nodes_with_tags("inference"),
            input_name="raw_data",
            log_model_kwargs=log_model_kwargs,
        )

        context = session.load_context()
        mlflow_hook.after_context_created(context)
        mlflow_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
        )
        mlflow_hook.before_pipeline_run(run_params=dummy_run_params,
                                        pipeline=pipeline_to_run,
                                        catalog=dummy_catalog)
        runner.run(pipeline_to_run, dummy_catalog, session._hook_manager)
        run_id = mlflow.active_run().info.run_id
        mlflow_hook.after_pipeline_run(run_params=dummy_run_params,
                                       pipeline=pipeline_to_run,
                                       catalog=dummy_catalog)

        # test : parameters should have been logged
        trained_model = mlflow.pyfunc.load_model(
            f"runs:/{run_id}/{expected_artifact_path}")
        # the real test is that the model is loaded without error
        assert trained_model is not None
コード例 #13
0
def test_kedro_mlflow_config_with_use_env_tracking_uri(
    kedro_project_with_mlflow_conf,
):

    os.environ["MLFLOW_TRACKING_URI"] = "my_mlruns"

    # default key server.mlflow_tracking_uri=None, so the environment variable will be used
    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config

    assert context.mlflow.server.mlflow_tracking_uri.endswith("my_mlruns")
    assert (kedro_project_with_mlflow_conf / "my_mlruns").is_dir()
コード例 #14
0
def test_mlflow_hook_save_pipeline_ml_with_signature(
    kedro_project_with_mlflow_conf,
    env_from_dict,
    dummy_pipeline,
    dummy_catalog,
    dummy_run_params,
    model_signature,
    expected_signature,
):
    # config_with_base_mlflow_conf is a conftest fixture
    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(
            project_path=kedro_project_with_mlflow_conf) as session:
        mlflow_hook = MlflowHook()
        runner = SequentialRunner()

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline.only_nodes_with_tags("training"),
            inference=dummy_pipeline.only_nodes_with_tags("inference"),
            input_name="raw_data",
            log_model_kwargs={
                "conda_env": env_from_dict,
                "signature": model_signature,
            },
        )

        context = session.load_context()
        mlflow_hook.after_context_created(context)
        mlflow_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
        )
        mlflow_hook.before_pipeline_run(run_params=dummy_run_params,
                                        pipeline=pipeline_to_run,
                                        catalog=dummy_catalog)
        runner.run(pipeline_to_run, dummy_catalog, session._hook_manager)
        run_id = mlflow.active_run().info.run_id
        mlflow_hook.after_pipeline_run(run_params=dummy_run_params,
                                       pipeline=pipeline_to_run,
                                       catalog=dummy_catalog)

        # test : parameters should have been logged
        trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
        assert trained_model.metadata.signature == expected_signature
コード例 #15
0
def test_kedro_mlflow_config_setup_set_tracking_uri(kedro_project_with_mlflow_conf):

    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "awesome_tracking").as_uri()

    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="awesome_tracking"),
        tracking=dict(experiment=dict(name="exp1")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert mlflow.get_tracking_uri() == mlflow_tracking_uri
コード例 #16
0
    def test_bootstrap_project(self, mocker, monkeypatch, tmp_path):
        monkeypatch.delenv("PYTHONPATH", raising=False)
        # assume settings.py is okay
        mocker.patch("kedro.framework.project._validate_module")
        pyproject_toml_payload = {
            "tool": {
                "kedro": {
                    "package_name": "fake_package_name",
                    "project_name": "fake_project_name",
                    "project_version": kedro_version,
                }
            }
        }
        pyproject_toml = tmp_path / "pyproject.toml"
        pyproject_toml.write_text(toml.dumps(pyproject_toml_payload))
        src_dir = tmp_path / "src"
        src_dir.mkdir(exist_ok=True)

        result = bootstrap_project(tmp_path)

        expected_metadata = {
            "config_file": pyproject_toml,
            "package_name": "fake_package_name",
            "project_name": "fake_project_name",
            "project_path": tmp_path,
            "project_version": kedro_version,
            "source_dir": src_dir,
        }
        assert result == ProjectMetadata(**expected_metadata)
        assert str(src_dir) in sys.path[0]
        assert os.environ["PYTHONPATH"] == str(src_dir)
コード例 #17
0
def test_node_hook_logging_above_limit_tag_strategy(kedro_project,
                                                    dummy_run_params,
                                                    param_length):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(tracking=dict(params=dict(long_params_strategy="tag")), ),
    )

    mlflow_tracking_uri = (kedro_project / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    mlflow_node_hook = MlflowHook()

    param_value = param_length * "a"
    node_inputs = {"params:my_param": param_value}

    bootstrap_project(kedro_project)
    with KedroSession.create(project_path=kedro_project, ) as session:
        context = session.load_context()
        mlflow_node_hook.after_context_created(context)
        with mlflow.start_run():
            mlflow_node_hook.before_pipeline_run(
                run_params=dummy_run_params,
                pipeline=Pipeline([]),
                catalog=DataCatalog(),
            )

            # IMPORTANT: Overpassing the parameters limit
            # should raise an error for all mlflow backend
            # but it does not on FileStore backend :
            # https://github.com/mlflow/mlflow/issues/2814#issuecomment-628284425
            # Since we use FileStore system for simplicty for tests logging works
            # But we have enforced failure (which is slightly different from mlflow
            # behaviour)
            mlflow_node_hook.before_node_run(
                node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None),
                catalog=DataCatalog(),  # can be empty
                inputs=node_inputs,
                is_async=False,
            )
            run_id = mlflow.active_run().info.run_id

        mlflow_client = MlflowClient(mlflow_tracking_uri)
        current_run = mlflow_client.get_run(run_id)
        assert current_run.data.params == {}
        assert current_run.data.tags["my_param"] == param_value
コード例 #18
0
def test_kedro_mlflow_config_setup_export_credentials(kedro_project_with_mlflow_conf):

    (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text(
        yaml.dump(dict(my_mlflow_creds=dict(fake_mlflow_cred="my_fake_cred")))
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        server=dict(credentials="my_mlflow_creds"),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert os.environ["fake_mlflow_cred"] == "my_fake_cred"
コード例 #19
0
def test_kedro_mlflow_config_new_experiment_does_not_exists(
    kedro_project_with_mlflow_conf,
):

    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="mlruns"),
        tracking=dict(experiment=dict(name="exp1")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert "exp1" in [
        exp.name for exp in config.server._mlflow_client.list_experiments()
    ]
コード例 #20
0
def test_node_hook_logging(
    kedro_project,
    dummy_run_params,
    dummy_catalog,
    dummy_pipeline,
    dummy_node,
    flatten,
    expected,
):

    _write_yaml(
        kedro_project / "conf" / "base" / "mlflow.yml",
        dict(tracking=dict(params=dict(
            dict_params=dict(flatten=flatten, recursive=False, sep="-")))),
    )

    node_inputs = {
        v: dummy_catalog._data_sets.get(v)
        for k, v in dummy_node._inputs.items()
    }

    mlflow_tracking_uri = (kedro_project / "mlruns").as_uri()

    bootstrap_project(kedro_project)
    with KedroSession.create(project_path=kedro_project, ) as session:
        context = session.load_context()
        mlflow_node_hook = MlflowHook()
        mlflow_node_hook.after_context_created(context)  # setup mlflow_config
        mlflow.set_tracking_uri(mlflow_tracking_uri)
        with mlflow.start_run():
            mlflow_node_hook.before_pipeline_run(
                run_params=dummy_run_params,
                pipeline=dummy_pipeline,
                catalog=dummy_catalog,
            )
            mlflow_node_hook.before_node_run(
                node=dummy_node,
                catalog=dummy_catalog,
                inputs=node_inputs,
                is_async=False,
            )
            run_id = mlflow.active_run().info.run_id

        mlflow_client = MlflowClient(mlflow_tracking_uri)
        current_run = mlflow_client.get_run(run_id)
        assert current_run.data.params == expected
コード例 #21
0
def test_modelify_with_artifact_path_arg(monkeypatch, kp_for_modelify):
    monkeypatch.chdir(kp_for_modelify)

    cli_runner = CliRunner()

    bootstrap_project(Path().cwd())
    with KedroSession.create() as session:
        context = session.load_context()
        catalog = context.catalog
        catalog.save("trained_model", 2)

    runs_id_set_before_cmd = set([
        run_info.run_id
        for run_info in context.mlflow.server._mlflow_client.list_run_infos(
            context.mlflow.tracking.experiment._experiment.experiment_id)
    ])

    result = cli_runner.invoke(
        cli_modelify,
        [
            "--pipeline",
            "inference",
            "--input-name",
            "my_input_data",
            "--artifact-path",
            "my_new_model",
        ],
        catch_exceptions=True,
    )
    runs_id_set_after_cmd = set([
        run_info.run_id
        for run_info in context.mlflow.server._mlflow_client.list_run_infos(
            context.mlflow.tracking.experiment._experiment.experiment_id)
    ])

    new_run_id = runs_id_set_after_cmd - runs_id_set_before_cmd

    assert result.exit_code == 0
    assert "my_new_model" in [
        file.path
        for file in context.mlflow.server._mlflow_client.list_artifacts(
            list(new_run_id)[0])
    ]
コード例 #22
0
    def __init__(self, project_path: Path):
        self._metadata = None  # running in package mode
        if _is_project(project_path):
            self._metadata = bootstrap_project(project_path)
        self._cli_hook_manager = CLIHooksManager()

        super().__init__(
            ("Global commands", self.global_groups),
            ("Project specific commands", self.project_groups),
        )
コード例 #23
0
def test_kedro_mlflow_config_experiment_exists(kedro_project_with_mlflow_conf):

    # create an experiment with the same name
    mlflow_tracking_uri = (
        kedro_project_with_mlflow_conf / "conf" / "local" / "mlruns"
    ).as_uri()
    MlflowClient(mlflow_tracking_uri).create_experiment("exp1")
    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="mlruns"),
        tracking=dict(experiment=dict(name="exp1")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert "exp1" in [
        exp.name for exp in config.server._mlflow_client.list_experiments()
    ]
コード例 #24
0
def test_on_pipeline_error(kedro_project_with_mlflow_conf):

    tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(
            project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()
        with pytest.raises(ValueError):
            session.run()

        # the run we want is the last one in the configuration experiment
        mlflow_client = MlflowClient(tracking_uri)
        experiment = mlflow_client.get_experiment_by_name(
            context.mlflow.tracking.experiment.name)
        failing_run_info = MlflowClient(tracking_uri).list_run_infos(
            experiment.experiment_id)[0]
        assert mlflow.active_run() is None  # the run must have been closed
        assert failing_run_info.status == RunStatus.to_string(
            RunStatus.FAILED)  # it must be marked as failed
コード例 #25
0
def test_cli_init_existing_config_force_option(monkeypatch, kedro_project,
                                               mock_settings_fake_project):
    # "kedro_project" is a pytest.fixture declared in conftest
    monkeypatch.chdir(kedro_project)
    cli_runner = CliRunner()

    bootstrap_project(kedro_project)
    with KedroSession.create(project_path=kedro_project) as session:

        # emulate first call by writing a mlflow.yml file
        yaml_str = yaml.dump(dict(server=dict(mlflow_tracking_uri="toto")))
        (kedro_project / mock_settings_fake_project.CONF_SOURCE / "local" /
         "mlflow.yml").write_text(yaml_str)

        result = cli_runner.invoke(cli_init, args="--force")

        # check an error message is raised
        assert "successfully updated" in result.output

        # check the file remains unmodified
        context = session.load_context()
        assert context.mlflow.server.mlflow_tracking_uri.endswith("mlruns")
コード例 #26
0
def test_node_hook_logging_above_limit_truncate_strategy(
        kedro_project, dummy_run_params, param_length):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(tracking=dict(params=dict(long_params_strategy="truncate")), ),
    )

    mlflow_tracking_uri = (kedro_project / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    param_value = param_length * "a"
    node_inputs = {"params:my_param": param_value}

    bootstrap_project(kedro_project)
    with KedroSession.create(project_path=kedro_project, ) as session:
        context = session.load_context()
        mlflow_node_hook = MlflowHook()
        mlflow_node_hook.after_context_created(context)
        with mlflow.start_run():
            mlflow_node_hook.before_pipeline_run(
                run_params=dummy_run_params,
                pipeline=Pipeline([]),
                catalog=DataCatalog(),
            )
            mlflow_node_hook.before_node_run(
                node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None),
                catalog=DataCatalog(),  # can be empty
                inputs=node_inputs,
                is_async=False,
            )
            run_id = mlflow.active_run().info.run_id

        mlflow_client = MlflowClient(mlflow_tracking_uri)
        current_run = mlflow_client.get_run(run_id)
        assert current_run.data.params == {
            "my_param": param_value[0:MAX_PARAM_VAL_LENGTH]
        }
コード例 #27
0
def test_cli_init_existing_config(monkeypatch, kedro_project_with_mlflow_conf,
                                  mock_settings_fake_project):
    # "kedro_project" is a pytest.fixture declared in conftest
    cli_runner = CliRunner()
    monkeypatch.chdir(kedro_project_with_mlflow_conf)
    bootstrap_project(kedro_project_with_mlflow_conf)

    with KedroSession.create(
            "fake_project",
            project_path=kedro_project_with_mlflow_conf) as session:
        # emulate first call by writing a mlflow.yml file
        yaml_str = yaml.dump(dict(server=dict(mlflow_tracking_uri="toto")))
        (kedro_project_with_mlflow_conf /
         mock_settings_fake_project.CONF_SOURCE / "local" /
         "mlflow.yml").write_text(yaml_str)

        result = cli_runner.invoke(cli_init)

        # check an error message is raised
        assert "A 'mlflow.yml' already exists" in result.output

        context = session.load_context()
        # check the file remains unmodified
        assert context.mlflow.server.mlflow_tracking_uri.endswith("toto")
コード例 #28
0
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf):

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="mlruns"),
        tracking=dict(experiment=dict(name="exp1")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert "exp1" in [
        exp.name for exp in config.server._mlflow_client.list_experiments()
    ]
コード例 #29
0
def reload_kedro(path, env: str = None, extra_params: Dict[str, Any] = None):
    """Line magic which reloads all Kedro default variables."""

    import kedro.config.default_logger  # noqa: F401 # pylint: disable=unused-import
    from kedro.framework.cli import load_entry_points
    from kedro.framework.project import pipelines
    from kedro.framework.session import KedroSession
    from kedro.framework.session.session import _activate_session
    from kedro.framework.startup import bootstrap_project

    _clear_hook_manager()

    path = path or project_path
    metadata = bootstrap_project(path)

    _remove_cached_modules(metadata.package_name)

    session = KedroSession.create(metadata.package_name,
                                  path,
                                  env=env,
                                  extra_params=extra_params)
    _activate_session(session, force=True)
    logging.debug("Loading the context from %s", str(path))
    context = session.load_context()
    catalog = context.catalog

    get_ipython().push(
        variables={
            "context": context,
            "catalog": catalog,
            "session": session,
            "pipelines": pipelines,
        })

    logging.info("** Kedro project %s", str(metadata.project_name))
    logging.info(
        "Defined global variable `context`, `session`, `catalog` and `pipelines`"
    )

    for line_magic in load_entry_points("line_magic"):
        register_line_magic(needs_local_scope(line_magic))
        logging.info("Registered line magic `%s`",
                     line_magic.__name__)  # type: ignore
コード例 #30
0
ファイル: cli.py プロジェクト: Galileo-Galilei/kedro-mlflow
def init(env: str, force: bool, silent: bool):
    """Updates the template of a kedro project.
    Running this command is mandatory to use kedro-mlflow.
    This adds "conf/base/mlflow.yml": This is a configuration file
    used for run parametrization when calling "kedro run" command.
    """

    # get constants
    mlflow_yml = "mlflow.yml"
    project_path = Path().cwd()
    project_metadata = bootstrap_project(project_path)
    mlflow_yml_path = project_path / settings.CONF_SOURCE / env / mlflow_yml

    # mlflow.yml is just a static file,
    # but the name of the experiment is set to be the same as the project
    if mlflow_yml_path.is_file() and not force:
        click.secho(
            click.style(
                f"A 'mlflow.yml' already exists at '{mlflow_yml_path}' You can use the ``--force`` option to override it.",
                fg="red",
            ))
    else:
        try:
            write_jinja_template(
                src=TEMPLATE_FOLDER_PATH / mlflow_yml,
                is_cookiecutter=False,
                dst=mlflow_yml_path,
                python_package=project_metadata.package_name,
            )
            if not silent:
                click.secho(
                    click.style(
                        f"'{settings.CONF_SOURCE}/{env}/{mlflow_yml}' successfully updated.",
                        fg="green",
                    ))
        except FileNotFoundError:
            click.secho(
                click.style(
                    f"No env '{env}' found. Please check this folder exists inside '{settings.CONF_SOURCE}' folder.",
                    fg="red",
                ))