def test_pipeline_run_hook_getting_configs( kedro_project, dummy_run_params, dummy_pipeline, dummy_catalog, ): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(tracking=dict(params=dict( dict_params=dict(flatten=True, recursive=False, sep="-")), ), ), ) bootstrap_project(kedro_project) with KedroSession.create(project_path=kedro_project, ) as session: context = session.load_context() mlflow_node_hook = MlflowHook() mlflow_node_hook.after_context_created(context) mlflow_node_hook.before_pipeline_run(run_params=dummy_run_params, pipeline=dummy_pipeline, catalog=dummy_catalog) assert ( mlflow_node_hook.flatten, mlflow_node_hook.recursive, mlflow_node_hook.sep, ) == (True, False, "-")
def test_modelify_logs_in_mlflow(monkeypatch, example_repo, artifacts_list): monkeypatch.chdir(example_repo) bootstrap_project(Path().cwd()) with KedroSession.create(project_path=Path().cwd()) as session: context = session.load_context() catalog = context.catalog catalog.save("trained_model", 2) runs_list_before_cmd = context.mlflow.server._mlflow_client.list_run_infos( context.mlflow.tracking.experiment._experiment.experiment_id) cli_runner = CliRunner() result = cli_runner.invoke( cli_modelify, ["--pipeline", "inference", "--input-name", "my_input_data"], catch_exceptions=True, ) runs_list_after_cmd = context.mlflow.server._mlflow_client.list_run_infos( context.mlflow.tracking.experiment._experiment.experiment_id) assert result.exit_code == 0 for artifact in artifacts_list: assert (f"The data_set '{artifact}' is added to the Pipeline catalog" in result.output) assert "Model successfully logged" in result.output assert len(runs_list_after_cmd) - len(runs_list_before_cmd) == 1
def ui(env: str, port: str, host: str): """Opens the mlflow user interface with the project-specific settings of mlflow.yml. This interface enables to browse and compares runs. """ project_path = Path().cwd() bootstrap_project(project_path) with KedroSession.create( project_path=project_path, env=env, ) as session: context = session.load_context() host = host or context.mlflow.ui.host port = port or context.mlflow.ui.port if context.mlflow.server.mlflow_tracking_uri.startswith("http"): webbrowser.open(context.mlflow.server.mlflow_tracking_uri) else: # call mlflow ui with specific options # TODO : add more options for ui subprocess.call([ "mlflow", "ui", "--backend-store-uri", context.mlflow.server.mlflow_tracking_uri, "--host", host, "--port", port, ])
def test_mlflow_config_default(kedro_project): # kedro_project is a pytest.fixture in conftest dict_config = dict( server=dict( mlflow_tracking_uri="mlruns", credentials=None, ), tracking=dict( disable_tracking=dict(pipelines=["my_disabled_pipeline"]), experiment=dict(name="fake_package", restore_if_deleted=True), run=dict(id="123456789", name="my_run", nested=True), params=dict( dict_params=dict( flatten=True, recursive=False, sep="-", ), long_params_strategy="truncate", ), ), ui=dict(port="5151", host="localhost"), ) _write_yaml(kedro_project / "conf" / "local" / "mlflow.yml", dict_config) expected = dict_config.copy() expected["server"]["mlflow_tracking_uri"] = (kedro_project / "mlruns").as_uri() bootstrap_project(kedro_project) with KedroSession.create(project_path=kedro_project) as session: context = session.load_context() assert context.mlflow.dict(exclude={"project_path"}) == expected
def test_mlflow_hook_automatically_prefix_metrics_dataset( kedro_project_with_mlflow_conf, dummy_catalog ): bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # triggers conf setup # config_with_base_mlflow_conf is a conftest fixture mlflow_hook = MlflowHook() mlflow_hook.after_context_created(context) # setup mlflow config mlflow_hook.after_catalog_created( catalog=dummy_catalog, # `after_catalog_created` is not using any of below arguments, # so we are setting them to empty values. conf_catalog={}, conf_creds={}, feed_dict={}, save_version="", load_versions="", ) # Check if metrics datasets have prefix with its names. # for metric assert dummy_catalog._data_sets["my_metrics"]._prefix == "my_metrics" assert dummy_catalog._data_sets["another_metrics"]._prefix == "foo" assert dummy_catalog._data_sets["my_metric"].key == "my_metric" assert dummy_catalog._data_sets["another_metric"].key == "foo"
def test_kedro_mlflow_config_setup_tracking_priority(kedro_project_with_mlflow_conf): """Test if the mlflow_tracking uri set is the one of mlflow.yml if it also exist in credentials. """ # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2"))) ) config = KedroMlflowConfig( server=dict( mlflow_tracking_uri="mlruns1", credentials="my_mlflow_creds", ), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() config.setup(context) assert ( mlflow.get_tracking_uri() == (kedro_project_with_mlflow_conf / "mlruns1").as_uri() ) # reset folder to avoid interference with other tests (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text("")
def test_kedro_mlflow_config_setup_set_experiment_globally( kedro_project_with_mlflow_conf, ): mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() # the config must restore properly the experiment config = KedroMlflowConfig( server=dict(mlflow_tracking_uri="mlruns"), tracking=dict(experiment=dict(name="incredible_exp")), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config config.setup(context) mlflow_client = MlflowClient(mlflow_tracking_uri) runs_list_before_interactive_run = mlflow_client.list_run_infos( config.tracking.experiment._experiment.experiment_id ) with mlflow.start_run(): mlflow.log_param("a", 1) my_run_id = mlflow.active_run().info.run_id runs_list_after_interactive_run = mlflow_client.list_run_infos( config.tracking.experiment._experiment.experiment_id ) assert ( len(runs_list_after_interactive_run) - len(runs_list_before_interactive_run) == 1 ) assert runs_list_after_interactive_run[0].run_id == my_run_id
def test_mlflow_config_in_uninitialized_project(kedro_project): # config_with_base_mlflow_conf is a pytest.fixture in conftest with pytest.raises( KedroMlflowConfigError, match="No 'mlflow.yml' config file found in environment"): bootstrap_project(kedro_project) with KedroSession.create(project_path=kedro_project) as session: session.load_context()
def test_modelify_with_infer_signature_arg(monkeypatch, kp_for_modelify_persistent_input): monkeypatch.chdir(kp_for_modelify_persistent_input) cli_runner = CliRunner() bootstrap_project(Path().cwd()) my_input_data = pd.DataFrame({ "col_int": [1, 2, 3], "col_str": ["a", "b", "c"] }) with KedroSession.create() as session: context = session.load_context() catalog = context.catalog catalog.save("trained_model", 2) catalog.save("my_input_data", my_input_data) runs_id_set_before_cmd = set([ run_info.run_id for run_info in context.mlflow.server._mlflow_client.list_run_infos( context.mlflow.tracking.experiment._experiment.experiment_id) ]) result = cli_runner.invoke( cli_modelify, [ "--pipeline", "inference", "--input-name", "my_input_data", "--infer-signature", ], catch_exceptions=True, ) assert result.exit_code == 0 runs_id_set_after_cmd = set([ run_info.run_id for run_info in context.mlflow.server._mlflow_client.list_run_infos( context.mlflow.tracking.experiment._experiment.experiment_id) ]) new_run_id = list(runs_id_set_after_cmd - runs_id_set_before_cmd)[0] loaded_model = mlflow.pyfunc.load_model(f"runs:/{new_run_id}/model") assert loaded_model.metadata.get_input_schema().to_dict() == [ { "name": "col_int", "type": "long" }, { "name": "col_str", "type": "string" }, ]
def test_modelify_with_infer_input_example(monkeypatch, kp_for_modelify_persistent_input, flag_infer_signature): monkeypatch.chdir(kp_for_modelify_persistent_input) cli_runner = CliRunner() bootstrap_project(Path().cwd()) my_input_data = pd.DataFrame({ "col_int": [1, 2, 3], "col_str": ["a", "b", "c"] }) with KedroSession.create() as session: context = session.load_context() catalog = context.catalog catalog.save("trained_model", 2) catalog.save("my_input_data", my_input_data) runs_id_set_before_cmd = set([ run_info.run_id for run_info in context.mlflow.server._mlflow_client.list_run_infos( context.mlflow.tracking.experiment._experiment.experiment_id) ]) cmd = [ "--pipeline", "inference", "--input-name", "my_input_data", "--infer-input-example", ] if flag_infer_signature: cmd.append("--infer-signature") result = cli_runner.invoke( cli_modelify, cmd, catch_exceptions=True, ) assert result.exit_code == 0 runs_id_set_after_cmd = set([ run_info.run_id for run_info in context.mlflow.server._mlflow_client.list_run_infos( context.mlflow.tracking.experiment._experiment.experiment_id) ]) new_run_id = list(runs_id_set_after_cmd - runs_id_set_before_cmd)[0] loaded_model = mlflow.pyfunc.load_model(f"runs:/{new_run_id}/model") assert loaded_model.metadata.saved_input_example_info == { "artifact_path": "input_example.json", "pandas_orient": "split", "type": "dataframe", }
def test_mlflow_hook_save_pipeline_ml( kedro_project_with_mlflow_conf, pipeline_to_run, dummy_catalog, dummy_run_params, ): bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create( project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # triggers conf setup # config_with_base_mlflow_conf is a conftest fixture mlflow_hook = MlflowHook() mlflow_hook.after_context_created(context) # setup mlflow config runner = SequentialRunner() mlflow_hook.after_catalog_created( catalog=dummy_catalog, # `after_catalog_created` is not using any of below arguments, # so we are setting them to empty values. conf_catalog={}, conf_creds={}, feed_dict={}, save_version="", load_versions="", ) mlflow_hook.before_pipeline_run(run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog) runner.run(pipeline_to_run, dummy_catalog, session._hook_manager) run_id = mlflow.active_run().info.run_id mlflow_hook.after_pipeline_run(run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog) # test : parameters should have been logged mlflow_client = MlflowClient(context.mlflow.server.mlflow_tracking_uri) run_data = mlflow_client.get_run(run_id).data # all run_params are recorded as tags for k, v in dummy_run_params.items(): if v: assert run_data.tags[k] == str(v) # params are not recorded because we don't have MlflowHook here # and the model should not be logged when it is not a PipelineML nb_artifacts = len(mlflow_client.list_artifacts(run_id)) if isinstance(pipeline_to_run, PipelineML): assert nb_artifacts == 1 else: assert nb_artifacts == 0 if isinstance(pipeline_to_run, PipelineML): trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") assert trained_model.metadata.signature.to_dict() == { "inputs": '[{"name": "a", "type": "long"}]', "outputs": None, }
def test_mlflow_hook_save_pipeline_ml_with_artifact_path( kedro_project_with_mlflow_conf, env_from_dict, dummy_pipeline, dummy_catalog, dummy_run_params, artifact_path, expected_artifact_path, ): # config_with_base_mlflow_conf is a conftest fixture bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create( project_path=kedro_project_with_mlflow_conf) as session: mlflow_hook = MlflowHook() runner = SequentialRunner() log_model_kwargs = { "conda_env": env_from_dict, } if artifact_path is not None: # we need to test what happens if the key is NOT present log_model_kwargs["artifact_path"] = artifact_path pipeline_to_run = pipeline_ml_factory( training=dummy_pipeline.only_nodes_with_tags("training"), inference=dummy_pipeline.only_nodes_with_tags("inference"), input_name="raw_data", log_model_kwargs=log_model_kwargs, ) context = session.load_context() mlflow_hook.after_context_created(context) mlflow_hook.after_catalog_created( catalog=dummy_catalog, # `after_catalog_created` is not using any of arguments bellow, # so we are setting them to empty values. conf_catalog={}, conf_creds={}, feed_dict={}, save_version="", load_versions="", ) mlflow_hook.before_pipeline_run(run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog) runner.run(pipeline_to_run, dummy_catalog, session._hook_manager) run_id = mlflow.active_run().info.run_id mlflow_hook.after_pipeline_run(run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog) # test : parameters should have been logged trained_model = mlflow.pyfunc.load_model( f"runs:/{run_id}/{expected_artifact_path}") # the real test is that the model is loaded without error assert trained_model is not None
def test_kedro_mlflow_config_with_use_env_tracking_uri( kedro_project_with_mlflow_conf, ): os.environ["MLFLOW_TRACKING_URI"] = "my_mlruns" # default key server.mlflow_tracking_uri=None, so the environment variable will be used bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config assert context.mlflow.server.mlflow_tracking_uri.endswith("my_mlruns") assert (kedro_project_with_mlflow_conf / "my_mlruns").is_dir()
def test_mlflow_hook_save_pipeline_ml_with_signature( kedro_project_with_mlflow_conf, env_from_dict, dummy_pipeline, dummy_catalog, dummy_run_params, model_signature, expected_signature, ): # config_with_base_mlflow_conf is a conftest fixture bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create( project_path=kedro_project_with_mlflow_conf) as session: mlflow_hook = MlflowHook() runner = SequentialRunner() pipeline_to_run = pipeline_ml_factory( training=dummy_pipeline.only_nodes_with_tags("training"), inference=dummy_pipeline.only_nodes_with_tags("inference"), input_name="raw_data", log_model_kwargs={ "conda_env": env_from_dict, "signature": model_signature, }, ) context = session.load_context() mlflow_hook.after_context_created(context) mlflow_hook.after_catalog_created( catalog=dummy_catalog, # `after_catalog_created` is not using any of arguments bellow, # so we are setting them to empty values. conf_catalog={}, conf_creds={}, feed_dict={}, save_version="", load_versions="", ) mlflow_hook.before_pipeline_run(run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog) runner.run(pipeline_to_run, dummy_catalog, session._hook_manager) run_id = mlflow.active_run().info.run_id mlflow_hook.after_pipeline_run(run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog) # test : parameters should have been logged trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") assert trained_model.metadata.signature == expected_signature
def test_kedro_mlflow_config_setup_set_tracking_uri(kedro_project_with_mlflow_conf): mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "awesome_tracking").as_uri() config = KedroMlflowConfig( server=dict(mlflow_tracking_uri="awesome_tracking"), tracking=dict(experiment=dict(name="exp1")), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config config.setup(context) assert mlflow.get_tracking_uri() == mlflow_tracking_uri
def test_bootstrap_project(self, mocker, monkeypatch, tmp_path): monkeypatch.delenv("PYTHONPATH", raising=False) # assume settings.py is okay mocker.patch("kedro.framework.project._validate_module") pyproject_toml_payload = { "tool": { "kedro": { "package_name": "fake_package_name", "project_name": "fake_project_name", "project_version": kedro_version, } } } pyproject_toml = tmp_path / "pyproject.toml" pyproject_toml.write_text(toml.dumps(pyproject_toml_payload)) src_dir = tmp_path / "src" src_dir.mkdir(exist_ok=True) result = bootstrap_project(tmp_path) expected_metadata = { "config_file": pyproject_toml, "package_name": "fake_package_name", "project_name": "fake_project_name", "project_path": tmp_path, "project_version": kedro_version, "source_dir": src_dir, } assert result == ProjectMetadata(**expected_metadata) assert str(src_dir) in sys.path[0] assert os.environ["PYTHONPATH"] == str(src_dir)
def test_node_hook_logging_above_limit_tag_strategy(kedro_project, dummy_run_params, param_length): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(tracking=dict(params=dict(long_params_strategy="tag")), ), ) mlflow_tracking_uri = (kedro_project / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) mlflow_node_hook = MlflowHook() param_value = param_length * "a" node_inputs = {"params:my_param": param_value} bootstrap_project(kedro_project) with KedroSession.create(project_path=kedro_project, ) as session: context = session.load_context() mlflow_node_hook.after_context_created(context) with mlflow.start_run(): mlflow_node_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=Pipeline([]), catalog=DataCatalog(), ) # IMPORTANT: Overpassing the parameters limit # should raise an error for all mlflow backend # but it does not on FileStore backend : # https://github.com/mlflow/mlflow/issues/2814#issuecomment-628284425 # Since we use FileStore system for simplicty for tests logging works # But we have enforced failure (which is slightly different from mlflow # behaviour) mlflow_node_hook.before_node_run( node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None), catalog=DataCatalog(), # can be empty inputs=node_inputs, is_async=False, ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == {} assert current_run.data.tags["my_param"] == param_value
def test_kedro_mlflow_config_setup_export_credentials(kedro_project_with_mlflow_conf): (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(fake_mlflow_cred="my_fake_cred"))) ) # the config must restore properly the experiment config = KedroMlflowConfig( server=dict(credentials="my_mlflow_creds"), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config config.setup(context) assert os.environ["fake_mlflow_cred"] == "my_fake_cred"
def test_kedro_mlflow_config_new_experiment_does_not_exists( kedro_project_with_mlflow_conf, ): config = KedroMlflowConfig( server=dict(mlflow_tracking_uri="mlruns"), tracking=dict(experiment=dict(name="exp1")), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config config.setup(context) assert "exp1" in [ exp.name for exp in config.server._mlflow_client.list_experiments() ]
def test_node_hook_logging( kedro_project, dummy_run_params, dummy_catalog, dummy_pipeline, dummy_node, flatten, expected, ): _write_yaml( kedro_project / "conf" / "base" / "mlflow.yml", dict(tracking=dict(params=dict( dict_params=dict(flatten=flatten, recursive=False, sep="-")))), ) node_inputs = { v: dummy_catalog._data_sets.get(v) for k, v in dummy_node._inputs.items() } mlflow_tracking_uri = (kedro_project / "mlruns").as_uri() bootstrap_project(kedro_project) with KedroSession.create(project_path=kedro_project, ) as session: context = session.load_context() mlflow_node_hook = MlflowHook() mlflow_node_hook.after_context_created(context) # setup mlflow_config mlflow.set_tracking_uri(mlflow_tracking_uri) with mlflow.start_run(): mlflow_node_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=dummy_pipeline, catalog=dummy_catalog, ) mlflow_node_hook.before_node_run( node=dummy_node, catalog=dummy_catalog, inputs=node_inputs, is_async=False, ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == expected
def test_modelify_with_artifact_path_arg(monkeypatch, kp_for_modelify): monkeypatch.chdir(kp_for_modelify) cli_runner = CliRunner() bootstrap_project(Path().cwd()) with KedroSession.create() as session: context = session.load_context() catalog = context.catalog catalog.save("trained_model", 2) runs_id_set_before_cmd = set([ run_info.run_id for run_info in context.mlflow.server._mlflow_client.list_run_infos( context.mlflow.tracking.experiment._experiment.experiment_id) ]) result = cli_runner.invoke( cli_modelify, [ "--pipeline", "inference", "--input-name", "my_input_data", "--artifact-path", "my_new_model", ], catch_exceptions=True, ) runs_id_set_after_cmd = set([ run_info.run_id for run_info in context.mlflow.server._mlflow_client.list_run_infos( context.mlflow.tracking.experiment._experiment.experiment_id) ]) new_run_id = runs_id_set_after_cmd - runs_id_set_before_cmd assert result.exit_code == 0 assert "my_new_model" in [ file.path for file in context.mlflow.server._mlflow_client.list_artifacts( list(new_run_id)[0]) ]
def __init__(self, project_path: Path): self._metadata = None # running in package mode if _is_project(project_path): self._metadata = bootstrap_project(project_path) self._cli_hook_manager = CLIHooksManager() super().__init__( ("Global commands", self.global_groups), ("Project specific commands", self.project_groups), )
def test_kedro_mlflow_config_experiment_exists(kedro_project_with_mlflow_conf): # create an experiment with the same name mlflow_tracking_uri = ( kedro_project_with_mlflow_conf / "conf" / "local" / "mlruns" ).as_uri() MlflowClient(mlflow_tracking_uri).create_experiment("exp1") config = KedroMlflowConfig( server=dict(mlflow_tracking_uri="mlruns"), tracking=dict(experiment=dict(name="exp1")), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config config.setup(context) assert "exp1" in [ exp.name for exp in config.server._mlflow_client.list_experiments() ]
def test_on_pipeline_error(kedro_project_with_mlflow_conf): tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create( project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() with pytest.raises(ValueError): session.run() # the run we want is the last one in the configuration experiment mlflow_client = MlflowClient(tracking_uri) experiment = mlflow_client.get_experiment_by_name( context.mlflow.tracking.experiment.name) failing_run_info = MlflowClient(tracking_uri).list_run_infos( experiment.experiment_id)[0] assert mlflow.active_run() is None # the run must have been closed assert failing_run_info.status == RunStatus.to_string( RunStatus.FAILED) # it must be marked as failed
def test_cli_init_existing_config_force_option(monkeypatch, kedro_project, mock_settings_fake_project): # "kedro_project" is a pytest.fixture declared in conftest monkeypatch.chdir(kedro_project) cli_runner = CliRunner() bootstrap_project(kedro_project) with KedroSession.create(project_path=kedro_project) as session: # emulate first call by writing a mlflow.yml file yaml_str = yaml.dump(dict(server=dict(mlflow_tracking_uri="toto"))) (kedro_project / mock_settings_fake_project.CONF_SOURCE / "local" / "mlflow.yml").write_text(yaml_str) result = cli_runner.invoke(cli_init, args="--force") # check an error message is raised assert "successfully updated" in result.output # check the file remains unmodified context = session.load_context() assert context.mlflow.server.mlflow_tracking_uri.endswith("mlruns")
def test_node_hook_logging_above_limit_truncate_strategy( kedro_project, dummy_run_params, param_length): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(tracking=dict(params=dict(long_params_strategy="truncate")), ), ) mlflow_tracking_uri = (kedro_project / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) param_value = param_length * "a" node_inputs = {"params:my_param": param_value} bootstrap_project(kedro_project) with KedroSession.create(project_path=kedro_project, ) as session: context = session.load_context() mlflow_node_hook = MlflowHook() mlflow_node_hook.after_context_created(context) with mlflow.start_run(): mlflow_node_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=Pipeline([]), catalog=DataCatalog(), ) mlflow_node_hook.before_node_run( node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None), catalog=DataCatalog(), # can be empty inputs=node_inputs, is_async=False, ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == { "my_param": param_value[0:MAX_PARAM_VAL_LENGTH] }
def test_cli_init_existing_config(monkeypatch, kedro_project_with_mlflow_conf, mock_settings_fake_project): # "kedro_project" is a pytest.fixture declared in conftest cli_runner = CliRunner() monkeypatch.chdir(kedro_project_with_mlflow_conf) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf) as session: # emulate first call by writing a mlflow.yml file yaml_str = yaml.dump(dict(server=dict(mlflow_tracking_uri="toto"))) (kedro_project_with_mlflow_conf / mock_settings_fake_project.CONF_SOURCE / "local" / "mlflow.yml").write_text(yaml_str) result = cli_runner.invoke(cli_init) # check an error message is raised assert "A 'mlflow.yml' already exists" in result.output context = session.load_context() # check the file remains unmodified assert context.mlflow.server.mlflow_tracking_uri.endswith("toto")
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf): # create an experiment with the same name and then delete it mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() mlflow_client = MlflowClient(mlflow_tracking_uri) mlflow_client.create_experiment("exp1") mlflow_client.delete_experiment( mlflow_client.get_experiment_by_name("exp1").experiment_id ) # the config must restore properly the experiment config = KedroMlflowConfig( server=dict(mlflow_tracking_uri="mlruns"), tracking=dict(experiment=dict(name="exp1")), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config config.setup(context) assert "exp1" in [ exp.name for exp in config.server._mlflow_client.list_experiments() ]
def reload_kedro(path, env: str = None, extra_params: Dict[str, Any] = None): """Line magic which reloads all Kedro default variables.""" import kedro.config.default_logger # noqa: F401 # pylint: disable=unused-import from kedro.framework.cli import load_entry_points from kedro.framework.project import pipelines from kedro.framework.session import KedroSession from kedro.framework.session.session import _activate_session from kedro.framework.startup import bootstrap_project _clear_hook_manager() path = path or project_path metadata = bootstrap_project(path) _remove_cached_modules(metadata.package_name) session = KedroSession.create(metadata.package_name, path, env=env, extra_params=extra_params) _activate_session(session, force=True) logging.debug("Loading the context from %s", str(path)) context = session.load_context() catalog = context.catalog get_ipython().push( variables={ "context": context, "catalog": catalog, "session": session, "pipelines": pipelines, }) logging.info("** Kedro project %s", str(metadata.project_name)) logging.info( "Defined global variable `context`, `session`, `catalog` and `pipelines`" ) for line_magic in load_entry_points("line_magic"): register_line_magic(needs_local_scope(line_magic)) logging.info("Registered line magic `%s`", line_magic.__name__) # type: ignore
def init(env: str, force: bool, silent: bool): """Updates the template of a kedro project. Running this command is mandatory to use kedro-mlflow. This adds "conf/base/mlflow.yml": This is a configuration file used for run parametrization when calling "kedro run" command. """ # get constants mlflow_yml = "mlflow.yml" project_path = Path().cwd() project_metadata = bootstrap_project(project_path) mlflow_yml_path = project_path / settings.CONF_SOURCE / env / mlflow_yml # mlflow.yml is just a static file, # but the name of the experiment is set to be the same as the project if mlflow_yml_path.is_file() and not force: click.secho( click.style( f"A 'mlflow.yml' already exists at '{mlflow_yml_path}' You can use the ``--force`` option to override it.", fg="red", )) else: try: write_jinja_template( src=TEMPLATE_FOLDER_PATH / mlflow_yml, is_cookiecutter=False, dst=mlflow_yml_path, python_package=project_metadata.package_name, ) if not silent: click.secho( click.style( f"'{settings.CONF_SOURCE}/{env}/{mlflow_yml}' successfully updated.", fg="green", )) except FileNotFoundError: click.secho( click.style( f"No env '{env}' found. Please check this folder exists inside '{settings.CONF_SOURCE}' folder.", fg="red", ))