def test_kedro_mlflow_config_setup_tracking_priority(kedro_project_with_mlflow_conf): """Test if the mlflow_tracking uri set is the one of mlflow.yml if it also eist in credentials. Args: mocker ([type]): [description] tmp_path ([type]): [description] """ # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2"))) ) config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="mlruns1", credentials="my_mlflow_creds", ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert ( mlflow.get_tracking_uri() == (kedro_project_with_mlflow_conf / "mlruns1").as_uri() )
def main(): # pragma: no cover """Main entry point. Look for a ``cli.py``, and, if found, add its commands to `kedro`'s before invoking the CLI. """ _init_plugins() global_groups = [cli] global_groups.extend(load_entry_points("global")) project_groups = [] cli_context = dict() path = Path.cwd() if _is_project(path): # load project commands from cli.py metadata = _get_project_metadata(path) package_name = metadata.package_name cli_context = dict(obj=metadata) _add_src_to_path(metadata.source_dir, path) configure_project(package_name) project_groups.extend(load_entry_points("project")) try: project_cli = importlib.import_module(f"{package_name}.cli") project_groups.append(project_cli.cli) except Exception as exc: raise KedroCliError( f"Cannot load commands from {package_name}.cli") from exc cli_collection = CommandCollection( ("Global commands", global_groups), ("Project specific commands", project_groups), ) cli_collection(**cli_context)
def test_mlflow_pipeline_hook_with_copy_mode( kedro_project_with_mlflow_conf, dummy_pipeline_ml, dummy_catalog, dummy_run_params, copy_mode, expected, ): # config_with_base_mlflow_conf is a conftest fixture project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project_with_mlflow_conf, ): pipeline_hook = MlflowPipelineHook() runner = SequentialRunner() pipeline_hook.after_catalog_created( catalog=dummy_catalog, # `after_catalog_created` is not using any of arguments bellow, # so we are setting them to empty values. conf_catalog={}, conf_creds={}, feed_dict={}, save_version="", load_versions="", run_id=dummy_run_params["run_id"], ) pipeline_to_run = pipeline_ml_factory( training=dummy_pipeline_ml.training, inference=dummy_pipeline_ml.inference, input_name=dummy_pipeline_ml.input_name, conda_env={}, model_name=dummy_pipeline_ml.model_name, copy_mode=copy_mode, ) pipeline_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog ) runner.run(pipeline_to_run, dummy_catalog) run_id = mlflow.active_run().info.run_id pipeline_hook.after_pipeline_run( run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog ) mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) loaded_model = mlflow.pyfunc.load_model(model_uri=f"runs:/{run_id}/model") actual_copy_mode = { name: ds._copy_mode for name, ds in loaded_model._model_impl.python_model.loaded_catalog._data_sets.items() } assert actual_copy_mode == expected
def test_get_current_session(self, fake_project, mock_package_name): assert get_current_session(silent=True) is None # no sessions yet pattern = "There is no active Kedro session" with pytest.raises(RuntimeError, match=pattern): get_current_session() configure_project(mock_package_name) session1 = KedroSession.create(mock_package_name, fake_project) session2 = KedroSession.create(mock_package_name, fake_project) with session1: assert get_current_session() is session1 pattern = ( "Cannot activate the session as another active session already exists" ) with pytest.raises(RuntimeError, match=pattern), session2: pass # pragma: no cover # session has been closed, so no current sessions should be available assert get_current_session(silent=True) is None with session2: assert get_current_session() is session2
def test_mlflow_config_with_templated_config_loader(kedro_project_with_tcl, ): _write_yaml( kedro_project_with_tcl / "conf" / "local" / "mlflow.yml", dict( mlflow_tracking_uri="${mlflow_tracking_uri}", credentials=None, disable_tracking=dict(pipelines=["my_disabled_pipeline"]), experiment=dict(name="fake_package", create=True), run=dict(id="123456789", name="my_run", nested=True), ui=dict(port="5151", host="localhost"), hooks=dict(node=dict( flatten_dict_params=True, recursive=False, sep="-", long_parameters_strategy="truncate", )), ), ) _write_yaml( kedro_project_with_tcl / "conf" / "local" / "globals.yml", dict(mlflow_tracking_uri="dynamic_mlruns"), ) expected = { "mlflow_tracking_uri": (kedro_project_with_tcl / "dynamic_mlruns").as_uri(), "credentials": None, "disable_tracking": { "pipelines": ["my_disabled_pipeline"] }, "experiments": { "name": "fake_package", "create": True }, "run": { "id": "123456789", "name": "my_run", "nested": True }, "ui": { "port": "5151", "host": "localhost" }, "hooks": { "node": { "flatten_dict_params": True, "recursive": False, "sep": "-", "long_parameters_strategy": "truncate", } }, } project_metadata = _get_project_metadata(kedro_project_with_tcl) _add_src_to_path(project_metadata.source_dir, kedro_project_with_tcl) configure_project(project_metadata.package_name) with KedroSession.create(project_metadata.package_name, kedro_project_with_tcl): assert get_mlflow_config().to_dict() == expected
def run_package(): # Entry point for running a Kedro project packaged with `kedro package` # using `python -m <project_package>.run` command. package_name = Path(__file__).resolve().parent.name configure_project(package_name) with KedroSession.create(package_name) as session: session.run()
def test_get_current_session(self, fake_project, mocker): assert get_current_session(silent=True) is None # no sessions yet pattern = "There is no active Kedro session" with pytest.raises(RuntimeError, match=pattern): get_current_session() mocker.patch("kedro.framework.project._validate_module") configure_project(_FAKE_PACKAGE_NAME) session1 = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) session2 = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) with session1: assert get_current_session() is session1 pattern = ( "Cannot activate the session as another active session already exists" ) with pytest.raises(RuntimeError, match=pattern), session2: pass # pragma: no cover # session has been closed, so no current sessions should be available assert get_current_session(silent=True) is None with session2: assert get_current_session() is session2
def test_pipeline_run_hook_getting_configs( kedro_project, dummy_run_params, dummy_pipeline, dummy_catalog, ): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(hooks=dict( node=dict(flatten_dict_params=True, recursive=False, sep="-")), ), ), project_metadata = _get_project_metadata(kedro_project) _add_src_to_path(project_metadata.source_dir, kedro_project) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project, ): mlflow_node_hook = MlflowNodeHook() mlflow_node_hook.before_pipeline_run(run_params=dummy_run_params, pipeline=dummy_pipeline, catalog=dummy_catalog) assert ( mlflow_node_hook.flatten, mlflow_node_hook.recursive, mlflow_node_hook.sep, ) == (True, False, "-")
def test_register_pipelines_is_called(self, mock_session_with_hooks, dummy_dataframe, caplog): # not sure why this test needs this line here too # and not just in the session fixture configure_project(MOCK_PACKAGE_NAME) context = mock_session_with_hooks.load_context() catalog = context.catalog catalog.save("cars", dummy_dataframe) catalog.save("boats", dummy_dataframe) mock_session_with_hooks.run() register_pipelines_calls = [ record for record in caplog.records if record.funcName == "register_pipelines" ] assert len(register_pipelines_calls) == 1 call_record = register_pipelines_calls[0] _assert_hook_call_record_has_expected_parameters(call_record, []) expected_pipelines = { "__default__": CONTEXT_PIPELINE, "de": CONTEXT_PIPELINE, "pipe": CONTEXT_PIPELINE, } assert context.pipelines == expected_pipelines
def ui(env, port, host): """Opens the mlflow user interface with the project-specific settings of mlflow.yml. This interface enables to browse and compares runs. """ project_path = Path().cwd() project_metadata = _get_project_metadata(project_path) _add_src_to_path(project_metadata.source_dir, project_path) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=project_path, env=env, ): mlflow_conf = get_mlflow_config() host = host or mlflow_conf.ui_opts.get("host") port = port or mlflow_conf.ui_opts.get("port") # call mlflow ui with specific options # TODO : add more options for ui subprocess.call( [ "mlflow", "ui", "--backend-store-uri", mlflow_conf.mlflow_tracking_uri, "--host", host, "--port", port, ] )
def fake_project_cli(fake_repo_path: Path, dummy_config: Path): old_settings = settings.as_dict() starter_path = Path(__file__).parents[3].resolve() starter_path = starter_path / "features" / "steps" / "test_starter" # This is needed just for the tests, those CLI groups are merged in our # code when invoking `kedro` but when imported, they still need to be merged kedro_cli = click.CommandCollection(name="Kedro", sources=[cli, create_cli]) CliRunner().invoke( kedro_cli, ["new", "-c", str(dummy_config), "--starter", str(starter_path)], ) # NOTE: Here we load a couple of modules, as they would be imported in # the code and tests. # It's safe to remove the new entries from path due to the python # module caching mechanism. Any `reload` on it will not work though. old_path = sys.path.copy() sys.path = [str(fake_repo_path / "src")] + sys.path import_module(PACKAGE_NAME) configure_project(PACKAGE_NAME) yield import_module(f"{PACKAGE_NAME}.cli") # reset side-effects of configure_project pipelines.clear() for key, value in old_settings.items(): settings.set(key, value) sys.path = old_path del sys.modules[PACKAGE_NAME]
def fake_project_cli( fake_repo_path: Path, dummy_config: Path, fake_kedro_cli: click.CommandCollection ): old_settings = settings.as_dict() starter_path = Path(__file__).parents[3].resolve() starter_path = starter_path / "features" / "steps" / "test_starter" CliRunner().invoke( fake_kedro_cli, ["new", "-c", str(dummy_config), "--starter", str(starter_path)] ) # NOTE: Here we load a couple of modules, as they would be imported in # the code and tests. # It's safe to remove the new entries from path due to the python # module caching mechanism. Any `reload` on it will not work though. old_path = sys.path.copy() sys.path = [str(fake_repo_path / "src")] + sys.path import_module(PACKAGE_NAME) configure_project(PACKAGE_NAME) yield fake_kedro_cli # reset side-effects of configure_project pipelines._clear(PACKAGE_NAME) # this resets pipelines loading state for key, value in old_settings.items(): settings.set(key, value) sys.path = old_path del sys.modules[PACKAGE_NAME]
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf): # create an experiment with the same name and then delete it mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() mlflow_client = MlflowClient(mlflow_tracking_uri) mlflow_client.create_experiment("exp1") mlflow_client.delete_experiment( mlflow_client.get_experiment_by_name("exp1").experiment_id ) # the config must restore properly the experiment config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="mlruns", experiment_opts=dict(name="exp1"), ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
def bootstrap_project(project_path: Path) -> ProjectMetadata: """Run setup required at the beginning of the workflow when running in project mode, and return project metadata. """ metadata = _get_project_metadata(project_path) _add_src_to_path(metadata.source_dir, project_path) configure_project(metadata.package_name) return metadata
def reload_kedro(path, line=None): """Line magic which reloads all Kedro default variables.""" global startup_error global context global catalog global session try: import kedro.config.default_logger from kedro.framework.hooks import get_hook_manager from kedro.framework.project import configure_project from kedro.framework.session import KedroSession from kedro.framework.session.session import _activate_session from kedro.framework.cli.jupyter import collect_line_magic except ImportError: logging.error( "Kedro appears not to be installed in your current environment " "or your current IPython session was not started in a valid Kedro project." ) raise try: path = path or project_path # clear hook manager hook_manager = get_hook_manager() name_plugin_pairs = hook_manager.list_name_plugin() for name, plugin in name_plugin_pairs: hook_manager.unregister(name=name, plugin=plugin) # remove cached user modules metadata = _get_project_metadata(path) to_remove = [ mod for mod in sys.modules if mod.startswith(metadata.package_name) ] # `del` is used instead of `reload()` because: If the new version of a module does not # define a name that was defined by the old version, the old definition remains. for module in to_remove: del sys.modules[module] configure_project(metadata.package_name) session = KedroSession.create(metadata.package_name, path) _activate_session(session, force=True) logging.debug("Loading the context from %s", str(path)) context = session.load_context() catalog = context.catalog logging.info("** Kedro project %s", str(metadata.project_name)) logging.info("Defined global variable `context` and `catalog`") for line_magic in collect_line_magic(): register_line_magic(needs_local_scope(line_magic)) logging.info("Registered line magic `%s`", line_magic.__name__) except Exception as err: startup_error = err logging.exception("Kedro's ipython session startup script failed:\n%s", str(err)) raise err
def _bootstrap_subprocess(package_name: str, conf_logging: Dict[str, Any]): # pylint: disable=import-outside-toplevel,protected-access,cyclic-import from kedro.framework.project import configure_project from kedro.framework.session.session import _register_all_project_hooks hook_manager = get_hook_manager() configure_project(package_name) _register_all_project_hooks(hook_manager) logging.config.dictConfig(conf_logging)
def mock_session_with_hooks(tmp_path, mock_settings_with_logging_hooks, logging_hooks, mocker): # pylint: disable=unused-argument mocker.patch("kedro.framework.project._validate_module") logging_hooks.queue_listener.start() configure_project(MOCK_PACKAGE_NAME) yield KedroSession.create(MOCK_PACKAGE_NAME, tmp_path, extra_params={"params:key": "value"}) logging_hooks.queue_listener.stop()
def test_node_hook_logging_above_limit_tag_strategy(kedro_project, dummy_run_params, param_length): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(hooks=dict(node=dict(long_parameters_strategy="tag")), ), ) mlflow_tracking_uri = (kedro_project / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) mlflow_node_hook = MlflowNodeHook() param_value = param_length * "a" node_inputs = {"params:my_param": param_value} project_metadata = _get_project_metadata(kedro_project) _add_src_to_path(project_metadata.source_dir, kedro_project) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project, ): with mlflow.start_run(): mlflow_node_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=Pipeline([]), catalog=DataCatalog(), ) # IMPORTANT: Overpassing the parameters limit # should raise an error for all mlflow backend # but it does not on FileStore backend : # https://github.com/mlflow/mlflow/issues/2814#issuecomment-628284425 # Since we use FileStore system for simplicty for tests logging works # But we have enforced failure (which is slightly different from mlflow # behaviour) mlflow_node_hook.before_node_run( node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None), catalog=DataCatalog(), # can be empty inputs=node_inputs, is_async=False, run_id="132", ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == {} assert { k: v for k, v in current_run.data.tags.items() if not k.startswith("mlflow") } == { "my_param": param_value }
def dummy_context(tmp_path, prepare_project_dir, env, extra_params, mocker): # pylint: disable=unused-argument mocker.patch("kedro.framework.project._validate_module") configure_project(MOCK_PACKAGE_NAME) context = KedroContext(MOCK_PACKAGE_NAME, str(tmp_path), env=env, extra_params=extra_params) return context
def _load_project(project_path): # pragma: no cover # TODO: This one can potentially become project bootstrap and will be # tested there if not _is_project(project_path): return None metadata = _get_project_metadata(project_path) _add_src_to_path(metadata.source_dir, project_path) configure_project(metadata.package_name) return metadata
def test_get_mlflow_config_in_uninitialized_project(kedro_project): # config_with_base_mlflow_conf is a pytest.fixture in conftest with pytest.raises( KedroMlflowConfigError, match="No 'mlflow.yml' config file found in environment"): project_metadata = _get_project_metadata(kedro_project) _add_src_to_path(project_metadata.source_dir, kedro_project) configure_project(project_metadata.package_name) with KedroSession.create(project_metadata.package_name, kedro_project): get_mlflow_config()
def dummy_context( tmp_path, prepare_project_dir, env, extra_params, mocker ): # pylint: disable=unused-argument configure_project(MOCK_PACKAGE_NAME) context = KedroContext( MOCK_PACKAGE_NAME, str(tmp_path), env=env, extra_params=extra_params ) yield context pipelines._clear(MOCK_PACKAGE_NAME)
def test_configure_project_should_not_raise_for_unimportable_pipelines( mock_package_name_with_unimportable_pipelines_file, ): # configure_project should not raise error for unimportable pipelines # since pipelines loading is lazy configure_project(mock_package_name_with_unimportable_pipelines_file) # accessing data should raise for unimportable pipelines with pytest.raises(ModuleNotFoundError, match="No module named 'this_is_not_a_real_thing'"): _ = pipelines["new_pipeline"]
def get_session(): "Get kedro session" cur_path = os.getcwd() os.chdir(Path(__file__).parents[2]) configure_project("kedro_171_package") session = KedroSession.create(Path(__file__).resolve().parent.name) _activate_session(session, force=True) os.chdir(cur_path) return session
def test_mlflow_pipeline_hook_with_pipeline_ml_signature( kedro_project_with_mlflow_conf, env_from_dict, dummy_pipeline, dummy_catalog, dummy_run_params, model_signature, expected_signature, ): # config_with_base_mlflow_conf is a conftest fixture project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project_with_mlflow_conf, ): pipeline_hook = MlflowPipelineHook() runner = SequentialRunner() pipeline_to_run = pipeline_ml_factory( training=dummy_pipeline.only_nodes_with_tags("training"), inference=dummy_pipeline.only_nodes_with_tags("inference"), input_name="raw_data", conda_env=env_from_dict, model_name="model", model_signature=model_signature, ) pipeline_hook.after_catalog_created( catalog=dummy_catalog, # `after_catalog_created` is not using any of arguments bellow, # so we are setting them to empty values. conf_catalog={}, conf_creds={}, feed_dict={}, save_version="", load_versions="", run_id=dummy_run_params["run_id"], ) pipeline_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog ) runner.run(pipeline_to_run, dummy_catalog) run_id = mlflow.active_run().info.run_id pipeline_hook.after_pipeline_run( run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog ) # test : parameters should have been logged trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") assert trained_model.metadata.signature == expected_signature
def init(env, force, silent): """Updates the template of a kedro project. Running this command is mandatory to use kedro-mlflow. This adds "conf/base/mlflow.yml": This is a configuration file used for run parametrization when calling "kedro run" command. See INSERT_DOC_URL for further details. """ # get constants mlflow_yml = "mlflow.yml" project_path = Path().cwd() project_metadata = _get_project_metadata(project_path) _add_src_to_path(project_metadata.source_dir, project_path) configure_project(project_metadata.package_name) session = KedroSession.create( project_metadata.package_name, project_path=project_path ) context = session.load_context() mlflow_yml_path = project_path / context.CONF_ROOT / env / mlflow_yml # mlflow.yml is just a static file, # but the name of the experiment is set to be the same as the project if mlflow_yml_path.is_file() and not force: click.secho( click.style( f"A 'mlflow.yml' already exists at '{mlflow_yml_path}' You can use the ``--force`` option to override it.", fg="red", ) ) else: try: write_jinja_template( src=TEMPLATE_FOLDER_PATH / mlflow_yml, is_cookiecutter=False, dst=mlflow_yml_path, python_package=project_metadata.package_name, ) except FileNotFoundError: click.secho( click.style( f"No env '{env}' found. Please check this folder exists inside '{context.CONF_ROOT}' folder.", fg="red", ) ) if not silent: click.secho( click.style( f"'{context.CONF_ROOT}/{env}/{mlflow_yml}' successfully updated.", fg="green", ) )
def mock_session( mocker, mock_settings_with_mlflow_hooks, kedro_project_path ): # pylint: disable=unused-argument # we need to patch "kedro.framework.session.session.validate_settings" instead of # "kedro.framework.project.validate_settings" because it is imported mocker.patch("kedro.framework.session.session.validate_settings") # idem, we patch we need to patch "kedro.framework.session.session._register_hooks_setuptools" instead of # "kedro.framework.hooks.manager._register_hooks_setuptools" because it is imported mocker.patch( "kedro.framework.session.session._register_hooks_setuptools" ) # prevent registering the one of the plugins which are already installed configure_project(MOCK_PACKAGE_NAME) return KedroSession.create(MOCK_PACKAGE_NAME, kedro_project_path)
def test_register_pipelines_with_duplicate_entries(self, tmp_path, mock_pipelines, mocker): mocker.patch("kedro.framework.project._validate_module") pattern = ("Found duplicate pipeline entries. The following " "will be overwritten: __default__") with pytest.warns(UserWarning, match=re.escape(pattern)): configure_project(MOCK_PACKAGE_NAME) session = KedroSession.create(MOCK_PACKAGE_NAME, tmp_path) context = session.load_context() # check that all pipeline dictionaries merged together correctly expected_pipelines = { key: CONTEXT_PIPELINE for key in ("__default__", "de", "pipe") } assert mock_pipelines == expected_pipelines assert context.pipelines == expected_pipelines
def test_on_pipeline_error(kedro_project_with_mlflow_conf): tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project_with_mlflow_conf, ): def failing_node(): mlflow.start_run(nested=True) raise ValueError("Let's make this pipeline fail") class DummyContextWithHook(KedroContext): project_name = "fake project" package_name = "fake_project" project_version = "0.16.5" hooks = (MlflowPipelineHook(),) def _get_pipeline(self, name: str = None) -> Pipeline: return Pipeline( [ node( func=failing_node, inputs=None, outputs="fake_output", ) ] ) with pytest.raises(ValueError): failing_context = DummyContextWithHook( "fake_package", kedro_project_with_mlflow_conf.as_posix() ) failing_context.run() # the run we want is the last one in Default experiment failing_run_info = MlflowClient(tracking_uri).list_run_infos("0")[0] assert mlflow.active_run() is None # the run must have been closed assert failing_run_info.status == RunStatus.to_string( RunStatus.FAILED ) # it must be marked as failed
def test_node_hook_logging_above_limit_truncate_strategy( kedro_project, dummy_run_params, param_length): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(hooks=dict(node=dict(long_parameters_strategy="truncate")), ), ) mlflow_tracking_uri = (kedro_project / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) mlflow_node_hook = MlflowNodeHook() param_value = param_length * "a" node_inputs = {"params:my_param": param_value} project_metadata = _get_project_metadata(kedro_project) _add_src_to_path(project_metadata.source_dir, kedro_project) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project, ): with mlflow.start_run(): mlflow_node_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=Pipeline([]), catalog=DataCatalog(), ) mlflow_node_hook.before_node_run( node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None), catalog=DataCatalog(), # can be empty inputs=node_inputs, is_async=False, run_id="132", ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == { "my_param": param_value[0:MAX_PARAM_VAL_LENGTH] }