Exemple #1
0
 def test_toml_invalid_format(self, tmp_path):
     """Test for loading context from an invalid path. """
     toml_path = tmp_path / "pyproject.toml"
     toml_path.write_text("!!")  # Invalid TOML
     pattern = "Failed to parse 'pyproject.toml' file"
     with pytest.raises(RuntimeError, match=re.escape(pattern)):
         _get_project_metadata(str(tmp_path))
Exemple #2
0
    def test_no_config_files(self, mocker):
        mocker.patch.object(Path, "is_file", return_value=False)

        pattern = (
            f"Could not find the project configuration file 'pyproject.toml' "
            f"in {self.project_path}")
        with pytest.raises(RuntimeError, match=re.escape(pattern)):
            _get_project_metadata(self.project_path)
Exemple #3
0
    def test_toml_file_without_kedro_section(self, mocker):
        mocker.patch.object(Path, "is_file", return_value=True)
        mocker.patch("anyconfig.load", return_value={})

        pattern = "There's no '[tool.kedro]' section in the 'pyproject.toml'."

        with pytest.raises(RuntimeError, match=re.escape(pattern)):
            _get_project_metadata(self.project_path)
Exemple #4
0
    def test_toml_file_has_missing_mandatory_keys(self, mocker):
        mocker.patch.object(Path, "is_file", return_value=True)
        pyproject_toml_payload = {
            "tool": {
                "kedro": {"project_version": kedro_version, "unexpected_key": "hello"}
            }
        }
        mocker.patch("anyconfig.load", return_value=pyproject_toml_payload)
        pattern = (
            "Missing required keys ['package_name', 'project_name'] "
            "from 'pyproject.toml'."
        )

        with pytest.raises(RuntimeError, match=re.escape(pattern)):
            _get_project_metadata(self.project_path)
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf):

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()

    assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
def test_mlflow_pipeline_hook_with_copy_mode(
    kedro_project_with_mlflow_conf,
    dummy_pipeline_ml,
    dummy_catalog,
    dummy_run_params,
    copy_mode,
    expected,
):
    # config_with_base_mlflow_conf is a conftest fixture
    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=kedro_project_with_mlflow_conf,
    ):

        pipeline_hook = MlflowPipelineHook()
        runner = SequentialRunner()

        pipeline_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
            run_id=dummy_run_params["run_id"],
        )

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline_ml.training,
            inference=dummy_pipeline_ml.inference,
            input_name=dummy_pipeline_ml.input_name,
            conda_env={},
            model_name=dummy_pipeline_ml.model_name,
            copy_mode=copy_mode,
        )
        pipeline_hook.before_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )
        runner.run(pipeline_to_run, dummy_catalog)
        run_id = mlflow.active_run().info.run_id
        pipeline_hook.after_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )

        mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
        mlflow.set_tracking_uri(mlflow_tracking_uri)

        loaded_model = mlflow.pyfunc.load_model(model_uri=f"runs:/{run_id}/model")

        actual_copy_mode = {
            name: ds._copy_mode
            for name, ds in loaded_model._model_impl.python_model.loaded_catalog._data_sets.items()
        }

        assert actual_copy_mode == expected
Exemple #7
0
def test_pipeline_run_hook_getting_configs(
    kedro_project,
    dummy_run_params,
    dummy_pipeline,
    dummy_catalog,
):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(hooks=dict(
            node=dict(flatten_dict_params=True, recursive=False, sep="-")), ),
    ),

    project_metadata = _get_project_metadata(kedro_project)
    _add_src_to_path(project_metadata.source_dir, kedro_project)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
            package_name=project_metadata.package_name,
            project_path=kedro_project,
    ):
        mlflow_node_hook = MlflowNodeHook()
        mlflow_node_hook.before_pipeline_run(run_params=dummy_run_params,
                                             pipeline=dummy_pipeline,
                                             catalog=dummy_catalog)

        assert (
            mlflow_node_hook.flatten,
            mlflow_node_hook.recursive,
            mlflow_node_hook.sep,
        ) == (True, False, "-")
Exemple #8
0
def ui(env, port, host):
    """Opens the mlflow user interface with the
    project-specific settings of mlflow.yml. This interface
    enables to browse and compares runs.
    """

    project_path = Path().cwd()
    project_metadata = _get_project_metadata(project_path)
    _add_src_to_path(project_metadata.source_dir, project_path)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=project_path,
        env=env,
    ):

        mlflow_conf = get_mlflow_config()
        host = host or mlflow_conf.ui_opts.get("host")
        port = port or mlflow_conf.ui_opts.get("port")

        # call mlflow ui with specific options
        # TODO : add more options for ui
        subprocess.call(
            [
                "mlflow",
                "ui",
                "--backend-store-uri",
                mlflow_conf.mlflow_tracking_uri,
                "--host",
                host,
                "--port",
                port,
            ]
        )
def test_kedro_mlflow_config_setup_tracking_priority(kedro_project_with_mlflow_conf):
    """Test if the mlflow_tracking uri set is the one of mlflow.yml
    if it also eist in credentials.

    Args:
        mocker ([type]): [description]
        tmp_path ([type]): [description]
    """
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project

    (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text(
        yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2")))
    )

    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="mlruns1",
        credentials="my_mlflow_creds",
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()

    assert (
        mlflow.get_tracking_uri()
        == (kedro_project_with_mlflow_conf / "mlruns1").as_uri()
    )
Exemple #10
0
def main():  # pragma: no cover
    """Main entry point. Look for a ``cli.py``, and, if found, add its
    commands to `kedro`'s before invoking the CLI.
    """
    _init_plugins()

    global_groups = [cli]
    global_groups.extend(load_entry_points("global"))
    project_groups = []
    cli_context = dict()

    path = Path.cwd()
    if _is_project(path):
        # load project commands from cli.py
        metadata = _get_project_metadata(path)
        cli_context = dict(obj=metadata)
        _add_src_to_path(metadata.source_dir, path)

        project_groups.extend(load_entry_points("project"))
        package_name = metadata.package_name

        try:
            project_cli = importlib.import_module(f"{package_name}.cli")
            project_groups.append(project_cli.cli)
        except Exception as exc:
            raise KedroCliError(
                f"Cannot load commands from {package_name}.cli"
            ) from exc

    cli_collection = CommandCollection(
        ("Global commands", global_groups),
        ("Project specific commands", project_groups),
    )
    cli_collection(**cli_context)
def test_mlflow_config_with_templated_config_loader(kedro_project_with_tcl, ):

    _write_yaml(
        kedro_project_with_tcl / "conf" / "local" / "mlflow.yml",
        dict(
            mlflow_tracking_uri="${mlflow_tracking_uri}",
            credentials=None,
            disable_tracking=dict(pipelines=["my_disabled_pipeline"]),
            experiment=dict(name="fake_package", create=True),
            run=dict(id="123456789", name="my_run", nested=True),
            ui=dict(port="5151", host="localhost"),
            hooks=dict(node=dict(
                flatten_dict_params=True,
                recursive=False,
                sep="-",
                long_parameters_strategy="truncate",
            )),
        ),
    )

    _write_yaml(
        kedro_project_with_tcl / "conf" / "local" / "globals.yml",
        dict(mlflow_tracking_uri="dynamic_mlruns"),
    )

    expected = {
        "mlflow_tracking_uri":
        (kedro_project_with_tcl / "dynamic_mlruns").as_uri(),
        "credentials": None,
        "disable_tracking": {
            "pipelines": ["my_disabled_pipeline"]
        },
        "experiments": {
            "name": "fake_package",
            "create": True
        },
        "run": {
            "id": "123456789",
            "name": "my_run",
            "nested": True
        },
        "ui": {
            "port": "5151",
            "host": "localhost"
        },
        "hooks": {
            "node": {
                "flatten_dict_params": True,
                "recursive": False,
                "sep": "-",
                "long_parameters_strategy": "truncate",
            }
        },
    }
    project_metadata = _get_project_metadata(kedro_project_with_tcl)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_tcl)
    configure_project(project_metadata.package_name)
    with KedroSession.create(project_metadata.package_name,
                             kedro_project_with_tcl):
        assert get_mlflow_config().to_dict() == expected
Exemple #12
0
def reload_kedro(path, line=None):
    """Line magic which reloads all Kedro default variables."""
    global startup_error
    global context
    global catalog
    global session

    try:
        import kedro.config.default_logger
        from kedro.framework.hooks import get_hook_manager
        from kedro.framework.project import configure_project
        from kedro.framework.session import KedroSession
        from kedro.framework.session.session import _activate_session
        from kedro.framework.cli.jupyter import collect_line_magic
    except ImportError:
        logging.error(
            "Kedro appears not to be installed in your current environment "
            "or your current IPython session was not started in a valid Kedro project."
        )
        raise

    try:
        path = path or project_path

        # clear hook manager
        hook_manager = get_hook_manager()
        name_plugin_pairs = hook_manager.list_name_plugin()
        for name, plugin in name_plugin_pairs:
            hook_manager.unregister(name=name, plugin=plugin)

        # remove cached user modules
        metadata = _get_project_metadata(path)
        to_remove = [
            mod for mod in sys.modules if mod.startswith(metadata.package_name)
        ]
        # `del` is used instead of `reload()` because: If the new version of a module does not
        # define a name that was defined by the old version, the old definition remains.
        for module in to_remove:
            del sys.modules[module]

        configure_project(metadata.package_name)
        session = KedroSession.create(metadata.package_name, path)
        _activate_session(session, force=True)
        logging.debug("Loading the context from %s", str(path))
        context = session.load_context()
        catalog = context.catalog

        logging.info("** Kedro project %s", str(metadata.project_name))
        logging.info("Defined global variable `context` and `catalog`")

        for line_magic in collect_line_magic():
            register_line_magic(needs_local_scope(line_magic))
            logging.info("Registered line magic `%s`", line_magic.__name__)
    except Exception as err:
        startup_error = err
        logging.exception("Kedro's ipython session startup script failed:\n%s",
                          str(err))
        raise err
Exemple #13
0
def test_node_hook_logging_above_limit_tag_strategy(kedro_project,
                                                    dummy_run_params,
                                                    param_length):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(hooks=dict(node=dict(long_parameters_strategy="tag")), ),
    )

    mlflow_tracking_uri = (kedro_project / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    mlflow_node_hook = MlflowNodeHook()

    param_value = param_length * "a"
    node_inputs = {"params:my_param": param_value}

    project_metadata = _get_project_metadata(kedro_project)
    _add_src_to_path(project_metadata.source_dir, kedro_project)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
            package_name=project_metadata.package_name,
            project_path=kedro_project,
    ):
        with mlflow.start_run():
            mlflow_node_hook.before_pipeline_run(
                run_params=dummy_run_params,
                pipeline=Pipeline([]),
                catalog=DataCatalog(),
            )

            # IMPORTANT: Overpassing the parameters limit
            # should raise an error for all mlflow backend
            # but it does not on FileStore backend :
            # https://github.com/mlflow/mlflow/issues/2814#issuecomment-628284425
            # Since we use FileStore system for simplicty for tests logging works
            # But we have enforced failure (which is slightly different from mlflow
            # behaviour)
            mlflow_node_hook.before_node_run(
                node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None),
                catalog=DataCatalog(),  # can be empty
                inputs=node_inputs,
                is_async=False,
                run_id="132",
            )
            run_id = mlflow.active_run().info.run_id

        mlflow_client = MlflowClient(mlflow_tracking_uri)
        current_run = mlflow_client.get_run(run_id)
        assert current_run.data.params == {}
        assert {
            k: v
            for k, v in current_run.data.tags.items()
            if not k.startswith("mlflow")
        } == {
            "my_param": param_value
        }
Exemple #14
0
 def _load_project(project_path):  # pragma: no cover
     # TODO: This one can potentially become project bootstrap and will be
     #  tested there
     if not _is_project(project_path):
         return None
     metadata = _get_project_metadata(project_path)
     _add_src_to_path(metadata.source_dir, project_path)
     configure_project(metadata.package_name)
     return metadata
Exemple #15
0
def _call_viz(
    host=None,
    port=None,
    browser=None,
    load_file=None,
    save_file=None,
    pipeline_name=None,
    env=None,
    project_path=None,
):
    global _DATA  # pylint: disable=global-statement,invalid-name
    global _CATALOG  # pylint: disable=global-statement

    if load_file:
        # Remove all handlers for root logger
        root_logger = logging.getLogger()
        root_logger.handlers = []

        _DATA = _load_from_file(load_file)
    else:
        try:
            project_path = project_path or Path.cwd()

            if KEDRO_VERSION.match(">=0.17.0"):  # pragma: no cover
                from kedro.framework.session import KedroSession
                from kedro.framework.startup import (  # pylint: disable=no-name-in-module,import-error
                    _get_project_metadata,
                )

                package_name = _get_project_metadata(project_path).package_name
                session_kwargs = dict(
                    package_name=package_name,
                    project_path=project_path,
                    env=env,
                    save_on_close=False,
                )
                session = KedroSession.create(  # pylint: disable=unexpected-keyword-arg
                    **session_kwargs
                )
                context = session.load_context()  # pylint: disable=no-member
                pipelines = _get_pipelines_from_context(context, pipeline_name)
            else:  # pragma: no cover
                context = load_context(project_path=project_path, env=env)
                pipelines = _get_pipelines_from_context(context, pipeline_name)
        except KedroContextError:
            raise KedroCliError(ERROR_PROJECT_ROOT)  # pragma: no cover

        _CATALOG = context.catalog
        _DATA = format_pipelines_data(pipelines)

    if save_file:
        Path(save_file).write_text(json.dumps(_DATA, indent=4, sort_keys=True))
    else:
        is_localhost = host in ("127.0.0.1", "localhost", "0.0.0.0")
        if browser and is_localhost:
            webbrowser.open_new("http://{}:{:d}/".format(host, port))
        app.run(host=host, port=port)
Exemple #16
0
    def test_invalid_version(self, invalid_version, mocker):
        mocker.patch.object(Path, "is_file", return_value=True)
        pyproject_toml_payload = {
            "tool": {
                "kedro": {
                    "source_dir": "source_dir",
                    "package_name": "fake_package_name",
                    "project_name": "fake_project_name",
                    "project_version": invalid_version,
                }
            }
        }
        mocker.patch("anyconfig.load", return_value=pyproject_toml_payload)

        pattern = (
            f"Your Kedro project version {invalid_version} does not match "
            f"Kedro package version {kedro_version} you are running.")
        with pytest.raises(ValueError, match=re.escape(pattern)):
            _get_project_metadata(self.project_path)
Exemple #17
0
    def test_toml_file_with_extra_keys(self, mocker):
        mocker.patch.object(Path, "is_file", return_value=True)
        pyproject_toml_payload = {
            "tool": {
                "kedro": {
                    "package_name": "fake_package_name",
                    "project_name": "fake_project_name",
                    "project_version": kedro_version,
                    "unexpected_key": "hello",
                }
            }
        }
        mocker.patch("anyconfig.load", return_value=pyproject_toml_payload)
        pattern = ("Found unexpected keys in 'pyproject.toml'. Make sure it "
                   "only contains the following keys: ['package_name', "
                   "'project_name', 'project_version', 'source_dir'].")

        with pytest.raises(RuntimeError, match=re.escape(pattern)):
            _get_project_metadata(self.project_path)
def test_get_mlflow_config_in_uninitialized_project(kedro_project):
    # config_with_base_mlflow_conf is a pytest.fixture in conftest
    with pytest.raises(
            KedroMlflowConfigError,
            match="No 'mlflow.yml' config file found in environment"):
        project_metadata = _get_project_metadata(kedro_project)
        _add_src_to_path(project_metadata.source_dir, kedro_project)
        configure_project(project_metadata.package_name)
        with KedroSession.create(project_metadata.package_name, kedro_project):
            get_mlflow_config()
def test_mlflow_pipeline_hook_with_pipeline_ml_signature(
    kedro_project_with_mlflow_conf,
    env_from_dict,
    dummy_pipeline,
    dummy_catalog,
    dummy_run_params,
    model_signature,
    expected_signature,
):
    # config_with_base_mlflow_conf is a conftest fixture
    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=kedro_project_with_mlflow_conf,
    ):
        pipeline_hook = MlflowPipelineHook()
        runner = SequentialRunner()

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline.only_nodes_with_tags("training"),
            inference=dummy_pipeline.only_nodes_with_tags("inference"),
            input_name="raw_data",
            conda_env=env_from_dict,
            model_name="model",
            model_signature=model_signature,
        )

        pipeline_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
            run_id=dummy_run_params["run_id"],
        )
        pipeline_hook.before_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )
        runner.run(pipeline_to_run, dummy_catalog)
        run_id = mlflow.active_run().info.run_id
        pipeline_hook.after_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )

        # test : parameters should have been logged
        trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
        assert trained_model.metadata.signature == expected_signature
Exemple #20
0
def init(env, force, silent):
    """Updates the template of a kedro project.
    Running this command is mandatory to use kedro-mlflow.
    This adds "conf/base/mlflow.yml": This is a configuration file
    used for run parametrization when calling "kedro run" command.
    See INSERT_DOC_URL for further details.
    """

    # get constants
    mlflow_yml = "mlflow.yml"
    project_path = Path().cwd()
    project_metadata = _get_project_metadata(project_path)
    _add_src_to_path(project_metadata.source_dir, project_path)
    configure_project(project_metadata.package_name)
    session = KedroSession.create(
        project_metadata.package_name, project_path=project_path
    )
    context = session.load_context()
    mlflow_yml_path = project_path / context.CONF_ROOT / env / mlflow_yml

    # mlflow.yml is just a static file,
    # but the name of the experiment is set to be the same as the project
    if mlflow_yml_path.is_file() and not force:
        click.secho(
            click.style(
                f"A 'mlflow.yml' already exists at '{mlflow_yml_path}' You can use the ``--force`` option to override it.",
                fg="red",
            )
        )
    else:
        try:
            write_jinja_template(
                src=TEMPLATE_FOLDER_PATH / mlflow_yml,
                is_cookiecutter=False,
                dst=mlflow_yml_path,
                python_package=project_metadata.package_name,
            )
        except FileNotFoundError:
            click.secho(
                click.style(
                    f"No env '{env}' found. Please check this folder exists inside '{context.CONF_ROOT}' folder.",
                    fg="red",
                )
            )
        if not silent:
            click.secho(
                click.style(
                    f"'{context.CONF_ROOT}/{env}/{mlflow_yml}' successfully updated.",
                    fg="green",
                )
            )
Exemple #21
0
def load_context(project_path: Union[str, Path], **kwargs) -> KedroContext:
    """Loads the KedroContext object of a Kedro Project.
    This is the default way to load the KedroContext object for normal workflows such as
    CLI, Jupyter Notebook, Plugins, etc. It assumes the following project structure
    under the given project_path::

       <project_path>
           |__ <src_dir>
           |__ pyproject.toml

    The name of the <scr_dir> is `src` by default. The `pyproject.toml` file is used
    for project metadata. Kedro configuration should be under `[tool.kedro]` section.

    Args:
        project_path: Path to the Kedro project.
        kwargs: Optional kwargs for ``KedroContext`` class.

    Returns:
        Instance of ``KedroContext`` class defined in Kedro project.

    Raises:
        KedroContextError: `pyproject.toml` was not found or the `[tool.kedro]` section
            is missing, or loaded context has package conflict.

    """
    warn(
        "`kedro.framework.context.load_context` is now deprecated in favour of "
        "`KedroSession.load_context` and will be removed in Kedro 0.18.0.",
        DeprecationWarning,
    )
    project_path = Path(project_path).expanduser().resolve()
    metadata = _get_project_metadata(project_path)

    context_class = _get_project_settings(
        metadata.package_name, "CONTEXT_CLASS", KedroContext
    )

    # update kwargs with env from the environment variable
    # (defaults to None if not set)
    # need to do this because some CLI command (e.g `kedro run`) defaults to
    # passing in `env=None`
    kwargs["env"] = kwargs.get("env") or os.getenv("KEDRO_ENV")
    context = context_class(
        package_name=metadata.package_name, project_path=project_path, **kwargs
    )
    return context
def test_on_pipeline_error(kedro_project_with_mlflow_conf):

    tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=kedro_project_with_mlflow_conf,
    ):

        def failing_node():
            mlflow.start_run(nested=True)
            raise ValueError("Let's make this pipeline fail")

        class DummyContextWithHook(KedroContext):
            project_name = "fake project"
            package_name = "fake_project"
            project_version = "0.16.5"

            hooks = (MlflowPipelineHook(),)

            def _get_pipeline(self, name: str = None) -> Pipeline:
                return Pipeline(
                    [
                        node(
                            func=failing_node,
                            inputs=None,
                            outputs="fake_output",
                        )
                    ]
                )

        with pytest.raises(ValueError):
            failing_context = DummyContextWithHook(
                "fake_package", kedro_project_with_mlflow_conf.as_posix()
            )
            failing_context.run()

        # the run we want is the last one in Default experiment
        failing_run_info = MlflowClient(tracking_uri).list_run_infos("0")[0]
        assert mlflow.active_run() is None  # the run must have been closed
        assert failing_run_info.status == RunStatus.to_string(
            RunStatus.FAILED
        )  # it must be marked as failed
Exemple #23
0
    def test_source_dir_specified_in_toml(self, mocker):
        mocker.patch.object(Path, "is_file", return_value=True)
        source_dir = "test_dir"
        pyproject_toml_payload = {
            "tool": {
                "kedro": {
                    "source_dir": source_dir,
                    "package_name": "fake_package_name",
                    "project_name": "fake_project_name",
                    "project_version": kedro_version,
                }
            }
        }
        mocker.patch("anyconfig.load", return_value=pyproject_toml_payload)

        project_metadata = _get_project_metadata(self.project_path)

        assert project_metadata.source_dir == self.project_path / source_dir
def test_kedro_mlflow_config_new_experiment_does_not_exists(
    kedro_project_with_mlflow_conf,
):

    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()

    assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
Exemple #25
0
def test_node_hook_logging_above_limit_truncate_strategy(
        kedro_project, dummy_run_params, param_length):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(hooks=dict(node=dict(long_parameters_strategy="truncate")), ),
    )

    mlflow_tracking_uri = (kedro_project / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    mlflow_node_hook = MlflowNodeHook()

    param_value = param_length * "a"
    node_inputs = {"params:my_param": param_value}

    project_metadata = _get_project_metadata(kedro_project)
    _add_src_to_path(project_metadata.source_dir, kedro_project)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
            package_name=project_metadata.package_name,
            project_path=kedro_project,
    ):
        with mlflow.start_run():
            mlflow_node_hook.before_pipeline_run(
                run_params=dummy_run_params,
                pipeline=Pipeline([]),
                catalog=DataCatalog(),
            )
            mlflow_node_hook.before_node_run(
                node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None),
                catalog=DataCatalog(),  # can be empty
                inputs=node_inputs,
                is_async=False,
                run_id="132",
            )
            run_id = mlflow.active_run().info.run_id

        mlflow_client = MlflowClient(mlflow_tracking_uri)
        current_run = mlflow_client.get_run(run_id)
        assert current_run.data.params == {
            "my_param": param_value[0:MAX_PARAM_VAL_LENGTH]
        }
Exemple #26
0
def load_kedro_objects(path, line=None):  # pylint: disable=unused-argument
    """Line magic which reloads all Kedro default variables."""

    import kedro.config.default_logger  # noqa: F401 # pylint: disable=unused-import
    from kedro.framework.cli import load_entry_points
    from kedro.framework.cli.utils import _add_src_to_path
    from kedro.framework.project import configure_project
    from kedro.framework.session import KedroSession
    from kedro.framework.session.session import _activate_session
    from kedro.framework.startup import _get_project_metadata

    global context
    global catalog
    global session

    path = path or project_path
    metadata = _get_project_metadata(path)
    _add_src_to_path(metadata.source_dir, path)
    configure_project(metadata.package_name)

    _clear_hook_manager()

    _remove_cached_modules(metadata.package_name)

    session = KedroSession.create(metadata.package_name, path)
    _activate_session(session)
    logging.debug("Loading the context from %s", str(path))
    context = session.load_context()
    catalog = context.catalog

    get_ipython().push(variables={
        "context": context,
        "catalog": catalog,
        "session": session
    })

    logging.info("** Kedro project %s", str(metadata.project_name))
    logging.info("Defined global variable `context`, `session` and `catalog`")

    for line_magic in load_entry_points("line_magic"):
        register_line_magic(needs_local_scope(line_magic))
        logging.info("Registered line magic `%s`", line_magic.__name__)
def test_kedro_mlflow_config_setup_export_credentials(kedro_project_with_mlflow_conf):

    (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text(
        yaml.dump(dict(my_mlflow_creds=dict(fake_mlflow_cred="my_fake_cred")))
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf, credentials="my_mlflow_creds"
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()

    assert os.environ["fake_mlflow_cred"] == "my_fake_cred"
def test_kedro_mlflow_config_experiment_exists(mocker, kedro_project_with_mlflow_conf):

    # create an experiment with the same name
    mlflow_tracking_uri = (
        kedro_project_with_mlflow_conf / "conf" / "local" / "mlruns"
    ).as_uri()
    MlflowClient(mlflow_tracking_uri).create_experiment("exp1")
    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()
    assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
def test_kedro_mlflow_config_setup_set_tracking_uri(kedro_project_with_mlflow_conf):

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "awesome_tracking").as_uri()

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="awesome_tracking",
        experiment_opts=dict(name="exp1"),
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()

    assert mlflow.get_tracking_uri() == mlflow_tracking_uri
Exemple #30
0
    def test_valid_toml_file(self, mocker):
        mocker.patch.object(Path, "is_file", return_value=True)
        pyproject_toml_payload = {
            "tool": {
                "kedro": {
                    "package_name": "fake_package_name",
                    "project_name": "fake_project_name",
                    "project_version": kedro_version,
                }
            }
        }
        mocker.patch("anyconfig.load", return_value=pyproject_toml_payload)

        actual = _get_project_metadata(self.project_path)

        expected = ProjectMetadata(
            source_dir=self.project_path / "src",  # default
            config_file=self.project_path / "pyproject.toml",
            package_name="fake_package_name",
            project_name="fake_project_name",
            project_version=kedro_version,
            project_path=self.project_path,
        )
        assert actual == expected