Python pipeline_ml_factory 예제들, kedro_mlflow.pipeline.pipeline_ml_factory Python 예제들

예제 #1

0

파일 보기

파일: test_pipeline_ml.py 프로젝트: felipeeeantunes/kedro-mlflow

def pipeline_ml_with_inputs_artifacts():
    full_pipeline = Pipeline([
        node(
            func=remove_stopwords,
            inputs=dict(data="data", stopwords="stopwords_from_nltk"),
            outputs="cleaned_data",
            tags=["training", "inference"],
        ),
        node(
            func=train_fun,
            inputs="cleaned_data",
            outputs="model",
            tags=["training"],
        ),
        node(
            func=predict_fun,
            inputs=["model", "cleaned_data"],
            outputs="predictions",
            tags=["inference"],
        ),
    ])
    pipeline_ml_with_inputs_artifacts = pipeline_ml_factory(
        training=full_pipeline.only_nodes_with_tags("training"),
        inference=full_pipeline.only_nodes_with_tags("inference"),
        input_name="data",
    )
    return pipeline_ml_with_inputs_artifacts

예제 #2

0

파일 보기

파일: test_kedro_pipeline_model.py 프로젝트: Galileo-Galilei/kedro-mlflow

def test_kedro_pipeline_ml_loading_deepcopiable_catalog(tmp_path, tmp_folder):

    # create pipelien and catalog. The training will not be triggered
    def fit_fun(data):
        pass

    def predict_fun(model, data):
        return model.predict(data)

    training_pipeline = Pipeline(
        [node(func=fit_fun, inputs="data", outputs="model")])

    inference_pipeline = Pipeline([
        node(func=predict_fun, inputs=["model", "data"],
             outputs="predictions"),
    ])

    ml_pipeline = pipeline_ml_factory(
        training=training_pipeline,
        inference=inference_pipeline,
        input_name="data",
    )

    # emulate training by creating the model manually
    model_dataset = MlflowModelSaverDataSet(
        filepath=(tmp_path / "model.pkl").resolve().as_posix(),
        flavor="mlflow.sklearn")

    data = pd.DataFrame(
        data=[
            [1, 2],
            [3, 4],
        ],
        columns=["a", "b"],
    )
    labels = [4, 6]
    linreg = LinearRegression()
    linreg.fit(data, labels)
    model_dataset.save(linreg)

    # check that mlflow loading is ok
    catalog = DataCatalog({"data": MemoryDataSet(), "model": model_dataset})

    kedro_model = KedroPipelineModel(pipeline=ml_pipeline,
                                     catalog=catalog,
                                     input_name=ml_pipeline.input_name)
    artifacts = kedro_model.extract_pipeline_artifacts(tmp_folder)

    mlflow_tracking_uri = (tmp_path / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    with mlflow.start_run():
        mlflow.pyfunc.log_model(artifact_path="model",
                                python_model=kedro_model,
                                artifacts=artifacts)
        run_id = mlflow.active_run().info.run_id

    loaded_model = mlflow.pyfunc.load_model(
        model_uri=(Path(r"runs:/") / run_id / "model").as_posix())
    loaded_model.predict(data) == [4.0, 6.0]

예제 #3

0

파일 보기

파일: test_pipeline_hook.py 프로젝트: gabrielbckr/kedro-mlflow

def test_mlflow_pipeline_hook_with_copy_mode(
    kedro_project_with_mlflow_conf,
    dummy_pipeline_ml,
    dummy_catalog,
    dummy_run_params,
    copy_mode,
    expected,
):
    # config_with_base_mlflow_conf is a conftest fixture
    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=kedro_project_with_mlflow_conf,
    ):

        pipeline_hook = MlflowPipelineHook()
        runner = SequentialRunner()

        pipeline_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
            run_id=dummy_run_params["run_id"],
        )

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline_ml.training,
            inference=dummy_pipeline_ml.inference,
            input_name=dummy_pipeline_ml.input_name,
            conda_env={},
            model_name=dummy_pipeline_ml.model_name,
            copy_mode=copy_mode,
        )
        pipeline_hook.before_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )
        runner.run(pipeline_to_run, dummy_catalog)
        run_id = mlflow.active_run().info.run_id
        pipeline_hook.after_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )

        mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
        mlflow.set_tracking_uri(mlflow_tracking_uri)

        loaded_model = mlflow.pyfunc.load_model(model_uri=f"runs:/{run_id}/model")

        actual_copy_mode = {
            name: ds._copy_mode
            for name, ds in loaded_model._model_impl.python_model.loaded_catalog._data_sets.items()
        }

        assert actual_copy_mode == expected

예제 #4

0

파일 보기

def pipeline_ml_obj():
    def preprocess_fun(data):
        return data

    def fit_fun(data):
        return 2

    def predict_fun(model, data):
        return data * model

    full_pipeline = Pipeline([
        node(
            func=preprocess_fun,
            inputs="raw_data",
            outputs="data",
            tags=["inference", "training"],
        ),
        node(func=fit_fun, inputs="data", outputs="model", tags=["training"]),
        node(
            func=predict_fun,
            inputs=["data", "model"],
            outputs="predictions",
            tags=["inference"],
        ),
    ])

    pipeline_ml_obj = pipeline_ml_factory(
        training=full_pipeline.only_nodes_with_tags("training"),
        inference=full_pipeline.only_nodes_with_tags("inference"),
        input_name="raw_data",
    )

    return pipeline_ml_obj

예제 #5

0

파일 보기

파일: test_pipeline_hook.py 프로젝트: felipeeeantunes/kedro-mlflow

def test_mlflow_pipeline_hook_with_copy_mode(
    mocker,
    monkeypatch,
    tmp_path,
    config_dir,
    dummy_pipeline_ml,
    dummy_catalog,
    dummy_run_params,
    dummy_mlflow_conf,
    copy_mode,
    expected,
):
    # config_with_base_mlflow_conf is a conftest fixture
    mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True)
    monkeypatch.chdir(tmp_path)
    pipeline_hook = MlflowPipelineHook()
    runner = SequentialRunner()

    pipeline_hook.after_catalog_created(
        catalog=dummy_catalog,
        # `after_catalog_created` is not using any of arguments bellow,
        # so we are setting them to empty values.
        conf_catalog={},
        conf_creds={},
        feed_dict={},
        save_version="",
        load_versions="",
        run_id=dummy_run_params["run_id"],
    )

    pipeline_to_run = pipeline_ml_factory(
        training=dummy_pipeline_ml.training,
        inference=dummy_pipeline_ml.inference,
        input_name=dummy_pipeline_ml.input_name,
        conda_env={},
        model_name=dummy_pipeline_ml.model_name,
        copy_mode=copy_mode,
    )
    pipeline_hook.before_pipeline_run(run_params=dummy_run_params,
                                      pipeline=pipeline_to_run,
                                      catalog=dummy_catalog)
    runner.run(pipeline_to_run, dummy_catalog)
    run_id = mlflow.active_run().info.run_id
    pipeline_hook.after_pipeline_run(run_params=dummy_run_params,
                                     pipeline=pipeline_to_run,
                                     catalog=dummy_catalog)

    mlflow_tracking_uri = (tmp_path / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    loaded_model = mlflow.pyfunc.load_model(model_uri=f"runs:/{run_id}/model")

    actual_copy_mode = {
        name: ds._copy_mode
        for name, ds in loaded_model._model_impl.python_model.loaded_catalog.
        _data_sets.items()
    }

    assert actual_copy_mode == expected

예제 #6

0

파일 보기

파일: test_pipeline_ml.py 프로젝트: felipeeeantunes/kedro-mlflow

def test_wrong_pipeline_ml_signature_type(pipeline_with_tag):
    with pytest.raises(
            ValueError,
            match=
            "model_signature must be one of 'None', 'auto', or a 'ModelSignature'",
    ):
        pipeline_ml_factory(
            training=pipeline_with_tag,
            inference=Pipeline([
                node(
                    func=predict_fun,
                    inputs=["model", "data"],
                    outputs="predictions",
                )
            ]),
            input_name="data",
            model_signature="wrong_type",
        )

예제 #7

0

파일 보기

파일: test_hook_pipeline_ml.py 프로젝트: Galileo-Galilei/kedro-mlflow

def test_mlflow_hook_save_pipeline_ml_with_artifact_path(
    kedro_project_with_mlflow_conf,
    env_from_dict,
    dummy_pipeline,
    dummy_catalog,
    dummy_run_params,
    artifact_path,
    expected_artifact_path,
):
    # config_with_base_mlflow_conf is a conftest fixture
    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(
            project_path=kedro_project_with_mlflow_conf) as session:
        mlflow_hook = MlflowHook()
        runner = SequentialRunner()

        log_model_kwargs = {
            "conda_env": env_from_dict,
        }
        if artifact_path is not None:
            # we need to test what happens if the key is NOT present
            log_model_kwargs["artifact_path"] = artifact_path

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline.only_nodes_with_tags("training"),
            inference=dummy_pipeline.only_nodes_with_tags("inference"),
            input_name="raw_data",
            log_model_kwargs=log_model_kwargs,
        )

        context = session.load_context()
        mlflow_hook.after_context_created(context)
        mlflow_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
        )
        mlflow_hook.before_pipeline_run(run_params=dummy_run_params,
                                        pipeline=pipeline_to_run,
                                        catalog=dummy_catalog)
        runner.run(pipeline_to_run, dummy_catalog, session._hook_manager)
        run_id = mlflow.active_run().info.run_id
        mlflow_hook.after_pipeline_run(run_params=dummy_run_params,
                                       pipeline=pipeline_to_run,
                                       catalog=dummy_catalog)

        # test : parameters should have been logged
        trained_model = mlflow.pyfunc.load_model(
            f"runs:/{run_id}/{expected_artifact_path}")
        # the real test is that the model is loaded without error
        assert trained_model is not None

예제 #8

0

파일 보기

파일: test_pipeline_hook.py 프로젝트: felipeeeantunes/kedro-mlflow

def dummy_pipeline_ml(dummy_pipeline, env_from_dict):

    dummy_pipeline_ml = pipeline_ml_factory(
        training=dummy_pipeline.only_nodes_with_tags("training"),
        inference=dummy_pipeline.only_nodes_with_tags("inference"),
        input_name="raw_data",
        conda_env=env_from_dict,
        model_name="model",
    )
    return dummy_pipeline_ml

예제 #9

0

파일 보기

파일: test_hook_pipeline_ml.py 프로젝트: Galileo-Galilei/kedro-mlflow

def pipeline_ml_with_parameters():
    def remove_stopwords(data, stopwords):
        return data

    def train_fun_hyperparam(data, hyperparam):
        return 2

    def predict_fun(model, data):
        return data * model

    def convert_probs_to_pred(data, threshold):
        return (data > threshold) * 1

    full_pipeline = Pipeline([
        # almost the same that previsously but stopwords are parameters
        # this is a shared parameter between inference and training22
        node(
            func=remove_stopwords,
            inputs=dict(data="data", stopwords="params:stopwords"),
            outputs="cleaned_data",
            tags=["training", "inference"],
        ),
        # parameters in training pipeline, should not be persisted
        node(
            func=train_fun_hyperparam,
            inputs=["cleaned_data", "params:penalty"],
            outputs="model",
            tags=["training"],
        ),
        node(
            func=predict_fun,
            inputs=["model", "cleaned_data"],
            outputs="predicted_probs",
            tags=["inference"],
        ),
        # this time, there is a parameter only for the inference pipeline
        node(
            func=convert_probs_to_pred,
            inputs=["predicted_probs", "params:threshold"],
            outputs="predictions",
            tags=["inference"],
        ),
    ])
    pipeline_ml_with_parameters = pipeline_ml_factory(
        training=full_pipeline.only_nodes_with_tags("training"),
        inference=full_pipeline.only_nodes_with_tags("inference"),
        input_name="data",
        log_model_kwargs={
            "conda_env": {
                "python": "3.7.0",
                "dependencies": ["kedro==0.16.5"]
            },
        },
    )
    return pipeline_ml_with_parameters

예제 #10

0

파일 보기

파일: test_pipeline_ml.py 프로젝트: felipeeeantunes/kedro-mlflow

def test_too_many_free_inputs():
    with pytest.raises(KedroMlflowPipelineMLInputsError,
                       match="No free input is allowed"):
        pipeline_ml_factory(
            training=Pipeline([
                node(
                    func=preprocess_fun,
                    inputs="raw_data",
                    outputs="neither_data_nor_model",
                )
            ]),
            inference=Pipeline([
                node(
                    func=predict_fun,
                    inputs=["model", "data"],
                    outputs="predictions",
                )
            ]),
            input_name="data",
        )

예제 #11

0

파일 보기

파일: test_pipeline_ml.py 프로젝트: felipeeeantunes/kedro-mlflow

def pipeline_ml_with_tag(pipeline_with_tag):
    pipeline_ml_with_tag = pipeline_ml_factory(
        training=pipeline_with_tag,
        inference=Pipeline([
            node(func=predict_fun,
                 inputs=["model", "data"],
                 outputs="predictions")
        ]),
        input_name="data",
    )
    return pipeline_ml_with_tag

예제 #12

0

파일 보기

파일: test_hook_pipeline_ml.py 프로젝트: Galileo-Galilei/kedro-mlflow

def dummy_pipeline_ml(dummy_pipeline, env_from_dict):

    dummy_pipeline_ml = pipeline_ml_factory(
        training=dummy_pipeline.only_nodes_with_tags("training"),
        inference=dummy_pipeline.only_nodes_with_tags("inference"),
        input_name="raw_data",
        log_model_kwargs={
            "conda_env": env_from_dict,
            "artifact_path": "model"
        },
    )
    return dummy_pipeline_ml

예제 #13

0

파일 보기

파일: test_pipeline_ml.py 프로젝트: felipeeeantunes/kedro-mlflow

def test_too_many_inference_outputs():
    with pytest.raises(
            KedroMlflowPipelineMLOutputsError,
            match="The inference pipeline must have one and only one output",
    ):
        pipeline_ml_factory(
            training=Pipeline(
                [node(
                    func=train_fun,
                    inputs="data",
                    outputs="model",
                )]),
            inference=Pipeline([
                node(
                    func=predict_fun_with_metric,
                    inputs=["model", "data"],
                    outputs=["predictions", "metric"],
                )
            ]),
            input_name="data",
        )

예제 #14

0

파일 보기

파일: test_pipeline_ml.py 프로젝트: felipeeeantunes/kedro-mlflow

def test_not_enough_inference_outputs():
    with pytest.raises(
            KedroMlflowPipelineMLOutputsError,
            match="The inference pipeline must have one and only one output",
    ):
        pipeline_ml_factory(
            training=Pipeline(
                [node(
                    func=train_fun,
                    inputs="data",
                    outputs="model",
                )]),
            inference=Pipeline([
                node(
                    func=predict_fun_return_nothing,
                    inputs=["model", "data"],
                    outputs=None,
                )
            ]),
            input_name="data",
        )

예제 #15

0

파일 보기

파일: test_pipeline_hook.py 프로젝트: gabrielbckr/kedro-mlflow

def test_mlflow_pipeline_hook_with_pipeline_ml_signature(
    kedro_project_with_mlflow_conf,
    env_from_dict,
    dummy_pipeline,
    dummy_catalog,
    dummy_run_params,
    model_signature,
    expected_signature,
):
    # config_with_base_mlflow_conf is a conftest fixture
    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=kedro_project_with_mlflow_conf,
    ):
        pipeline_hook = MlflowPipelineHook()
        runner = SequentialRunner()

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline.only_nodes_with_tags("training"),
            inference=dummy_pipeline.only_nodes_with_tags("inference"),
            input_name="raw_data",
            conda_env=env_from_dict,
            model_name="model",
            model_signature=model_signature,
        )

        pipeline_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
            run_id=dummy_run_params["run_id"],
        )
        pipeline_hook.before_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )
        runner.run(pipeline_to_run, dummy_catalog)
        run_id = mlflow.active_run().info.run_id
        pipeline_hook.after_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )

        # test : parameters should have been logged
        trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
        assert trained_model.metadata.signature == expected_signature

예제 #16

0

파일 보기

파일: test_pipeline_hook.py 프로젝트: felipeeeantunes/kedro-mlflow

def test_mlflow_pipeline_hook_with_pipeline_ml_signature(
    mocker,
    monkeypatch,
    tmp_path,
    config_dir,
    env_from_dict,
    dummy_pipeline,
    dummy_catalog,
    dummy_run_params,
    dummy_mlflow_conf,
    model_signature,
    expected_signature,
):
    # config_with_base_mlflow_conf is a conftest fixture
    mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True)
    monkeypatch.chdir(tmp_path)
    pipeline_hook = MlflowPipelineHook()
    runner = SequentialRunner()

    pipeline_to_run = pipeline_ml_factory(
        training=dummy_pipeline.only_nodes_with_tags("training"),
        inference=dummy_pipeline.only_nodes_with_tags("inference"),
        input_name="raw_data",
        conda_env=env_from_dict,
        model_name="model",
        model_signature=model_signature,
    )

    pipeline_hook.after_catalog_created(
        catalog=dummy_catalog,
        # `after_catalog_created` is not using any of arguments bellow,
        # so we are setting them to empty values.
        conf_catalog={},
        conf_creds={},
        feed_dict={},
        save_version="",
        load_versions="",
        run_id=dummy_run_params["run_id"],
    )
    pipeline_hook.before_pipeline_run(run_params=dummy_run_params,
                                      pipeline=pipeline_to_run,
                                      catalog=dummy_catalog)
    runner.run(pipeline_to_run, dummy_catalog)
    run_id = mlflow.active_run().info.run_id
    pipeline_hook.after_pipeline_run(run_params=dummy_run_params,
                                     pipeline=pipeline_to_run,
                                     catalog=dummy_catalog)

    # test : parameters should have been logged
    trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
    assert trained_model.metadata.signature == expected_signature

예제 #17

0

파일 보기

파일: test_hook_pipeline_ml.py 프로젝트: Galileo-Galilei/kedro-mlflow

def test_mlflow_hook_save_pipeline_ml_with_signature(
    kedro_project_with_mlflow_conf,
    env_from_dict,
    dummy_pipeline,
    dummy_catalog,
    dummy_run_params,
    model_signature,
    expected_signature,
):
    # config_with_base_mlflow_conf is a conftest fixture
    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(
            project_path=kedro_project_with_mlflow_conf) as session:
        mlflow_hook = MlflowHook()
        runner = SequentialRunner()

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline.only_nodes_with_tags("training"),
            inference=dummy_pipeline.only_nodes_with_tags("inference"),
            input_name="raw_data",
            log_model_kwargs={
                "conda_env": env_from_dict,
                "signature": model_signature,
            },
        )

        context = session.load_context()
        mlflow_hook.after_context_created(context)
        mlflow_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
        )
        mlflow_hook.before_pipeline_run(run_params=dummy_run_params,
                                        pipeline=pipeline_to_run,
                                        catalog=dummy_catalog)
        runner.run(pipeline_to_run, dummy_catalog, session._hook_manager)
        run_id = mlflow.active_run().info.run_id
        mlflow_hook.after_pipeline_run(run_params=dummy_run_params,
                                       pipeline=pipeline_to_run,
                                       catalog=dummy_catalog)

        # test : parameters should have been logged
        trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
        assert trained_model.metadata.signature == expected_signature

예제 #18

0

파일 보기

파일: test_pipeline_ml.py 프로젝트: felipeeeantunes/kedro-mlflow

def pipeline_ml_with_intermediary_artifacts():
    full_pipeline = Pipeline([
        node(
            func=preprocess_fun,
            inputs="raw_data",
            outputs="data",
            tags=["training"],
        ),
        node(
            func=fit_encoder_fun,
            inputs="data",
            outputs="encoder",
            tags=["training"],
        ),
        node(
            func=apply_encoder_fun,
            inputs=["encoder", "data"],
            outputs="encoded_data",
            tags=["training", "inference"],
        ),
        node(
            func=train_fun,
            inputs="encoded_data",
            outputs="model",
            tags=["training"],
        ),
        node(
            func=predict_fun,
            inputs=["model", "encoded_data"],
            outputs="predictions",
            tags=["inference"],
        ),
    ])
    pipeline_ml_with_intermediary_artifacts = pipeline_ml_factory(
        training=full_pipeline.only_nodes_with_tags("training"),
        inference=full_pipeline.only_nodes_with_tags("inference"),
        input_name="data",
    )
    return pipeline_ml_with_intermediary_artifacts

예제 #19

0

파일 보기

    def register_pipelines(self) -> Dict[str, Pipeline]:
        """Register the project's pipeline.

        Returns:
            A mapping from a pipeline name to a ``Pipeline`` object.

        """
        etl_pipeline = create_etl_pipeline()
        etl_instances_pipeline = etl_pipeline.only_nodes_with_tags("etl_instances")
        etl_labels_pipeline = etl_pipeline.only_nodes_with_tags("etl_labels")

        ml_pipeline = create_ml_pipeline()
        inference_pipeline = ml_pipeline.only_nodes_with_tags("inference")
        training_pipeline_ml = pipeline_ml_factory(
            training=ml_pipeline.only_nodes_with_tags("training"),
            inference=inference_pipeline,
            input_name="instances",
            model_name="kedro_mlflow_tutorial",
            conda_env={
                "python": 3.7,
                "pip": [f"kedro_mlflow_tutorial=={PROJECT_VERSION}"],
            },
            model_signature="auto",
        )

        user_app_pipeline = create_user_app_pipeline()

        return {
            "etl_instances": etl_instances_pipeline,
            "etl_labels": etl_labels_pipeline,
            "training": training_pipeline_ml,
            "inference": inference_pipeline,
            "user_app": user_app_pipeline,
            "__default__": etl_instances_pipeline
            + etl_labels_pipeline
            + inference_pipeline
            + user_app_pipeline,
        }

예제 #20

0

파일 보기

파일: test_pipeline_ml.py 프로젝트: felipeeeantunes/kedro-mlflow

def pipeline_ml_with_parameters():
    full_pipeline = Pipeline([
        # almost the same that previsously but stopwords are parameters
        # this is a shared parameter between inference and training22
        node(
            func=remove_stopwords,
            inputs=dict(data="data", stopwords="params:stopwords"),
            outputs="cleaned_data",
            tags=["training", "inference"],
        ),
        # parameters in training pipeline, should not be persisted
        node(
            func=train_fun_hyperparam,
            inputs=["cleaned_data", "params:penalty"],
            outputs="model",
            tags=["training"],
        ),
        node(
            func=predict_fun,
            inputs=["model", "cleaned_data"],
            outputs="predicted_probs",
            tags=["inference"],
        ),
        # this time, there is a parameter only for the inference pipeline
        node(
            func=convert_probs_to_pred,
            inputs=["predicted_probs", "params:threshold"],
            outputs="predictions",
            tags=["inference"],
        ),
    ])
    pipeline_ml_with_parameters = pipeline_ml_factory(
        training=full_pipeline.only_nodes_with_tags("training"),
        inference=full_pipeline.only_nodes_with_tags("inference"),
        input_name="data",
    )
    return pipeline_ml_with_parameters

예제 #21

0

파일 보기

파일: test_hook_pipeline_ml.py 프로젝트: Galileo-Galilei/kedro-mlflow

def test_mlflow_hook_save_pipeline_ml_with_copy_mode(
    kedro_project_with_mlflow_conf,
    dummy_pipeline_ml,
    dummy_catalog,
    dummy_run_params,
    copy_mode,
    expected,
):
    # config_with_base_mlflow_conf is a conftest fixture
    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(
            project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()
        mlflow_hook = MlflowHook()
        runner = SequentialRunner()
        mlflow_hook.after_context_created(context)
        mlflow_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
        )

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline_ml.training,
            inference=dummy_pipeline_ml.inference,
            input_name=dummy_pipeline_ml.input_name,
            log_model_kwargs={
                "artifact_path":
                dummy_pipeline_ml.log_model_kwargs["artifact_path"],
                "conda_env": {
                    "python": "3.7.0",
                    "dependencies": ["kedro==0.16.5"]
                },
            },
            kpm_kwargs={
                "copy_mode": copy_mode,
            },
        )
        mlflow_hook.before_pipeline_run(run_params=dummy_run_params,
                                        pipeline=pipeline_to_run,
                                        catalog=dummy_catalog)
        runner.run(pipeline_to_run, dummy_catalog, session._hook_manager)
        run_id = mlflow.active_run().info.run_id
        mlflow_hook.after_pipeline_run(run_params=dummy_run_params,
                                       pipeline=pipeline_to_run,
                                       catalog=dummy_catalog)

        mlflow_tracking_uri = (kedro_project_with_mlflow_conf /
                               "mlruns").as_uri()
        mlflow.set_tracking_uri(mlflow_tracking_uri)

        loaded_model = mlflow.pyfunc.load_model(
            model_uri=f"runs:/{run_id}/model")

        actual_copy_mode = {
            name: ds._copy_mode
            for name, ds in loaded_model._model_impl.python_model.
            loaded_catalog._data_sets.items()
        }

        assert actual_copy_mode == expected