def test_kedro_pipeline_ml_loading_deepcopiable_catalog(tmp_path, tmp_folder):

    # create pipelien and catalog. The training will not be triggered
    def fit_fun(data):
        pass

    def predict_fun(model, data):
        return model.predict(data)

    training_pipeline = Pipeline(
        [node(func=fit_fun, inputs="data", outputs="model")])

    inference_pipeline = Pipeline([
        node(func=predict_fun, inputs=["model", "data"],
             outputs="predictions"),
    ])

    ml_pipeline = pipeline_ml_factory(
        training=training_pipeline,
        inference=inference_pipeline,
        input_name="data",
    )

    # emulate training by creating the model manually
    model_dataset = MlflowModelSaverDataSet(
        filepath=(tmp_path / "model.pkl").resolve().as_posix(),
        flavor="mlflow.sklearn")

    data = pd.DataFrame(
        data=[
            [1, 2],
            [3, 4],
        ],
        columns=["a", "b"],
    )
    labels = [4, 6]
    linreg = LinearRegression()
    linreg.fit(data, labels)
    model_dataset.save(linreg)

    # check that mlflow loading is ok
    catalog = DataCatalog({"data": MemoryDataSet(), "model": model_dataset})

    kedro_model = KedroPipelineModel(pipeline=ml_pipeline,
                                     catalog=catalog,
                                     input_name=ml_pipeline.input_name)
    artifacts = kedro_model.extract_pipeline_artifacts(tmp_folder)

    mlflow_tracking_uri = (tmp_path / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    with mlflow.start_run():
        mlflow.pyfunc.log_model(artifact_path="model",
                                python_model=kedro_model,
                                artifacts=artifacts)
        run_id = mlflow.active_run().info.run_id

    loaded_model = mlflow.pyfunc.load_model(
        model_uri=(Path(r"runs:/") / run_id / "model").as_posix())
    loaded_model.predict(data) == [4.0, 6.0]
Example #2
0
def test_pyfunc_flavor_python_model_save_and_load(
    tmp_path, pipeline_ml_obj, dummy_catalog, kedro_pipeline_model
):

    artifacts = pipeline_ml_obj.extract_pipeline_artifacts(dummy_catalog)

    model_config = {
        "name": "kedro_pipeline_model",
        "config": {
            "type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
            "filepath": tmp_path / "data" / "06_models" / "my_custom_model",
            "flavor": "mlflow.pyfunc",
            "pyfunc_workflow": "python_model",
            "save_args": {"artifacts": artifacts, "conda_env": {"python": "3.7.0"}},
        },
    }

    mlflow_model_ds = MlflowModelSaverDataSet.from_config(**model_config)
    mlflow_model_ds.save(kedro_pipeline_model)

    assert mlflow.active_run() is None

    # close the run, create another dataset and reload
    # (emulate a new "kedro run" with the launch of the )
    loaded_model = mlflow_model_ds.load()

    loaded_model.predict(pd.DataFrame(data=[1], columns=["a"])) == pd.DataFrame(
        data=[2], columns=["a"]
    )
Example #3
0
def test_save_unversioned_under_same_path(
    linreg_path, linreg_model,
):
    model_config = {
        "name": "linreg",
        "config": {
            "type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
            "flavor": "mlflow.sklearn",
            "filepath": linreg_path,
        },
    }
    mlflow_model_ds = MlflowModelSaverDataSet.from_config(**model_config)
    mlflow_model_ds.save(linreg_model)
    # check that second save does not fail
    # this happens if the underlying folder already exists
    mlflow_model_ds.save(linreg_model)
Example #4
0
def test_pyfunc_flavor_python_model_save_and_load(tmp_path, tmp_folder,
                                                  pipeline, dummy_catalog):

    kedro_pipeline_model = KedroPipelineModel(
        pipeline=pipeline,
        catalog=dummy_catalog,
        input_name="raw_data",
    )
    artifacts = kedro_pipeline_model.extract_pipeline_artifacts(tmp_folder)

    model_config = {
        "name": "kedro_pipeline_model",
        "config": {
            "type":
            "kedro_mlflow.io.models.MlflowModelSaverDataSet",
            "filepath":
            (tmp_path / "data" / "06_models" / "my_custom_model").as_posix(),
            "flavor":
            "mlflow.pyfunc",
            "pyfunc_workflow":
            "python_model",
            "save_args": {
                "artifacts": artifacts,
                "conda_env": {
                    "python": "3.7.0",
                    "dependencies": ["kedro==0.16.5"]
                },
            },
        },
    }

    mlflow_model_ds = MlflowModelSaverDataSet.from_config(**model_config)
    mlflow_model_ds.save(kedro_pipeline_model)

    assert mlflow.active_run() is None

    # close the run, create another dataset and reload
    # (emulate a new "kedro run" with the launch of the )
    loaded_model = mlflow_model_ds.load()

    loaded_model.predict(pd.DataFrame(
        data=[1], columns=["a"])) == pd.DataFrame(data=[2], columns=["a"])
Example #5
0
def test_save_load_local(linreg_path, linreg_model, versioned):
    model_config = {
        "name": "linreg",
        "config": {
            "type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
            "filepath": linreg_path.as_posix(),
            "flavor": "mlflow.sklearn",
            "versioned": versioned,
        },
    }
    mlflow_model_ds = MlflowModelSaverDataSet.from_config(**model_config)
    mlflow_model_ds.save(linreg_model)

    if versioned:
        assert (linreg_path / mlflow_model_ds._version.save /
                linreg_path.name).exists()  # Versioned model saved locally
    else:
        assert linreg_path.exists()  # Unversioned model saved locally

    linreg_model_loaded = mlflow_model_ds.load()
    assert isinstance(linreg_model_loaded, LinearRegression)