Ejemplo n.º 1
0
def test_sparkml_estimator_model_log(tmpdir, spark_model_estimator):
    # Print the coefficients and intercept for multinomial logistic regression
    old_tracking_uri = kiwi.get_tracking_uri()
    cnt = 0
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        for dfs_tmp_dir in [None, os.path.join(str(tmpdir), "test")]:
            print("should_start_run =", should_start_run, "dfs_tmp_dir =", dfs_tmp_dir)
            try:
                tracking_dir = os.path.abspath(str(tmpdir.join("mlruns")))
                kiwi.set_tracking_uri("file://%s" % tracking_dir)
                if should_start_run:
                    kiwi.start_run()
                artifact_path = "model%d" % cnt
                cnt += 1
                sparkm.log_model(
                    artifact_path=artifact_path,
                    spark_model=spark_model_estimator.model,
                    dfs_tmpdir=dfs_tmp_dir)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=kiwi.active_run().info.run_id,
                    artifact_path=artifact_path)

                # test reloaded model
                reloaded_model = sparkm.load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmp_dir)
                preds_df = reloaded_model.transform(spark_model_estimator.spark_df)
                preds = [x.prediction for x in preds_df.select("prediction").collect()]
                assert spark_model_estimator.predictions == preds
            finally:
                kiwi.end_run()
                kiwi.set_tracking_uri(old_tracking_uri)
                x = dfs_tmp_dir or sparkm.DFS_TMP
                shutil.rmtree(x)
                shutil.rmtree(tracking_dir)
def test_log_model_without_specified_conda_env_uses_default_env_with_expected_dependencies(
        sklearn_knn_model, main_scoped_model_class):
    sklearn_artifact_path = "sk_model"
    with kiwi.start_run():
        kiwi.sklearn.log_model(sk_model=sklearn_knn_model,
                               artifact_path=sklearn_artifact_path)
        sklearn_run_id = kiwi.active_run().info.run_id

    pyfunc_artifact_path = "pyfunc_model"
    with kiwi.start_run():
        kiwi.pyfunc.log_model(
            artifact_path=pyfunc_artifact_path,
            artifacts={
                "sk_model":
                utils_get_artifact_uri(artifact_path=sklearn_artifact_path,
                                       run_id=sklearn_run_id)
            },
            python_model=main_scoped_model_class(predict_fn=None))
        pyfunc_model_path = _download_artifact_from_uri(
            "runs:/{run_id}/{artifact_path}".format(
                run_id=kiwi.active_run().info.run_id,
                artifact_path=pyfunc_artifact_path))

    pyfunc_conf = _get_flavor_configuration(
        model_path=pyfunc_model_path, flavor_name=kiwi.pyfunc.FLAVOR_NAME)
    conda_env_path = os.path.join(pyfunc_model_path,
                                  pyfunc_conf[kiwi.pyfunc.ENV])
    with open(conda_env_path, "r") as f:
        conda_env = yaml.safe_load(f)

    assert conda_env == kiwi.pyfunc.model.get_default_conda_env()
def test_model_log_load(sklearn_knn_model, main_scoped_model_class, iris_data):
    sklearn_artifact_path = "sk_model"
    with kiwi.start_run():
        kiwi.sklearn.log_model(sk_model=sklearn_knn_model,
                               artifact_path=sklearn_artifact_path)
        sklearn_model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id,
            artifact_path=sklearn_artifact_path)

    def test_predict(sk_model, model_input):
        return sk_model.predict(model_input) * 2

    pyfunc_artifact_path = "pyfunc_model"
    with kiwi.start_run():
        kiwi.pyfunc.log_model(
            artifact_path=pyfunc_artifact_path,
            artifacts={
                "sk_model": sklearn_model_uri,
            },
            python_model=main_scoped_model_class(test_predict))
        pyfunc_model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id,
            artifact_path=pyfunc_artifact_path)
        pyfunc_model_path = _download_artifact_from_uri(
            "runs:/{run_id}/{artifact_path}".format(
                run_id=kiwi.active_run().info.run_id,
                artifact_path=pyfunc_artifact_path))
        model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel"))

    loaded_pyfunc_model = kiwi.pyfunc.load_pyfunc(model_uri=pyfunc_model_uri)
    assert model_config.to_yaml() == loaded_pyfunc_model.metadata.to_yaml()
    np.testing.assert_array_equal(
        loaded_pyfunc_model.predict(iris_data[0]),
        test_predict(sk_model=sklearn_knn_model, model_input=iris_data[0]))
Ejemplo n.º 4
0
def test_model_log(h2o_iris_model):
    h2o_model = h2o_iris_model.model
    old_uri = kiwi.get_tracking_uri()
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        with TempDir(chdr=True, remove_on_exit=True):
            try:
                artifact_path = "gbm_model"
                kiwi.set_tracking_uri("test")
                if should_start_run:
                    kiwi.start_run()
                kiwi.h2o.log_model(h2o_model=h2o_model,
                                   artifact_path=artifact_path)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=kiwi.active_run().info.run_id,
                    artifact_path=artifact_path)

                # Load model
                h2o_model_loaded = kiwi.h2o.load_model(model_uri=model_uri)
                assert all(
                    h2o_model_loaded.predict(h2o_iris_model.inference_data).
                    as_data_frame() == h2o_model.predict(
                        h2o_iris_model.inference_data).as_data_frame())
            finally:
                kiwi.end_run()
                kiwi.set_tracking_uri(old_uri)
Ejemplo n.º 5
0
def test_autologging_dedups_multiple_reads_of_same_datasource(
        spark_session, format_to_file_path):
    kiwi.spark.autolog()
    data_format = list(format_to_file_path.keys())[0]
    file_path = format_to_file_path[data_format]
    df = spark_session.read.format(data_format).option("header", "true"). \
        option("inferSchema", "true").load(file_path)
    with kiwi.start_run():
        run_id = kiwi.active_run().info.run_id
        df.collect()
        df.filter("number1 > 0").collect()
        df.limit(2).collect()
        df.collect()
        time.sleep(1)
    run = kiwi.get_run(run_id)
    _assert_spark_data_logged(run=run, path=file_path, data_format=data_format)
    # Test context provider flow
    df.filter("number1 > 0").collect()
    df.limit(2).collect()
    df.collect()
    with kiwi.start_run():
        run_id2 = kiwi.active_run().info.run_id
    time.sleep(1)
    run2 = kiwi.get_run(run_id2)
    _assert_spark_data_logged(run=run2,
                              path=file_path,
                              data_format=data_format)
Ejemplo n.º 6
0
def test_start_deleted_run():
    run_id = None
    with kiwi.start_run() as active_run:
        run_id = active_run.info.run_id
    tracking.MlflowClient().delete_run(run_id)
    with pytest.raises(MlflowException,
                       matches='because it is in the deleted state.'):
        with kiwi.start_run(run_id=run_id):
            pass
    assert kiwi.active_run() is None
Ejemplo n.º 7
0
def test_start_run_exp_id_0():
    kiwi.set_experiment("some-experiment")
    # Create a run and verify that the current active experiment is the one we just set
    with kiwi.start_run() as active_run:
        exp_id = active_run.info.experiment_id
        assert exp_id != FileStore.DEFAULT_EXPERIMENT_ID
        assert MlflowClient().get_experiment(exp_id).name == "some-experiment"
    # Set experiment ID to 0 when creating a run, verify that the specified experiment ID is honored
    with kiwi.start_run(experiment_id=0) as active_run:
        assert active_run.info.experiment_id == FileStore.DEFAULT_EXPERIMENT_ID
Ejemplo n.º 8
0
def gluon_random_data_run():
    kiwi.gluon.autolog()

    with kiwi.start_run() as run:
        data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")
        validation = DataLoader(LogsDataset(),
                                batch_size=128,
                                last_batch="discard")

        model = HybridSequential()
        model.add(Dense(64, activation="relu"))
        model.add(Dense(64, activation="relu"))
        model.add(Dense(10))
        model.initialize()
        model.hybridize()
        trainer = Trainer(model.collect_params(),
                          "adam",
                          optimizer_params={
                              "learning_rate": .001,
                              "epsilon": 1e-07
                          })
        est = estimator.Estimator(net=model,
                                  loss=SoftmaxCrossEntropyLoss(),
                                  metrics=Accuracy(),
                                  trainer=trainer)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            est.fit(data, epochs=3, val_data=validation)
    client = kiwi.tracking.MlflowClient()
    return client.get_run(run.info.run_id)
def test_log_model_calls_register_model(sklearn_knn_model,
                                        main_scoped_model_class):
    register_model_patch = mock.patch("mlflow.register_model")
    with register_model_patch:
        sklearn_artifact_path = "sk_model_no_run"
        with kiwi.start_run():
            kiwi.sklearn.log_model(sk_model=sklearn_knn_model,
                                   artifact_path=sklearn_artifact_path)
            sklearn_model_uri = "runs:/{run_id}/{artifact_path}".format(
                run_id=kiwi.active_run().info.run_id,
                artifact_path=sklearn_artifact_path)

        def test_predict(sk_model, model_input):
            return sk_model.predict(model_input) * 2

        pyfunc_artifact_path = "pyfunc_model"
        assert kiwi.active_run() is None
        kiwi.pyfunc.log_model(
            artifact_path=pyfunc_artifact_path,
            artifacts={"sk_model": sklearn_model_uri},
            python_model=main_scoped_model_class(test_predict),
            registered_model_name="AdsModel1")
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id,
            artifact_path=pyfunc_artifact_path)
        kiwi.register_model.assert_called_once_with(model_uri, "AdsModel1")
        kiwi.end_run()
def test_pyfunc_model_log_load_no_active_run(sklearn_knn_model,
                                             main_scoped_model_class,
                                             iris_data):
    sklearn_artifact_path = "sk_model_no_run"
    with kiwi.start_run():
        kiwi.sklearn.log_model(sk_model=sklearn_knn_model,
                               artifact_path=sklearn_artifact_path)
        sklearn_model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id,
            artifact_path=sklearn_artifact_path)

    def test_predict(sk_model, model_input):
        return sk_model.predict(model_input) * 2

    pyfunc_artifact_path = "pyfunc_model"
    assert kiwi.active_run() is None
    kiwi.pyfunc.log_model(artifact_path=pyfunc_artifact_path,
                          artifacts={"sk_model": sklearn_model_uri},
                          python_model=main_scoped_model_class(test_predict))
    pyfunc_model_uri = "runs:/{run_id}/{artifact_path}".format(
        run_id=kiwi.active_run().info.run_id,
        artifact_path=pyfunc_artifact_path)
    loaded_pyfunc_model = kiwi.pyfunc.load_pyfunc(model_uri=pyfunc_model_uri)
    np.testing.assert_array_equal(
        loaded_pyfunc_model.predict(iris_data[0]),
        test_predict(sk_model=sklearn_knn_model, model_input=iris_data[0]))
    kiwi.end_run()
Ejemplo n.º 11
0
Archivo: main.py Proyecto: iPieter/kiwi
def workflow(als_max_iter, keras_hidden_units, max_row_limit):
    # Note: The entrypoint names are defined in MLproject. The artifact directories
    # are documented by each step's .py file.
    with kiwi.start_run() as active_run:
        os.environ['SPARK_CONF_DIR'] = os.path.abspath('.')
        git_commit = active_run.data.tags.get(mlflow_tags.MLFLOW_GIT_COMMIT)
        load_raw_data_run = _get_or_run("load_raw_data", {}, git_commit)
        ratings_csv_uri = os.path.join(load_raw_data_run.info.artifact_uri,
                                       "ratings-csv-dir")
        etl_data_run = _get_or_run("etl_data", {
            "ratings_csv": ratings_csv_uri,
            "max_row_limit": max_row_limit
        }, git_commit)
        ratings_parquet_uri = os.path.join(etl_data_run.info.artifact_uri,
                                           "ratings-parquet-dir")

        # We specify a spark-defaults.conf to override the default driver memory. ALS requires
        # significant memory. The driver memory property cannot be set by the application itself.
        als_run = _get_or_run("als", {
            "ratings_data": ratings_parquet_uri,
            "max_iter": str(als_max_iter)
        }, git_commit)
        als_model_uri = os.path.join(als_run.info.artifact_uri, "als-model")

        keras_params = {
            "ratings_data": ratings_parquet_uri,
            "als_model_uri": als_model_uri,
            "hidden_units": keras_hidden_units,
        }
        _get_or_run("train_keras", keras_params, git_commit, use_cache=False)
Ejemplo n.º 12
0
def test_autologging_of_datasources_with_different_formats(
        spark_session, format_to_file_path):
    kiwi.spark.autolog()
    for data_format, file_path in format_to_file_path.items():
        base_df = spark_session.read.format(data_format).option("header", "true").\
            option("inferSchema", "true").load(file_path)
        base_df.createOrReplaceTempView("temptable")
        table_df0 = spark_session.table("temptable")
        table_df1 = spark_session.sql(
            "SELECT number1, number2 from temptable LIMIT 5")
        dfs = [
            base_df, table_df0, table_df1,
            base_df.filter("number1 > 0"),
            base_df.select("number1"),
            base_df.limit(2),
            base_df.filter("number1 > 0").select("number1").limit(2)
        ]

        for df in dfs:
            with kiwi.start_run():
                run_id = kiwi.active_run().info.run_id
                df.collect()
                time.sleep(1)
            run = kiwi.get_run(run_id)
            _assert_spark_data_logged(run=run,
                                      path=file_path,
                                      data_format=data_format)
Ejemplo n.º 13
0
def test_cli_build_image_with_runs_uri_calls_expected_azure_routines(
        sklearn_model):
    artifact_path = "model"
    with kiwi.start_run():
        kiwi.sklearn.log_model(sk_model=sklearn_model,
                               artifact_path=artifact_path)
        run_id = kiwi.active_run().info.run_id
    model_uri = "runs:/{run_id}/{artifact_path}".format(
        run_id=run_id, artifact_path=artifact_path)

    with AzureMLMocks() as aml_mocks:
        result = CliRunner(env={
            "LC_ALL": "en_US.UTF-8",
            "LANG": "en_US.UTF-8"
        }).invoke(kiwi.azureml.cli.commands, [
            'build-image',
            '-m',
            model_uri,
            '-w',
            'test_workspace',
            '-i',
            'image_name',
            '-n',
            'model_name',
        ])
        assert result.exit_code == 0

        assert aml_mocks["register_model"].call_count == 1
        assert aml_mocks["create_image"].call_count == 1
        assert aml_mocks["load_workspace"].call_count == 1
Ejemplo n.º 14
0
def test_build_image_includes_default_metadata_in_azure_image_and_model_tags(
        sklearn_model):
    artifact_path = "model"
    with kiwi.start_run():
        kiwi.sklearn.log_model(sk_model=sklearn_model,
                               artifact_path=artifact_path)
        run_id = kiwi.active_run().info.run_id
    model_uri = "runs:///{run_id}/{artifact_path}".format(
        run_id=run_id, artifact_path=artifact_path)
    model_config = Model.load(
        os.path.join(_download_artifact_from_uri(artifact_uri=model_uri),
                     "MLmodel"))

    with AzureMLMocks() as aml_mocks:
        workspace = get_azure_workspace()
        kiwi.azureml.build_image(model_uri=model_uri, workspace=workspace)

        register_model_call_args = aml_mocks["register_model"].call_args_list
        assert len(register_model_call_args) == 1
        _, register_model_call_kwargs = register_model_call_args[0]
        called_tags = register_model_call_kwargs["tags"]
        assert called_tags["model_uri"] == model_uri
        assert called_tags["python_version"] ==\
            model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.PY_VERSION]

        create_image_call_args = aml_mocks["create_image"].call_args_list
        assert len(create_image_call_args) == 1
        _, create_image_call_kwargs = create_image_call_args[0]
        image_config = create_image_call_kwargs["image_config"]
        assert image_config.tags["model_uri"] == model_uri
        assert image_config.tags["python_version"] ==\
            model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.PY_VERSION]
Ejemplo n.º 15
0
def test_default_conda_env_strips_dev_suffix_from_pyspark_version(spark_model_iris, model_path):
    mock_version_standard = mock.PropertyMock(return_value="2.4.0")
    with mock.patch("pyspark.__version__", new_callable=mock_version_standard):
        default_conda_env_standard = sparkm.get_default_conda_env()

    for dev_version in ["2.4.0.dev0", "2.4.0.dev", "2.4.0.dev1", "2.4.0dev.a", "2.4.0.devb"]:
        mock_version_dev = mock.PropertyMock(return_value=dev_version)
        with mock.patch("pyspark.__version__", new_callable=mock_version_dev):
            default_conda_env_dev = sparkm.get_default_conda_env()
            assert (default_conda_env_dev == default_conda_env_standard)

            with kiwi.start_run():
                sparkm.log_model(
                    spark_model=spark_model_iris.model, artifact_path="model", conda_env=None)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=kiwi.active_run().info.run_id,
                    artifact_path="model")

            model_path = _download_artifact_from_uri(artifact_uri=model_uri)
            pyfunc_conf = _get_flavor_configuration(
                model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
            conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
            with open(conda_env_path, "r") as f:
                persisted_conda_env_dev = yaml.safe_load(f)
            assert (persisted_conda_env_dev == default_conda_env_standard)

    for unaffected_version in ["2.0", "2.3.4", "2"]:
        mock_version = mock.PropertyMock(return_value=unaffected_version)
        with mock.patch("pyspark.__version__", new_callable=mock_version):
            assert unaffected_version in yaml.safe_dump(sparkm.get_default_conda_env())
Ejemplo n.º 16
0
def test_prepare_env_passes(sk_model):
    if no_conda:
        pytest.skip("This test requires conda.")

    with TempDir(chdr=True):
        with kiwi.start_run() as active_run:
            kiwi.sklearn.log_model(sk_model, "model")
            model_uri = "runs:/{run_id}/model".format(
                run_id=active_run.info.run_id)

        # Test with no conda
        p = subprocess.Popen(
            ["mlflow", "models", "prepare-env", "-m", model_uri, "--no-conda"],
            stderr=subprocess.PIPE)
        assert p.wait() == 0

        # With conda
        p = subprocess.Popen(
            ["mlflow", "models", "prepare-env", "-m", model_uri],
            stderr=subprocess.PIPE)
        assert p.wait() == 0

        # Should be idempotent
        p = subprocess.Popen(
            ["mlflow", "models", "prepare-env", "-m", model_uri],
            stderr=subprocess.PIPE)
        assert p.wait() == 0
Ejemplo n.º 17
0
def test_log_and_load_model_persists_and_restores_model_successfully(
        saved_tf_iris_model):
    artifact_path = "model"
    with kiwi.start_run():
        kiwi.tensorflow.log_model(
            tf_saved_model_dir=saved_tf_iris_model.path,
            tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags,
            tf_signature_def_key=saved_tf_iris_model.signature_def_key,
            artifact_path=artifact_path)
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path)

    tf_graph = tf.Graph()
    tf_sess = tf.Session(graph=tf_graph)
    with tf_graph.as_default():
        signature_def = kiwi.tensorflow.load_model(model_uri=model_uri,
                                                   tf_sess=tf_sess)

        for _, input_signature in signature_def.inputs.items():
            t_input = tf_graph.get_tensor_by_name(input_signature.name)
            assert t_input is not None

        for _, output_signature in signature_def.outputs.items():
            t_output = tf_graph.get_tensor_by_name(output_signature.name)
            assert t_output is not None
Ejemplo n.º 18
0
def test_serve_gunicorn_opts(iris_data, sk_model):
    if sys.platform == "win32":
        pytest.skip(
            "This test requires gunicorn which is not available on windows.")
    with kiwi.start_run() as active_run:
        kiwi.sklearn.log_model(sk_model,
                               "model",
                               registered_model_name="imlegit")
        run_id = active_run.info.run_id

    model_uris = [
        "models:/{name}/{stage}".format(name="imlegit", stage="None"),
        "runs:/{run_id}/model".format(run_id=run_id)
    ]
    for model_uri in model_uris:
        with TempDir() as tpm:
            output_file_path = tpm.path("stoudt")
            with open(output_file_path, "w") as output_file:
                x, _ = iris_data
                scoring_response = pyfunc_serve_and_score_model(
                    model_uri,
                    pd.DataFrame(x),
                    content_type=CONTENT_TYPE_JSON_SPLIT_ORIENTED,
                    stdout=output_file,
                    extra_args=["-w", "3"])
            with open(output_file_path, "r") as output_file:
                stdout = output_file.read()
        actual = pd.read_json(scoring_response.content, orient="records")
        actual = actual[actual.columns[0]].values
        expected = sk_model.predict(x)
        assert all(expected == actual)
        expected_command_pattern = re.compile(
            ("gunicorn.*-w 3.*mlflow.pyfunc.scoring_server.wsgi:app"))
        assert expected_command_pattern.search(stdout) is not None
Ejemplo n.º 19
0
def test_autolog_persists_manually_created_run():
    kiwi.gluon.autolog()

    data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")

    with kiwi.start_run() as run:

        model = HybridSequential()
        model.add(Dense(64, activation="relu"))
        model.add(Dense(64, activation="relu"))
        model.add(Dense(10))
        model.initialize()
        model.hybridize()
        trainer = Trainer(model.collect_params(),
                          "adam",
                          optimizer_params={
                              "learning_rate": .001,
                              "epsilon": 1e-07
                          })
        est = estimator.Estimator(net=model,
                                  loss=SoftmaxCrossEntropyLoss(),
                                  metrics=Accuracy(),
                                  trainer=trainer)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            est.fit(data, epochs=3)

        assert kiwi.active_run().info.run_id == run.info.run_id
Ejemplo n.º 20
0
def test_log_model_persists_specified_conda_env_in_mlflow_model_directory(
        saved_tf_iris_model, tf_custom_env):
    artifact_path = "model"
    with kiwi.start_run():
        kiwi.tensorflow.log_model(
            tf_saved_model_dir=saved_tf_iris_model.path,
            tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags,
            tf_signature_def_key=saved_tf_iris_model.signature_def_key,
            artifact_path=artifact_path,
            conda_env=tf_custom_env)
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path)

    model_path = _download_artifact_from_uri(artifact_uri=model_uri)
    pyfunc_conf = _get_flavor_configuration(model_path=model_path,
                                            flavor_name=pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)
    assert saved_conda_env_path != tf_custom_env

    with open(tf_custom_env, "r") as f:
        tf_custom_env_text = f.read()
    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_text = f.read()
    assert saved_conda_env_text == tf_custom_env_text
Ejemplo n.º 21
0
 def eval(parms):
     lr, momentum = parms
     with kiwi.start_run(nested=True) as child_run:
         p = kiwi.projects.run(run_id=child_run.info.run_id,
                               uri=".",
                               entry_point="train",
                               parameters={
                                   "training_data": training_data,
                                   "epochs": str(nepochs),
                                   "learning_rate": str(lr),
                                   "momentum": str(momentum),
                                   "seed": str(seed)
                               },
                               experiment_id=experiment_id,
                               synchronous=False)
         succeeded = p.wait()
     if succeeded:
         training_run = tracking_client.get_run(p.run_id)
         metrics = training_run.data.metrics
         # cap the loss at the loss of the null model
         train_loss = min(null_train_loss, metrics[train_metric])
         val_loss = min(null_val_loss, metrics[val_metric])
         test_loss = min(null_test_loss, metrics[test_metric])
     else:
         # run failed => return null loss
         tracking_client.set_terminated(p.run_id, "FAILED")
         train_loss = null_train_loss
         val_loss = null_val_loss
         test_loss = null_test_loss
     kiwi.log_metrics({
         "train_{}".format(metric): train_loss,
         "val_{}".format(metric): val_loss,
         "test_{}".format(metric): test_loss
     })
     return p.run_id, train_loss, val_loss, test_loss
Ejemplo n.º 22
0
def test_log_model_without_specified_conda_env_uses_default_env_with_expected_dependencies(
        sklearn_knn_model, tmpdir):
    sk_model_path = os.path.join(str(tmpdir), "knn.pkl")
    with open(sk_model_path, "wb") as f:
        pickle.dump(sklearn_knn_model, f)

    pyfunc_artifact_path = "pyfunc_model"
    with kiwi.start_run():
        kiwi.pyfunc.log_model(artifact_path=pyfunc_artifact_path,
                              data_path=sk_model_path,
                              loader_module=os.path.basename(__file__)[:-3],
                              code_path=[__file__])
        run_id = kiwi.active_run().info.run_id

    pyfunc_model_path = _download_artifact_from_uri(
        "runs:/{run_id}/{artifact_path}".format(
            run_id=run_id, artifact_path=pyfunc_artifact_path))

    pyfunc_conf = _get_flavor_configuration(
        model_path=pyfunc_model_path, flavor_name=kiwi.pyfunc.FLAVOR_NAME)
    conda_env_path = os.path.join(pyfunc_model_path,
                                  pyfunc_conf[kiwi.pyfunc.ENV])
    with open(conda_env_path, "r") as f:
        conda_env = yaml.safe_load(f)

    assert conda_env == kiwi.pyfunc.model.get_default_conda_env()
Ejemplo n.º 23
0
def test_log_model_persists_specified_conda_env_dict_in_mlflow_model_directory(
        sklearn_knn_model, tmpdir, pyfunc_custom_env_dict):
    sk_model_path = os.path.join(str(tmpdir), "knn.pkl")
    with open(sk_model_path, "wb") as f:
        pickle.dump(sklearn_knn_model, f)

    pyfunc_artifact_path = "pyfunc_model"
    with kiwi.start_run():
        kiwi.pyfunc.log_model(artifact_path=pyfunc_artifact_path,
                              data_path=sk_model_path,
                              loader_module=os.path.basename(__file__)[:-3],
                              code_path=[__file__],
                              conda_env=pyfunc_custom_env_dict)
        run_id = kiwi.active_run().info.run_id

    pyfunc_model_path = _download_artifact_from_uri(
        "runs:/{run_id}/{artifact_path}".format(
            run_id=run_id, artifact_path=pyfunc_artifact_path))

    pyfunc_conf = _get_flavor_configuration(
        model_path=pyfunc_model_path, flavor_name=kiwi.pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(pyfunc_model_path,
                                        pyfunc_conf[kiwi.pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)

    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_parsed = yaml.safe_load(f)
    assert saved_conda_env_parsed == pyfunc_custom_env_dict
Ejemplo n.º 24
0
def test_log_model_no_registered_model_name(onnx_model, onnx_custom_env):
    import kiwi.onnx
    artifact_path = "model"
    register_model_patch = mock.patch("mlflow.register_model")
    with kiwi.start_run(), register_model_patch:
        kiwi.onnx.log_model(onnx_model=onnx_model, artifact_path=artifact_path,
                            conda_env=onnx_custom_env)
        kiwi.register_model.assert_not_called()
Ejemplo n.º 25
0
def test_log_model_with_non_keyword_args_fails(saved_tf_iris_model):
    artifact_path = "model"
    with kiwi.start_run():
        with pytest.raises(TypeError):
            kiwi.tensorflow.log_model(saved_tf_iris_model.path,
                                      saved_tf_iris_model.meta_graph_tags,
                                      saved_tf_iris_model.signature_def_key,
                                      artifact_path)
Ejemplo n.º 26
0
def main(use_start_run):
    if use_start_run:
        print("Running with start_run API")
        with kiwi.start_run():
            call_tracking_apis()
    else:
        print("Running without start_run API")
        call_tracking_apis()
Ejemplo n.º 27
0
def run(training_data, epochs, batch_size, learning_rate, momentum, seed):
    warnings.filterwarnings("ignore")
    data = pd.read_csv(training_data, sep=';')
    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data, random_state=seed)
    train, valid = train_test_split(train, random_state=seed)
    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1).as_matrix()
    train_x = (train_x).astype('float32')
    train_y = train[["quality"]].as_matrix().astype('float32')
    valid_x = (valid.drop(["quality"], axis=1).as_matrix()).astype('float32')

    valid_y = valid[["quality"]].as_matrix().astype('float32')

    test_x = (test.drop(["quality"], axis=1).as_matrix()).astype("float32")
    test_y = test[["quality"]].as_matrix().astype("float32")

    with kiwi.start_run():
        if epochs == 0:  # score null model
            eval_and_log_metrics("train",
                                 train_y,
                                 np.ones(len(train_y)) * np.mean(train_y),
                                 epoch=-1)
            eval_and_log_metrics("val",
                                 valid_y,
                                 np.ones(len(valid_y)) * np.mean(valid_y),
                                 epoch=-1)
            eval_and_log_metrics("test",
                                 test_y,
                                 np.ones(len(test_y)) * np.mean(test_y),
                                 epoch=-1)
        else:
            with MLflowCheckpoint(test_x, test_y) as mlflow_logger:
                model = Sequential()
                model.add(Lambda(get_standardize_f(train_x)))
                model.add(
                    Dense(train_x.shape[1],
                          activation='relu',
                          kernel_initializer='normal',
                          input_shape=(train_x.shape[1], )))
                model.add(
                    Dense(16, activation='relu', kernel_initializer='normal'))
                model.add(
                    Dense(16, activation='relu', kernel_initializer='normal'))
                model.add(
                    Dense(1, kernel_initializer='normal', activation='linear'))
                model.compile(loss='mean_squared_error',
                              optimizer=SGD(lr=learning_rate,
                                            momentum=momentum),
                              metrics=[])

                model.fit(train_x,
                          train_y,
                          batch_size=batch_size,
                          epochs=epochs,
                          verbose=1,
                          validation_data=(valid_x, valid_y),
                          callbacks=[mlflow_logger])
Ejemplo n.º 28
0
def main(argv):
    with kiwi.start_run():
        args = parser.parse_args(argv[1:])

        # Builds, trains and evaluates a tf.estimator. Then, exports it for inference,
        # logs the exported model with MLflow, and loads the fitted model back as a PyFunc.
        (x_train,
         y_train), (x_test,
                    y_test) = tf.keras.datasets.boston_housing.load_data()

        # There are 13 features we are using for inference.
        feat_cols = [
            tf.feature_column.numeric_column(key="features",
                                             shape=(x_train.shape[1], ))
        ]
        feat_spec = {
            "features":
            tf.placeholder("float",
                           name="features",
                           shape=[None, x_train.shape[1]])
        }

        hidden_units = [50, 20]
        steps = args.steps

        regressor = tf.estimator.DNNRegressor(hidden_units=hidden_units,
                                              feature_columns=feat_cols)
        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            {"features": x_train}, y_train, num_epochs=None, shuffle=True)
        regressor.train(train_input_fn, steps=steps)
        test_input_fn = tf.estimator.inputs.numpy_input_fn(
            {"features": x_test}, y_test, num_epochs=None, shuffle=True)
        # Compute mean squared error
        mse = regressor.evaluate(test_input_fn, steps=steps)

        # Building a receiver function for exporting
        receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
            feat_spec)
        temp = tempfile.mkdtemp()
        try:
            # The model is automatically logged when export_saved_model() is called.
            saved_estimator_path = regressor.export_savedmodel(
                temp, receiver_fn).decode("utf-8")

            # Since the model was automatically logged as an artifact (more specifically
            # a MLflow Model), we don't need to use saved_estimator_path to load back the model.
            # MLflow takes care of it!
            pyfunc_model = pyfunc.load_model(kiwi.get_artifact_uri('model'))
            df = pd.DataFrame(data=x_test,
                              columns=["features"] * x_train.shape[1])

            # Checking the PyFunc's predictions are the same as the original model's predictions.
            predict_df = pyfunc_model.predict(df)
            predict_df['original_labels'] = y_test
            print(predict_df)
        finally:
            shutil.rmtree(temp)
Ejemplo n.º 29
0
def test_log_model_calls_register_model(onnx_model, onnx_custom_env):
    import kiwi.onnx
    artifact_path = "model"
    register_model_patch = mock.patch("mlflow.register_model")
    with kiwi.start_run(), register_model_patch:
        kiwi.onnx.log_model(onnx_model=onnx_model, artifact_path=artifact_path,
                            conda_env=onnx_custom_env, registered_model_name="AdsModel1")
        model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=kiwi.active_run().info.run_id,
                                                            artifact_path=artifact_path)
        kiwi.register_model.assert_called_once_with(model_uri, "AdsModel1")
Ejemplo n.º 30
0
def test_log_model_no_registered_model_name(saved_tf_iris_model):
    artifact_path = "model"
    register_model_patch = mock.patch("mlflow.register_model")
    with kiwi.start_run(), register_model_patch:
        kiwi.tensorflow.log_model(
            tf_saved_model_dir=saved_tf_iris_model.path,
            tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags,
            tf_signature_def_key=saved_tf_iris_model.signature_def_key,
            artifact_path=artifact_path)
        kiwi.register_model.assert_not_called()