예제 #1
0
def test_model_log(h2o_iris_model):
    h2o_model = h2o_iris_model.model
    old_uri = kiwi.get_tracking_uri()
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        with TempDir(chdr=True, remove_on_exit=True):
            try:
                artifact_path = "gbm_model"
                kiwi.set_tracking_uri("test")
                if should_start_run:
                    kiwi.start_run()
                kiwi.h2o.log_model(h2o_model=h2o_model,
                                   artifact_path=artifact_path)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=kiwi.active_run().info.run_id,
                    artifact_path=artifact_path)

                # Load model
                h2o_model_loaded = kiwi.h2o.load_model(model_uri=model_uri)
                assert all(
                    h2o_model_loaded.predict(h2o_iris_model.inference_data).
                    as_data_frame() == h2o_model.predict(
                        h2o_iris_model.inference_data).as_data_frame())
            finally:
                kiwi.end_run()
                kiwi.set_tracking_uri(old_uri)
예제 #2
0
def test_sparkml_estimator_model_log(tmpdir, spark_model_estimator):
    # Print the coefficients and intercept for multinomial logistic regression
    old_tracking_uri = kiwi.get_tracking_uri()
    cnt = 0
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        for dfs_tmp_dir in [None, os.path.join(str(tmpdir), "test")]:
            print("should_start_run =", should_start_run, "dfs_tmp_dir =", dfs_tmp_dir)
            try:
                tracking_dir = os.path.abspath(str(tmpdir.join("mlruns")))
                kiwi.set_tracking_uri("file://%s" % tracking_dir)
                if should_start_run:
                    kiwi.start_run()
                artifact_path = "model%d" % cnt
                cnt += 1
                sparkm.log_model(
                    artifact_path=artifact_path,
                    spark_model=spark_model_estimator.model,
                    dfs_tmpdir=dfs_tmp_dir)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=kiwi.active_run().info.run_id,
                    artifact_path=artifact_path)

                # test reloaded model
                reloaded_model = sparkm.load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmp_dir)
                preds_df = reloaded_model.transform(spark_model_estimator.spark_df)
                preds = [x.prediction for x in preds_df.select("prediction").collect()]
                assert spark_model_estimator.predictions == preds
            finally:
                kiwi.end_run()
                kiwi.set_tracking_uri(old_tracking_uri)
                x = dfs_tmp_dir or sparkm.DFS_TMP
                shutil.rmtree(x)
                shutil.rmtree(tracking_dir)
예제 #3
0
def test_docker_project_tracking_uri_propagation(ProfileConfigProvider, tmpdir,
                                                 tracking_uri,
                                                 expected_command_segment,
                                                 docker_example_base_image):  # pylint: disable=unused-argument
    mock_provider = mock.MagicMock()
    mock_provider.get_config.return_value = \
        DatabricksConfig("host", "user", "pass", None, insecure=True)
    ProfileConfigProvider.return_value = mock_provider
    # Create and mock local tracking directory
    local_tracking_dir = os.path.join(tmpdir.strpath, "mlruns")
    if tracking_uri is None:
        tracking_uri = local_tracking_dir
    old_uri = kiwi.get_tracking_uri()
    try:
        kiwi.set_tracking_uri(tracking_uri)
        with mock.patch("mlflow.tracking._tracking_service.utils._get_store"
                        ) as _get_store_mock:
            _get_store_mock.return_value = file_store.FileStore(
                local_tracking_dir)
            kiwi.projects.run(
                TEST_DOCKER_PROJECT_DIR,
                experiment_id=file_store.FileStore.DEFAULT_EXPERIMENT_ID)
    finally:
        kiwi.set_tracking_uri(old_uri)
예제 #4
0
def pyfunc_serve_and_score_model(model_uri,
                                 data,
                                 content_type,
                                 activity_polling_timeout_seconds=500,
                                 extra_args=None,
                                 stdout=sys.stdout):
    """
    :param model_uri: URI to the model to be served.
    :param data: The data to send to the pyfunc server for testing. This is either a
                 Pandas dataframe or string of the format specified by `content_type`.
    :param content_type: The type of the data to send to the pyfunc server for testing. This is
                         one of `mlflow.pyfunc.scoring_server.CONTENT_TYPES`.
    :param activity_polling_timeout_seconds: The amount of time, in seconds, to wait before
                                             declaring the scoring process to have failed.
    :param extra_args: A list of extra arguments to pass to the pyfunc scoring server command. For
                       example, passing ``extra_args=["--no-conda"]`` will pass the ``--no-conda``
                       flag to the scoring server to ensure that conda environment activation
                       is skipped.
    """
    env = dict(os.environ)
    env.update(LC_ALL="en_US.UTF-8", LANG="en_US.UTF-8")
    env.update(MLFLOW_TRACKING_URI=kiwi.get_tracking_uri())
    env.update(MLFLOW_HOME=_get_mlflow_home())
    port = get_safe_port()
    scoring_cmd = [
        'mlflow', 'models', 'serve', '-m', model_uri, "-p",
        str(port), "--install-mlflow"
    ]
    if extra_args is not None:
        scoring_cmd += extra_args
    proc = _start_scoring_proc(cmd=scoring_cmd,
                               env=env,
                               stdout=stdout,
                               stderr=stdout)
    return _evaluate_scoring_proc(proc, port, data, content_type,
                                  activity_polling_timeout_seconds)
예제 #5
0
        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        kiwi.log_param("alpha", alpha)
        kiwi.log_param("l1_ratio", l1_ratio)
        kiwi.log_metric("rmse", rmse)
        kiwi.log_metric("r2", r2)
        kiwi.log_metric("mae", mae)

        tracking_url_type_store = urlparse(kiwi.get_tracking_uri()).scheme

        # Model registry does not work with file store
        if tracking_url_type_store != "file":

            # Register the model
            # There are other ways to use the Model Registry, which depends on the use case,
            # please refer to the doc for more information:
            # https://mlflow.org/docs/latest/model-registry.html#api-workflow
            kiwi.sklearn.log_model(lr,
                                   "model",
                                   registered_model_name="ElasticnetWineModel")
        else:
            kiwi.sklearn.log_model(lr, "model")
예제 #6
0
def test_predict(iris_data, sk_model):
    with TempDir(chdr=True) as tmp:
        with kiwi.start_run() as active_run:
            kiwi.sklearn.log_model(sk_model,
                                   "model",
                                   registered_model_name="impredicting")
            model_uri = "runs:/{run_id}/model".format(
                run_id=active_run.info.run_id)
        model_registry_uri = "models:/{name}/{stage}".format(
            name="impredicting", stage="None")
        input_json_path = tmp.path("input.json")
        input_csv_path = tmp.path("input.csv")
        output_json_path = tmp.path("output.json")
        x, _ = iris_data
        pd.DataFrame(x).to_json(input_json_path, orient="split")
        pd.DataFrame(x).to_csv(input_csv_path, index=False)

        # Test with no conda & model registry URI
        env_with_tracking_uri = os.environ.copy()
        env_with_tracking_uri.update(
            MLFLOW_TRACKING_URI=kiwi.get_tracking_uri())
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m", model_registry_uri, "-i",
            input_json_path, "-o", output_json_path, "--no-conda"
        ],
                             stderr=subprocess.PIPE,
                             env=env_with_tracking_uri)
        assert p.wait() == 0
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = sk_model.predict(x)
        assert all(expected == actual)

        # With conda + --install-mlflow
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m", model_uri, "-i",
            input_json_path, "-o", output_json_path
        ] + extra_options,
                             env=env_with_tracking_uri)
        assert 0 == p.wait()
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = sk_model.predict(x)
        assert all(expected == actual)

        # explicit json format with default orient (should be split)
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m", model_uri, "-i",
            input_json_path, "-o", output_json_path, "-t", "json"
        ] + extra_options,
                             env=env_with_tracking_uri)
        assert 0 == p.wait()
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = sk_model.predict(x)
        assert all(expected == actual)

        # explicit json format with orient==split
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m", model_uri, "-i",
            input_json_path, "-o", output_json_path, "-t", "json",
            "--json-format", "split"
        ] + extra_options,
                             env=env_with_tracking_uri)
        assert 0 == p.wait()
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = sk_model.predict(x)
        assert all(expected == actual)

        # read from stdin, write to stdout.
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m", model_uri, "-t", "json",
            "--json-format", "split"
        ] + extra_options,
                             universal_newlines=True,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=sys.stderr,
                             env=env_with_tracking_uri)
        with open(input_json_path, "r") as f:
            stdout, _ = p.communicate(f.read())
        assert 0 == p.wait()
        actual = pd.read_json(StringIO(stdout), orient="records")
        actual = actual[actual.columns[0]].values
        expected = sk_model.predict(x)
        assert all(expected == actual)

        # NB: We do not test orient=records here because records may loose column ordering.
        # orient == records is tested in other test with simpler model.

        # csv
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m", model_uri, "-i",
            input_csv_path, "-o", output_json_path, "-t", "csv"
        ] + extra_options,
                             env=env_with_tracking_uri)
        assert 0 == p.wait()
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = sk_model.predict(x)
        assert all(expected == actual)
예제 #7
0
import os
import shutil
import sys
import random
import tempfile

import kiwi
from kiwi import log_metric, log_param, log_artifacts, get_artifact_uri, active_run,\
    get_tracking_uri, log_artifact

if __name__ == "__main__":
    print("Running {} with tracking URI {}".format(sys.argv[0],
                                                   get_tracking_uri()))
    log_param("param1", 5)
    log_metric("foo", 5)
    log_metric("foo", 6)
    log_metric("foo", 7)
    log_metric("random_int", random.randint(0, 100))
    run_id = active_run().info.run_id
    # Get run metadata & data from the tracking server
    service = kiwi.tracking.MlflowClient()
    run = service.get_run(run_id)
    print("Metadata & data for run with UUID %s: %s" % (run_id, run))
    local_dir = tempfile.mkdtemp()
    message = "test artifact written during run %s within artifact URI %s\n" \
              % (active_run().info.run_id, get_artifact_uri())
    try:
        file_path = os.path.join(local_dir, "some_output_file.txt")
        with open(file_path, "w") as handle:
            handle.write(message)
        log_artifacts(local_dir, "some_subdir")