def test_sparkml_estimator_model_log(tmpdir, spark_model_estimator): # Print the coefficients and intercept for multinomial logistic regression old_tracking_uri = kiwi.get_tracking_uri() cnt = 0 # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: for dfs_tmp_dir in [None, os.path.join(str(tmpdir), "test")]: print("should_start_run =", should_start_run, "dfs_tmp_dir =", dfs_tmp_dir) try: tracking_dir = os.path.abspath(str(tmpdir.join("mlruns"))) kiwi.set_tracking_uri("file://%s" % tracking_dir) if should_start_run: kiwi.start_run() artifact_path = "model%d" % cnt cnt += 1 sparkm.log_model( artifact_path=artifact_path, spark_model=spark_model_estimator.model, dfs_tmpdir=dfs_tmp_dir) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) # test reloaded model reloaded_model = sparkm.load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmp_dir) preds_df = reloaded_model.transform(spark_model_estimator.spark_df) preds = [x.prediction for x in preds_df.select("prediction").collect()] assert spark_model_estimator.predictions == preds finally: kiwi.end_run() kiwi.set_tracking_uri(old_tracking_uri) x = dfs_tmp_dir or sparkm.DFS_TMP shutil.rmtree(x) shutil.rmtree(tracking_dir)
def test_log_model_without_specified_conda_env_uses_default_env_with_expected_dependencies( sklearn_knn_model, main_scoped_model_class): sklearn_artifact_path = "sk_model" with kiwi.start_run(): kiwi.sklearn.log_model(sk_model=sklearn_knn_model, artifact_path=sklearn_artifact_path) sklearn_run_id = kiwi.active_run().info.run_id pyfunc_artifact_path = "pyfunc_model" with kiwi.start_run(): kiwi.pyfunc.log_model( artifact_path=pyfunc_artifact_path, artifacts={ "sk_model": utils_get_artifact_uri(artifact_path=sklearn_artifact_path, run_id=sklearn_run_id) }, python_model=main_scoped_model_class(predict_fn=None)) pyfunc_model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=pyfunc_artifact_path)) pyfunc_conf = _get_flavor_configuration( model_path=pyfunc_model_path, flavor_name=kiwi.pyfunc.FLAVOR_NAME) conda_env_path = os.path.join(pyfunc_model_path, pyfunc_conf[kiwi.pyfunc.ENV]) with open(conda_env_path, "r") as f: conda_env = yaml.safe_load(f) assert conda_env == kiwi.pyfunc.model.get_default_conda_env()
def test_model_log_load(sklearn_knn_model, main_scoped_model_class, iris_data): sklearn_artifact_path = "sk_model" with kiwi.start_run(): kiwi.sklearn.log_model(sk_model=sklearn_knn_model, artifact_path=sklearn_artifact_path) sklearn_model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=sklearn_artifact_path) def test_predict(sk_model, model_input): return sk_model.predict(model_input) * 2 pyfunc_artifact_path = "pyfunc_model" with kiwi.start_run(): kiwi.pyfunc.log_model( artifact_path=pyfunc_artifact_path, artifacts={ "sk_model": sklearn_model_uri, }, python_model=main_scoped_model_class(test_predict)) pyfunc_model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=pyfunc_artifact_path) pyfunc_model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=pyfunc_artifact_path)) model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel")) loaded_pyfunc_model = kiwi.pyfunc.load_pyfunc(model_uri=pyfunc_model_uri) assert model_config.to_yaml() == loaded_pyfunc_model.metadata.to_yaml() np.testing.assert_array_equal( loaded_pyfunc_model.predict(iris_data[0]), test_predict(sk_model=sklearn_knn_model, model_input=iris_data[0]))
def test_model_log(h2o_iris_model): h2o_model = h2o_iris_model.model old_uri = kiwi.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True): try: artifact_path = "gbm_model" kiwi.set_tracking_uri("test") if should_start_run: kiwi.start_run() kiwi.h2o.log_model(h2o_model=h2o_model, artifact_path=artifact_path) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) # Load model h2o_model_loaded = kiwi.h2o.load_model(model_uri=model_uri) assert all( h2o_model_loaded.predict(h2o_iris_model.inference_data). as_data_frame() == h2o_model.predict( h2o_iris_model.inference_data).as_data_frame()) finally: kiwi.end_run() kiwi.set_tracking_uri(old_uri)
def test_autologging_dedups_multiple_reads_of_same_datasource( spark_session, format_to_file_path): kiwi.spark.autolog() data_format = list(format_to_file_path.keys())[0] file_path = format_to_file_path[data_format] df = spark_session.read.format(data_format).option("header", "true"). \ option("inferSchema", "true").load(file_path) with kiwi.start_run(): run_id = kiwi.active_run().info.run_id df.collect() df.filter("number1 > 0").collect() df.limit(2).collect() df.collect() time.sleep(1) run = kiwi.get_run(run_id) _assert_spark_data_logged(run=run, path=file_path, data_format=data_format) # Test context provider flow df.filter("number1 > 0").collect() df.limit(2).collect() df.collect() with kiwi.start_run(): run_id2 = kiwi.active_run().info.run_id time.sleep(1) run2 = kiwi.get_run(run_id2) _assert_spark_data_logged(run=run2, path=file_path, data_format=data_format)
def test_start_deleted_run(): run_id = None with kiwi.start_run() as active_run: run_id = active_run.info.run_id tracking.MlflowClient().delete_run(run_id) with pytest.raises(MlflowException, matches='because it is in the deleted state.'): with kiwi.start_run(run_id=run_id): pass assert kiwi.active_run() is None
def test_start_run_exp_id_0(): kiwi.set_experiment("some-experiment") # Create a run and verify that the current active experiment is the one we just set with kiwi.start_run() as active_run: exp_id = active_run.info.experiment_id assert exp_id != FileStore.DEFAULT_EXPERIMENT_ID assert MlflowClient().get_experiment(exp_id).name == "some-experiment" # Set experiment ID to 0 when creating a run, verify that the specified experiment ID is honored with kiwi.start_run(experiment_id=0) as active_run: assert active_run.info.experiment_id == FileStore.DEFAULT_EXPERIMENT_ID
def gluon_random_data_run(): kiwi.gluon.autolog() with kiwi.start_run() as run: data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") validation = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": .001, "epsilon": 1e-07 }) est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(), metrics=Accuracy(), trainer=trainer) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(data, epochs=3, val_data=validation) client = kiwi.tracking.MlflowClient() return client.get_run(run.info.run_id)
def test_log_model_calls_register_model(sklearn_knn_model, main_scoped_model_class): register_model_patch = mock.patch("mlflow.register_model") with register_model_patch: sklearn_artifact_path = "sk_model_no_run" with kiwi.start_run(): kiwi.sklearn.log_model(sk_model=sklearn_knn_model, artifact_path=sklearn_artifact_path) sklearn_model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=sklearn_artifact_path) def test_predict(sk_model, model_input): return sk_model.predict(model_input) * 2 pyfunc_artifact_path = "pyfunc_model" assert kiwi.active_run() is None kiwi.pyfunc.log_model( artifact_path=pyfunc_artifact_path, artifacts={"sk_model": sklearn_model_uri}, python_model=main_scoped_model_class(test_predict), registered_model_name="AdsModel1") model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=pyfunc_artifact_path) kiwi.register_model.assert_called_once_with(model_uri, "AdsModel1") kiwi.end_run()
def test_pyfunc_model_log_load_no_active_run(sklearn_knn_model, main_scoped_model_class, iris_data): sklearn_artifact_path = "sk_model_no_run" with kiwi.start_run(): kiwi.sklearn.log_model(sk_model=sklearn_knn_model, artifact_path=sklearn_artifact_path) sklearn_model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=sklearn_artifact_path) def test_predict(sk_model, model_input): return sk_model.predict(model_input) * 2 pyfunc_artifact_path = "pyfunc_model" assert kiwi.active_run() is None kiwi.pyfunc.log_model(artifact_path=pyfunc_artifact_path, artifacts={"sk_model": sklearn_model_uri}, python_model=main_scoped_model_class(test_predict)) pyfunc_model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=pyfunc_artifact_path) loaded_pyfunc_model = kiwi.pyfunc.load_pyfunc(model_uri=pyfunc_model_uri) np.testing.assert_array_equal( loaded_pyfunc_model.predict(iris_data[0]), test_predict(sk_model=sklearn_knn_model, model_input=iris_data[0])) kiwi.end_run()
def workflow(als_max_iter, keras_hidden_units, max_row_limit): # Note: The entrypoint names are defined in MLproject. The artifact directories # are documented by each step's .py file. with kiwi.start_run() as active_run: os.environ['SPARK_CONF_DIR'] = os.path.abspath('.') git_commit = active_run.data.tags.get(mlflow_tags.MLFLOW_GIT_COMMIT) load_raw_data_run = _get_or_run("load_raw_data", {}, git_commit) ratings_csv_uri = os.path.join(load_raw_data_run.info.artifact_uri, "ratings-csv-dir") etl_data_run = _get_or_run("etl_data", { "ratings_csv": ratings_csv_uri, "max_row_limit": max_row_limit }, git_commit) ratings_parquet_uri = os.path.join(etl_data_run.info.artifact_uri, "ratings-parquet-dir") # We specify a spark-defaults.conf to override the default driver memory. ALS requires # significant memory. The driver memory property cannot be set by the application itself. als_run = _get_or_run("als", { "ratings_data": ratings_parquet_uri, "max_iter": str(als_max_iter) }, git_commit) als_model_uri = os.path.join(als_run.info.artifact_uri, "als-model") keras_params = { "ratings_data": ratings_parquet_uri, "als_model_uri": als_model_uri, "hidden_units": keras_hidden_units, } _get_or_run("train_keras", keras_params, git_commit, use_cache=False)
def test_autologging_of_datasources_with_different_formats( spark_session, format_to_file_path): kiwi.spark.autolog() for data_format, file_path in format_to_file_path.items(): base_df = spark_session.read.format(data_format).option("header", "true").\ option("inferSchema", "true").load(file_path) base_df.createOrReplaceTempView("temptable") table_df0 = spark_session.table("temptable") table_df1 = spark_session.sql( "SELECT number1, number2 from temptable LIMIT 5") dfs = [ base_df, table_df0, table_df1, base_df.filter("number1 > 0"), base_df.select("number1"), base_df.limit(2), base_df.filter("number1 > 0").select("number1").limit(2) ] for df in dfs: with kiwi.start_run(): run_id = kiwi.active_run().info.run_id df.collect() time.sleep(1) run = kiwi.get_run(run_id) _assert_spark_data_logged(run=run, path=file_path, data_format=data_format)
def test_cli_build_image_with_runs_uri_calls_expected_azure_routines( sklearn_model): artifact_path = "model" with kiwi.start_run(): kiwi.sklearn.log_model(sk_model=sklearn_model, artifact_path=artifact_path) run_id = kiwi.active_run().info.run_id model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=run_id, artifact_path=artifact_path) with AzureMLMocks() as aml_mocks: result = CliRunner(env={ "LC_ALL": "en_US.UTF-8", "LANG": "en_US.UTF-8" }).invoke(kiwi.azureml.cli.commands, [ 'build-image', '-m', model_uri, '-w', 'test_workspace', '-i', 'image_name', '-n', 'model_name', ]) assert result.exit_code == 0 assert aml_mocks["register_model"].call_count == 1 assert aml_mocks["create_image"].call_count == 1 assert aml_mocks["load_workspace"].call_count == 1
def test_build_image_includes_default_metadata_in_azure_image_and_model_tags( sklearn_model): artifact_path = "model" with kiwi.start_run(): kiwi.sklearn.log_model(sk_model=sklearn_model, artifact_path=artifact_path) run_id = kiwi.active_run().info.run_id model_uri = "runs:///{run_id}/{artifact_path}".format( run_id=run_id, artifact_path=artifact_path) model_config = Model.load( os.path.join(_download_artifact_from_uri(artifact_uri=model_uri), "MLmodel")) with AzureMLMocks() as aml_mocks: workspace = get_azure_workspace() kiwi.azureml.build_image(model_uri=model_uri, workspace=workspace) register_model_call_args = aml_mocks["register_model"].call_args_list assert len(register_model_call_args) == 1 _, register_model_call_kwargs = register_model_call_args[0] called_tags = register_model_call_kwargs["tags"] assert called_tags["model_uri"] == model_uri assert called_tags["python_version"] ==\ model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.PY_VERSION] create_image_call_args = aml_mocks["create_image"].call_args_list assert len(create_image_call_args) == 1 _, create_image_call_kwargs = create_image_call_args[0] image_config = create_image_call_kwargs["image_config"] assert image_config.tags["model_uri"] == model_uri assert image_config.tags["python_version"] ==\ model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.PY_VERSION]
def test_default_conda_env_strips_dev_suffix_from_pyspark_version(spark_model_iris, model_path): mock_version_standard = mock.PropertyMock(return_value="2.4.0") with mock.patch("pyspark.__version__", new_callable=mock_version_standard): default_conda_env_standard = sparkm.get_default_conda_env() for dev_version in ["2.4.0.dev0", "2.4.0.dev", "2.4.0.dev1", "2.4.0dev.a", "2.4.0.devb"]: mock_version_dev = mock.PropertyMock(return_value=dev_version) with mock.patch("pyspark.__version__", new_callable=mock_version_dev): default_conda_env_dev = sparkm.get_default_conda_env() assert (default_conda_env_dev == default_conda_env_standard) with kiwi.start_run(): sparkm.log_model( spark_model=spark_model_iris.model, artifact_path="model", conda_env=None) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path="model") model_path = _download_artifact_from_uri(artifact_uri=model_uri) pyfunc_conf = _get_flavor_configuration( model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) with open(conda_env_path, "r") as f: persisted_conda_env_dev = yaml.safe_load(f) assert (persisted_conda_env_dev == default_conda_env_standard) for unaffected_version in ["2.0", "2.3.4", "2"]: mock_version = mock.PropertyMock(return_value=unaffected_version) with mock.patch("pyspark.__version__", new_callable=mock_version): assert unaffected_version in yaml.safe_dump(sparkm.get_default_conda_env())
def test_prepare_env_passes(sk_model): if no_conda: pytest.skip("This test requires conda.") with TempDir(chdr=True): with kiwi.start_run() as active_run: kiwi.sklearn.log_model(sk_model, "model") model_uri = "runs:/{run_id}/model".format( run_id=active_run.info.run_id) # Test with no conda p = subprocess.Popen( ["mlflow", "models", "prepare-env", "-m", model_uri, "--no-conda"], stderr=subprocess.PIPE) assert p.wait() == 0 # With conda p = subprocess.Popen( ["mlflow", "models", "prepare-env", "-m", model_uri], stderr=subprocess.PIPE) assert p.wait() == 0 # Should be idempotent p = subprocess.Popen( ["mlflow", "models", "prepare-env", "-m", model_uri], stderr=subprocess.PIPE) assert p.wait() == 0
def test_log_and_load_model_persists_and_restores_model_successfully( saved_tf_iris_model): artifact_path = "model" with kiwi.start_run(): kiwi.tensorflow.log_model( tf_saved_model_dir=saved_tf_iris_model.path, tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags, tf_signature_def_key=saved_tf_iris_model.signature_def_key, artifact_path=artifact_path) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) tf_graph = tf.Graph() tf_sess = tf.Session(graph=tf_graph) with tf_graph.as_default(): signature_def = kiwi.tensorflow.load_model(model_uri=model_uri, tf_sess=tf_sess) for _, input_signature in signature_def.inputs.items(): t_input = tf_graph.get_tensor_by_name(input_signature.name) assert t_input is not None for _, output_signature in signature_def.outputs.items(): t_output = tf_graph.get_tensor_by_name(output_signature.name) assert t_output is not None
def test_serve_gunicorn_opts(iris_data, sk_model): if sys.platform == "win32": pytest.skip( "This test requires gunicorn which is not available on windows.") with kiwi.start_run() as active_run: kiwi.sklearn.log_model(sk_model, "model", registered_model_name="imlegit") run_id = active_run.info.run_id model_uris = [ "models:/{name}/{stage}".format(name="imlegit", stage="None"), "runs:/{run_id}/model".format(run_id=run_id) ] for model_uri in model_uris: with TempDir() as tpm: output_file_path = tpm.path("stoudt") with open(output_file_path, "w") as output_file: x, _ = iris_data scoring_response = pyfunc_serve_and_score_model( model_uri, pd.DataFrame(x), content_type=CONTENT_TYPE_JSON_SPLIT_ORIENTED, stdout=output_file, extra_args=["-w", "3"]) with open(output_file_path, "r") as output_file: stdout = output_file.read() actual = pd.read_json(scoring_response.content, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) expected_command_pattern = re.compile( ("gunicorn.*-w 3.*mlflow.pyfunc.scoring_server.wsgi:app")) assert expected_command_pattern.search(stdout) is not None
def test_autolog_persists_manually_created_run(): kiwi.gluon.autolog() data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard") with kiwi.start_run() as run: model = HybridSequential() model.add(Dense(64, activation="relu")) model.add(Dense(64, activation="relu")) model.add(Dense(10)) model.initialize() model.hybridize() trainer = Trainer(model.collect_params(), "adam", optimizer_params={ "learning_rate": .001, "epsilon": 1e-07 }) est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(), metrics=Accuracy(), trainer=trainer) with warnings.catch_warnings(): warnings.simplefilter("ignore") est.fit(data, epochs=3) assert kiwi.active_run().info.run_id == run.info.run_id
def test_log_model_persists_specified_conda_env_in_mlflow_model_directory( saved_tf_iris_model, tf_custom_env): artifact_path = "model" with kiwi.start_run(): kiwi.tensorflow.log_model( tf_saved_model_dir=saved_tf_iris_model.path, tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags, tf_signature_def_key=saved_tf_iris_model.signature_def_key, artifact_path=artifact_path, conda_env=tf_custom_env) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) model_path = _download_artifact_from_uri(artifact_uri=model_uri) pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) assert os.path.exists(saved_conda_env_path) assert saved_conda_env_path != tf_custom_env with open(tf_custom_env, "r") as f: tf_custom_env_text = f.read() with open(saved_conda_env_path, "r") as f: saved_conda_env_text = f.read() assert saved_conda_env_text == tf_custom_env_text
def eval(parms): lr, momentum = parms with kiwi.start_run(nested=True) as child_run: p = kiwi.projects.run(run_id=child_run.info.run_id, uri=".", entry_point="train", parameters={ "training_data": training_data, "epochs": str(nepochs), "learning_rate": str(lr), "momentum": str(momentum), "seed": str(seed) }, experiment_id=experiment_id, synchronous=False) succeeded = p.wait() if succeeded: training_run = tracking_client.get_run(p.run_id) metrics = training_run.data.metrics # cap the loss at the loss of the null model train_loss = min(null_train_loss, metrics[train_metric]) val_loss = min(null_val_loss, metrics[val_metric]) test_loss = min(null_test_loss, metrics[test_metric]) else: # run failed => return null loss tracking_client.set_terminated(p.run_id, "FAILED") train_loss = null_train_loss val_loss = null_val_loss test_loss = null_test_loss kiwi.log_metrics({ "train_{}".format(metric): train_loss, "val_{}".format(metric): val_loss, "test_{}".format(metric): test_loss }) return p.run_id, train_loss, val_loss, test_loss
def test_log_model_without_specified_conda_env_uses_default_env_with_expected_dependencies( sklearn_knn_model, tmpdir): sk_model_path = os.path.join(str(tmpdir), "knn.pkl") with open(sk_model_path, "wb") as f: pickle.dump(sklearn_knn_model, f) pyfunc_artifact_path = "pyfunc_model" with kiwi.start_run(): kiwi.pyfunc.log_model(artifact_path=pyfunc_artifact_path, data_path=sk_model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__]) run_id = kiwi.active_run().info.run_id pyfunc_model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=run_id, artifact_path=pyfunc_artifact_path)) pyfunc_conf = _get_flavor_configuration( model_path=pyfunc_model_path, flavor_name=kiwi.pyfunc.FLAVOR_NAME) conda_env_path = os.path.join(pyfunc_model_path, pyfunc_conf[kiwi.pyfunc.ENV]) with open(conda_env_path, "r") as f: conda_env = yaml.safe_load(f) assert conda_env == kiwi.pyfunc.model.get_default_conda_env()
def test_log_model_persists_specified_conda_env_dict_in_mlflow_model_directory( sklearn_knn_model, tmpdir, pyfunc_custom_env_dict): sk_model_path = os.path.join(str(tmpdir), "knn.pkl") with open(sk_model_path, "wb") as f: pickle.dump(sklearn_knn_model, f) pyfunc_artifact_path = "pyfunc_model" with kiwi.start_run(): kiwi.pyfunc.log_model(artifact_path=pyfunc_artifact_path, data_path=sk_model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__], conda_env=pyfunc_custom_env_dict) run_id = kiwi.active_run().info.run_id pyfunc_model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=run_id, artifact_path=pyfunc_artifact_path)) pyfunc_conf = _get_flavor_configuration( model_path=pyfunc_model_path, flavor_name=kiwi.pyfunc.FLAVOR_NAME) saved_conda_env_path = os.path.join(pyfunc_model_path, pyfunc_conf[kiwi.pyfunc.ENV]) assert os.path.exists(saved_conda_env_path) with open(saved_conda_env_path, "r") as f: saved_conda_env_parsed = yaml.safe_load(f) assert saved_conda_env_parsed == pyfunc_custom_env_dict
def test_log_model_no_registered_model_name(onnx_model, onnx_custom_env): import kiwi.onnx artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with kiwi.start_run(), register_model_patch: kiwi.onnx.log_model(onnx_model=onnx_model, artifact_path=artifact_path, conda_env=onnx_custom_env) kiwi.register_model.assert_not_called()
def test_log_model_with_non_keyword_args_fails(saved_tf_iris_model): artifact_path = "model" with kiwi.start_run(): with pytest.raises(TypeError): kiwi.tensorflow.log_model(saved_tf_iris_model.path, saved_tf_iris_model.meta_graph_tags, saved_tf_iris_model.signature_def_key, artifact_path)
def main(use_start_run): if use_start_run: print("Running with start_run API") with kiwi.start_run(): call_tracking_apis() else: print("Running without start_run API") call_tracking_apis()
def run(training_data, epochs, batch_size, learning_rate, momentum, seed): warnings.filterwarnings("ignore") data = pd.read_csv(training_data, sep=';') # Split the data into training and test sets. (0.75, 0.25) split. train, test = train_test_split(data, random_state=seed) train, valid = train_test_split(train, random_state=seed) # The predicted column is "quality" which is a scalar from [3, 9] train_x = train.drop(["quality"], axis=1).as_matrix() train_x = (train_x).astype('float32') train_y = train[["quality"]].as_matrix().astype('float32') valid_x = (valid.drop(["quality"], axis=1).as_matrix()).astype('float32') valid_y = valid[["quality"]].as_matrix().astype('float32') test_x = (test.drop(["quality"], axis=1).as_matrix()).astype("float32") test_y = test[["quality"]].as_matrix().astype("float32") with kiwi.start_run(): if epochs == 0: # score null model eval_and_log_metrics("train", train_y, np.ones(len(train_y)) * np.mean(train_y), epoch=-1) eval_and_log_metrics("val", valid_y, np.ones(len(valid_y)) * np.mean(valid_y), epoch=-1) eval_and_log_metrics("test", test_y, np.ones(len(test_y)) * np.mean(test_y), epoch=-1) else: with MLflowCheckpoint(test_x, test_y) as mlflow_logger: model = Sequential() model.add(Lambda(get_standardize_f(train_x))) model.add( Dense(train_x.shape[1], activation='relu', kernel_initializer='normal', input_shape=(train_x.shape[1], ))) model.add( Dense(16, activation='relu', kernel_initializer='normal')) model.add( Dense(16, activation='relu', kernel_initializer='normal')) model.add( Dense(1, kernel_initializer='normal', activation='linear')) model.compile(loss='mean_squared_error', optimizer=SGD(lr=learning_rate, momentum=momentum), metrics=[]) model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(valid_x, valid_y), callbacks=[mlflow_logger])
def main(argv): with kiwi.start_run(): args = parser.parse_args(argv[1:]) # Builds, trains and evaluates a tf.estimator. Then, exports it for inference, # logs the exported model with MLflow, and loads the fitted model back as a PyFunc. (x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data() # There are 13 features we are using for inference. feat_cols = [ tf.feature_column.numeric_column(key="features", shape=(x_train.shape[1], )) ] feat_spec = { "features": tf.placeholder("float", name="features", shape=[None, x_train.shape[1]]) } hidden_units = [50, 20] steps = args.steps regressor = tf.estimator.DNNRegressor(hidden_units=hidden_units, feature_columns=feat_cols) train_input_fn = tf.estimator.inputs.numpy_input_fn( {"features": x_train}, y_train, num_epochs=None, shuffle=True) regressor.train(train_input_fn, steps=steps) test_input_fn = tf.estimator.inputs.numpy_input_fn( {"features": x_test}, y_test, num_epochs=None, shuffle=True) # Compute mean squared error mse = regressor.evaluate(test_input_fn, steps=steps) # Building a receiver function for exporting receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feat_spec) temp = tempfile.mkdtemp() try: # The model is automatically logged when export_saved_model() is called. saved_estimator_path = regressor.export_savedmodel( temp, receiver_fn).decode("utf-8") # Since the model was automatically logged as an artifact (more specifically # a MLflow Model), we don't need to use saved_estimator_path to load back the model. # MLflow takes care of it! pyfunc_model = pyfunc.load_model(kiwi.get_artifact_uri('model')) df = pd.DataFrame(data=x_test, columns=["features"] * x_train.shape[1]) # Checking the PyFunc's predictions are the same as the original model's predictions. predict_df = pyfunc_model.predict(df) predict_df['original_labels'] = y_test print(predict_df) finally: shutil.rmtree(temp)
def test_log_model_calls_register_model(onnx_model, onnx_custom_env): import kiwi.onnx artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with kiwi.start_run(), register_model_patch: kiwi.onnx.log_model(onnx_model=onnx_model, artifact_path=artifact_path, conda_env=onnx_custom_env, registered_model_name="AdsModel1") model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) kiwi.register_model.assert_called_once_with(model_uri, "AdsModel1")
def test_log_model_no_registered_model_name(saved_tf_iris_model): artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with kiwi.start_run(), register_model_patch: kiwi.tensorflow.log_model( tf_saved_model_dir=saved_tf_iris_model.path, tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags, tf_signature_def_key=saved_tf_iris_model.signature_def_key, artifact_path=artifact_path) kiwi.register_model.assert_not_called()