def test_load_model_loads_torch_model_using_pickle_module_specified_at_save_time( module_scoped_subclassed_model): custom_pickle_module = pickle artifact_path = "pytorch_model" with kiwi.start_run(): kiwi.pytorch.log_model( artifact_path=artifact_path, pytorch_model=module_scoped_subclassed_model, conda_env=None, pickle_module=custom_pickle_module) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) import_module_fn = importlib.import_module imported_modules = [] def track_module_imports(module_name): imported_modules.append(module_name) return import_module_fn(module_name) with mock.patch("importlib.import_module") as import_mock,\ mock.patch("torch.load") as torch_load_mock: import_mock.side_effect = track_module_imports pyfunc.load_pyfunc(model_uri=model_uri) torch_load_mock.assert_called_with(mock.ANY, pickle_module=custom_pickle_module) assert custom_pickle_module.__name__ in imported_modules
def test_load_model_succeeds_when_data_is_model_file_instead_of_directory( module_scoped_subclassed_model, model_path, data): """ This test verifies that PyTorch models saved in older versions of MLflow are loaded successfully by ``mlflow.pytorch.load_model``. The ``data`` path associated with these older models is serialized PyTorch model file, as opposed to the current format: a directory containing a serialized model file and pickle module information. """ artifact_path = "pytorch_model" with kiwi.start_run(): kiwi.pytorch.log_model( artifact_path=artifact_path, pytorch_model=module_scoped_subclassed_model, conda_env=None) model_path = _download_artifact_from_uri("runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path)) model_conf_path = os.path.join(model_path, "MLmodel") model_conf = Model.load(model_conf_path) pyfunc_conf = model_conf.flavors.get(pyfunc.FLAVOR_NAME) assert pyfunc_conf is not None model_data_path = os.path.join(model_path, pyfunc_conf[pyfunc.DATA]) assert os.path.exists(model_data_path) assert kiwi.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME in os.listdir(model_data_path) pyfunc_conf[pyfunc.DATA] = os.path.join( model_data_path, kiwi.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME) model_conf.save(model_conf_path) loaded_pyfunc = pyfunc.load_pyfunc(model_path) np.testing.assert_array_almost_equal( loaded_pyfunc.predict(data[0]), pd.DataFrame(_predict(model=module_scoped_subclassed_model, data=data)), decimal=4)
def test_load_pyfunc_succeeds_for_older_models_with_pyfunc_data_field( sklearn_knn_model, model_path): """ This test verifies that scikit-learn models saved in older versions of MLflow are loaded successfully by ``mlflow.pyfunc.load_model``. These older models specify a pyfunc ``data`` field referring directly to a serialized scikit-learn model file. In contrast, newer models omit the ``data`` field. """ kiwi.sklearn.save_model( sk_model=sklearn_knn_model.model, path=model_path, serialization_format=kiwi.sklearn.SERIALIZATION_FORMAT_PICKLE) model_conf_path = os.path.join(model_path, "MLmodel") model_conf = Model.load(model_conf_path) pyfunc_conf = model_conf.flavors.get(pyfunc.FLAVOR_NAME) sklearn_conf = model_conf.flavors.get(kiwi.sklearn.FLAVOR_NAME) assert sklearn_conf is not None assert pyfunc_conf is not None pyfunc_conf[pyfunc.DATA] = sklearn_conf["pickled_model"] reloaded_knn_pyfunc = pyfunc.load_pyfunc(model_uri=model_path) np.testing.assert_array_equal( sklearn_knn_model.model.predict(sklearn_knn_model.inference_data), reloaded_knn_pyfunc.predict(sklearn_knn_model.inference_data))
def test_estimator_model_export(spark_model_estimator, model_path, spark_custom_env): sparkm.save_model(spark_model_estimator.model, path=model_path, conda_env=spark_custom_env) # score and compare the reloaded sparkml model reloaded_model = sparkm.load_model(model_uri=model_path) preds_df = reloaded_model.transform(spark_model_estimator.spark_df) preds = [x.prediction for x in preds_df.select("prediction").collect()] assert spark_model_estimator.predictions == preds # 2. score and compare reloaded pyfunc m = pyfunc.load_pyfunc(model_path) preds2 = m.predict(spark_model_estimator.spark_df.toPandas()) assert spark_model_estimator.predictions == preds2
def test_model_save_load(xgb_model, model_path): model = xgb_model.model kiwi.xgboost.save_model(xgb_model=model, path=model_path) reloaded_model = kiwi.xgboost.load_model(model_uri=model_path) reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path) np.testing.assert_array_almost_equal( model.predict(xgb_model.inference_dmatrix), reloaded_model.predict(xgb_model.inference_dmatrix)) np.testing.assert_array_almost_equal( reloaded_model.predict(xgb_model.inference_dmatrix), reloaded_pyfunc.predict(xgb_model.inference_dataframe))
def test_model_save_load(lgb_model, model_path): model = lgb_model.model kiwi.lightgbm.save_model(lgb_model=model, path=model_path) reloaded_model = kiwi.lightgbm.load_model(model_uri=model_path) reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path) np.testing.assert_array_almost_equal( model.predict(lgb_model.inference_dataframe), reloaded_model.predict(lgb_model.inference_dataframe)) np.testing.assert_array_almost_equal( reloaded_model.predict(lgb_model.inference_dataframe), reloaded_pyfunc.predict(lgb_model.inference_dataframe))
def test_model_save_load(sklearn_knn_model, model_path): knn_model = sklearn_knn_model.model kiwi.sklearn.save_model(sk_model=knn_model, path=model_path) reloaded_knn_model = kiwi.sklearn.load_model(model_uri=model_path) reloaded_knn_pyfunc = pyfunc.load_pyfunc(model_uri=model_path) np.testing.assert_array_equal( knn_model.predict(sklearn_knn_model.inference_data), reloaded_knn_model.predict(sklearn_knn_model.inference_data)) np.testing.assert_array_equal( reloaded_knn_model.predict(sklearn_knn_model.inference_data), reloaded_knn_pyfunc.predict(sklearn_knn_model.inference_data))
def test_load_pyfunc_loads_torch_model_using_pickle_module_specified_at_save_time( module_scoped_subclassed_model, model_path): custom_pickle_module = pickle kiwi.pytorch.save_model( path=model_path, pytorch_model=module_scoped_subclassed_model, conda_env=None, pickle_module=custom_pickle_module) import_module_fn = importlib.import_module imported_modules = [] def track_module_imports(module_name): imported_modules.append(module_name) return import_module_fn(module_name) with mock.patch("importlib.import_module") as import_mock,\ mock.patch("torch.load") as torch_load_mock: import_mock.side_effect = track_module_imports pyfunc.load_pyfunc(model_path) torch_load_mock.assert_called_with(mock.ANY, pickle_module=custom_pickle_module) assert custom_pickle_module.__name__ in imported_modules
def test_model_export(spark_model_iris, model_path, spark_custom_env): sparkm.save_model(spark_model_iris.model, path=model_path, conda_env=spark_custom_env) # 1. score and compare reloaded sparkml model reloaded_model = sparkm.load_model(model_uri=model_path) preds_df = reloaded_model.transform(spark_model_iris.spark_df) preds1 = [x.prediction for x in preds_df.select("prediction").collect()] assert spark_model_iris.predictions == preds1 m = pyfunc.load_pyfunc(model_path) # 2. score and compare reloaded pyfunc preds2 = m.predict(spark_model_iris.pandas_df) assert spark_model_iris.predictions == preds2 # 3. score and compare reloaded pyfunc Spark udf preds3 = score_model_as_udf(model_uri=model_path, pandas_df=spark_model_iris.pandas_df) assert spark_model_iris.predictions == preds3 assert os.path.exists(sparkm.DFS_TMP)
def test_sagemaker_docker_model_scoring_with_default_conda_env( xgb_model, model_path): kiwi.xgboost.save_model(xgb_model=xgb_model.model, path=model_path, conda_env=None) reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path) scoring_response = score_model_in_sagemaker_docker_container( model_uri=model_path, data=xgb_model.inference_dataframe, content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=kiwi.pyfunc.FLAVOR_NAME) deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content)) pandas.testing.assert_frame_equal( deployed_model_preds, pd.DataFrame(reloaded_pyfunc.predict(xgb_model.inference_dataframe)), check_dtype=False, check_less_precise=6)
def get_or_load(archive_path): """Given a path returned by add_local_model(), this method will return the loaded model. If this Python process ever loaded the model before, we will reuse that copy. """ if archive_path in SparkModelCache._models: SparkModelCache._cache_hits += 1 return SparkModelCache._models[archive_path] # BUG: Despite the documentation of SparkContext.addFile() and SparkFiles.get() in Scala # and Python, it turns out that we actually need to use the basename as the input to # SparkFiles.get(), as opposed to the (absolute) path. archive_path_basename = os.path.basename(archive_path) local_path = SparkFiles.get(archive_path_basename) temp_dir = tempfile.mkdtemp() zip_ref = zipfile.ZipFile(local_path, 'r') zip_ref.extractall(temp_dir) zip_ref.close() # We must rely on a supposed cyclic import here because we want this behavior # on the Spark Executors (i.e., don't try to pickle the load_model function). from kiwi.pyfunc import load_pyfunc # pylint: disable=cyclic-import SparkModelCache._models[archive_path] = load_pyfunc(temp_dir) return SparkModelCache._models[archive_path]
from kiwi.pyfunc import scoring_server from kiwi import pyfunc app = scoring_server.init(pyfunc.load_pyfunc("/opt/ml/model/"))