def test_model_log(self): with TempDir(chdr=True, remove_on_exit=True) as tmp: model_path = tmp.path("linear.pkl") with open(model_path, "wb") as f: pickle.dump(self._linear_lr, f) tracking_dir = os.path.abspath(tmp.path("mlruns")) mlflow.set_tracking_uri("file://%s" % tracking_dir) mlflow.start_run() try: pyfunc.log_model(artifact_path="linear", data_path=model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__]) run_id = mlflow.active_run().info.run_uuid path = tracking.utils._get_model_log_dir("linear", run_id) m = Model.load(os.path.join(path, "MLmodel")) print(m.__dict__) assert pyfunc.FLAVOR_NAME in m.flavors assert pyfunc.PY_VERSION in m.flavors[pyfunc.FLAVOR_NAME] x = pyfunc.load_pyfunc("linear", run_id=run_id) xpred = x.predict(self._X) np.testing.assert_array_equal(self._linear_lr_predict, xpred) finally: mlflow.end_run() mlflow.set_tracking_uri(None) # Remove the log directory in order to avoid adding new tests to pytest... shutil.rmtree(tracking_dir)
def on_train_end(self, args, state, control, **kwargs): input_schema = Schema([ColSpec(name="text", type="string")]) output_schema = Schema([TensorSpec(np.dtype(np.float), (-1, -1))]) signature = ModelSignature(inputs=input_schema, outputs=output_schema) pyfunc.log_model( # artifact path is _relative_ to run root in mlflow artifact_path="bert_classifier_model", # Dir with the module files for dependencies code_path=[ os.path.join(os.path.dirname(os.path.abspath(__file__)), "models.py"), os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils.py") ], python_model=MLFlowBertClassificationModel(), artifacts={ "model": state.best_model_checkpoint, }, conda_env={ 'name': 'classifier-env', 'channels': ['defaults', 'pytorch', 'pypi'], 'dependencies': [ 'python=3.8.8', 'pip', 'pytorch=1.8.0', { 'pip': [ 'transformers==4.4.2', 'mlflow==1.15.0', 'numpy==1.20.1' ] } ] }, signature=signature, await_registration_for=5, registered_model_name=self.registered_name)
def test_load_model_succeeds_with_dependencies_specified_via_code_paths( module_scoped_subclassed_model, model_path, data ): # Save a PyTorch model whose class is defined in the current test suite. Because the # `tests` module is not available when the model is deployed for local scoring, we include # the test suite file as a code dependency mlflow.pytorch.save_model( path=model_path, pytorch_model=module_scoped_subclassed_model, conda_env=None, code_paths=[__file__], ) # Define a custom pyfunc model that loads a PyTorch model artifact using # `mlflow.pytorch.load_model` class TorchValidatorModel(pyfunc.PythonModel): def load_context(self, context): # pylint: disable=attribute-defined-outside-init self.pytorch_model = mlflow.pytorch.load_model(context.artifacts["pytorch_model"]) def predict(self, context, model_input): with torch.no_grad(): input_tensor = torch.from_numpy(model_input.values.astype(np.float32)) output_tensor = self.pytorch_model(input_tensor) return pd.DataFrame(output_tensor.numpy()) pyfunc_artifact_path = "pyfunc_model" with mlflow.start_run(): pyfunc.log_model( artifact_path=pyfunc_artifact_path, python_model=TorchValidatorModel(), artifacts={"pytorch_model": model_path}, ) pyfunc_model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=pyfunc_artifact_path ) ) # Deploy the custom pyfunc model and ensure that it is able to successfully load its # constituent PyTorch model via `mlflow.pytorch.load_model` scoring_response = pyfunc_serve_and_score_model( model_uri=pyfunc_model_path, data=data[0], content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, extra_args=["--no-conda"], ) assert scoring_response.status_code == 200 deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content)) np.testing.assert_array_almost_equal( deployed_model_preds.values[:, 0], _predict(model=module_scoped_subclassed_model, data=data), decimal=4, )
def log_model(keras_model, artifact_path): """ Log model to mlflow. :param keras_model: Keras model to be saved. :param artifact_path: Run-relative artifact path this model is to be saved to. """ with TempDir() as tmp: data_path = tmp.path(STOCK_MODEL_PATHS) if not path.exists(data_path): mkdir(data_path) keras_path = path.join(data_path, MODEL_ARTIFACT_NAME) keras.save_model(keras_model, path=keras_path) pyfunc.log_model(artifact_path=artifact_path, loader_module=__name__, code_path=[__file__], data_path=data_path)
def config_simple(request): import os import mlflow from mlflow.pyfunc import log_model log_model( artifact_path='ensembler', python_model=TestEnsembler(result_type=request.param), code_path=[os.path.join(os.path.dirname(__file__), '../ensembler')]) ensembler_path = os.path.join(mlflow.get_artifact_uri(), 'ensembler') yield from_yaml(f"""\ uri: {ensembler_path} result: column_name: test_results type: {request.param.name} """, openapi.EnsemblingJobEnsemblerSpec)
def config_array(): import os import mlflow from mlflow.pyfunc import log_model log_model( artifact_path='ensembler_v2', python_model=ArrayEnsembler(), code_path=[os.path.join(os.path.dirname(__file__), '../ensembler')]) ensembler_path = os.path.join(mlflow.get_artifact_uri(), 'ensembler_v2') yield from_yaml(f"""\ uri: {ensembler_path} result: column_name: test_results type: ARRAY item_type: INTEGER """, openapi.EnsemblingJobEnsemblerSpec)
def test_integration(spark_session, bq): log_model("model", python_model=IrisModel(), artifacts={"model_path": "test-model/model.joblib"}) model_path = os.path.join(mlflow.get_artifact_uri(), "model") config_path = "test-config/integration_test.yaml" with open(config_path, "w") as f: f.write(test_config.format(model_path)) cfg = load(config_path) bq.delete_table(table=cfg.sink().table(), not_found_ok=True) main(config_path, spark_session) result_table = bq.get_table(cfg.sink().table()) assert len(result_table.schema) == 5 assert result_table.schema[4].name == cfg.sink().result_column() assert result_table.schema[4].field_type == "FLOAT"
def _log_model(sentiment_classifier): artifact_path = sentiment_classifier.save_model_artifacts() log_model(artifact_path='model', loader_module=__name__, data_path=artifact_path)