def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data): if no_conda: pytest.skip("This test needs conda.") x, _ = iris_data with TempDir() as tmp: input_records_path = tmp.path("input_records.json") pd.DataFrame(x).to_json(input_records_path, orient="records") output_json_path = tmp.path("output.json") test_model_path = tmp.path("test_model") from mlflow.utils.environment import _mlflow_conda_env test_model_conda_path = tmp.path("conda.yml") # create env with odl mlflow! _mlflow_conda_env(path=test_model_conda_path, additional_pip_deps=[ "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION) ]) pyfunc.save_model(path=test_model_path, loader_module=test_pyfunc.__name__.split(".")[-1], code_path=[test_pyfunc.__file__], conda_env=test_model_conda_path) # explicit json format with orient records p = subprocess.Popen([ "mlflow", "models", "predict", "-m", path_to_local_file_uri(test_model_path), "-i", input_records_path, "-o", output_json_path, "-t", "json", "--json-format", "records" ] + no_conda) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = test_pyfunc.PyFuncTestModel(check_version=False).predict( df=pd.DataFrame(x)) assert all(expected == actual)
def test_model_export(self): path_to_remove = None try: with TempDir(chdr=True, remove_on_exit=False) as tmp: path_to_remove = tmp._path # NOTE: Changed dir to temp dir and use relative paths to get around the way temp # dirs are handled in python. model_pkl = tmp.path("model.pkl") with open(model_pkl, "wb") as f: pickle.dump(self._linear_lr, f) input_path = tmp.path("input_model") conda_env = "conda.env" pyfunc.save_model(input_path, loader_module="mlflow.sklearn", data_path=model_pkl, conda_env=_mlflow_conda_env( tmp.path(conda_env))) scoring_response = score_model_in_sagemaker_docker_container( model_path=input_path, data=self._iris_df, content_type=pyfunc_scoring_server. CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=pyfunc.FLAVOR_NAME) np.testing.assert_array_equal( self._linear_lr_predict, np.array(json.loads(scoring_response.content))) finally: if path_to_remove: try: import shutil shutil.rmtree(path_to_remove) except OSError: print("Failed to remove", path_to_remove)
def _cli_predict_with_conda_env(self, extra_args): with TempDir() as tmp: model_path = tmp.path("knn.pkl") with open(model_path, "wb") as f: pickle.dump(self._knn, f) # create a conda yaml that installs mlflow from source in-place mode path = tmp.path("knn") pyfunc.save_model(dst_path=path, data_path=model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__], conda_env=self._create_conda_env_file(tmp)) input_csv_path = tmp.path("input with spaces.csv") pandas.DataFrame(self._X).to_csv(input_csv_path, header=True, index=False) output_csv_path = tmp.path("output.csv") process = Popen([ 'mlflow', 'pyfunc', 'predict', '--model-path', path, '-i', input_csv_path, '-o', output_csv_path ] + extra_args, stderr=STDOUT, preexec_fn=os.setsid) process.wait() result_df = pandas.read_csv(output_csv_path, header=None) np.testing.assert_array_equal(result_df.values.transpose()[0], self._knn.predict(self._X))
def test_cli_predict(self): with TempDir() as tmp: model_path = tmp.path("knn.pkl") with open(model_path, "wb") as f: pickle.dump(self._knn, f) path = tmp.path("knn") pyfunc.save_model( dst_path=path, data_path=model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__], ) input_csv_path = tmp.path("input with spaces.csv") pandas.DataFrame(self._X).to_csv(input_csv_path, header=True, index=False) output_csv_path = tmp.path("output.csv") runner = CliRunner(env={ "LC_ALL": "en_US.UTF-8", "LANG": "en_US.UTF-8" }) result = runner.invoke(mlflow.pyfunc.cli.commands, [ 'predict', '--model-path', path, '-i', input_csv_path, '-o', output_csv_path ]) print("result", result.output) print(result.exc_info) print(result.exception) assert result.exit_code == 0 result_df = pandas.read_csv(output_csv_path, header=None) np.testing.assert_array_equal(result_df.values.transpose()[0], self._knn.predict(self._X))
def save_pyfunc(model_path, model, artifacts, code_path): if os.path.isdir(model_path): shutil.rmtree(model_path) pyfunc.save_model(path=model_path, python_model=model, conda_env=CONDA_ENV, artifacts=artifacts, code_path=code_path)
def test_model_serve(self): with TempDir() as tmp: model_path = tmp.path("knn.pkl") with open(model_path, "wb") as f: pickle.dump(self._knn, f) path = tmp.path("knn") pyfunc.save_model(dst_path=path, data_path=model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__], )
def _model_serve_with_conda_env(self, extra_args): with TempDir() as tmp: model_path = tmp.path("knn.pkl") with open(model_path, "wb") as f: pickle.dump(self._knn, f) path = tmp.path("knn") pyfunc.save_model(dst_path=path, data_path=model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__], conda_env=self._create_conda_env_file(tmp)) input_csv_path = tmp.path("input.csv") pandas.DataFrame(self._X).to_csv(input_csv_path, header=True, index=False) port = 5000 process = Popen([ 'mlflow', 'pyfunc', 'serve', '--model-path', path, '--port', str(port) ] + extra_args, stderr=STDOUT, preexec_fn=os.setsid) time.sleep(5) try: assert process.poll() is None, "server died prematurely" success = False failcount = 0 while not success and failcount < 3 and process.poll() is None: try: response = requests.post( "http://localhost:{}/invocations".format(port), data=open(input_csv_path, 'rb'), headers={'Content-type': 'text/csv'}) response.close() success = True except requests.ConnectionError: time.sleep(5) failcount += 1 finally: os.killpg(os.getpgid(process.pid), signal.SIGTERM) # kill process + children time.sleep(0.5) assert process.poll() is not None, "server not dead" # check result if not success: raise RuntimeError("Fail to connect to the server") else: result_df = pandas.read_json(response.content) np.testing.assert_array_equal(result_df.values.transpose()[0], self._knn.predict(self._X))
def test_model_save_load(self): with TempDir() as tmp: model_path = tmp.path("knn.pkl") with open(model_path, "wb") as f: pickle.dump(self._knn, f) path = tmp.path("knn") m = Model(run_id="test", artifact_path="testtest") pyfunc.save_model(dst_path=path, data_path=model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__], model=m) m2 = Model.load(os.path.join(path, "MLmodel")) print("m1", m.__dict__) print("m2", m2.__dict__) assert m.__dict__ == m2.__dict__ x = pyfunc.load_pyfunc(path) xpred = x.predict(self._X) np.testing.assert_array_equal(self._knn_predict, xpred)
def test_model_export(self): path_to_remove = None try: with TempDir(chdr=True, remove_on_exit=False) as tmp: path_to_remove = tmp._path # NOTE: Changed dir to temp dir and use relative paths to get around the way temp # dirs are handled in python. model_pkl = tmp.path("model.pkl") with open(model_pkl, "wb") as f: pickle.dump(self._linear_lr, f) input_path = tmp.path("input_model") conda_env = "conda.env" from sys import version_info python_version = "{major}.{minor}.{micro}".format( major=version_info.major, minor=version_info.minor, micro=version_info.micro) with open(conda_env, "w") as f: f.write(CONDA_ENV.format(python_version=python_version)) pyfunc.save_model(input_path, loader_module="test_model_export", code_path=[__file__], data_path=model_pkl, conda_env=conda_env) proc = Popen( ['mlflow', 'sagemaker', 'run-local', '-m', input_path], stdout=PIPE, stderr=STDOUT, universal_newlines=True) try: for i in range(0, 50): self.assertTrue(proc.poll() is None, "scoring process died") time.sleep(5) # noinspection PyBroadException try: ping_status = requests.get( url='http://localhost:5000/ping') print('connection attempt', i, "server is up! ping status", ping_status) if ping_status.status_code == 200: break except Exception: print('connection attempt', i, "failed, server is not up yet") self.assertTrue(proc.poll() is None, "scoring process died") ping_status = requests.get( url='http://localhost:5000/ping') print("server up, ping status", ping_status) if ping_status.status_code != 200: raise Exception("ping failed, server is not happy") x = self._iris_df.to_dict(orient='records') y = requests.post(url='http://localhost:5000/invocations', json=x) import json xpred = json.loads(y.content) print('expected', self._linear_lr_predict) print('actual ', xpred) np.testing.assert_array_equal(self._linear_lr_predict, xpred) finally: if proc.poll() is None: proc.terminate() print("captured output of the scoring process") print(proc.stdout.read()) finally: if path_to_remove: try: import shutil shutil.rmtree(path_to_remove) except PermissionError: print("Failed to remove", path_to_remove)
""" 将pandas的值加上5 """ class addN(mlflow.pyfunc.PythonModel): def __init__(self, n): self.n = n def predict(self, context, model_input): return model_input.apply(lambda column: column + self.n) #保存 model_path = "add_n_model" add5_model = addN(n=5) pyfunc.save_model(path=model_path, python_model=add5_model) #加载 loaded_model = pyfunc.load_model(model_path) #执行 import pandas as pd import numpy as np model_input = pd.DataFrame(np.random.randint(1, 10, 5)) model_output = loaded_model.predict(model_input) print(model_input) print(model_output)