Exemple #1
0
def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data):
    if no_conda:
        pytest.skip("This test needs conda.")
    x, _ = iris_data
    with TempDir() as tmp:
        input_records_path = tmp.path("input_records.json")
        pd.DataFrame(x).to_json(input_records_path, orient="records")
        output_json_path = tmp.path("output.json")
        test_model_path = tmp.path("test_model")
        from mlflow.utils.environment import _mlflow_conda_env
        test_model_conda_path = tmp.path("conda.yml")
        # create env with odl mlflow!
        _mlflow_conda_env(path=test_model_conda_path,
                          additional_pip_deps=[
                              "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION)
                          ])
        pyfunc.save_model(path=test_model_path,
                          loader_module=test_pyfunc.__name__.split(".")[-1],
                          code_path=[test_pyfunc.__file__],
                          conda_env=test_model_conda_path)
        # explicit json format with orient records
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m",
            path_to_local_file_uri(test_model_path), "-i", input_records_path,
            "-o", output_json_path, "-t", "json", "--json-format", "records"
        ] + no_conda)
        assert 0 == p.wait()
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = test_pyfunc.PyFuncTestModel(check_version=False).predict(
            df=pd.DataFrame(x))
        assert all(expected == actual)
 def test_model_export(self):
     path_to_remove = None
     try:
         with TempDir(chdr=True, remove_on_exit=False) as tmp:
             path_to_remove = tmp._path
             # NOTE: Changed dir to temp dir and use relative paths to get around the way temp
             # dirs are handled in python.
             model_pkl = tmp.path("model.pkl")
             with open(model_pkl, "wb") as f:
                 pickle.dump(self._linear_lr, f)
             input_path = tmp.path("input_model")
             conda_env = "conda.env"
             pyfunc.save_model(input_path,
                               loader_module="mlflow.sklearn",
                               data_path=model_pkl,
                               conda_env=_mlflow_conda_env(
                                   tmp.path(conda_env)))
             scoring_response = score_model_in_sagemaker_docker_container(
                 model_path=input_path,
                 data=self._iris_df,
                 content_type=pyfunc_scoring_server.
                 CONTENT_TYPE_JSON_SPLIT_ORIENTED,
                 flavor=pyfunc.FLAVOR_NAME)
             np.testing.assert_array_equal(
                 self._linear_lr_predict,
                 np.array(json.loads(scoring_response.content)))
     finally:
         if path_to_remove:
             try:
                 import shutil
                 shutil.rmtree(path_to_remove)
             except OSError:
                 print("Failed to remove", path_to_remove)
    def _cli_predict_with_conda_env(self, extra_args):
        with TempDir() as tmp:
            model_path = tmp.path("knn.pkl")
            with open(model_path, "wb") as f:
                pickle.dump(self._knn, f)

            # create a conda yaml that installs mlflow from source in-place mode
            path = tmp.path("knn")
            pyfunc.save_model(dst_path=path,
                              data_path=model_path,
                              loader_module=os.path.basename(__file__)[:-3],
                              code_path=[__file__],
                              conda_env=self._create_conda_env_file(tmp))
            input_csv_path = tmp.path("input with spaces.csv")
            pandas.DataFrame(self._X).to_csv(input_csv_path,
                                             header=True,
                                             index=False)
            output_csv_path = tmp.path("output.csv")
            process = Popen([
                'mlflow', 'pyfunc', 'predict', '--model-path', path, '-i',
                input_csv_path, '-o', output_csv_path
            ] + extra_args,
                            stderr=STDOUT,
                            preexec_fn=os.setsid)
            process.wait()
            result_df = pandas.read_csv(output_csv_path, header=None)
            np.testing.assert_array_equal(result_df.values.transpose()[0],
                                          self._knn.predict(self._X))
 def test_cli_predict(self):
     with TempDir() as tmp:
         model_path = tmp.path("knn.pkl")
         with open(model_path, "wb") as f:
             pickle.dump(self._knn, f)
         path = tmp.path("knn")
         pyfunc.save_model(
             dst_path=path,
             data_path=model_path,
             loader_module=os.path.basename(__file__)[:-3],
             code_path=[__file__],
         )
         input_csv_path = tmp.path("input with spaces.csv")
         pandas.DataFrame(self._X).to_csv(input_csv_path,
                                          header=True,
                                          index=False)
         output_csv_path = tmp.path("output.csv")
         runner = CliRunner(env={
             "LC_ALL": "en_US.UTF-8",
             "LANG": "en_US.UTF-8"
         })
         result = runner.invoke(mlflow.pyfunc.cli.commands, [
             'predict', '--model-path', path, '-i', input_csv_path, '-o',
             output_csv_path
         ])
         print("result", result.output)
         print(result.exc_info)
         print(result.exception)
         assert result.exit_code == 0
         result_df = pandas.read_csv(output_csv_path, header=None)
         np.testing.assert_array_equal(result_df.values.transpose()[0],
                                       self._knn.predict(self._X))
Exemple #5
0
def save_pyfunc(model_path, model, artifacts, code_path):
    if os.path.isdir(model_path):
        shutil.rmtree(model_path)

    pyfunc.save_model(path=model_path,
                      python_model=model,
                      conda_env=CONDA_ENV,
                      artifacts=artifacts,
                      code_path=code_path)
Exemple #6
0
 def test_model_serve(self):
     with TempDir() as tmp:
         model_path = tmp.path("knn.pkl")
         with open(model_path, "wb") as f:
             pickle.dump(self._knn, f)
         path = tmp.path("knn")
         pyfunc.save_model(dst_path=path,
                           data_path=model_path,
                           loader_module=os.path.basename(__file__)[:-3],
                           code_path=[__file__],
                           )
Exemple #7
0
    def _model_serve_with_conda_env(self, extra_args):
        with TempDir() as tmp:
            model_path = tmp.path("knn.pkl")
            with open(model_path, "wb") as f:
                pickle.dump(self._knn, f)
            path = tmp.path("knn")

            pyfunc.save_model(dst_path=path,
                              data_path=model_path,
                              loader_module=os.path.basename(__file__)[:-3],
                              code_path=[__file__],
                              conda_env=self._create_conda_env_file(tmp))
            input_csv_path = tmp.path("input.csv")
            pandas.DataFrame(self._X).to_csv(input_csv_path,
                                             header=True,
                                             index=False)
            port = 5000
            process = Popen([
                'mlflow', 'pyfunc', 'serve', '--model-path', path, '--port',
                str(port)
            ] + extra_args,
                            stderr=STDOUT,
                            preexec_fn=os.setsid)
            time.sleep(5)
            try:
                assert process.poll() is None, "server died prematurely"
                success = False
                failcount = 0
                while not success and failcount < 3 and process.poll() is None:
                    try:
                        response = requests.post(
                            "http://localhost:{}/invocations".format(port),
                            data=open(input_csv_path, 'rb'),
                            headers={'Content-type': 'text/csv'})
                        response.close()
                        success = True
                    except requests.ConnectionError:
                        time.sleep(5)
                        failcount += 1
            finally:
                os.killpg(os.getpgid(process.pid),
                          signal.SIGTERM)  # kill process + children
                time.sleep(0.5)
                assert process.poll() is not None, "server not dead"

            # check result
            if not success:
                raise RuntimeError("Fail to connect to the server")
            else:
                result_df = pandas.read_json(response.content)
                np.testing.assert_array_equal(result_df.values.transpose()[0],
                                              self._knn.predict(self._X))
Exemple #8
0
 def test_model_save_load(self):
     with TempDir() as tmp:
         model_path = tmp.path("knn.pkl")
         with open(model_path, "wb") as f:
             pickle.dump(self._knn, f)
         path = tmp.path("knn")
         m = Model(run_id="test", artifact_path="testtest")
         pyfunc.save_model(dst_path=path,
                           data_path=model_path,
                           loader_module=os.path.basename(__file__)[:-3],
                           code_path=[__file__],
                           model=m)
         m2 = Model.load(os.path.join(path, "MLmodel"))
         print("m1", m.__dict__)
         print("m2", m2.__dict__)
         assert m.__dict__ == m2.__dict__
         x = pyfunc.load_pyfunc(path)
         xpred = x.predict(self._X)
         np.testing.assert_array_equal(self._knn_predict, xpred)
Exemple #9
0
    def test_model_export(self):
        path_to_remove = None
        try:
            with TempDir(chdr=True, remove_on_exit=False) as tmp:
                path_to_remove = tmp._path
                # NOTE: Changed dir to temp dir and use relative paths to get around the way temp
                # dirs are handled in python.
                model_pkl = tmp.path("model.pkl")
                with open(model_pkl, "wb") as f:
                    pickle.dump(self._linear_lr, f)
                input_path = tmp.path("input_model")
                conda_env = "conda.env"
                from sys import version_info
                python_version = "{major}.{minor}.{micro}".format(
                    major=version_info.major,
                    minor=version_info.minor,
                    micro=version_info.micro)
                with open(conda_env, "w") as f:
                    f.write(CONDA_ENV.format(python_version=python_version))
                pyfunc.save_model(input_path,
                                  loader_module="test_model_export",
                                  code_path=[__file__],
                                  data_path=model_pkl,
                                  conda_env=conda_env)
                proc = Popen(
                    ['mlflow', 'sagemaker', 'run-local', '-m', input_path],
                    stdout=PIPE,
                    stderr=STDOUT,
                    universal_newlines=True)

                try:
                    for i in range(0, 50):
                        self.assertTrue(proc.poll() is None,
                                        "scoring process died")
                        time.sleep(5)
                        # noinspection PyBroadException
                        try:
                            ping_status = requests.get(
                                url='http://localhost:5000/ping')
                            print('connection attempt', i,
                                  "server is up! ping status", ping_status)
                            if ping_status.status_code == 200:
                                break
                        except Exception:
                            print('connection attempt', i,
                                  "failed, server is not up yet")

                    self.assertTrue(proc.poll() is None,
                                    "scoring process died")
                    ping_status = requests.get(
                        url='http://localhost:5000/ping')
                    print("server up, ping status", ping_status)
                    if ping_status.status_code != 200:
                        raise Exception("ping failed, server is not happy")
                    x = self._iris_df.to_dict(orient='records')
                    y = requests.post(url='http://localhost:5000/invocations',
                                      json=x)
                    import json
                    xpred = json.loads(y.content)
                    print('expected', self._linear_lr_predict)
                    print('actual  ', xpred)
                    np.testing.assert_array_equal(self._linear_lr_predict,
                                                  xpred)

                finally:
                    if proc.poll() is None:
                        proc.terminate()
                    print("captured output of the scoring process")
                    print(proc.stdout.read())
        finally:
            if path_to_remove:
                try:
                    import shutil
                    shutil.rmtree(path_to_remove)
                except PermissionError:
                    print("Failed to remove", path_to_remove)
Exemple #10
0
"""
将pandas的值加上5
"""


class addN(mlflow.pyfunc.PythonModel):
    def __init__(self, n):
        self.n = n

    def predict(self, context, model_input):

        return model_input.apply(lambda column: column + self.n)


#保存
model_path = "add_n_model"
add5_model = addN(n=5)
pyfunc.save_model(path=model_path, python_model=add5_model)

#加载
loaded_model = pyfunc.load_model(model_path)

#执行
import pandas as pd
import numpy as np
model_input = pd.DataFrame(np.random.randint(1, 10, 5))

model_output = loaded_model.predict(model_input)

print(model_input)
print(model_output)