Beispiel #1
0
def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data):
    if no_conda:
        pytest.skip("This test needs conda.")
    # TODO: Enable this test after 1.0 is out to ensure we do not break the serve / predict
    # TODO: Also add a test for serve, not just predict.
    pytest.skip("TODO: enable this after 1.0 release is out.")
    x, _ = iris_data
    with TempDir() as tmp:
        input_records_path = tmp.path("input_records.json")
        pd.DataFrame(x).to_json(input_records_path, orient="records")
        output_json_path = tmp.path("output.json")
        test_model_path = tmp.path("test_model")
        test_model_conda_path = tmp.path("conda.yml")
        # create env with old mlflow!
        _mlflow_conda_env(
            path=test_model_conda_path,
            additional_pip_deps=[
                "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION)
            ],
        )
        pyfunc.save_model(
            path=test_model_path,
            loader_module=test_pyfunc.__name__.split(".")[-1],
            code_path=[test_pyfunc.__file__],
            conda_env=test_model_conda_path,
        )
        # explicit json format with orient records
        p = subprocess.Popen([
            "mlflow",
            "models",
            "predict",
            "-m",
            path_to_local_file_uri(test_model_path),
            "-i",
            input_records_path,
            "-o",
            output_json_path,
            "-t",
            "json",
            "--json-format",
            "records",
        ] + no_conda)
        assert 0 == p.wait()
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = test_pyfunc.PyFuncTestModel(check_version=False).predict(
            df=pd.DataFrame(x))
        assert all(expected == actual)
Beispiel #2
0
def test_path_parameter():
    """
    Tests that MLflow file-download APIs get called when necessary for arguments of type `path`.
    """
    project = load_project()
    entry_point = project.get_entry_point("line_count")
    with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \
            as download_uri_mock:
        download_uri_mock.return_value = 0
        # Verify that we don't attempt to call download_uri when passing a local file to a
        # parameter of type "path"
        with TempDir() as tmp:
            dst_dir = tmp.path()
            local_path = os.path.join(TEST_PROJECT_DIR, "MLproject")
            params, _ = entry_point.compute_parameters(
                user_parameters={"path": local_path}, storage_dir=dst_dir)
            assert params["path"] == os.path.abspath(local_path)
            assert download_uri_mock.call_count == 0

            params, _ = entry_point.compute_parameters(
                user_parameters={"path": path_to_local_file_uri(local_path)},
                storage_dir=dst_dir)
            assert params["path"] == os.path.abspath(local_path)
            assert download_uri_mock.call_count == 0

        # Verify that we raise an exception when passing a non-existent local file to a
        # parameter of type "path"
        with TempDir() as tmp, pytest.raises(ExecutionException):
            dst_dir = tmp.path()
            entry_point.compute_parameters(user_parameters={
                "path":
                os.path.join(dst_dir, "some/nonexistent/file")
            },
                                           storage_dir=dst_dir)
        # Verify that we do call `download_uri` when passing a URI to a parameter of type "path"
        for i, prefix in enumerate(["dbfs:/", "s3://", "gs://"]):
            with TempDir() as tmp:
                dst_dir = tmp.path()
                file_to_download = 'images.tgz'
                download_path = "%s/%s" % (dst_dir, file_to_download)
                download_uri_mock.return_value = download_path
                params, _ = entry_point.compute_parameters(user_parameters={
                    "path":
                    os.path.join(prefix, file_to_download)
                },
                                                           storage_dir=dst_dir)
                assert params["path"] == download_path
                assert download_uri_mock.call_count == i + 1
Beispiel #3
0
 def __init__(self, root_directory=None, artifact_root_uri=None):
     """
     Create a new FileStore with the given root directory and a given default artifact root URI.
     """
     super(FileStore, self).__init__()
     self.root_directory = local_file_uri_to_path(root_directory or _default_root_dir())
     self.artifact_root_uri = artifact_root_uri or path_to_local_file_uri(self.root_directory)
     self.trash_folder = os.path.join(self.root_directory, FileStore.TRASH_FOLDER_NAME)
     # Create root directory if needed
     if not exists(self.root_directory):
         mkdir(self.root_directory)
         self._create_experiment_with_id(name=Experiment.DEFAULT_EXPERIMENT_NAME,
                                         experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
                                         artifact_uri=None)
     # Create trash folder if needed
     if not exists(self.trash_folder):
         mkdir(self.trash_folder)
Beispiel #4
0
 def predict(self, model_uri, input_path, output_path, content_type,
             json_format):
     """
     Generate predictions using generic python model saved with MLflow.
     Return the prediction results as a JSON.
     """
     local_path = _download_artifact_from_uri(model_uri)
     # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure
     # platform compatibility.
     local_uri = path_to_local_file_uri(local_path)
     command = (
         'python -c "from mlflow.pyfunc.scoring_server import _predict; _predict('
         "model_uri={model_uri}, "
         "input_path={input_path}, "
         "output_path={output_path}, "
         "content_type={content_type}, "
         'json_format={json_format})"').format(
             model_uri=repr(local_uri),
             input_path=repr(input_path),
             output_path=repr(output_path),
             content_type=repr(content_type),
             json_format=repr(json_format),
         )
     if self._env_manager == _EnvManager.CONDA and ENV in self._config:
         conda_env_path = os.path.join(local_path, self._config[ENV])
         conda_env_name = get_or_create_conda_env(
             conda_env_path,
             env_id=self._env_id,
             capture_output=False,
             env_root_dir=self._env_root_dir,
         )
         return _execute_in_conda_env(conda_env_name,
                                      command,
                                      self._install_mlflow,
                                      env_root_dir=self._env_root_dir)
     elif self._env_manager == _EnvManager.VIRTUALENV:
         activate_cmd = _get_or_create_virtualenv(
             local_path, self._env_id, env_root_dir=self._env_root_dir)
         return _execute_in_virtualenv(activate_cmd,
                                       command,
                                       self._install_mlflow,
                                       env_root_dir=self._env_root_dir)
     else:
         scoring_server._predict(local_uri, input_path, output_path,
                                 content_type, json_format)
Beispiel #5
0
def _download_artifact_from_uri(artifact_uri, output_path=None):
    """
    :param artifact_uri: The *absolute* URI of the artifact to download.
    :param output_path: The local filesystem path to which to download the artifact. If unspecified,
                        a local output path will be created.
    """
    if os.path.exists(artifact_uri):
        if os.name != "nt":
            # If we're dealing with local files, just reference the direct pathing.
            # non-nt-based file systems can directly reference path information, while nt-based
            # systems need to url-encode special characters in directory listings to be able to
            # resolve them (i.e., spaces converted to %20 within a file name or path listing)
            root_uri = os.path.dirname(artifact_uri)
            artifact_path = os.path.basename(artifact_uri)
            return get_artifact_repository(
                artifact_uri=root_uri).download_artifacts(
                    artifact_path=artifact_path, dst_path=output_path)
        else:  # if we're dealing with nt-based systems, we need to utilize pathname2url to encode.
            artifact_uri = path_to_local_file_uri(artifact_uri)

    parsed_uri = urllib.parse.urlparse(str(artifact_uri))
    prefix = ""
    if parsed_uri.scheme and not parsed_uri.path.startswith("/"):
        # relative path is a special case, urllib does not reconstruct it properly
        prefix = parsed_uri.scheme + ":"
        parsed_uri = parsed_uri._replace(scheme="")

    # For models:/ URIs, it doesn't make sense to initialize a ModelsArtifactRepository with only
    # the model name portion of the URI, then call download_artifacts with the version info.
    if ModelsArtifactRepository.is_models_uri(artifact_uri):
        root_uri = artifact_uri
        artifact_path = ""
    else:
        artifact_path = posixpath.basename(parsed_uri.path)
        parsed_uri = parsed_uri._replace(
            path=posixpath.dirname(parsed_uri.path))
        root_uri = prefix + urllib.parse.urlunparse(parsed_uri)

    return get_artifact_repository(artifact_uri=root_uri).download_artifacts(
        artifact_path=artifact_path, dst_path=output_path)
Beispiel #6
0
 def serve(self, model_uri, port, host):
     """
     Serve pyfunc model locally.
     """
     local_path = _download_artifact_from_uri(model_uri)
     # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure
     # platform compatibility.
     local_uri = path_to_local_file_uri(local_path)
     command = ("gunicorn --timeout 60 -b {host}:{port} -w {nworkers} "
                "mlflow.pyfunc.scoring_server.wsgi:app").format(
                    host=host, port=port, nworkers=self._nworkers)
     command_env = os.environ.copy()
     command_env[scoring_server._SERVER_MODEL_PATH] = local_uri
     if not self._no_conda and ENV in self._config:
         conda_env_path = os.path.join(local_path, self._config[ENV])
         return _execute_in_conda_env(conda_env_path,
                                      command,
                                      self._install_mlflow,
                                      command_env=command_env)
     else:
         _logger.info("=== Running command '%s'", command)
         subprocess.Popen(command.split(" "), env=command_env).wait()
Beispiel #7
0
 def predict(
     self,
     model_uri,
     input_path,
     output_path,
     content_type,
     json_format,
 ):
     """
     Generate predictions using generic python model saved with MLflow.
     Return the prediction results as a JSON.
     """
     local_path = _download_artifact_from_uri(model_uri)
     # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure
     # platform compatibility.
     local_uri = path_to_local_file_uri(local_path)
     if not self._no_conda and ENV in self._config:
         conda_env_path = os.path.join(local_path, self._config[ENV])
         command = (
             'python -c "from mlflow.pyfunc.scoring_server import _predict; _predict('
             "model_uri={model_uri}, "
             "input_path={input_path}, "
             "output_path={output_path}, "
             "content_type={content_type}, "
             'json_format={json_format})"').format(
                 model_uri=repr(local_uri),
                 input_path=repr(input_path),
                 output_path=repr(output_path),
                 content_type=repr(content_type),
                 json_format=repr(json_format),
             )
         return _execute_in_conda_env(conda_env_path, command,
                                      self._install_mlflow)
     else:
         scoring_server._predict(local_uri, input_path, output_path,
                                 content_type, json_format)
Beispiel #8
0
def get_cmd(model_uri: str,
            port: int = None,
            host: int = None,
            timeout: int = None,
            nworkers: int = None) -> Tuple[str, Dict[str, str]]:
    local_uri = path_to_local_file_uri(model_uri)
    timeout = timeout or 60
    # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure
    # platform compatibility.
    if os.name != "nt":
        args = [f"--timeout={timeout}"]
        if port and host:
            args.append(f"-b {host}:{port}")
        elif host:
            args.append(f"-b {host}")

        if nworkers:
            args.append(f"-w {nworkers}")

        command = (f"gunicorn {' '.join(args)} ${{GUNICORN_CMD_ARGS}}"
                   " -- mlflow.pyfunc.scoring_server.wsgi:app")
    else:
        args = []
        if host:
            args.append(f"--host={host}")

        if port:
            args.append(f"--port={port}")

        command = (f"waitress-serve {' '.join(args)} "
                   "--ident=mlflow mlflow.pyfunc.scoring_server.wsgi:app")

    command_env = os.environ.copy()
    command_env[_SERVER_MODEL_PATH] = local_uri

    return command, command_env
Beispiel #9
0
def _get_sqlite_uri():
    path = path_to_local_file_uri(os.path.join(SUITE_ROOT_DIR, "test-database.bd"))
    path = path[len("file://") :]

    # NB: It looks like windows and posix have different requirements on number of slashes for
    # whatever reason. Windows needs uri like 'sqlite:///C:/path/to/my/file' whereas posix expects
    # sqlite://///path/to/my/file
    prefix = "sqlite://" if sys.platform == "win32" else "sqlite:////"
    return prefix + path


# Backend store URIs to test against
BACKEND_URIS = [
    _get_sqlite_uri(),  # SqlAlchemy
    path_to_local_file_uri(os.path.join(SUITE_ROOT_DIR, "file_store_root")),  # FileStore
]

# Map of backend URI to tuple (server URL, Process). We populate this map by constructing
# a server per backend URI
BACKEND_URI_TO_SERVER_URL_AND_PROC = {
    uri: _init_server(backend_uri=uri, root_artifact_uri=SUITE_ARTIFACT_ROOT_DIR)
    for uri in BACKEND_URIS
}


def pytest_generate_tests(metafunc):
    """
    Automatically parametrize each each fixture/test that depends on `backend_store_uri` with the
    list of backend store URIs.
    """
Beispiel #10
0
def local_artifact_repo(local_artifact_root):
    from mlflow.utils.file_utils import path_to_local_file_uri

    return LocalArtifactRepository(
        artifact_uri=path_to_local_file_uri(local_artifact_root))
Beispiel #11
0
def tracking_uri_mock(tmpdir):
    try:
        mlflow.set_tracking_uri(path_to_local_file_uri(os.path.join(tmpdir.strpath, 'mlruns')))
        yield tmpdir
    finally:
        mlflow.set_tracking_uri(None)