Beispiel #1
0
 def predict(
     self,
     model_uri,
     input_path,
     output_path,
     content_type,
     json_format,
 ):
     """
     Generate predictions using generic python model saved with MLflow.
     Return the prediction results as a JSON.
     """
     local_path = _download_artifact_from_uri(model_uri)
     # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure
     # platform compatibility.
     local_uri = path_to_local_file_uri(local_path)
     if not self._no_conda and ENV in self._config:
         conda_env_path = os.path.join(local_path, self._config[ENV])
         command = (
             'python -c "from mlflow.pyfunc.scoring_server import _predict; _predict('
             'model_uri={model_uri}, '
             'input_path={input_path}, '
             'output_path={output_path}, '
             'content_type={content_type}, '
             'json_format={json_format})"').format(
                 model_uri=repr(local_uri),
                 input_path=repr(input_path),
                 output_path=repr(output_path),
                 content_type=repr(content_type),
                 json_format=repr(json_format))
         return _execute_in_conda_env(conda_env_path, command,
                                      self._install_mlflow)
     else:
         scoring_server._predict(local_uri, input_path, output_path,
                                 content_type, json_format)
Beispiel #2
0
    def serve(self, model_uri, port, host):
        """
        Serve pyfunc model locally.
        """
        local_path = _download_artifact_from_uri(model_uri)
        # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure
        # platform compatibility.
        local_uri = path_to_local_file_uri(local_path)
        if os.name != "nt":
            command = (
                "gunicorn --timeout=60 -b {host}:{port} -w {nworkers} ${{GUNICORN_CMD_ARGS}}"
                " -- mlflow.pyfunc.scoring_server.wsgi:app").format(
                    host=host, port=port, nworkers=self._nworkers)
        else:
            command = (
                "waitress-serve --host={host} --port={port} "
                "--ident=mlflow mlflow.pyfunc.scoring_server.wsgi:app").format(
                    host=host, port=port)

        command_env = os.environ.copy()
        command_env[scoring_server._SERVER_MODEL_PATH] = local_uri
        if not self._no_conda and ENV in self._config:
            conda_env_path = os.path.join(local_path, self._config[ENV])
            return _execute_in_conda_env(conda_env_path,
                                         command,
                                         self._install_mlflow,
                                         command_env=command_env)
        else:
            _logger.info("=== Running command '%s'", command)
            if os.name != "nt":
                subprocess.Popen(["bash", "-c", command],
                                 env=command_env).wait()
            else:
                subprocess.Popen([command.split(" ")], env=command_env).wait()
Beispiel #3
0
def _init_server(backend_uri, root_artifact_uri):
    """
    Launch a new REST server using the tracking store specified by backend_uri and root artifact
    directory specified by root_artifact_uri.
    :returns A tuple (url, process) containing the string URL of the server and a handle to the
             server process (a multiprocessing.Process object).
    """
    kiwi.set_tracking_uri(None)
    server_port = get_safe_port()
    env = {
        BACKEND_STORE_URI_ENV_VAR:
        backend_uri,
        ARTIFACT_ROOT_ENV_VAR:
        path_to_local_file_uri(
            tempfile.mkdtemp(dir=local_file_uri_to_path(root_artifact_uri))),
    }
    with mock.patch.dict(os.environ, env):
        cmd = [
            "python", "-c",
            'from mlflow.server import app; app.run("{hostname}", {port})'.
            format(hostname=LOCALHOST, port=server_port)
        ]
        process = Popen(cmd)

    _await_server_up_or_die(server_port)
    url = "http://{hostname}:{port}".format(hostname=LOCALHOST,
                                            port=server_port)
    print("Launching tracking server against backend URI %s. Server URL: %s" %
          (backend_uri, url))
    return url, process
Beispiel #4
0
def test_artifacts(mlflow_client):
    experiment_id = mlflow_client.create_experiment('Art In Fact')
    experiment_info = mlflow_client.get_experiment(experiment_id)
    assert experiment_info.artifact_location.startswith(
        path_to_local_file_uri(SUITE_ARTIFACT_ROOT_DIR))
    artifact_path = urllib.parse.urlparse(
        experiment_info.artifact_location).path
    assert posixpath.split(artifact_path)[-1] == experiment_id

    created_run = mlflow_client.create_run(experiment_id)
    assert created_run.info.artifact_uri.startswith(
        experiment_info.artifact_location)
    run_id = created_run.info.run_id
    src_dir = tempfile.mkdtemp('test_artifacts_src')
    src_file = os.path.join(src_dir, 'my.file')
    with open(src_file, 'w') as f:
        f.write('Hello, World!')
    mlflow_client.log_artifact(run_id, src_file, None)
    mlflow_client.log_artifacts(run_id, src_dir, 'dir')

    root_artifacts_list = mlflow_client.list_artifacts(run_id)
    assert set([a.path for a in root_artifacts_list]) == {'my.file', 'dir'}

    dir_artifacts_list = mlflow_client.list_artifacts(run_id, 'dir')
    assert set([a.path for a in dir_artifacts_list]) == {'dir/my.file'}

    all_artifacts = mlflow_client.download_artifacts(run_id, '.')
    assert open('%s/my.file' % all_artifacts, 'r').read() == 'Hello, World!'
    assert open('%s/dir/my.file' % all_artifacts,
                'r').read() == 'Hello, World!'

    dir_artifacts = mlflow_client.download_artifacts(run_id, 'dir')
    assert open('%s/my.file' % dir_artifacts, 'r').read() == 'Hello, World!'
Beispiel #5
0
def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data):
    if no_conda:
        pytest.skip("This test needs conda.")
    # TODO: Enable this test after 1.0 is out to ensure we do not break the serve / predict
    # TODO: Also add a test for serve, not just predict.
    pytest.skip("TODO: enable this after 1.0 release is out.")
    x, _ = iris_data
    with TempDir() as tmp:
        input_records_path = tmp.path("input_records.json")
        pd.DataFrame(x).to_json(input_records_path, orient="records")
        output_json_path = tmp.path("output.json")
        test_model_path = tmp.path("test_model")
        test_model_conda_path = tmp.path("conda.yml")
        # create env with old mlflow!
        _mlflow_conda_env(path=test_model_conda_path,
                          additional_pip_deps=[
                              "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION)
                          ])
        pyfunc.save_model(path=test_model_path,
                          loader_module=test_pyfunc.__name__.split(".")[-1],
                          code_path=[test_pyfunc.__file__],
                          conda_env=test_model_conda_path)
        # explicit json format with orient records
        p = subprocess.Popen([
            "mlflow", "models", "predict", "-m",
            path_to_local_file_uri(test_model_path), "-i", input_records_path,
            "-o", output_json_path, "-t", "json", "--json-format", "records"
        ] + no_conda)
        assert 0 == p.wait()
        actual = pd.read_json(output_json_path, orient="records")
        actual = actual[actual.columns[0]].values
        expected = test_pyfunc.PyFuncTestModel(check_version=False).predict(
            df=pd.DataFrame(x))
        assert all(expected == actual)
Beispiel #6
0
def _get_sqlite_uri():
    path = path_to_local_file_uri(
        os.path.join(SUITE_ROOT_DIR, "test-database.bd"))
    path = path[len("file://"):]

    # NB: It looks like windows and posix have different requirements on number of slashes for
    # whatever reason. Windows needs uri like 'sqlite:///C:/path/to/my/file' whereas posix expects
    # sqlite://///path/to/my/file
    prefix = "sqlite://" if sys.platform == "win32" else "sqlite:////"
    return prefix + path
Beispiel #7
0
def _get_local_uri_or_none(uri):
    if uri == "databricks":
        return None, None
    parsed_uri = urllib.parse.urlparse(uri)
    if not parsed_uri.netloc and parsed_uri.scheme in ("", "file", "sqlite"):
        path = urllib.request.url2pathname(parsed_uri.path)
        if parsed_uri.scheme == "sqlite":
            uri = path_to_local_sqlite_uri(_MLFLOW_DOCKER_TRACKING_DIR_PATH)
        else:
            uri = path_to_local_file_uri(_MLFLOW_DOCKER_TRACKING_DIR_PATH)
        return path, uri
    else:
        return None, None
Beispiel #8
0
def get_tracking_uri():
    """
    Get the current tracking URI. This may not correspond to the tracking URI of
    the currently active run, since the tracking URI can be updated via ``set_tracking_uri``.

    :return: The tracking URI.
    """
    global _tracking_uri
    if _tracking_uri is not None:
        return _tracking_uri
    elif env.get_env(_TRACKING_URI_ENV_VAR) is not None:
        return env.get_env(_TRACKING_URI_ENV_VAR)
    else:
        return path_to_local_file_uri(
            os.path.abspath(DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH))
Beispiel #9
0
def test_path_parameter():
    """
    Tests that MLflow file-download APIs get called when necessary for arguments of type `path`.
    """
    project = load_project()
    entry_point = project.get_entry_point("line_count")
    with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \
            as download_uri_mock:
        download_uri_mock.return_value = 0
        # Verify that we don't attempt to call download_uri when passing a local file to a
        # parameter of type "path"
        with TempDir() as tmp:
            dst_dir = tmp.path()
            local_path = os.path.join(TEST_PROJECT_DIR, "MLproject")
            params, _ = entry_point.compute_parameters(
                user_parameters={"path": local_path},
                storage_dir=dst_dir)
            assert params["path"] == os.path.abspath(local_path)
            assert download_uri_mock.call_count == 0

            params, _ = entry_point.compute_parameters(
                user_parameters={"path": path_to_local_file_uri(local_path)},
                storage_dir=dst_dir)
            assert params["path"] == os.path.abspath(local_path)
            assert download_uri_mock.call_count == 0

        # Verify that we raise an exception when passing a non-existent local file to a
        # parameter of type "path"
        with TempDir() as tmp, pytest.raises(ExecutionException):
            dst_dir = tmp.path()
            entry_point.compute_parameters(
                user_parameters={"path": os.path.join(dst_dir, "some/nonexistent/file")},
                storage_dir=dst_dir)
        # Verify that we do call `download_uri` when passing a URI to a parameter of type "path"
        for i, prefix in enumerate(["dbfs:/", "s3://", "gs://"]):
            with TempDir() as tmp:
                dst_dir = tmp.path()
                file_to_download = 'images.tgz'
                download_path = "%s/%s" % (dst_dir, file_to_download)
                download_uri_mock.return_value = download_path
                params, _ = entry_point.compute_parameters(
                    user_parameters={"path": os.path.join(prefix, file_to_download)},
                    storage_dir=dst_dir)
                assert params["path"] == download_path
                assert download_uri_mock.call_count == i + 1
Beispiel #10
0
 def __init__(self, root_directory=None, artifact_root_uri=None):
     """
     Create a new FileStore with the given root directory and a given default artifact root URI.
     """
     super(FileStore, self).__init__()
     self.root_directory = local_file_uri_to_path(root_directory
                                                  or _default_root_dir())
     self.artifact_root_uri = artifact_root_uri or path_to_local_file_uri(
         self.root_directory)
     self.trash_folder = os.path.join(self.root_directory,
                                      FileStore.TRASH_FOLDER_NAME)
     # Create root directory if needed
     if not exists(self.root_directory):
         mkdir(self.root_directory)
         self._create_experiment_with_id(
             name=Experiment.DEFAULT_EXPERIMENT_NAME,
             experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
             artifact_uri=None)
     # Create trash folder if needed
     if not exists(self.trash_folder):
         mkdir(self.trash_folder)
Beispiel #11
0
def _get_sqlite_uri():
    path = path_to_local_file_uri(
        os.path.join(SUITE_ROOT_DIR, "test-database.bd"))
    path = path[len("file://"):]

    # NB: It looks like windows and posix have different requirements on number of slashes for
    # whatever reason. Windows needs uri like 'sqlite:///C:/path/to/my/file' whereas posix expects
    # sqlite://///path/to/my/file
    prefix = "sqlite://" if sys.platform == "win32" else "sqlite:////"
    return prefix + path


# Backend store URIs to test against
BACKEND_URIS = [
    _get_sqlite_uri(),  # SqlAlchemy
    path_to_local_file_uri(os.path.join(SUITE_ROOT_DIR,
                                        "file_store_root")),  # FileStore
]

# Map of backend URI to tuple (server URL, Process). We populate this map by constructing
# a server per backend URI
BACKEND_URI_TO_SERVER_URL_AND_PROC = {
    uri: _init_server(backend_uri=uri,
                      root_artifact_uri=SUITE_ARTIFACT_ROOT_DIR)
    for uri in BACKEND_URIS
}


def pytest_generate_tests(metafunc):
    """
    Automatically parametrize each each fixture/test that depends on `backend_store_uri` with the
    list of backend store URIs.
def local_artifact_repo(local_artifact_root):
    from kiwi.utils.file_utils import path_to_local_file_uri
    return LocalArtifactRepository(
        artifact_uri=path_to_local_file_uri(local_artifact_root))