def predict( self, model_uri, input_path, output_path, content_type, json_format, ): """ Generate predictions using generic python model saved with MLflow. Return the prediction results as a JSON. """ local_path = _download_artifact_from_uri(model_uri) # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure # platform compatibility. local_uri = path_to_local_file_uri(local_path) if not self._no_conda and ENV in self._config: conda_env_path = os.path.join(local_path, self._config[ENV]) command = ( 'python -c "from mlflow.pyfunc.scoring_server import _predict; _predict(' 'model_uri={model_uri}, ' 'input_path={input_path}, ' 'output_path={output_path}, ' 'content_type={content_type}, ' 'json_format={json_format})"').format( model_uri=repr(local_uri), input_path=repr(input_path), output_path=repr(output_path), content_type=repr(content_type), json_format=repr(json_format)) return _execute_in_conda_env(conda_env_path, command, self._install_mlflow) else: scoring_server._predict(local_uri, input_path, output_path, content_type, json_format)
def serve(self, model_uri, port, host): """ Serve pyfunc model locally. """ local_path = _download_artifact_from_uri(model_uri) # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure # platform compatibility. local_uri = path_to_local_file_uri(local_path) if os.name != "nt": command = ( "gunicorn --timeout=60 -b {host}:{port} -w {nworkers} ${{GUNICORN_CMD_ARGS}}" " -- mlflow.pyfunc.scoring_server.wsgi:app").format( host=host, port=port, nworkers=self._nworkers) else: command = ( "waitress-serve --host={host} --port={port} " "--ident=mlflow mlflow.pyfunc.scoring_server.wsgi:app").format( host=host, port=port) command_env = os.environ.copy() command_env[scoring_server._SERVER_MODEL_PATH] = local_uri if not self._no_conda and ENV in self._config: conda_env_path = os.path.join(local_path, self._config[ENV]) return _execute_in_conda_env(conda_env_path, command, self._install_mlflow, command_env=command_env) else: _logger.info("=== Running command '%s'", command) if os.name != "nt": subprocess.Popen(["bash", "-c", command], env=command_env).wait() else: subprocess.Popen([command.split(" ")], env=command_env).wait()
def _init_server(backend_uri, root_artifact_uri): """ Launch a new REST server using the tracking store specified by backend_uri and root artifact directory specified by root_artifact_uri. :returns A tuple (url, process) containing the string URL of the server and a handle to the server process (a multiprocessing.Process object). """ kiwi.set_tracking_uri(None) server_port = get_safe_port() env = { BACKEND_STORE_URI_ENV_VAR: backend_uri, ARTIFACT_ROOT_ENV_VAR: path_to_local_file_uri( tempfile.mkdtemp(dir=local_file_uri_to_path(root_artifact_uri))), } with mock.patch.dict(os.environ, env): cmd = [ "python", "-c", 'from mlflow.server import app; app.run("{hostname}", {port})'. format(hostname=LOCALHOST, port=server_port) ] process = Popen(cmd) _await_server_up_or_die(server_port) url = "http://{hostname}:{port}".format(hostname=LOCALHOST, port=server_port) print("Launching tracking server against backend URI %s. Server URL: %s" % (backend_uri, url)) return url, process
def test_artifacts(mlflow_client): experiment_id = mlflow_client.create_experiment('Art In Fact') experiment_info = mlflow_client.get_experiment(experiment_id) assert experiment_info.artifact_location.startswith( path_to_local_file_uri(SUITE_ARTIFACT_ROOT_DIR)) artifact_path = urllib.parse.urlparse( experiment_info.artifact_location).path assert posixpath.split(artifact_path)[-1] == experiment_id created_run = mlflow_client.create_run(experiment_id) assert created_run.info.artifact_uri.startswith( experiment_info.artifact_location) run_id = created_run.info.run_id src_dir = tempfile.mkdtemp('test_artifacts_src') src_file = os.path.join(src_dir, 'my.file') with open(src_file, 'w') as f: f.write('Hello, World!') mlflow_client.log_artifact(run_id, src_file, None) mlflow_client.log_artifacts(run_id, src_dir, 'dir') root_artifacts_list = mlflow_client.list_artifacts(run_id) assert set([a.path for a in root_artifacts_list]) == {'my.file', 'dir'} dir_artifacts_list = mlflow_client.list_artifacts(run_id, 'dir') assert set([a.path for a in dir_artifacts_list]) == {'dir/my.file'} all_artifacts = mlflow_client.download_artifacts(run_id, '.') assert open('%s/my.file' % all_artifacts, 'r').read() == 'Hello, World!' assert open('%s/dir/my.file' % all_artifacts, 'r').read() == 'Hello, World!' dir_artifacts = mlflow_client.download_artifacts(run_id, 'dir') assert open('%s/my.file' % dir_artifacts, 'r').read() == 'Hello, World!'
def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data): if no_conda: pytest.skip("This test needs conda.") # TODO: Enable this test after 1.0 is out to ensure we do not break the serve / predict # TODO: Also add a test for serve, not just predict. pytest.skip("TODO: enable this after 1.0 release is out.") x, _ = iris_data with TempDir() as tmp: input_records_path = tmp.path("input_records.json") pd.DataFrame(x).to_json(input_records_path, orient="records") output_json_path = tmp.path("output.json") test_model_path = tmp.path("test_model") test_model_conda_path = tmp.path("conda.yml") # create env with old mlflow! _mlflow_conda_env(path=test_model_conda_path, additional_pip_deps=[ "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION) ]) pyfunc.save_model(path=test_model_path, loader_module=test_pyfunc.__name__.split(".")[-1], code_path=[test_pyfunc.__file__], conda_env=test_model_conda_path) # explicit json format with orient records p = subprocess.Popen([ "mlflow", "models", "predict", "-m", path_to_local_file_uri(test_model_path), "-i", input_records_path, "-o", output_json_path, "-t", "json", "--json-format", "records" ] + no_conda) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = test_pyfunc.PyFuncTestModel(check_version=False).predict( df=pd.DataFrame(x)) assert all(expected == actual)
def _get_sqlite_uri(): path = path_to_local_file_uri( os.path.join(SUITE_ROOT_DIR, "test-database.bd")) path = path[len("file://"):] # NB: It looks like windows and posix have different requirements on number of slashes for # whatever reason. Windows needs uri like 'sqlite:///C:/path/to/my/file' whereas posix expects # sqlite://///path/to/my/file prefix = "sqlite://" if sys.platform == "win32" else "sqlite:////" return prefix + path
def _get_local_uri_or_none(uri): if uri == "databricks": return None, None parsed_uri = urllib.parse.urlparse(uri) if not parsed_uri.netloc and parsed_uri.scheme in ("", "file", "sqlite"): path = urllib.request.url2pathname(parsed_uri.path) if parsed_uri.scheme == "sqlite": uri = path_to_local_sqlite_uri(_MLFLOW_DOCKER_TRACKING_DIR_PATH) else: uri = path_to_local_file_uri(_MLFLOW_DOCKER_TRACKING_DIR_PATH) return path, uri else: return None, None
def get_tracking_uri(): """ Get the current tracking URI. This may not correspond to the tracking URI of the currently active run, since the tracking URI can be updated via ``set_tracking_uri``. :return: The tracking URI. """ global _tracking_uri if _tracking_uri is not None: return _tracking_uri elif env.get_env(_TRACKING_URI_ENV_VAR) is not None: return env.get_env(_TRACKING_URI_ENV_VAR) else: return path_to_local_file_uri( os.path.abspath(DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH))
def test_path_parameter(): """ Tests that MLflow file-download APIs get called when necessary for arguments of type `path`. """ project = load_project() entry_point = project.get_entry_point("line_count") with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \ as download_uri_mock: download_uri_mock.return_value = 0 # Verify that we don't attempt to call download_uri when passing a local file to a # parameter of type "path" with TempDir() as tmp: dst_dir = tmp.path() local_path = os.path.join(TEST_PROJECT_DIR, "MLproject") params, _ = entry_point.compute_parameters( user_parameters={"path": local_path}, storage_dir=dst_dir) assert params["path"] == os.path.abspath(local_path) assert download_uri_mock.call_count == 0 params, _ = entry_point.compute_parameters( user_parameters={"path": path_to_local_file_uri(local_path)}, storage_dir=dst_dir) assert params["path"] == os.path.abspath(local_path) assert download_uri_mock.call_count == 0 # Verify that we raise an exception when passing a non-existent local file to a # parameter of type "path" with TempDir() as tmp, pytest.raises(ExecutionException): dst_dir = tmp.path() entry_point.compute_parameters( user_parameters={"path": os.path.join(dst_dir, "some/nonexistent/file")}, storage_dir=dst_dir) # Verify that we do call `download_uri` when passing a URI to a parameter of type "path" for i, prefix in enumerate(["dbfs:/", "s3://", "gs://"]): with TempDir() as tmp: dst_dir = tmp.path() file_to_download = 'images.tgz' download_path = "%s/%s" % (dst_dir, file_to_download) download_uri_mock.return_value = download_path params, _ = entry_point.compute_parameters( user_parameters={"path": os.path.join(prefix, file_to_download)}, storage_dir=dst_dir) assert params["path"] == download_path assert download_uri_mock.call_count == i + 1
def __init__(self, root_directory=None, artifact_root_uri=None): """ Create a new FileStore with the given root directory and a given default artifact root URI. """ super(FileStore, self).__init__() self.root_directory = local_file_uri_to_path(root_directory or _default_root_dir()) self.artifact_root_uri = artifact_root_uri or path_to_local_file_uri( self.root_directory) self.trash_folder = os.path.join(self.root_directory, FileStore.TRASH_FOLDER_NAME) # Create root directory if needed if not exists(self.root_directory): mkdir(self.root_directory) self._create_experiment_with_id( name=Experiment.DEFAULT_EXPERIMENT_NAME, experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, artifact_uri=None) # Create trash folder if needed if not exists(self.trash_folder): mkdir(self.trash_folder)
def _get_sqlite_uri(): path = path_to_local_file_uri( os.path.join(SUITE_ROOT_DIR, "test-database.bd")) path = path[len("file://"):] # NB: It looks like windows and posix have different requirements on number of slashes for # whatever reason. Windows needs uri like 'sqlite:///C:/path/to/my/file' whereas posix expects # sqlite://///path/to/my/file prefix = "sqlite://" if sys.platform == "win32" else "sqlite:////" return prefix + path # Backend store URIs to test against BACKEND_URIS = [ _get_sqlite_uri(), # SqlAlchemy path_to_local_file_uri(os.path.join(SUITE_ROOT_DIR, "file_store_root")), # FileStore ] # Map of backend URI to tuple (server URL, Process). We populate this map by constructing # a server per backend URI BACKEND_URI_TO_SERVER_URL_AND_PROC = { uri: _init_server(backend_uri=uri, root_artifact_uri=SUITE_ARTIFACT_ROOT_DIR) for uri in BACKEND_URIS } def pytest_generate_tests(metafunc): """ Automatically parametrize each each fixture/test that depends on `backend_store_uri` with the list of backend store URIs.
def local_artifact_repo(local_artifact_root): from kiwi.utils.file_utils import path_to_local_file_uri return LocalArtifactRepository( artifact_uri=path_to_local_file_uri(local_artifact_root))