def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data): if no_conda: pytest.skip("This test needs conda.") # TODO: Enable this test after 1.0 is out to ensure we do not break the serve / predict # TODO: Also add a test for serve, not just predict. pytest.skip("TODO: enable this after 1.0 release is out.") x, _ = iris_data with TempDir() as tmp: input_records_path = tmp.path("input_records.json") pd.DataFrame(x).to_json(input_records_path, orient="records") output_json_path = tmp.path("output.json") test_model_path = tmp.path("test_model") test_model_conda_path = tmp.path("conda.yml") # create env with old mlflow! _mlflow_conda_env( path=test_model_conda_path, additional_pip_deps=[ "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION) ], ) pyfunc.save_model( path=test_model_path, loader_module=test_pyfunc.__name__.split(".")[-1], code_path=[test_pyfunc.__file__], conda_env=test_model_conda_path, ) # explicit json format with orient records p = subprocess.Popen([ "mlflow", "models", "predict", "-m", path_to_local_file_uri(test_model_path), "-i", input_records_path, "-o", output_json_path, "-t", "json", "--json-format", "records", ] + no_conda) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = test_pyfunc.PyFuncTestModel(check_version=False).predict( df=pd.DataFrame(x)) assert all(expected == actual)
def test_path_parameter(): """ Tests that MLflow file-download APIs get called when necessary for arguments of type `path`. """ project = load_project() entry_point = project.get_entry_point("line_count") with mock.patch("mlflow.tracking.artifact_utils._download_artifact_from_uri") \ as download_uri_mock: download_uri_mock.return_value = 0 # Verify that we don't attempt to call download_uri when passing a local file to a # parameter of type "path" with TempDir() as tmp: dst_dir = tmp.path() local_path = os.path.join(TEST_PROJECT_DIR, "MLproject") params, _ = entry_point.compute_parameters( user_parameters={"path": local_path}, storage_dir=dst_dir) assert params["path"] == os.path.abspath(local_path) assert download_uri_mock.call_count == 0 params, _ = entry_point.compute_parameters( user_parameters={"path": path_to_local_file_uri(local_path)}, storage_dir=dst_dir) assert params["path"] == os.path.abspath(local_path) assert download_uri_mock.call_count == 0 # Verify that we raise an exception when passing a non-existent local file to a # parameter of type "path" with TempDir() as tmp, pytest.raises(ExecutionException): dst_dir = tmp.path() entry_point.compute_parameters(user_parameters={ "path": os.path.join(dst_dir, "some/nonexistent/file") }, storage_dir=dst_dir) # Verify that we do call `download_uri` when passing a URI to a parameter of type "path" for i, prefix in enumerate(["dbfs:/", "s3://", "gs://"]): with TempDir() as tmp: dst_dir = tmp.path() file_to_download = 'images.tgz' download_path = "%s/%s" % (dst_dir, file_to_download) download_uri_mock.return_value = download_path params, _ = entry_point.compute_parameters(user_parameters={ "path": os.path.join(prefix, file_to_download) }, storage_dir=dst_dir) assert params["path"] == download_path assert download_uri_mock.call_count == i + 1
def __init__(self, root_directory=None, artifact_root_uri=None): """ Create a new FileStore with the given root directory and a given default artifact root URI. """ super(FileStore, self).__init__() self.root_directory = local_file_uri_to_path(root_directory or _default_root_dir()) self.artifact_root_uri = artifact_root_uri or path_to_local_file_uri(self.root_directory) self.trash_folder = os.path.join(self.root_directory, FileStore.TRASH_FOLDER_NAME) # Create root directory if needed if not exists(self.root_directory): mkdir(self.root_directory) self._create_experiment_with_id(name=Experiment.DEFAULT_EXPERIMENT_NAME, experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, artifact_uri=None) # Create trash folder if needed if not exists(self.trash_folder): mkdir(self.trash_folder)
def predict(self, model_uri, input_path, output_path, content_type, json_format): """ Generate predictions using generic python model saved with MLflow. Return the prediction results as a JSON. """ local_path = _download_artifact_from_uri(model_uri) # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure # platform compatibility. local_uri = path_to_local_file_uri(local_path) command = ( 'python -c "from mlflow.pyfunc.scoring_server import _predict; _predict(' "model_uri={model_uri}, " "input_path={input_path}, " "output_path={output_path}, " "content_type={content_type}, " 'json_format={json_format})"').format( model_uri=repr(local_uri), input_path=repr(input_path), output_path=repr(output_path), content_type=repr(content_type), json_format=repr(json_format), ) if self._env_manager == _EnvManager.CONDA and ENV in self._config: conda_env_path = os.path.join(local_path, self._config[ENV]) conda_env_name = get_or_create_conda_env( conda_env_path, env_id=self._env_id, capture_output=False, env_root_dir=self._env_root_dir, ) return _execute_in_conda_env(conda_env_name, command, self._install_mlflow, env_root_dir=self._env_root_dir) elif self._env_manager == _EnvManager.VIRTUALENV: activate_cmd = _get_or_create_virtualenv( local_path, self._env_id, env_root_dir=self._env_root_dir) return _execute_in_virtualenv(activate_cmd, command, self._install_mlflow, env_root_dir=self._env_root_dir) else: scoring_server._predict(local_uri, input_path, output_path, content_type, json_format)
def _download_artifact_from_uri(artifact_uri, output_path=None): """ :param artifact_uri: The *absolute* URI of the artifact to download. :param output_path: The local filesystem path to which to download the artifact. If unspecified, a local output path will be created. """ if os.path.exists(artifact_uri): if os.name != "nt": # If we're dealing with local files, just reference the direct pathing. # non-nt-based file systems can directly reference path information, while nt-based # systems need to url-encode special characters in directory listings to be able to # resolve them (i.e., spaces converted to %20 within a file name or path listing) root_uri = os.path.dirname(artifact_uri) artifact_path = os.path.basename(artifact_uri) return get_artifact_repository( artifact_uri=root_uri).download_artifacts( artifact_path=artifact_path, dst_path=output_path) else: # if we're dealing with nt-based systems, we need to utilize pathname2url to encode. artifact_uri = path_to_local_file_uri(artifact_uri) parsed_uri = urllib.parse.urlparse(str(artifact_uri)) prefix = "" if parsed_uri.scheme and not parsed_uri.path.startswith("/"): # relative path is a special case, urllib does not reconstruct it properly prefix = parsed_uri.scheme + ":" parsed_uri = parsed_uri._replace(scheme="") # For models:/ URIs, it doesn't make sense to initialize a ModelsArtifactRepository with only # the model name portion of the URI, then call download_artifacts with the version info. if ModelsArtifactRepository.is_models_uri(artifact_uri): root_uri = artifact_uri artifact_path = "" else: artifact_path = posixpath.basename(parsed_uri.path) parsed_uri = parsed_uri._replace( path=posixpath.dirname(parsed_uri.path)) root_uri = prefix + urllib.parse.urlunparse(parsed_uri) return get_artifact_repository(artifact_uri=root_uri).download_artifacts( artifact_path=artifact_path, dst_path=output_path)
def serve(self, model_uri, port, host): """ Serve pyfunc model locally. """ local_path = _download_artifact_from_uri(model_uri) # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure # platform compatibility. local_uri = path_to_local_file_uri(local_path) command = ("gunicorn --timeout 60 -b {host}:{port} -w {nworkers} " "mlflow.pyfunc.scoring_server.wsgi:app").format( host=host, port=port, nworkers=self._nworkers) command_env = os.environ.copy() command_env[scoring_server._SERVER_MODEL_PATH] = local_uri if not self._no_conda and ENV in self._config: conda_env_path = os.path.join(local_path, self._config[ENV]) return _execute_in_conda_env(conda_env_path, command, self._install_mlflow, command_env=command_env) else: _logger.info("=== Running command '%s'", command) subprocess.Popen(command.split(" "), env=command_env).wait()
def predict( self, model_uri, input_path, output_path, content_type, json_format, ): """ Generate predictions using generic python model saved with MLflow. Return the prediction results as a JSON. """ local_path = _download_artifact_from_uri(model_uri) # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure # platform compatibility. local_uri = path_to_local_file_uri(local_path) if not self._no_conda and ENV in self._config: conda_env_path = os.path.join(local_path, self._config[ENV]) command = ( 'python -c "from mlflow.pyfunc.scoring_server import _predict; _predict(' "model_uri={model_uri}, " "input_path={input_path}, " "output_path={output_path}, " "content_type={content_type}, " 'json_format={json_format})"').format( model_uri=repr(local_uri), input_path=repr(input_path), output_path=repr(output_path), content_type=repr(content_type), json_format=repr(json_format), ) return _execute_in_conda_env(conda_env_path, command, self._install_mlflow) else: scoring_server._predict(local_uri, input_path, output_path, content_type, json_format)
def get_cmd(model_uri: str, port: int = None, host: int = None, timeout: int = None, nworkers: int = None) -> Tuple[str, Dict[str, str]]: local_uri = path_to_local_file_uri(model_uri) timeout = timeout or 60 # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure # platform compatibility. if os.name != "nt": args = [f"--timeout={timeout}"] if port and host: args.append(f"-b {host}:{port}") elif host: args.append(f"-b {host}") if nworkers: args.append(f"-w {nworkers}") command = (f"gunicorn {' '.join(args)} ${{GUNICORN_CMD_ARGS}}" " -- mlflow.pyfunc.scoring_server.wsgi:app") else: args = [] if host: args.append(f"--host={host}") if port: args.append(f"--port={port}") command = (f"waitress-serve {' '.join(args)} " "--ident=mlflow mlflow.pyfunc.scoring_server.wsgi:app") command_env = os.environ.copy() command_env[_SERVER_MODEL_PATH] = local_uri return command, command_env
def _get_sqlite_uri(): path = path_to_local_file_uri(os.path.join(SUITE_ROOT_DIR, "test-database.bd")) path = path[len("file://") :] # NB: It looks like windows and posix have different requirements on number of slashes for # whatever reason. Windows needs uri like 'sqlite:///C:/path/to/my/file' whereas posix expects # sqlite://///path/to/my/file prefix = "sqlite://" if sys.platform == "win32" else "sqlite:////" return prefix + path # Backend store URIs to test against BACKEND_URIS = [ _get_sqlite_uri(), # SqlAlchemy path_to_local_file_uri(os.path.join(SUITE_ROOT_DIR, "file_store_root")), # FileStore ] # Map of backend URI to tuple (server URL, Process). We populate this map by constructing # a server per backend URI BACKEND_URI_TO_SERVER_URL_AND_PROC = { uri: _init_server(backend_uri=uri, root_artifact_uri=SUITE_ARTIFACT_ROOT_DIR) for uri in BACKEND_URIS } def pytest_generate_tests(metafunc): """ Automatically parametrize each each fixture/test that depends on `backend_store_uri` with the list of backend store URIs. """
def local_artifact_repo(local_artifact_root): from mlflow.utils.file_utils import path_to_local_file_uri return LocalArtifactRepository( artifact_uri=path_to_local_file_uri(local_artifact_root))
def tracking_uri_mock(tmpdir): try: mlflow.set_tracking_uri(path_to_local_file_uri(os.path.join(tmpdir.strpath, 'mlruns'))) yield tmpdir finally: mlflow.set_tracking_uri(None)