def upload_py_modules_if_needed( runtime_env: Dict[str, Any], scratch_dir: str, logger: Optional[logging.Logger] = default_logger, ) -> Dict[str, Any]: """Uploads the entries in py_modules and replaces them with a list of URIs. For each entry that is already a URI, this is a no-op. """ py_modules = runtime_env.get("py_modules") if py_modules is None: return runtime_env if not isinstance(py_modules, list): raise TypeError( "py_modules must be a List of local paths, imported modules, or " f"URIs, got {type(py_modules)}.") py_modules_uris = [] for module in py_modules: if isinstance(module, str): # module_path is a local path or a URI. module_path = module elif isinstance(module, Path): module_path = str(module) elif isinstance(module, ModuleType): # NOTE(edoakes): Python allows some installed Python packages to # be split into multiple directories. We could probably handle # this, but it seems tricky & uncommon. If it's a problem for # users, we can add this support on demand. if len(module.__path__) > 1: raise ValueError("py_modules only supports modules whose " "__path__ has length 1.") [module_path] = module.__path__ else: raise TypeError("py_modules must be a list of file paths, URIs, " f"or imported modules, got {type(module)}.") if _check_is_uri(module_path): module_uri = module_path else: # module_path is a local path. excludes = runtime_env.get("excludes", None) module_uri = get_uri_for_directory(module_path, excludes=excludes) upload_package_if_needed( module_uri, scratch_dir, module_path, excludes=excludes, include_parent_dir=True, logger=logger, ) py_modules_uris.append(module_uri) # TODO(architkulkarni): Expose a single URI for py_modules. This plugin # should internally handle the "sub-URIs", the individual modules. runtime_env["py_modules"] = py_modules_uris return runtime_env
def upload_working_dir_if_needed( runtime_env: Dict[str, Any], scratch_dir: str, logger: Optional[logging.Logger] = default_logger, ) -> Dict[str, Any]: """Uploads the working_dir and replaces it with a URI. If the working_dir is already a URI, this is a no-op. """ working_dir = runtime_env.get("working_dir") if working_dir is None: return runtime_env if not isinstance(working_dir, str) and not isinstance(working_dir, Path): raise TypeError( "working_dir must be a string or Path (either a local path " f"or remote URI), got {type(working_dir)}.") if isinstance(working_dir, Path): working_dir = str(working_dir) # working_dir is already a URI -- just pass it through. try: protocol, path = parse_uri(working_dir) except ValueError: protocol, path = None, None if protocol is not None: if protocol in Protocol.remote_protocols( ) and not path.endswith(".zip"): raise ValueError("Only .zip files supported for remote URIs.") return runtime_env excludes = runtime_env.get("excludes", None) try: working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes) except ValueError: # working_dir is not a directory package_path = Path(working_dir) if not package_path.exists() or package_path.suffix != ".zip": raise ValueError(f"directory {package_path} must be an existing " "directory or a zip package") pkg_uri = get_uri_for_package(package_path) upload_package_to_gcs(pkg_uri, package_path.read_bytes()) runtime_env["working_dir"] = pkg_uri return runtime_env upload_package_if_needed( working_dir_uri, scratch_dir, working_dir, include_parent_dir=False, excludes=excludes, logger=logger, ) runtime_env["working_dir"] = working_dir_uri return runtime_env
def test_create_upload_once(self, tmp_path, random_dir, ray_start_regular): uri = get_uri_for_directory(random_dir) uploaded = upload_package_if_needed(uri, tmp_path, random_dir) assert uploaded assert _internal_kv_exists(uri, namespace=KV_NAMESPACE_PACKAGE) uploaded = upload_package_if_needed(uri, tmp_path, random_dir) assert not uploaded assert _internal_kv_exists(uri, namespace=KV_NAMESPACE_PACKAGE) # Delete the URI from the internal_kv. This should trigger re-upload. _internal_kv_del(uri, namespace=KV_NAMESPACE_PACKAGE) assert not _internal_kv_exists(uri, namespace=KV_NAMESPACE_PACKAGE) uploaded = upload_package_if_needed(uri, tmp_path, random_dir) assert uploaded
def test_create_upload_once(self, empty_dir, random_dir, ray_start_regular): uri = get_uri_for_directory(random_dir) uploaded = upload_package_if_needed(uri, empty_dir, random_dir) assert uploaded assert _internal_kv_exists(uri) uploaded = upload_package_if_needed(uri, empty_dir, random_dir) assert not uploaded assert _internal_kv_exists(uri) # Delete the URI from the internal_kv. This should trigger re-upload. _internal_kv_del(uri) assert not _internal_kv_exists(uri) uploaded = upload_package_if_needed(uri, empty_dir, random_dir) assert uploaded
def upload_working_dir_if_needed( runtime_env: Dict[str, Any], scratch_dir: str, logger: Optional[logging.Logger] = default_logger) -> Dict[str, Any]: """Uploads the working_dir and replaces it with a URI. If the working_dir is already a URI, this is a no-op. """ working_dir = runtime_env.get("working_dir") if working_dir is None: return runtime_env if not isinstance(working_dir, str): raise TypeError( "working_dir must be a string (either a local path or remote " f"URI), got {type(working_dir)}.") # working_dir is already a URI -- just pass it through. try: protocol, path = parse_uri(working_dir) except ValueError: protocol, path = None, None if protocol is not None: if protocol in Protocol.remote_protocols( ) and not path.endswith(".zip"): raise ValueError("Only .zip files supported for remote URIs.") return runtime_env excludes = runtime_env.get("excludes", None) working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes) upload_package_if_needed(working_dir_uri, scratch_dir, working_dir, include_parent_dir=False, excludes=excludes, logger=logger) runtime_env["working_dir"] = working_dir_uri return runtime_env
async def test_create_delete_size_equal(tmpdir, ray_start_regular): """Tests that `create` and `delete_uri` return the same size for a URI.""" # Create an arbitrary nonempty directory to upload. path = Path(tmpdir) dir_to_upload = path / "dir_to_upload" dir_to_upload.mkdir(parents=True) filepath = dir_to_upload / "file" with filepath.open("w") as file: file.write("F" * 100) uri = get_uri_for_directory(dir_to_upload) assert get_directory_size_bytes(dir_to_upload) > 0 uploaded = upload_package_if_needed(uri, tmpdir, dir_to_upload) assert uploaded manager = WorkingDirManager(tmpdir) created_size_bytes = await manager.create(uri, {}, RuntimeEnvContext()) deleted_size_bytes = manager.delete_uri(uri) assert created_size_bytes == deleted_size_bytes
def do_register_package(): # TODO(ekl) does this get garbage collected correctly with the # current job id? upload_package_if_needed(pkg_uri, _pkg_tmp(), base_dir)