Esempio n. 1
0
 def test_empty_directory(self):
     try:
         os.mkdir("d1")
         os.mkdir("d2")
         assert get_uri_for_directory("d1") == get_uri_for_directory("d2")
     finally:
         os.rmdir("d1")
         os.rmdir("d2")
Esempio n. 2
0
    def test_excludes(self, random_dir):
        # Excluding a directory should modify the URI.
        included_uri = get_uri_for_directory(random_dir)
        excluded_uri = get_uri_for_directory(random_dir, excludes=["subdir"])
        assert included_uri != excluded_uri

        # Excluding a directory should be the same as deleting it.
        rmtree((Path(random_dir) / "subdir").resolve())
        deleted_uri = get_uri_for_directory(random_dir)
        assert deleted_uri == excluded_uri
Esempio n. 3
0
    def test_determinism(self, random_dir):
        # Check that it's deterministic for same data.
        uris = {get_uri_for_directory(random_dir) for _ in range(10)}
        assert len(uris) == 1

        # Add one file, should be different now.
        with open(random_dir / f"test_{random_string()}", "w") as f:
            f.write(random_string())

        assert {get_uri_for_directory(random_dir)} != uris
Esempio n. 4
0
    def _upload_package_if_needed(
        self,
        package_path: str,
        include_parent_dir: bool = False,
        excludes: Optional[List[str]] = None,
        is_file: bool = False,
    ) -> str:
        if is_file:
            package_uri = get_uri_for_package(Path(package_path))
        else:
            package_uri = get_uri_for_directory(package_path,
                                                excludes=excludes)

        if not self._package_exists(package_uri):
            self._upload_package(
                package_uri,
                package_path,
                include_parent_dir=include_parent_dir,
                excludes=excludes,
                is_file=is_file,
            )
        else:
            logger.info(
                f"Package {package_uri} already exists, skipping upload.")

        return package_uri
Esempio n. 5
0
def upload_py_modules_if_needed(
    runtime_env: Dict[str, Any],
    scratch_dir: str,
    logger: Optional[logging.Logger] = default_logger,
) -> Dict[str, Any]:
    """Uploads the entries in py_modules and replaces them with a list of URIs.

    For each entry that is already a URI, this is a no-op.
    """
    py_modules = runtime_env.get("py_modules")
    if py_modules is None:
        return runtime_env

    if not isinstance(py_modules, list):
        raise TypeError(
            "py_modules must be a List of local paths, imported modules, or "
            f"URIs, got {type(py_modules)}.")

    py_modules_uris = []
    for module in py_modules:
        if isinstance(module, str):
            # module_path is a local path or a URI.
            module_path = module
        elif isinstance(module, Path):
            module_path = str(module)
        elif isinstance(module, ModuleType):
            # NOTE(edoakes): Python allows some installed Python packages to
            # be split into multiple directories. We could probably handle
            # this, but it seems tricky & uncommon. If it's a problem for
            # users, we can add this support on demand.
            if len(module.__path__) > 1:
                raise ValueError("py_modules only supports modules whose "
                                 "__path__ has length 1.")
            [module_path] = module.__path__
        else:
            raise TypeError("py_modules must be a list of file paths, URIs, "
                            f"or imported modules, got {type(module)}.")

        if _check_is_uri(module_path):
            module_uri = module_path
        else:
            # module_path is a local path.
            excludes = runtime_env.get("excludes", None)
            module_uri = get_uri_for_directory(module_path, excludes=excludes)
            upload_package_if_needed(
                module_uri,
                scratch_dir,
                module_path,
                excludes=excludes,
                include_parent_dir=True,
                logger=logger,
            )

        py_modules_uris.append(module_uri)

    # TODO(architkulkarni): Expose a single URI for py_modules.  This plugin
    # should internally handle the "sub-URIs", the individual modules.

    runtime_env["py_modules"] = py_modules_uris
    return runtime_env
Esempio n. 6
0
def upload_working_dir_if_needed(
    runtime_env: Dict[str, Any],
    scratch_dir: str,
    logger: Optional[logging.Logger] = default_logger,
) -> Dict[str, Any]:
    """Uploads the working_dir and replaces it with a URI.

    If the working_dir is already a URI, this is a no-op.
    """
    working_dir = runtime_env.get("working_dir")
    if working_dir is None:
        return runtime_env

    if not isinstance(working_dir, str) and not isinstance(working_dir, Path):
        raise TypeError(
            "working_dir must be a string or Path (either a local path "
            f"or remote URI), got {type(working_dir)}.")

    if isinstance(working_dir, Path):
        working_dir = str(working_dir)

    # working_dir is already a URI -- just pass it through.
    try:
        protocol, path = parse_uri(working_dir)
    except ValueError:
        protocol, path = None, None

    if protocol is not None:
        if protocol in Protocol.remote_protocols(
        ) and not path.endswith(".zip"):
            raise ValueError("Only .zip files supported for remote URIs.")
        return runtime_env

    excludes = runtime_env.get("excludes", None)
    try:
        working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes)
    except ValueError:  # working_dir is not a directory
        package_path = Path(working_dir)
        if not package_path.exists() or package_path.suffix != ".zip":
            raise ValueError(f"directory {package_path} must be an existing "
                             "directory or a zip package")

        pkg_uri = get_uri_for_package(package_path)
        upload_package_to_gcs(pkg_uri, package_path.read_bytes())
        runtime_env["working_dir"] = pkg_uri
        return runtime_env

    upload_package_if_needed(
        working_dir_uri,
        scratch_dir,
        working_dir,
        include_parent_dir=False,
        excludes=excludes,
        logger=logger,
    )
    runtime_env["working_dir"] = working_dir_uri
    return runtime_env
Esempio n. 7
0
    def test_unopenable_files_skipped(self, random_dir, short_path_dir):
        """Test that unopenable files can be present in the working_dir.

        Some files such as `.sock` files are unopenable. This test ensures that
        we skip those files when generating the content hash. Previously this
        would raise an exception, see #25411.
        """

        # Create a socket file.
        sock = socket.socket(socket.AF_UNIX)
        sock.bind(str(short_path_dir / "test_socket"))

        # Check that opening the socket raises an exception.
        with pytest.raises(OSError):
            (short_path_dir / "test_socket").open()

        # Check that the hash can still be generated without errors.
        get_uri_for_directory(short_path_dir)
Esempio n. 8
0
    def _upload_package_if_needed(
        self, package_path: str, excludes: Optional[List[str]] = None
    ) -> str:
        package_uri = get_uri_for_directory(package_path, excludes=excludes)
        if not self._package_exists(package_uri):
            self._upload_package(package_uri, package_path, excludes=excludes)
        else:
            logger.info(f"Package {package_uri} already exists, skipping upload.")

        return package_uri
Esempio n. 9
0
    def test_create_upload_once(self, tmp_path, random_dir, ray_start_regular):
        uri = get_uri_for_directory(random_dir)
        uploaded = upload_package_if_needed(uri, tmp_path, random_dir)
        assert uploaded
        assert _internal_kv_exists(uri, namespace=KV_NAMESPACE_PACKAGE)

        uploaded = upload_package_if_needed(uri, tmp_path, random_dir)
        assert not uploaded
        assert _internal_kv_exists(uri, namespace=KV_NAMESPACE_PACKAGE)

        # Delete the URI from the internal_kv. This should trigger re-upload.
        _internal_kv_del(uri, namespace=KV_NAMESPACE_PACKAGE)
        assert not _internal_kv_exists(uri, namespace=KV_NAMESPACE_PACKAGE)
        uploaded = upload_package_if_needed(uri, tmp_path, random_dir)
        assert uploaded
Esempio n. 10
0
    def test_create_upload_once(self, empty_dir, random_dir,
                                ray_start_regular):
        uri = get_uri_for_directory(random_dir)
        uploaded = upload_package_if_needed(uri, empty_dir, random_dir)
        assert uploaded
        assert _internal_kv_exists(uri)

        uploaded = upload_package_if_needed(uri, empty_dir, random_dir)
        assert not uploaded
        assert _internal_kv_exists(uri)

        # Delete the URI from the internal_kv. This should trigger re-upload.
        _internal_kv_del(uri)
        assert not _internal_kv_exists(uri)
        uploaded = upload_package_if_needed(uri, empty_dir, random_dir)
        assert uploaded
async def test_create_delete_size_equal(tmpdir, ray_start_regular):
    """Tests that `create` and `delete_uri` return the same size for a URI."""

    # Create an arbitrary nonempty directory to upload.
    path = Path(tmpdir)
    dir_to_upload = path / "dir_to_upload"
    dir_to_upload.mkdir(parents=True)
    filepath = dir_to_upload / "file"
    with filepath.open("w") as file:
        file.write("F" * 100)

    uri = get_uri_for_directory(dir_to_upload)
    assert get_directory_size_bytes(dir_to_upload) > 0

    uploaded = upload_package_if_needed(uri, tmpdir, dir_to_upload)
    assert uploaded

    manager = WorkingDirManager(tmpdir)

    created_size_bytes = await manager.create(uri, {}, RuntimeEnvContext())
    deleted_size_bytes = manager.delete_uri(uri)
    assert created_size_bytes == deleted_size_bytes
Esempio n. 12
0
def upload_working_dir_if_needed(
        runtime_env: Dict[str, Any],
        scratch_dir: str,
        logger: Optional[logging.Logger] = default_logger) -> Dict[str, Any]:
    """Uploads the working_dir and replaces it with a URI.

    If the working_dir is already a URI, this is a no-op.
    """
    working_dir = runtime_env.get("working_dir")
    if working_dir is None:
        return runtime_env

    if not isinstance(working_dir, str):
        raise TypeError(
            "working_dir must be a string (either a local path or remote "
            f"URI), got {type(working_dir)}.")

    # working_dir is already a URI -- just pass it through.
    try:
        protocol, path = parse_uri(working_dir)
    except ValueError:
        protocol, path = None, None

    if protocol is not None:
        if protocol in Protocol.remote_protocols(
        ) and not path.endswith(".zip"):
            raise ValueError("Only .zip files supported for remote URIs.")
        return runtime_env

    excludes = runtime_env.get("excludes", None)
    working_dir_uri = get_uri_for_directory(working_dir, excludes=excludes)
    upload_package_if_needed(working_dir_uri,
                             scratch_dir,
                             working_dir,
                             include_parent_dir=False,
                             excludes=excludes,
                             logger=logger)
    runtime_env["working_dir"] = working_dir_uri
    return runtime_env
Esempio n. 13
0
def load_package(config_path: str) -> "_RuntimePackage":
    """Load the code package given its config path.

    Args:
        config_path (str): The path to the configuration YAML that defines
            the package. For documentation on the packaging format, see the
            example YAML in ``example_pkg/ray_pkg.yaml``.

    Examples:
        >>> # Load from local.
        >>> my_pkg = load_package("~/path/to/my_pkg.yaml")

        >>> # Load from GitHub.
        >>> my_pkg = ray.util.load_package(
        ...   "https://raw.githubusercontent.com/user/repo/refspec"
        ...   "/path/to/package/my_pkg.yaml")

        >>> # Inspect the package runtime env.
        >>> print(my_pkg._runtime_env)
        ... {"conda": {...},
        ...  "docker": "anyscale-ml/ray-ml:nightly-py38-cpu",
        ...  "working_dir": "https://github.com/demo/foo/blob/v3.0/project/"}

        >>> # Run remote functions from the package.
        >>> my_pkg.my_func.remote(1, 2)

        >>> # Create actors from the package.
        >>> actor = my_pkg.MyActor.remote(3, 4)

        >>> # Create new remote funcs in the same env as a package.
        >>> @ray.remote(runtime_env=my_pkg._runtime_env)
        >>> def f(): ...
    """

    config_path = _download_from_github_if_needed(config_path)

    if not os.path.exists(config_path):
        raise ValueError("Config file does not exist: {}".format(config_path))

    # TODO(ekl) validate schema?
    config = yaml.safe_load(open(config_path).read())
    base_dir = os.path.abspath(os.path.dirname(config_path))
    runtime_env = config["runtime_env"]

    # Autofill working directory by uploading to GCS storage.
    if "working_dir" not in runtime_env:
        pkg_uri = get_uri_for_directory(base_dir, excludes=[])

        def do_register_package():
            # TODO(ekl) does this get garbage collected correctly with the
            # current job id?
            upload_package_if_needed(pkg_uri, _pkg_tmp(), base_dir)

        if ray.is_initialized():
            do_register_package()
        else:
            ray.worker._post_init_hooks.append(do_register_package)
        runtime_env["working_dir"] = pkg_uri

    # Autofill conda config.
    conda_yaml = os.path.join(base_dir, "conda.yaml")
    if os.path.exists(conda_yaml):
        if "conda" in runtime_env:
            raise ValueError(
                "Both conda.yaml and conda: section found in package")
        runtime_env["conda"] = yaml.safe_load(open(conda_yaml).read())

    pkg = _RuntimePackage(name=config["name"],
                          desc=config["description"],
                          interface_file=os.path.join(
                              base_dir, config["interface_file"]),
                          runtime_env=runtime_env)
    return pkg
Esempio n. 14
0
    def test_invalid_directory(self):
        with pytest.raises(ValueError):
            get_uri_for_directory("/does/not/exist")

        with pytest.raises(ValueError):
            get_uri_for_directory("does/not/exist")
Esempio n. 15
0
 def test_uri_hash_length(self, random_dir):
     uri = get_uri_for_directory(random_dir)
     hex_hash = uri.split("_")[-1][:-len(".zip")]
     assert len(hex_hash) == 16
Esempio n. 16
0
 def test_relative_paths(self, random_dir):
     # Check that relative or absolute paths result in the same URI.
     p = Path(random_dir)
     relative_uri = get_uri_for_directory(os.path.relpath(p))
     absolute_uri = get_uri_for_directory(p.resolve())
     assert relative_uri == absolute_uri