Ejemplo n.º 1
0
        def _create():
            logger.debug("Setting up conda for runtime_env: "
                         f"{runtime_env.serialize()}")
            protocol, hash = parse_uri(uri)
            conda_env_name = self._get_path_from_hash(hash)

            conda_dict = _get_conda_dict_with_ray_inserted(runtime_env,
                                                           logger=logger)

            logger.info(f"Setting up conda environment with {runtime_env}")
            with FileLock(self._installs_and_deletions_file_lock):
                try:
                    conda_yaml_file = os.path.join(self._resources_dir,
                                                   "environment.yml")
                    with open(conda_yaml_file, "w") as file:
                        yaml.dump(conda_dict, file)
                    create_conda_env_if_needed(conda_yaml_file,
                                               prefix=conda_env_name,
                                               logger=logger)
                finally:
                    os.remove(conda_yaml_file)

                if runtime_env.get_extension("_inject_current_ray") == "True":
                    _inject_ray_to_conda_site(conda_path=conda_env_name,
                                              logger=logger)
            logger.info(
                f"Finished creating conda environment at {conda_env_name}")
            return get_directory_size_bytes(conda_env_name)
Ejemplo n.º 2
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> int:
        """Delete URI and return the number of bytes deleted."""
        logger.info("Got request to delete pip URI %s", uri)
        protocol, hash = parse_uri(uri)
        if protocol != Protocol.PIP:
            raise ValueError("PipManager can only delete URIs with protocol "
                             f"pip. Received protocol {protocol}, URI {uri}")

        # Cancel running create task.
        task = self._creating_task.pop(hash, None)
        if task is not None:
            task.cancel()

        pip_env_path = self._get_path_from_hash(hash)
        local_dir_size = get_directory_size_bytes(pip_env_path)
        del self._create_locks[uri]
        try:
            shutil.rmtree(pip_env_path)
        except OSError as e:
            logger.warning(
                f"Error when deleting pip env {pip_env_path}: {str(e)}")
            return 0

        return local_dir_size
Ejemplo n.º 3
0
    def test_hit_cache_size_limit(self, start_cluster, URI_cache_10_MB):
        """Test eviction happens when we exceed a nonzero (10MB) cache size."""
        NUM_NODES = 3
        cluster, address = start_cluster
        for i in range(NUM_NODES - 1):  # Head node already added.
            cluster.add_node(
                num_cpus=1, runtime_env_dir_name=f"node_{i}_runtime_resources")
        with tempfile.TemporaryDirectory() as tmp_dir, chdir(tmp_dir):
            with open("test_file_1", "wb") as f:
                f.write(os.urandom(8 * 1024 * 1024))  # 8 MiB

            ray.init(address, runtime_env={"working_dir": tmp_dir})

            @ray.remote
            def f():
                pass

            ray.get(f.remote())
            ray.shutdown()

            with open("test_file_2", "wb") as f:
                f.write(os.urandom(4 * 1024 * 1024))
            os.remove("test_file_1")

            ray.init(address, runtime_env={"working_dir": tmp_dir})
            # Without the cache size limit, we would expect the local dir to be
            # 12 MB.  Since we do have a size limit, the first package must be
            # GC'ed, leaving us with 4 MB.  Sleep to give time for deletion.
            time.sleep(5)
            for node in cluster.list_all_nodes():
                local_dir = os.path.join(node.get_runtime_env_dir_path(),
                                         "working_dir_files")
                assert 3 < get_directory_size_bytes(local_dir) / (1024**2) < 5
Ejemplo n.º 4
0
    def create(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ) -> int:
        logger.debug("Setting up pip for runtime_env: "
                     f"{runtime_env.serialize()}")
        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)

        pip_packages: List[str] = runtime_env.pip_packages()
        with FileLock(self._installs_and_deletions_file_lock):
            _install_pip_list_to_dir(pip_packages, target_dir, logger=logger)

            # Despite Ray being removed from the input pip list during
            # validation, other packages in the pip list (for example,
            # xgboost_ray) may themselves include Ray as a dependency.  In this
            # case, we will have inadvertently installed the latest Ray version
            # in the target_dir, which may cause Ray version mismatch issues.
            # Uninstall it here, if it exists, to make the workers use the Ray
            # that is already installed in the cluster.
            #
            # In the case where the user explicitly wants to include Ray in
            # their pip list (and signals this by setting the environment
            # variable below) then we don't want this deletion logic, so we
            # skip it.
            if os.environ.get(RAY_RUNTIME_ENV_ALLOW_RAY_IN_PIP) != 1:
                ray_path = Path(target_dir) / "ray"
                if ray_path.exists() and ray_path.is_dir():
                    shutil.rmtree(ray_path)
        return get_directory_size_bytes(target_dir)
Ejemplo n.º 5
0
    def test_hit_cache_size_limit(self, start_cluster, URI_cache_10_MB,
                                  disable_temporary_uri_pinning):
        """Test eviction happens when we exceed a nonzero (10MB) cache size."""
        NUM_NODES = 3
        cluster, address = start_cluster
        for i in range(NUM_NODES - 1):  # Head node already added.
            cluster.add_node(
                num_cpus=1, runtime_env_dir_name=f"node_{i}_runtime_resources")
            print(
                f'Added node with runtime_env_dir_name "node_{i}_runtime_resources".'
            )

        print(f"Added all {NUM_NODES} nodes.")

        with tempfile.TemporaryDirectory() as tmp_dir, chdir(tmp_dir):
            print("Entered tempfile context manager.")

            with open("test_file_1", "wb") as f:
                f.write(os.urandom(8 * 1024 * 1024))  # 8 MiB
            print('Wrote random bytes to "test_file_1" file.')

            ray.init(address, runtime_env={"working_dir": tmp_dir})
            print(f'Initialized Ray at "{address}" with working_dir.')

            @ray.remote
            def f():
                pass

            ray.get(f.remote())
            print('Created and received response from task "f".')

            ray.shutdown()
            print("Ray has been shut down.")

            with open("test_file_2", "wb") as f:
                f.write(os.urandom(4 * 1024 * 1024))
            print('Wrote random bytes to "test_file_2".')

            os.remove("test_file_1")
            print('Removed "test_file_1".')

            ray.init(address, runtime_env={"working_dir": tmp_dir})
            print(
                f'Reinitialized Ray at address "{address}" with working_dir.')

            # Without the cache size limit, we would expect the local dir to be
            # 12 MB.  Since we do have a size limit, the first package must be
            # GC'ed, leaving us with 4 MB.  Sleep to give time for deletion.
            time.sleep(5)
            print("Slept for 5 seconds.")

            for idx, node in enumerate(cluster.list_all_nodes()):
                local_dir = os.path.join(node.get_runtime_env_dir_path(),
                                         "working_dir_files")
                print("Created local_dir path.")

                assert 3 < get_directory_size_bytes(local_dir) / (1024**2) < 5
                print(f"get_directory_size_bytes assertion {idx} passed.")
Ejemplo n.º 6
0
        def _create():
            if is_jar_uri(uri):
                module_dir = self._download_jars(uri=uri, logger=logger)
            else:
                module_dir = download_and_unpack_package(
                    uri, self._resources_dir, logger=logger
                )

            return get_directory_size_bytes(module_dir)
Ejemplo n.º 7
0
 async def create(
     self,
     uri: str,
     runtime_env: dict,
     context: RuntimeEnvContext,
     logger: Optional[logging.Logger] = default_logger,
 ) -> int:
     local_dir = download_and_unpack_package(uri,
                                             self._resources_dir,
                                             logger=logger)
     return get_directory_size_bytes(local_dir)
Ejemplo n.º 8
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> int:
        """Delete URI and return the number of bytes deleted."""
        local_dir = get_local_dir_from_uri(uri, self._resources_dir)
        local_dir_size = get_directory_size_bytes(local_dir)

        deleted = delete_package(uri, self._resources_dir)
        if not deleted:
            logger.warning(f"Tried to delete nonexistent URI: {uri}.")
            return 0

        return local_dir_size
Ejemplo n.º 9
0
    async def create(
        self,
        uri: str,
        runtime_env: "RuntimeEnv",  # noqa: F821
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ) -> int:
        if is_jar_uri(uri):
            module_dir = await self._download_jars(uri=uri, logger=logger)
        else:
            module_dir = await download_and_unpack_package(
                uri, self._resources_dir, self._gcs_aio_client, logger=logger)

        return get_directory_size_bytes(module_dir)
Ejemplo n.º 10
0
    async def create(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ) -> int:
        if not runtime_env.has_pip():
            return 0

        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)

        with FileLock(self._installs_and_deletions_file_lock):
            pip_processor = PipProcessor(target_dir, runtime_env, logger)
            pip_processor.run()

        return get_directory_size_bytes(target_dir)
Ejemplo n.º 11
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> int:
        """Delete URI and return the number of bytes deleted."""
        logger.info(f"Got request to delete URI {uri}")
        protocol, hash = parse_uri(uri)
        if protocol != Protocol.CONDA:
            raise ValueError(
                "CondaManager can only delete URIs with protocol "
                f"conda.  Received protocol {protocol}, URI {uri}")

        conda_env_path = self._get_path_from_hash(hash)
        local_dir_size = get_directory_size_bytes(conda_env_path)

        with FileLock(self._installs_and_deletions_file_lock):
            successful = delete_conda_env(prefix=conda_env_path, logger=logger)
        if not successful:
            logger.warning(f"Error when deleting conda env {conda_env_path}. ")
            return 0

        return local_dir_size
Ejemplo n.º 12
0
    def delete_uri(self,
                   uri: str,
                   logger: Optional[logging.Logger] = default_logger) -> int:
        """Delete URI and return the number of bytes deleted."""
        logger.info(f"Got request to delete pip URI {uri}")
        protocol, hash = parse_uri(uri)
        if protocol != Protocol.PIP:
            raise ValueError("PipManager can only delete URIs with protocol "
                             f"pip. Received protocol {protocol}, URI {uri}")

        pip_env_path = self._get_path_from_hash(hash)
        local_dir_size = get_directory_size_bytes(pip_env_path)
        try:
            with FileLock(self._installs_and_deletions_file_lock):
                shutil.rmtree(pip_env_path)
        except OSError as e:
            logger.warning(
                f"Error when deleting pip env {pip_env_path}: {str(e)}")
            return 0

        return local_dir_size
Ejemplo n.º 13
0
async def test_create_delete_size_equal(tmpdir, ray_start_regular):
    """Tests that `create` and `delete_uri` return the same size for a URI."""

    # Create an arbitrary nonempty directory to upload.
    path = Path(tmpdir)
    dir_to_upload = path / "dir_to_upload"
    dir_to_upload.mkdir(parents=True)
    filepath = dir_to_upload / "file"
    with filepath.open("w") as file:
        file.write("F" * 100)

    uri = get_uri_for_directory(dir_to_upload)
    assert get_directory_size_bytes(dir_to_upload) > 0

    uploaded = upload_package_if_needed(uri, tmpdir, dir_to_upload)
    assert uploaded

    manager = WorkingDirManager(tmpdir)

    created_size_bytes = await manager.create(uri, {}, RuntimeEnvContext())
    deleted_size_bytes = manager.delete_uri(uri)
    assert created_size_bytes == deleted_size_bytes
Ejemplo n.º 14
0
 def _create():
     local_dir = download_and_unpack_package(uri,
                                             self._resources_dir,
                                             logger=logger)
     return get_directory_size_bytes(local_dir)
Ejemplo n.º 15
0
 def local_dir_size_near_4mb():
     return 3 < get_directory_size_bytes(local_dir) / (1024 ** 2) < 5