Beispiel #1
0
    def setup(self,
              runtime_env: RuntimeEnv,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.has_conda() and not runtime_env.has_pip():
            return

        logger.debug("Setting up conda or pip for runtime_env: "
                     f"{runtime_env.serialize()}")

        if runtime_env.conda_env_name():
            conda_env_name = runtime_env.conda_env_name()
        else:
            conda_dict = get_conda_dict(runtime_env, self._resources_dir)
            protocol, hash = parse_uri(runtime_env.conda_uri())
            conda_env_name = self._get_path_from_hash(hash)
            assert conda_dict is not None

            ray_pip = current_ray_pip_specifier(logger=logger)
            if ray_pip:
                extra_pip_dependencies = [ray_pip, "ray[default]"]
            elif runtime_env.get_extension("_inject_current_ray") == "True":
                extra_pip_dependencies = (
                    _resolve_install_from_source_ray_dependencies())
            else:
                extra_pip_dependencies = []
            conda_dict = inject_dependencies(conda_dict, _current_py_version(),
                                             extra_pip_dependencies)

            # It is not safe for multiple processes to install conda envs
            # concurrently, even if the envs are different, so use a global
            # lock for all conda installs.
            # See https://github.com/ray-project/ray/issues/17086
            file_lock_name = "ray-conda-install.lock"
            with FileLock(os.path.join(self._resources_dir, file_lock_name)):
                try:
                    conda_yaml_file = os.path.join(self._resources_dir,
                                                   "environment.yml")
                    with open(conda_yaml_file, "w") as file:
                        yaml.dump(conda_dict, file)

                    if conda_env_name in self._created_envs:
                        logger.debug(f"Conda env {conda_env_name} already "
                                     "created, skipping creation.")
                    else:
                        create_conda_env(conda_yaml_file,
                                         prefix=conda_env_name,
                                         logger=logger)
                        self._created_envs.add(conda_env_name)
                finally:
                    os.remove(conda_yaml_file)

                if runtime_env.get_extension("_inject_current_ray"):
                    _inject_ray_to_conda_site(conda_path=conda_env_name,
                                              logger=logger)

        context.py_executable = "python"
        context.command_prefix += get_conda_activate_commands(conda_env_name)
        logger.info(
            f"Finished setting up runtime environment at {conda_env_name}")
Beispiel #2
0
    def setup(self,
              runtime_env: RuntimeEnv,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.has_pip():
            return

        logger.debug(f"Setting up pip for runtime_env: {runtime_env}")
        pip_packages: List[str] = runtime_env.pip_packages()
        target_dir = self._get_path_from_hash(_get_pip_hash(pip_packages))

        _install_pip_list_to_dir(pip_packages, target_dir, logger=logger)

        # Despite Ray being removed from the input pip list during validation,
        # other packages in the pip list (for example, xgboost_ray) may
        # themselves include Ray as a dependency.  In this case, we will have
        # inadvertently installed the latest Ray version in the target_dir,
        # which may cause Ray version mismatch issues. Uninstall it here, if it
        # exists, to make the workers use the Ray that is already
        # installed in the cluster.
        #
        # In the case where the user explicitly wants to include Ray in their
        # pip list (and signals this by setting the environment variable below)
        # then we don't want this deletion logic, so we skip it.
        if os.environ.get(RAY_RUNTIME_ENV_ALLOW_RAY_IN_PIP) != 1:
            ray_path = Path(target_dir) / "ray"
            if ray_path.exists() and ray_path.is_dir():
                shutil.rmtree(ray_path)

        # Insert the target directory into the PYTHONPATH.
        python_path = target_dir
        if "PYTHONPATH" in context.env_vars:
            python_path += os.pathsep + context.env_vars["PYTHONPATH"]
        context.env_vars["PYTHONPATH"] = python_path
Beispiel #3
0
    async def create(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ) -> int:
        if not runtime_env.has_pip():
            return 0

        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)

        with FileLock(self._installs_and_deletions_file_lock):
            pip_processor = PipProcessor(target_dir, runtime_env, logger)
            pip_processor.run()

        return get_directory_size_bytes(target_dir)
Beispiel #4
0
    def modify_context(
        self,
        uri: str,
        runtime_env: RuntimeEnv,
        context: RuntimeEnvContext,
        logger: Optional[logging.Logger] = default_logger,
    ):
        if not runtime_env.has_pip():
            return
        # Update py_executable.
        protocol, hash = parse_uri(uri)
        target_dir = self._get_path_from_hash(hash)
        virtualenv_python = _PathHelper.get_virtualenv_python(target_dir)

        if not os.path.exists(virtualenv_python):
            raise ValueError(
                f"Local directory {target_dir} for URI {uri} does "
                "not exist on the cluster. Something may have gone wrong while "
                "installing the runtime_env `pip` packages.")
        context.py_executable = virtualenv_python
Beispiel #5
0
 def modify_context(
     self,
     uri: str,
     runtime_env: RuntimeEnv,
     context: RuntimeEnvContext,
     logger: Optional[logging.Logger] = default_logger,
 ):
     if not runtime_env.has_pip():
         return
     # Insert the target directory into the PYTHONPATH.
     protocol, hash = parse_uri(uri)
     target_dir = get_local_dir_from_uri(uri, self._resources_dir)
     if not target_dir.exists():
         raise ValueError(
             f"Local directory {target_dir} for URI {uri} does "
             "not exist on the cluster. Something may have gone wrong while "
             "installing the runtime_env `pip` packages.")
     python_path = str(target_dir)
     if "PYTHONPATH" in context.env_vars:
         python_path += os.pathsep + context.env_vars["PYTHONPATH"]
     context.env_vars["PYTHONPATH"] = python_path