예제 #1
0
파일: proxier.py 프로젝트: haochihlin/ray
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        serialized_runtime_env = job_config.get_serialized_runtime_env()
        runtime_env = json.loads(serialized_runtime_env)

        # Set up the working_dir for the server.
        # TODO(edoakes): this should go be unified with the worker setup code
        # by going through the runtime_env agent.
        context = RuntimeEnvContext(
            env_vars=runtime_env.get("env_vars"),
            resources_dir=self.node.get_runtime_env_dir_path())
        working_dir_pkg.setup_working_dir(runtime_env, context)

        proc = start_ray_client_server(
            self.redis_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            serialized_runtime_env_context=context.serialize(),
            redis_password=self._redis_password)

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
예제 #2
0
def setup_worker(input_args):
    # remaining_args contains the arguments to the original worker command,
    # minus the python executable, e.g. default_worker.py --node-ip-address=...
    args, remaining_args = parser.parse_known_args(args=input_args)

    commands = []
    py_executable: str = sys.executable
    runtime_env: dict = json.loads(args.serialized_runtime_env or "{}")
    runtime_env_context: RuntimeEnvContext = None
    if args.serialized_runtime_env_context:
        runtime_env_context = RuntimeEnvContext.deserialize(
            args.serialized_runtime_env_context)

    # Ray client server setups runtime env by itself instead of agent.
    if runtime_env.get("conda") or runtime_env.get("pip"):
        if not args.serialized_runtime_env_context:
            runtime_env_context = RuntimeEnvContext(args.session_dir)
            setup_conda_or_pip(runtime_env, runtime_env_context, logger=logger)

    if runtime_env_context and runtime_env_context.working_dir is not None:
        commands += [f"cd {runtime_env_context.working_dir}"]

        # Insert the working_dir as the first entry in PYTHONPATH. This is
        # compatible with users providing their own PYTHONPATH in env_vars.
        env_vars = runtime_env.get("env_vars", None) or {}
        python_path = runtime_env_context.working_dir
        if "PYTHONPATH" in env_vars:
            python_path += os.pathsep + runtime_env["PYTHONPATH"]
        env_vars["PYTHONPATH"] = python_path
        runtime_env["env_vars"] = env_vars

    # Add a conda activate command prefix if using a conda env.
    if runtime_env_context and runtime_env_context.conda_env_name is not None:
        py_executable = "python"
        conda_activate_commands = get_conda_activate_commands(
            runtime_env_context.conda_env_name)
        if (conda_activate_commands):
            commands += conda_activate_commands
    elif runtime_env.get("conda"):
        logger.warning(
            "Conda env name is not found in context, "
            "but conda exists in runtime env. The runtime env %s, "
            "the context %s.", args.serialized_runtime_env,
            args.serialized_runtime_env_context)

    commands += [" ".join([f"exec {py_executable}"] + remaining_args)]
    command_str = " && ".join(commands)

    # update env vars
    if runtime_env.get("env_vars"):
        env_vars = runtime_env["env_vars"]
        os.environ.update(env_vars)
    os.execvp("bash", ["bash", "-c", command_str])
예제 #3
0
def setup_working_dir(runtime_env: dict,
                      context: RuntimeEnvContext,
                      logger: Optional[logging.Logger] = None):
    if not runtime_env.get("uris"):
        return

    # Overwrite the module-wide logger and PKG_DIR temporarily.
    # TODO(edoakes): we should be able to remove this by refactoring the
    # working_dir setup code into a class instead of using global vars.
    global _logger, PKG_DIR
    if logger:
        prev_logger = _logger
        _logger = logger

    assert context.resources_dir is not None
    prev_pkg_dir = PKG_DIR
    PKG_DIR = context.resources_dir

    working_dir = ensure_runtime_env_setup(runtime_env["uris"])
    context.command_prefix += [f"cd {working_dir}"]

    # Insert the working_dir as the first entry in PYTHONPATH. This is
    # compatible with users providing their own PYTHONPATH in env_vars.
    python_path = working_dir
    if "PYTHONPATH" in context.env_vars:
        python_path += os.pathsep + context.env_vars["PYTHONPATH"]
    context.env_vars["PYTHONPATH"] = python_path

    PKG_DIR = prev_pkg_dir

    if logger:
        _logger = prev_logger
예제 #4
0
            def run_setup_with_logger():
                runtime_env: dict = json.loads(serialized_runtime_env or "{}")
                allocated_resource: dict = json.loads(
                    serialized_allocated_resource_instances or "{}")

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                # TODO(chenk008): Add log about allocated_resource to
                # avoid lint error. That will be moved to cgroup plugin.
                per_job_logger.debug(f"Worker has resource :"
                                     f"{allocated_resource}")
                context = RuntimeEnvContext(
                    env_vars=runtime_env.get("env_vars"))
                self._conda_manager.setup(runtime_env,
                                          context,
                                          logger=per_job_logger)
                self._working_dir_manager.setup(runtime_env,
                                                context,
                                                logger=per_job_logger)

                # Add the mapping of URIs -> the serialized environment to be
                # used for cache invalidation.
                for uri in runtime_env.get("uris", []):
                    self._working_dir_uri_to_envs[uri].add(
                        serialized_runtime_env)

                return context
예제 #5
0
            def run_setup_with_logger():
                runtime_env: dict = json.loads(serialized_runtime_env or "{}")

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                context = RuntimeEnvContext(
                    env_vars=runtime_env.get("env_vars"))
                self._conda_manager.setup(runtime_env,
                                          context,
                                          logger=per_job_logger)
                self._working_dir_manager.setup(runtime_env,
                                                context,
                                                logger=per_job_logger)

                # Add the mapping of URIs -> the serialized environment to be
                # used for cache invalidation.
                for uri in runtime_env.get("uris") or []:
                    self._working_dir_uri_to_envs[uri].add(
                        serialized_runtime_env)

                # Run setup function from all the plugins
                for plugin_class_path in runtime_env.get("plugins", {}).keys():
                    plugin_class = import_attr(plugin_class_path)
                    # TODO(simon): implement uri support
                    plugin_class.create("uri not implemented", runtime_env,
                                        context)
                    plugin_class.modify_context("uri not implemented",
                                                runtime_env, context)

                return context
예제 #6
0
            def run_setup_with_logger():
                runtime_env: dict = json.loads(serialized_runtime_env or "{}")

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                context = RuntimeEnvContext(self._runtime_env_dir)
                setup_conda_or_pip(runtime_env, context, logger=per_job_logger)
                setup_working_dir(runtime_env, context, logger=per_job_logger)
                return context
예제 #7
0
파일: proxier.py 프로젝트: holdenk/ray
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        serialized_runtime_env = job_config.get_serialized_runtime_env()
        if serialized_runtime_env == "{}":
            serialized_runtime_env_context = RuntimeEnvContext().serialize()
        else:
            serialized_runtime_env_context = self._create_runtime_env(
                serialized_runtime_env=serialized_runtime_env,
                specific_server=specific_server,
            )

        proc = start_ray_client_server(
            self.redis_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env_context=serialized_runtime_env_context,
            redis_password=self._redis_password)

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
예제 #8
0
def setup_worker(input_args):
    # remaining_args contains the arguments to the original worker command,
    # minus the python executable, e.g. default_worker.py --node-ip-address=...
    args, remaining_args = parser.parse_known_args(args=input_args)

    runtime_env: dict = json.loads(args.serialized_runtime_env or "{}")
    runtime_env_context: RuntimeEnvContext = None
    if args.serialized_runtime_env_context:
        runtime_env_context = RuntimeEnvContext.deserialize(
            args.serialized_runtime_env_context)
    else:
        runtime_env_context = RuntimeEnvContext(
            env_vars=runtime_env.get("env_vars"))

    # Ray client server setups runtime env by itself instead of agent.
    if args.from_ray_client:
        if runtime_env.get("conda") or runtime_env.get("pip"):
            setup_conda_or_pip(runtime_env, runtime_env_context, logger=logger)

    runtime_env_context.exec_worker(remaining_args)
예제 #9
0
파일: conda.py 프로젝트: haochihlin/ray
def setup_conda_or_pip(runtime_env: dict,
                       context: RuntimeEnvContext,
                       logger: Optional[logging.Logger] = None):
    if logger is None:
        logger = logging.getLogger(__name__)

    if not runtime_env.get("conda") and not runtime_env.get("pip"):
        return

    logger.debug(f"Setting up conda or pip for runtime_env: {runtime_env}")
    conda_dict = get_conda_dict(runtime_env, context.resources_dir)
    if isinstance(runtime_env.get("conda"), str):
        conda_env_name = runtime_env["conda"]
    else:
        assert conda_dict is not None
        ray_pip = current_ray_pip_specifier(logger)
        if ray_pip:
            extra_pip_dependencies = [ray_pip, "ray[default]"]
        elif runtime_env.get("_inject_current_ray"):
            extra_pip_dependencies = (
                _resolve_install_from_source_ray_dependencies())
        else:
            extra_pip_dependencies = []
        conda_dict = inject_dependencies(conda_dict, _current_py_version(),
                                         extra_pip_dependencies)
        logger.info(f"Setting up conda environment with {runtime_env}")
        # It is not safe for multiple processes to install conda envs
        # concurrently, even if the envs are different, so use a global
        # lock for all conda installs.
        # See https://github.com/ray-project/ray/issues/17086
        file_lock_name = "ray-conda-install.lock"
        with FileLock(os.path.join(context.resources_dir, file_lock_name)):
            conda_dir = os.path.join(context.resources_dir, "conda")
            try_to_create_directory(conda_dir)
            conda_yaml_path = os.path.join(conda_dir, "environment.yml")
            with open(conda_yaml_path, "w") as file:
                # Sort keys because we hash based on the file contents,
                # and we don't want the hash to depend on the order
                # of the dependencies.
                yaml.dump(conda_dict, file, sort_keys=True)
            conda_env_name = get_or_create_conda_env(conda_yaml_path,
                                                     conda_dir,
                                                     logger=logger)

        if runtime_env.get("_inject_current_ray"):
            conda_path = os.path.join(conda_dir, conda_env_name)
            _inject_ray_to_conda_site(conda_path, logger)

    context.py_executable = "python"
    context.command_prefix += get_conda_activate_commands(conda_env_name)
    logger.info(f"Finished setting up runtime environment at {conda_env_name}")
예제 #10
0
파일: working_dir.py 프로젝트: rlan/ray
    def setup(self,
              runtime_env: dict,
              context: RuntimeEnvContext,
              logger: Optional[logging.Logger] = default_logger):
        if not runtime_env.get("uris"):
            return

        working_dir = self.ensure_runtime_env_setup(runtime_env["uris"],
                                                    logger=logger)
        context.command_prefix += [f"cd {working_dir}"]

        # Insert the working_dir as the first entry in PYTHONPATH. This is
        # compatible with users providing their own PYTHONPATH in env_vars.
        python_path = working_dir
        if "PYTHONPATH" in context.env_vars:
            python_path += os.pathsep + context.env_vars["PYTHONPATH"]
        context.env_vars["PYTHONPATH"] = python_path
예제 #11
0
            def run_setup_with_logger():
                runtime_env: dict = json.loads(serialized_runtime_env or "{}")

                # Use a separate logger for each job.
                per_job_logger = self.get_or_create_logger(request.job_id)
                context = RuntimeEnvContext(
                    env_vars=runtime_env.get("env_vars"))
                self._conda_manager.setup(runtime_env,
                                          context,
                                          logger=per_job_logger)
                self._working_dir_manager.setup(runtime_env,
                                                context,
                                                logger=per_job_logger)

                # Add the mapping of URIs -> the serialized environment to be
                # used for cache invalidation.
                for uri in runtime_env.get("uris") or []:
                    self._working_dir_uri_to_envs[uri].add(
                        serialized_runtime_env)

                return context
예제 #12
0
파일: setup_worker.py 프로젝트: rlan/ray
    ]
    container_command.append("--env")
    container_command.append("RAY_RAYLET_PID=" + str(os.getppid()))
    if container_option.get("run_options"):
        container_command.extend(container_option.get("run_options"))
    container_command.extend(
        parse_allocated_resource(args.allocated_instances_serialized_json))

    container_command.append("--entrypoint")
    container_command.append("python")
    container_command.append(container_option.get("image"))
    container_command.extend(entrypoint_args)
    logger.warning("start worker in container: {}".format(container_command))
    os.execvp(container_driver, container_command)


if __name__ == "__main__":
    args, remaining_args = parser.parse_known_args()
    runtime_env: dict = json.loads(args.serialized_runtime_env or "{}")
    container_option = runtime_env.get("container")
    if container_option and container_option.get("image"):
        start_worker_in_container(container_option, args, remaining_args)
    else:
        # NOTE(edoakes): args.serialized_runtime_env_context is only None when
        # we're starting the main Ray client proxy server. That case should
        # probably not even go through this codepath.
        runtime_env_context = RuntimeEnvContext.deserialize(
            args.serialized_runtime_env_context or "{}")

        runtime_env_context.exec_worker(remaining_args)