Example #1
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        serialized_runtime_env = job_config.get_serialized_runtime_env()
        runtime_env_config = job_config.get_proto_runtime_env_config()
        if not serialized_runtime_env or serialized_runtime_env == "{}":
            # TODO(edoakes): can we just remove this case and always send it
            # to the agent?
            serialized_runtime_env_context = RuntimeEnvContext().serialize()
        else:
            serialized_runtime_env_context = self._create_runtime_env(
                serialized_runtime_env=serialized_runtime_env,
                runtime_env_config=runtime_env_config,
                specific_server=specific_server,
            )

        proc = start_ray_client_server(
            self.address,
            self.node.node_ip_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env_context=serialized_runtime_env_context,
            redis_password=self._redis_password,
        )

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
Example #2
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        serialized_runtime_env = job_config.get_serialized_runtime_env()
        runtime_env = json.loads(serialized_runtime_env)

        # Set up the working_dir for the server.
        # TODO(edoakes): this should go be unified with the worker setup code
        # by going through the runtime_env agent.
        context = RuntimeEnvContext(
            env_vars=runtime_env.get("env_vars"),
            resources_dir=self.node.get_runtime_env_dir_path())
        working_dir_pkg.setup_working_dir(runtime_env, context)

        proc = start_ray_client_server(
            self.redis_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            serialized_runtime_env_context=context.serialize(),
            redis_password=self._redis_password)

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
Example #3
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        with self.server_lock:
            port = self._get_unused_port()
            handle_ready = futures.Future()
            specific_server = SpecificServer(
                port=port,
                process_handle_future=handle_ready,
                channel=grpc.insecure_channel(f"localhost:{port}",
                                              options=GRPC_OPTIONS))
            self.servers[client_id] = specific_server

        serialized_runtime_env = job_config.get_serialized_runtime_env()

        proc = start_ray_client_server(
            self.redis_address,
            port,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            session_dir=self._get_session_dir())

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if len(cmd) > 3 and cmd[2] == "ray.util.client.server":
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        handle_ready.set_result(proc)
        logger.info(f"SpecificServer started on port: {port} with PID: {pid} "
                    f"for client: {client_id}")
        return proc.process.poll() is None
Example #4
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        serialized_runtime_env = job_config.get_serialized_runtime_env()

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        proc = start_ray_client_server(
            self.redis_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            session_dir=self.node.get_session_dir_path())

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None
Example #5
0
 def start_specific_server(self, client_id) -> None:
     """
     Start up a RayClient Server for an incoming client to
     communicate with.
     """
     port = self._get_unused_port()
     specific_server = SpecificServer(
         port=port,
         process_handle=start_ray_client_server(
             self.redis_address,
             port,
             fate_share=self.fate_share,
             server_type="specific-server"),
         channel=grpc.insecure_channel(f"localhost:{port}",
                                       options=GRPC_OPTIONS))
     with self.server_lock:
         self.servers[client_id] = specific_server
Example #6
0
    def start_specific_server(self, client_id: str,
                              job_config: JobConfig) -> bool:
        """
        Start up a RayClient Server for an incoming client to
        communicate with. Returns whether creation was successful.
        """
        specific_server = self._get_server_for_client(client_id)
        assert specific_server, f"Server has not been created for: {client_id}"

        output, error = self.node.get_log_file_handles(
            f"ray_client_server_{specific_server.port}", unique=True)

        # Set up the working_dir for the server.
        # TODO(edoakes): this should go be unified with the worker setup code
        # by going through the runtime_env agent.
        uris = job_config.get_runtime_env_uris() if job_config else []
        if uris:
            # Download and set up the working_dir locally.
            working_dir = working_dir_pkg.ensure_runtime_env_setup(uris)

            # Set PYTHONPATH in the environment variables so the working_dir
            # is included in the module search path.
            runtime_env = job_config.runtime_env
            env_vars = runtime_env.get("env_vars", None) or {}
            python_path = working_dir
            if "PYTHONPATH" in env_vars:
                python_path += (os.pathsep + runtime_env["PYTHONPATH"])
            env_vars["PYTHONPATH"] = python_path
            runtime_env["env_vars"] = env_vars
            job_config.set_runtime_env(runtime_env)

        serialized_runtime_env = job_config.get_serialized_runtime_env()

        proc = start_ray_client_server(
            self.redis_address,
            specific_server.port,
            stdout_file=output,
            stderr_file=error,
            fate_share=self.fate_share,
            server_type="specific-server",
            serialized_runtime_env=serialized_runtime_env,
            session_dir=self.node.get_session_dir_path(),
            redis_password=self._redis_password)

        # Wait for the process being run transitions from the shim process
        # to the actual RayClient Server.
        pid = proc.process.pid
        if sys.platform != "win32":
            psutil_proc = psutil.Process(pid)
        else:
            psutil_proc = None
        # Don't use `psutil` on Win32
        while psutil_proc is not None:
            if proc.process.poll() is not None:
                logger.error(
                    f"SpecificServer startup failed for client: {client_id}")
                break
            cmd = psutil_proc.cmdline()
            if _match_running_client_server(cmd):
                break
            logger.debug(
                "Waiting for Process to reach the actual client server.")
            time.sleep(0.5)
        specific_server.set_result(proc)
        logger.info(f"SpecificServer started on port: {specific_server.port} "
                    f"with PID: {pid} for client: {client_id}")
        return proc.process.poll() is None