def start_specific_server(self, client_id: str, job_config: JobConfig) -> bool: """ Start up a RayClient Server for an incoming client to communicate with. Returns whether creation was successful. """ specific_server = self._get_server_for_client(client_id) assert specific_server, f"Server has not been created for: {client_id}" output, error = self.node.get_log_file_handles( f"ray_client_server_{specific_server.port}", unique=True) serialized_runtime_env = job_config.get_serialized_runtime_env() runtime_env_config = job_config.get_proto_runtime_env_config() if not serialized_runtime_env or serialized_runtime_env == "{}": # TODO(edoakes): can we just remove this case and always send it # to the agent? serialized_runtime_env_context = RuntimeEnvContext().serialize() else: serialized_runtime_env_context = self._create_runtime_env( serialized_runtime_env=serialized_runtime_env, runtime_env_config=runtime_env_config, specific_server=specific_server, ) proc = start_ray_client_server( self.address, self.node.node_ip_address, specific_server.port, stdout_file=output, stderr_file=error, fate_share=self.fate_share, server_type="specific-server", serialized_runtime_env_context=serialized_runtime_env_context, redis_password=self._redis_password, ) # Wait for the process being run transitions from the shim process # to the actual RayClient Server. pid = proc.process.pid if sys.platform != "win32": psutil_proc = psutil.Process(pid) else: psutil_proc = None # Don't use `psutil` on Win32 while psutil_proc is not None: if proc.process.poll() is not None: logger.error( f"SpecificServer startup failed for client: {client_id}") break cmd = psutil_proc.cmdline() if _match_running_client_server(cmd): break logger.debug( "Waiting for Process to reach the actual client server.") time.sleep(0.5) specific_server.set_result(proc) logger.info(f"SpecificServer started on port: {specific_server.port} " f"with PID: {pid} for client: {client_id}") return proc.process.poll() is None
def connect(self, conn_str: str, job_config: JobConfig = None, secure: bool = False, metadata: List[Tuple[str, str]] = None, connection_retries: int = 3, namespace: str = None, *, ignore_version: bool = False) -> Dict[str, Any]: """Connect the Ray Client to a server. Args: conn_str: Connection string, in the form "[host]:port" job_config: The job config of the server. secure: Whether to use a TLS secured gRPC channel metadata: gRPC metadata to send on connect connection_retries: number of connection attempts to make ignore_version: whether to ignore Python or Ray version mismatches. This should only be used for debugging purposes. Returns: Dictionary of connection info, e.g., {"num_clients": 1}. """ # Delay imports until connect to avoid circular imports. from ray.util.client.worker import Worker import ray._private.client_mode_hook if self.client_worker is not None: if self._connected_with_init: return raise Exception( "ray.connect() called, but ray client is already connected") if not self._inside_client_test: # If we're calling a client connect specifically and we're not # currently in client mode, ensure we are. ray._private.client_mode_hook._explicitly_enable_client_mode() if namespace is not None: job_config = job_config or JobConfig() job_config.set_ray_namespace(namespace) if job_config is not None: runtime_env = json.loads(job_config.get_serialized_runtime_env()) if runtime_env.get("pip") or runtime_env.get("conda"): logger.warning("The 'pip' or 'conda' field was specified in " "the runtime env, so it may take some time to " "install the environment before ray.connect() " "returns.") try: self.client_worker = Worker(conn_str, secure=secure, metadata=metadata, connection_retries=connection_retries) self.api.worker = self.client_worker self.client_worker._server_init(job_config) conn_info = self.client_worker.connection_info() self._check_versions(conn_info, ignore_version) self._register_serializers() return conn_info except Exception: self.disconnect() raise
def start_specific_server(self, client_id: str, job_config: JobConfig) -> bool: """ Start up a RayClient Server for an incoming client to communicate with. Returns whether creation was successful. """ specific_server = self._get_server_for_client(client_id) assert specific_server, f"Server has not been created for: {client_id}" output, error = self.node.get_log_file_handles( f"ray_client_server_{specific_server.port}", unique=True) serialized_runtime_env = job_config.get_serialized_runtime_env() runtime_env = json.loads(serialized_runtime_env) # Set up the working_dir for the server. # TODO(edoakes): this should go be unified with the worker setup code # by going through the runtime_env agent. context = RuntimeEnvContext( env_vars=runtime_env.get("env_vars"), resources_dir=self.node.get_runtime_env_dir_path()) working_dir_pkg.setup_working_dir(runtime_env, context) proc = start_ray_client_server( self.redis_address, specific_server.port, stdout_file=output, stderr_file=error, fate_share=self.fate_share, server_type="specific-server", serialized_runtime_env=serialized_runtime_env, serialized_runtime_env_context=context.serialize(), redis_password=self._redis_password) # Wait for the process being run transitions from the shim process # to the actual RayClient Server. pid = proc.process.pid if sys.platform != "win32": psutil_proc = psutil.Process(pid) else: psutil_proc = None # Don't use `psutil` on Win32 while psutil_proc is not None: if proc.process.poll() is not None: logger.error( f"SpecificServer startup failed for client: {client_id}") break cmd = psutil_proc.cmdline() if _match_running_client_server(cmd): break logger.debug( "Waiting for Process to reach the actual client server.") time.sleep(0.5) specific_server.set_result(proc) logger.info(f"SpecificServer started on port: {specific_server.port} " f"with PID: {pid} for client: {client_id}") return proc.process.poll() is None
def start_specific_server(self, client_id: str, job_config: JobConfig) -> bool: """ Start up a RayClient Server for an incoming client to communicate with. Returns whether creation was successful. """ with self.server_lock: port = self._get_unused_port() handle_ready = futures.Future() specific_server = SpecificServer( port=port, process_handle_future=handle_ready, channel=grpc.insecure_channel(f"localhost:{port}", options=GRPC_OPTIONS)) self.servers[client_id] = specific_server serialized_runtime_env = job_config.get_serialized_runtime_env() proc = start_ray_client_server( self.redis_address, port, fate_share=self.fate_share, server_type="specific-server", serialized_runtime_env=serialized_runtime_env, session_dir=self._get_session_dir()) # Wait for the process being run transitions from the shim process # to the actual RayClient Server. pid = proc.process.pid if sys.platform != "win32": psutil_proc = psutil.Process(pid) else: psutil_proc = None # Don't use `psutil` on Win32 while psutil_proc is not None: if proc.process.poll() is not None: logger.error( f"SpecificServer startup failed for client: {client_id}") break cmd = psutil_proc.cmdline() if len(cmd) > 3 and cmd[2] == "ray.util.client.server": break logger.debug( "Waiting for Process to reach the actual client server.") time.sleep(0.5) handle_ready.set_result(proc) logger.info(f"SpecificServer started on port: {port} with PID: {pid} " f"for client: {client_id}") return proc.process.poll() is None
def start_specific_server(self, client_id: str, job_config: JobConfig) -> bool: """ Start up a RayClient Server for an incoming client to communicate with. Returns whether creation was successful. """ specific_server = self._get_server_for_client(client_id) assert specific_server, f"Server has not been created for: {client_id}" serialized_runtime_env = job_config.get_serialized_runtime_env() output, error = self.node.get_log_file_handles( f"ray_client_server_{specific_server.port}", unique=True) proc = start_ray_client_server( self.redis_address, specific_server.port, stdout_file=output, stderr_file=error, fate_share=self.fate_share, server_type="specific-server", serialized_runtime_env=serialized_runtime_env, session_dir=self.node.get_session_dir_path()) # Wait for the process being run transitions from the shim process # to the actual RayClient Server. pid = proc.process.pid if sys.platform != "win32": psutil_proc = psutil.Process(pid) else: psutil_proc = None # Don't use `psutil` on Win32 while psutil_proc is not None: if proc.process.poll() is not None: logger.error( f"SpecificServer startup failed for client: {client_id}") break cmd = psutil_proc.cmdline() if _match_running_client_server(cmd): break logger.debug( "Waiting for Process to reach the actual client server.") time.sleep(0.5) specific_server.set_result(proc) logger.info(f"SpecificServer started on port: {specific_server.port} " f"with PID: {pid} for client: {client_id}") return proc.process.poll() is None
def start_specific_server(self, client_id: str, job_config: JobConfig) -> bool: """ Start up a RayClient Server for an incoming client to communicate with. Returns whether creation was successful. """ specific_server = self._get_server_for_client(client_id) assert specific_server, f"Server has not been created for: {client_id}" output, error = self.node.get_log_file_handles( f"ray_client_server_{specific_server.port}", unique=True) # Set up the working_dir for the server. # TODO(edoakes): this should go be unified with the worker setup code # by going through the runtime_env agent. uris = job_config.get_runtime_env_uris() if job_config else [] if uris: # Download and set up the working_dir locally. working_dir = working_dir_pkg.ensure_runtime_env_setup(uris) # Set PYTHONPATH in the environment variables so the working_dir # is included in the module search path. runtime_env = job_config.runtime_env env_vars = runtime_env.get("env_vars", None) or {} python_path = working_dir if "PYTHONPATH" in env_vars: python_path += (os.pathsep + runtime_env["PYTHONPATH"]) env_vars["PYTHONPATH"] = python_path runtime_env["env_vars"] = env_vars job_config.set_runtime_env(runtime_env) serialized_runtime_env = job_config.get_serialized_runtime_env() proc = start_ray_client_server( self.redis_address, specific_server.port, stdout_file=output, stderr_file=error, fate_share=self.fate_share, server_type="specific-server", serialized_runtime_env=serialized_runtime_env, session_dir=self.node.get_session_dir_path(), redis_password=self._redis_password) # Wait for the process being run transitions from the shim process # to the actual RayClient Server. pid = proc.process.pid if sys.platform != "win32": psutil_proc = psutil.Process(pid) else: psutil_proc = None # Don't use `psutil` on Win32 while psutil_proc is not None: if proc.process.poll() is not None: logger.error( f"SpecificServer startup failed for client: {client_id}") break cmd = psutil_proc.cmdline() if _match_running_client_server(cmd): break logger.debug( "Waiting for Process to reach the actual client server.") time.sleep(0.5) specific_server.set_result(proc) logger.info(f"SpecificServer started on port: {specific_server.port} " f"with PID: {pid} for client: {client_id}") return proc.process.poll() is None