def test_prepare_runtime_init_req_modified_job(): """ Check that `prepare_runtime_init_req` properly extracts the JobConfig and modifies it according to `ray_client_server_env_prep`. """ job_config = JobConfig( runtime_env={"env_vars": {"KEY": "VALUE"}}, ray_namespace="abc" ) init_req = ray_client_pb2.DataRequest( init=ray_client_pb2.InitRequest( job_config=pickle.dumps(job_config), ray_init_kwargs=json.dumps({"log_to_driver": False}), ) ) def modify_namespace(job_config: JobConfig): job_config.set_ray_namespace("test_value") return job_config with patch.object(proxier, "ray_client_server_env_prep", modify_namespace): req, new_config = proxier.prepare_runtime_init_req(init_req) assert new_config.ray_namespace == "test_value" assert pickle.loads(req.init.job_config).serialize() == new_config.serialize() assert json.loads(req.init.ray_init_kwargs) == {"log_to_driver": False}
def chunk_task(req: ray_client_pb2.DataRequest): """ Chunks a client task. Doing this lazily is important with large arguments, since taking slices of bytes objects does a copy. This means if we immediately materialized every chunk of a large argument and inserted them into the result_queue, we would effectively double the memory needed on the client to handle the task. """ total_size = len(req.task.data) assert total_size > 0, "Cannot chunk object with missing data" total_chunks = math.ceil(total_size / OBJECT_TRANSFER_CHUNK_SIZE) for chunk_id in range(0, total_chunks): start = chunk_id * OBJECT_TRANSFER_CHUNK_SIZE end = min(total_size, (chunk_id + 1) * OBJECT_TRANSFER_CHUNK_SIZE) chunk = ray_client_pb2.ClientTask( type=req.task.type, name=req.task.name, payload_id=req.task.payload_id, client_id=req.task.client_id, options=req.task.options, baseline_options=req.task.baseline_options, namespace=req.task.namespace, data=req.task.data[start:end], chunk_id=chunk_id, total_chunks=total_chunks, ) yield ray_client_pb2.DataRequest(req_id=req.req_id, task=chunk)
def ReleaseObject( self, request: ray_client_pb2.ReleaseRequest, context=None ) -> None: datareq = ray_client_pb2.DataRequest( release=request, ) self._async_send(datareq)
def chunk_put(req: ray_client_pb2.DataRequest): """ Chunks a put request. Doing this lazily is important for large objects, since taking slices of bytes objects does a copy. This means if we immediately materialized every chunk of a large object and inserted them into the result_queue, we would effectively double the memory needed on the client to handle the put. """ total_size = len(req.put.data) assert total_size > 0, "Cannot chunk object with missing data" if total_size >= OBJECT_TRANSFER_WARNING_SIZE and log_once( "client_object_put_size_warning"): size_gb = total_size / 2**30 warnings.warn( "Ray Client is attempting to send a " f"{size_gb:.2f} GiB object over the network, which may " "be slow. Consider serializing the object and using a remote " "URI to transfer via S3 or Google Cloud Storage instead. " "Documentation for doing this can be found here: " "https://docs.ray.io/en/latest/handling-dependencies.html#remote-uris", UserWarning, ) total_chunks = math.ceil(total_size / OBJECT_TRANSFER_CHUNK_SIZE) for chunk_id in range(0, total_chunks): start = chunk_id * OBJECT_TRANSFER_CHUNK_SIZE end = min(total_size, (chunk_id + 1) * OBJECT_TRANSFER_CHUNK_SIZE) chunk = ray_client_pb2.PutRequest( client_ref_id=req.put.client_ref_id, data=req.put.data[start:end], chunk_id=chunk_id, total_chunks=total_chunks, total_size=total_size, ) yield ray_client_pb2.DataRequest(req_id=req.req_id, put=chunk)
def Init( self, request: ray_client_pb2.InitRequest, context=None ) -> ray_client_pb2.InitResponse: datareq = ray_client_pb2.DataRequest( init=request, ) resp = self._blocking_send(datareq) return resp.init
def RegisterGetCallback(self, request: ray_client_pb2.GetRequest, callback: ResponseCallable) -> None: if len(request.ids) != 1: raise ValueError( "RegisterGetCallback() must have exactly 1 Object ID. " f"Actual: {request}") datareq = ray_client_pb2.DataRequest(get=request, ) self._async_send(datareq, callback)
def test_prepare_runtime_init_req_fails(): """ Check that a connection that is initiated with a non-Init request raises an error. """ put_req = ray_client_pb2.DataRequest(put=ray_client_pb2.PutRequest()) with pytest.raises(AssertionError): proxier.prepare_runtime_init_req(put_req)
def ListNamedActors( self, request: ray_client_pb2.ClientListNamedActorsRequest ) -> ray_client_pb2.ClientListNamedActorsResponse: req = ray_client_pb2.DataRequest( list_named_actors=request, ) resp = self._blocking_send(req) return resp.list_named_actors
def Terminate( self, request: ray_client_pb2.TerminateRequest ) -> ray_client_pb2.TerminateResponse: req = ray_client_pb2.DataRequest( terminate=request, ) resp = self._blocking_send(req) return resp.terminate
def PutObject( self, request: ray_client_pb2.PutRequest, context=None ) -> ray_client_pb2.PutResponse: datareq = ray_client_pb2.DataRequest( put=request, ) resp = self._blocking_send(datareq) return resp.put
def PrepRuntimeEnv( self, request: ray_client_pb2.PrepRuntimeEnvRequest, context=None ) -> ray_client_pb2.PrepRuntimeEnvResponse: datareq = ray_client_pb2.DataRequest( prep_runtime_env=request, ) resp = self._blocking_send(datareq) return resp.prep_runtime_env
def ReleaseObject(self, request: ray_client_pb2.ReleaseRequest, context=None) -> None: datareq = ray_client_pb2.DataRequest(release=request, ) # TODO: Make this nonblocking. There's a race here for named # actors # a = Actor.options(name="a", lifetime="detached").remote() # del a # b = ray.get_actor("a") self._blocking_send(datareq)
def test_prepare_runtime_init_req_no_modification(): """ Check that `prepare_runtime_init_req` properly extracts the JobConfig. """ job_config = JobConfig(worker_env={"KEY": "VALUE"}, ray_namespace="abc") init_req = ray_client_pb2.DataRequest( init=ray_client_pb2.InitRequest(job_config=pickle.dumps(job_config))) req, new_config = proxier.prepare_runtime_init_req(init_req) assert new_config.serialize() == job_config.serialize() assert isinstance(req, ray_client_pb2.DataRequest) assert pickle.loads( req.init.job_config).serialize() == new_config.serialize()
def _acknowledge(self, req_id: int) -> None: """ Puts an acknowledge request on the request queue periodically. Lock should be held before calling this. Used when an async or blocking response is received. """ if not self.client_worker._reconnect_enabled: # Skip ACKs if reconnect isn't enabled return assert self.lock.locked() self._acknowledge_counter += 1 if self._acknowledge_counter % ACKNOWLEDGE_BATCH_SIZE == 0: self.request_queue.put( ray_client_pb2.DataRequest( acknowledge=ray_client_pb2.AcknowledgeRequest( req_id=req_id)))
def test_prepare_runtime_init_req_no_modification(): """ Check that `prepare_runtime_init_req` properly extracts the JobConfig. """ job_config = JobConfig(runtime_env={"env_vars": { "KEY": "VALUE" }}, ray_namespace="abc") init_req = ray_client_pb2.DataRequest(init=ray_client_pb2.InitRequest( job_config=pickle.dumps(job_config), ray_init_kwargs=json.dumps({"log_to_driver": False})), ) req, new_config = proxier.prepare_runtime_init_req(init_req) assert new_config.serialize() == job_config.serialize() assert isinstance(req, ray_client_pb2.DataRequest) assert pickle.loads( req.init.job_config).serialize() == new_config.serialize() assert json.loads(req.init.ray_init_kwargs) == {"log_to_driver": False}
def test_prepare_runtime_init_req_modified_job(): """ Check that `prepare_runtime_init_req` properly extracts the JobConfig and modifies it according to `ray_client_server_env_prep`. """ job_config = JobConfig(worker_env={"KEY": "VALUE"}, ray_namespace="abc") init_req = ray_client_pb2.DataRequest(init=ray_client_pb2.InitRequest( job_config=pickle.dumps(job_config))) def modify_namespace(job_config: JobConfig): job_config.set_ray_namespace("test_value") return job_config proxier.ray_client_server_env_prep = modify_namespace req, new_config = proxier.prepare_runtime_init_req(iter([init_req])) assert new_config.ray_namespace == "test_value" assert pickle.loads( req.init.job_config).serialize() == new_config.serialize()
def close(self) -> None: thread = None with self.lock: self._in_shutdown = True # Notify blocking operations to fail. self.cv.notify_all() # Add sentinel to terminate streaming RPC. if self.request_queue is not None: # Intentional shutdown, tell server it can clean up the # connection immediately and ignore the reconnect grace period. cleanup_request = ray_client_pb2.DataRequest( connection_cleanup=ray_client_pb2.ConnectionCleanupRequest( )) self.request_queue.put(cleanup_request) self.request_queue.put(None) if self.data_thread is not None: thread = self.data_thread # Wait until streaming RPCs are done. if thread is not None: thread.join()
def RegisterGetCallback(self, request: ray_client_pb2.GetRequest, callback: ResponseCallable, context=None) -> None: datareq = ray_client_pb2.DataRequest(get=request, ) self._async_send(datareq, callback)
def ReleaseObject(self, request: ray_client_pb2.ReleaseRequest, context=None) -> None: datareq = ray_client_pb2.DataRequest(release=request, ) self.request_queue.put(datareq)
def Schedule(self, request: ray_client_pb2.ClientTask, callback: ResponseCallable): datareq = ray_client_pb2.DataRequest(task=request) self._async_send(datareq, callback)
def ConnectionInfo(self, context=None) -> ray_client_pb2.ConnectionInfoResponse: datareq = ray_client_pb2.DataRequest( connection_info=ray_client_pb2.ConnectionInfoRequest()) resp = self._blocking_send(datareq) return resp.connection_info