async def handle_request(self, request: Query) -> asyncio.Future: request.tick_enter_replica = time.time() logger.debug("Replica {} received request {}".format( self.replica_tag, request.metadata.request_id)) request.async_future = asyncio.get_event_loop().create_future() self.num_ongoing_requests += 1 self.batch_queue.put(request) result = await request.async_future request_time_ms = (time.time() - request.tick_enter_replica) * 1000 logger.debug("Replica {} finished request {} in {:.2f}ms".format( self.replica_tag, request.metadata.request_id, request_time_ms)) self.num_ongoing_requests -= 1 # Returns a small object for router to track request status. return b"", result
async def handle_request(self, request: Union[Query, bytes]): if isinstance(request, bytes): request = Query.ray_deserialize(request) logger.debug("Worker {} got request {}".format(self.replica_tag, request)) request.async_future = asyncio.get_event_loop().create_future() self.batch_queue.put(request) return await request.async_future
async def handle_request( self, request_metadata: RequestMetadata, *request_args, **request_kwargs, ): # Directly receive input because it might contain an ObjectRef. query = Query(request_args, request_kwargs, request_metadata) return await self.backend.handle_request(query)
async def handle_request( self, pickled_request_metadata: bytes, *request_args, **request_kwargs, ): # The request metadata should be pickled for performance. request_metadata: RequestMetadata = pickle.loads(pickled_request_metadata) # Directly receive input because it might contain an ObjectRef. query = Query(request_args, request_kwargs, request_metadata) return await self.replica.handle_request(query)
async def handle_request(self, request: Query) -> asyncio.Future: request.tick_enter_replica = time.time() logger.debug("Replica {} received request {}".format( self.replica_tag, request.metadata.request_id)) self.num_ongoing_requests += 1 self.num_processing_items.set(self.num_ongoing_requests) result = await self.invoke_single(request) self.num_ongoing_requests -= 1 request_time_ms = (time.time() - request.tick_enter_replica) * 1000 logger.debug("Replica {} finished request {} in {:.2f}ms".format( self.replica_tag, request.metadata.request_id, request_time_ms)) # Returns a small object for router to track request status. return b"", result
async def handle_request(self, request: Union[Query, bytes]) -> asyncio.Future: if isinstance(request, bytes): request = Query.ray_deserialize(request) request.tick_enter_replica = time.time() logger.debug("Worker {} got request {}".format(self.replica_tag, request)) request.async_future = asyncio.get_event_loop().create_future() self.num_ongoing_requests += 1 self.batch_queue.put(request) result = await request.async_future self.num_ongoing_requests -= 1 return result
async def handle_request(self, request: Query) -> asyncio.Future: async with self.rwlock.reader_lock: request.tick_enter_replica = time.time() logger.debug("Replica {} received request {}".format( self.replica_tag, request.metadata.request_id)) num_running_requests = self._get_handle_request_stats()["running"] self.num_processing_items.set(num_running_requests) result = await self.invoke_single(request) request_time_ms = (time.time() - request.tick_enter_replica) * 1000 logger.debug("Replica {} finished request {} in {:.2f}ms".format( self.replica_tag, request.metadata.request_id, request_time_ms)) # Returns a small object for router to track request status. return b"", result
async def handle_request(self, request: Union[Query, bytes]) -> asyncio.Future: if isinstance(request, bytes): request = Query.ray_deserialize(request) request.tick_enter_replica = time.time() logger.debug("Replica {} received request {}".format( self.replica_tag, request.metadata.request_id)) request.async_future = asyncio.get_event_loop().create_future() self.num_ongoing_requests += 1 self.batch_queue.put(request) result = await request.async_future request_time_ms = (time.time() - request.tick_enter_replica) * 1000 logger.debug("Replica {} finished request {} in {:.2f}ms".format( self.replica_tag, request.metadata.request_id, request_time_ms)) self.num_ongoing_requests -= 1 return result
async def handle_request(self, request: Query) -> asyncio.Future: request.tick_enter_replica = time.time() logger.debug("Replica {} received request {}".format( self.replica_tag, request.metadata.request_id)) self.num_ongoing_requests += 1 self.num_processing_items.set(self.num_ongoing_requests) # Trigger a context switch so we can enqueue more requests in the # meantime. Without this line and if the function is synchronous, # other requests won't even get enqueued as await self.invoke_single # doesn't context switch. await asyncio.sleep(0) result = await self.invoke_single(request) self.num_ongoing_requests -= 1 request_time_ms = (time.time() - request.tick_enter_replica) * 1000 logger.debug("Replica {} finished request {} in {:.2f}ms".format( self.replica_tag, request.metadata.request_id, request_time_ms)) # Returns a small object for router to track request status. return b"", result
async def handle_request(self, request): if isinstance(request, bytes): request = Query.ray_deserialize(request) self.query = request self.queries.append(request) return "DONE"
async def test_replica_set(ray_instance): signal = SignalActor.remote() @ray.remote(num_cpus=0) class MockWorker: _num_queries = 0 async def handle_request(self, request): self._num_queries += 1 await signal.wait.remote() return "DONE" async def num_queries(self): return self._num_queries # We will test a scenario with two replicas in the replica set. rs = ReplicaSet() workers = [MockWorker.remote() for _ in range(2)] rs.set_max_concurrent_queries(1) rs.update_worker_replicas(workers) # Send two queries. They should go through the router but blocked by signal # actors. query = Query([], {}, TaskContext.Python, RequestMetadata("request-id", "endpoint", TaskContext.Python)) first_ref = await rs.assign_replica(query) second_ref = await rs.assign_replica(query) # These should be blocked by signal actor. with pytest.raises(ray.exceptions.GetTimeoutError): ray.get([first_ref, second_ref], timeout=1) # Each replica should have exactly one inflight query. Let make sure the # queries arrived there. for worker in workers: while await worker.num_queries.remote() != 1: await asyncio.sleep(1) # Let's try to send another query. third_ref_pending_task = asyncio.get_event_loop().create_task( rs.assign_replica(query)) # We should fail to assign a replica, so this coroutine should still be # pending after some time. await asyncio.sleep(0.2) assert not third_ref_pending_task.done() # Let's unblock the two workers await signal.send.remote() assert await first_ref == "DONE" assert await second_ref == "DONE" # The third request should be unblocked and sent to first worker. # This meas we should be able to get the object ref. third_ref = await third_ref_pending_task # Now we got the object ref, let's get it result. await signal.send.remote() assert await third_ref == "DONE" # Finally, make sure that one of the replica processed the third query. num_queries_set = {(await worker.num_queries.remote()) for worker in workers} assert num_queries_set == {2, 1}
async def handle_request(self, request_metadata, *args, **kwargs): self.query = Query(args, kwargs, request_metadata) self.queries.append(self.query) return b"", "DONE"
async def handle_request(self, request: Query): assert not isinstance(request, list) logger.debug("Worker {} got request {}".format(self.name, request)) request.async_future = asyncio.get_event_loop().create_future() self.query_queue.put_nowait(request) return await request.async_future