def _log_main(self) -> None: stub = ray_client_pb2_grpc.RayletLogStreamerStub(self.channel) log_stream = stub.Logstream(iter(self.request_queue.get, None), metadata=self._metadata) try: for record in log_stream: if record.level < 0: self.stdstream(level=record.level, msg=record.msg) self.log(level=record.level, msg=record.msg) except grpc.RpcError as e: if e.code() == grpc.StatusCode.CANCELLED: # Graceful shutdown. We've cancelled our own connection. logger.info("Cancelling logs channel") elif e.code() in (grpc.StatusCode.UNAVAILABLE, grpc.StatusCode.RESOURCE_EXHAUSTED): # TODO(barakmich): The server may have # dropped. In theory, we can retry, as per # https://grpc.github.io/grpc/core/md_doc_statuscodes.html but # in practice we may need to think about the correct semantics # here. logger.info("Server disconnected from logs channel") else: # Some other, unhandled, gRPC error logger.error( f"Got Error from logger channel -- shutting down: {e}") raise e
def _log_main(self) -> None: reconnecting = False while not self.client_worker._in_shutdown: if reconnecting: # Refresh queue and retry last request self.request_queue = queue.Queue() if self.last_req: self.request_queue.put(self.last_req) stub = ray_client_pb2_grpc.RayletLogStreamerStub( self.client_worker.channel) try: log_stream = stub.Logstream( iter(self.request_queue.get, None), metadata=self._metadata) except ValueError: # Trying to use the stub on a cancelled channel will raise # ValueError. This should only happen when the data client # is attempting to reset the connection -- sleep and try # again. time.sleep(.5) continue try: for record in log_stream: if record.level < 0: self.stdstream(level=record.level, msg=record.msg) self.log(level=record.level, msg=record.msg) return except grpc.RpcError as e: reconnecting = self._process_rpc_error(e) if not reconnecting: return
def Logstream(self, request_iterator, context): client_id = _get_client_id_from_context(context) if client_id == "": return logger.debug(f"New logstream connection from client {client_id}: ") channel = None # We need to retry a few times because the LogClient *may* connect # Before the DataClient has finished connecting. for i in range(LOGSTREAM_RETRIES): channel = self.proxy_manager.get_channel(client_id) if channel is not None: break logger.warning( f"Retrying Logstream connection. {i+1} attempts failed.") time.sleep(LOGSTREAM_RETRY_INTERVAL_SEC) if channel is None: context.set_code(grpc.StatusCode.NOT_FOUND) context.set_details( "Logstream proxy failed to connect. Channel for client " f"{client_id} not found.") return None stub = ray_client_pb2_grpc.RayletLogStreamerStub(channel) resp_stream = stub.Logstream( request_iterator, metadata=[("client_id", client_id)]) try: for resp in resp_stream: yield resp except Exception: logger.exception("Proxying Logstream failed!")
def Logstream(self, request_iterator, context): client_id = _get_client_id_from_context(context) if client_id == "": return logger.debug(f"New data connection from client {client_id}: ") channel = None # We need to retry a few times because the LogClient *may* connect # Before the DataClient has finished connecting. for i in range(5): channel = self.proxy_manager.get_channel(client_id) if channel is not None: break logger.warning( f"Retrying Logstream connection. {i+1} attempts failed.") time.sleep(2) if channel is None: context.set_code(grpc.StatusCode.NOT_FOUND) return None stub = ray_client_pb2_grpc.RayletLogStreamerStub(channel) queue = Queue() thread = Thread(target=forward_streaming_requests, args=(request_iterator, queue), daemon=True) thread.start() try: resp_stream = stub.Logstream(iter(queue.get, None), metadata=[("client_id", client_id)]) for resp in resp_stream: yield resp finally: thread.join(1)
def Logstream(self, request_iterator, context): client_id = _get_client_id_from_context(context) if client_id == "": return logger.debug(f"New data connection from client {client_id}: ") channel = None for i in range(10): # TODO(ilr) Ensure LogClient starts after startup has happened. # This will remove the need for retries here. channel = self.proxy_manager.get_channel(client_id) if channel is not None: break logger.warning( f"Retrying Logstream connection. {i+1} attempts failed.") time.sleep(5) if channel is None: context.set_code(grpc.StatusCode.NOT_FOUND) return None stub = ray_client_pb2_grpc.RayletLogStreamerStub(channel) queue = Queue() thread = Thread(target=forward_streaming_requests, args=(request_iterator, queue), daemon=True) thread.start() resp_stream = stub.Logstream(iter(queue.get, None), metadata=[("client_id", client_id)]) for resp in resp_stream: yield resp
def _log_main(self) -> None: stub = ray_client_pb2_grpc.RayletLogStreamerStub(self.channel) log_stream = stub.Logstream(iter(self.request_queue.get, None)) try: for record in log_stream: if record.level < 0: self.stdstream(level=record.level, msg=record.msg) self.log(level=record.level, msg=record.msg) except grpc.RpcError as e: if grpc.StatusCode.CANCELLED != e.code(): # Not just shutting down normally logger.error( f"Got Error from logger channel -- shutting down: {e}") raise e
def set_channel(self, channel: grpc.Channel) -> None: self.stub = ray_client_pb2_grpc.RayletLogStreamerStub(channel)