async def register_server(self): resp = await api.new_server_id() if resp.error_code == 0: self._server_id = resp.id logger.info("PD Register Server Success, ServerID:%d" % self._server_id) else: logger.error( "PD Register Server Fail, ExitCode:%d" % ERROR_PD_NEW_SERVER.code ) exit(ERROR_PD_NEW_SERVER.code) return resp = await api.register_server( self._server_id, self._config.start_time, self._config.ttl, self._config.address, self._config.services, self._config.desc if self._config.desc else "host_%s" % self.server_id(), ) if resp.error_code == 0: self._lease_id = resp.lease_id logger.info("ServerID:%d, LeaseID:%d" % (self._server_id, self._lease_id)) service_list = [ "%s => %s" % (k, self._config.services[k]) for k in self._config.services ] logger.info("Host Services:%s" % (", ".join(service_list))) asyncio.create_task(self._heart_beat_loop()) else: logger.error( "%d, %s" % (ERROR_PD_NEW_SERVER.code, ERROR_PD_NEW_SERVER.message) ) exit(ERROR_PD_REGISTER_SERVER.code) pass
async def _dispatch_actor_rpc_request( actor: ActorBase, session: Optional[SocketSession], req: RpcRequest ): try: method = rpc_meta.get_rpc_impl_method((req.service_name, req.method_name)) if method is None: raise RpcException.method_not_found() assert actor.context actor.context.last_message_time = time.time() result = method.__call__(actor, *req.args, **req.kwargs) if asyncio.iscoroutine(result): result = await result resp = RpcResponse() resp.request_id = req.request_id raw_response = utils.pickle_dumps(result) if session: await session.send_message(RpcMessage.from_msg(resp, raw_response)) except Exception as e: logger.error( "_dispatch_actor_rpc_request, Actor:%s/%s, Exception:%s, StackTrace:%s" % (actor.type_name, actor.uid, e, traceback.format_exc()) ) if session: await _send_error_resp(session, req.request_id, e)
async def send_message(self, msg: object): try: data = self._codec.encode(msg) self._writer.write(data) except Exception as e: logger.error("send_message, Exception:%s, StackTrace:%s" % (e, traceback.format_exc()))
async def _message_handler(session: SocketSession, clz: Type, msg: object): t = clz if t not in _user_message_handler_map: logger.error("process user message, Type:%s not found a processor" % str(t)) return handler: Callable[[SocketSession, object], Coroutine] = _user_message_handler_map[t] await handler(session, msg)
async def activate_async(self): try: await self.on_activate_async() except Exception as e: logger.error( "Actor.OnActivateAsync, Actor:%s/%s, Exception:%s, StackTrace:%s" % (self.type_name, self.uid, e, traceback.format_exc()))
def _process_connect_success(session: SocketSession): if session: session.heart_beat(_last_process_message_time) logger.info("SocketSessionManager, SessionID:%d, ConnectSuccess" % session.session_id) else: logger.error("SocketSessionManager, SessionID:%d not found" % session.session_id)
async def _heart_beat_loop(self): while True: try: await asyncio.sleep(self._config.ttl / 3) await self._try_send_heart_beat() except Exception as e: logger.error("Placement.heart_beat_loop, Exception:%s" % e) pass
def remove_server(self, node: ServerNode): _membership_manager.remove_member(node.server_uid) try: self._on_remove_server(node) logger.info("PD RemoveServer, ServerID:%d, Address:%s:%s" % (node.server_uid, node.host, node.port)) except Exception as e: logger.error( "Placement.RemoveServer, ServerUID:%d, Exception:%s, StackTrace:%s" % (node.server_uid, e, traceback.format_exc())) pass
async def _process_socket_message(session: SocketSession, clz: Type, msg: object): try: if _message_handler: await _message_handler(session, clz, msg) else: logger.error( "process_socket_message, user message handler is None") except Exception as e: logger.error( "process_socket_message, SessionID:%d Exception:%s, StackTrace:%s" % (session.session_id, e, traceback.format_exc()))
def tick(self): if self.is_cancel: return try: self._tick_count += 1 self._fn(self) except Exception as e: logger.error("ActorTimer, Actor:%s, ActorID:%d, Exception:%s" % (self._actor_id, self.timer_id, e)) return if not self.is_cancel: next_wait = self.next_tick_time() self._manager.internal_register_timer(next_wait, self)
async def _recv_data(self): while not self._stop: msg = self._codec.decode(self._buffer) if msg is not None: return msg self._buffer.shrink() data = await self._reader.read(1024) if not data or len(data) == 0: logger.error("TcpSocketSession.recv, SessionID:%d recv 0" % self.session_id) return None self._buffer.append(data)
def add_server(self, node: ServerNode): server = node _membership_manager.add_member(server) try: self._on_add_server(server) logger.info( "PD AddServer, ServerID:%d, Address:%s:%s, Desc:%s" % (server.server_uid, server.host, server.port, server.desc)) except Exception as e: logger.error( "Placement.AddServer, ServerUID:%d, Exception:%s, StackTrace:%s" % (node.server_uid, e, traceback.format_exc())) pass
async def _try_connect(self, node: ServerNode): try: session = await TcpSocketSession.connect(node.host, int(node.port), CODEC_RPC) if session is not None: node.set_session(session) logger.info("try_connect ServerID:%d, Host:%s:%s success" % (node.server_uid, node.host, node.port)) self.session = session except Exception as e: logger.error("try_connect ServerID:%d, Host:%s:%s, Exception:%s" % (node.server_uid, node.host, node.port, e)) pass
def _process_close_socket(session_id: int): session = _session_manager.get_session(session_id) try: if session and _socket_close_handler: _socket_close_handler(session) except Exception as e: logger.error( "SocketSessionManager, Before Remove SessionID:%d, Exception:%s" % (session_id, e)) pass if session: _session_manager.remove_session(session_id) logger.debug("SocketSessionManager, SessionID:%d removed" % session_id) return
async def recv_message(self): try: while not self._stop: msg = await self._recv_data() if msg is None: break await event_handler._process_socket_message( self, self.get_real_type(msg), msg) except Exception as e: logger.error( "TcpSocketSession.recv_message, SessionID:%d Exception:%s, StackTrace:%s" % (self.session_id, e, traceback.format_exc())) self.close() finally: event_handler._process_close_socket(session_id=self.session_id)
async def connect(cls, host: str, port: int, codec_id: int) -> Optional[SocketSession]: codec = _codec_manager.get_codec(codec_id) if not codec: logger.error("connect %s:%d failed, CodecID:%d not found" % (host, port, codec_id)) return None reader, writer = await asyncio.open_connection(host=host, port=port, limit=WINDOW_SIZE) session = TcpSocketSession(session_id_gen.new_session_id(), codec, reader, writer) session._is_client = True asyncio.create_task(session.recv_message()) return session
async def _try_send_heart_beat(self): heart_beat = RequestHeartBeat() heart_beat.milli_seconds = int(time.time() * 1000) for server_id in self._recent_added: try: host = _membership.get_member(server_id) if host is None: continue if not host.session or host.session.is_closed: asyncio.create_task(self._try_connect(host)) continue session = host.session await session.send_message(heart_beat) except Exception as e: logger.error("try_send_heart_beat, Exception:%s" % e) pass
async def deactivate_async(self): try: if self.__timer_manager: self.__timer_manager.unregister_all() del self.__timer_manager self.__timer_manager = None except Exception as e: logger.error( "Actor.OnDeactivateAsync, Actor:%s/%s, Exception:%s, StackTrace:%s" % (self.type_name, self.uid, e, traceback.format_exc())) pass try: await self.on_deactivate_async() except Exception as e: logger.error( "Actor.OnDeactivateAsync, Actor:%s/%s, Exception:%s, StackTrace:%s" % (self.type_name, self.uid, e, traceback.format_exc()))
async def listen(self, port: int, codec_id: int): codec = _codec_manager.get_codec(codec_id) if codec is None: logger.error("listen port:%d failed, CodecID:%d not found" % (port, codec_id)) return async def callback(reader: asyncio.StreamReader, writer: asyncio.StreamWriter): assert codec await self._handle_new_session(codec, reader, writer) try: await asyncio.start_server(callback, port=port, limit=WINDOW_SIZE) logger.info("listen port:%d CodecID:%d success" % (port, codec_id)) except Exception as e: logger.error("listen port:%d Exception:%s" % (port, e))
async def _try_connect(cls, node: ServerNode): begin = time.time() try: session = await TcpSocketSession.connect( node.host, int(node.port), CODEC_RPC ) if session is not None: node.set_session(session) logger.info( "try_connect ServerID:%d, Host:%s:%s success" % (node.server_uid, node.host, node.port) ) except Exception as e: end = time.time() logger.error( "try_connect ServerID:%d, Host:%s:%s, CostTime:%sms, Exception:%s" % (node.server_uid, node.host, node.port, int((end - begin) * 1000), e) ) pass
async def _pd_keep_alive(self) -> api.KeepAliveServerResponse: if time.time() - self._last_heart_beat > self._config.ttl: logger.error( "%s, %s" % ( ERROR_PD_KEEP_ALIVE_TIME_OUT.code, ERROR_PD_KEEP_ALIVE_TIME_OUT.message, ) ) exit(ERROR_PD_KEEP_ALIVE_TIME_OUT.code) pass resp = await api.keep_alive(self._server_id, self._lease_id, self._load) if resp.error_code != 0: logger.error( "%d, %s" % (ERROR_PD_KEEP_ALIVE.code, ERROR_PD_KEEP_ALIVE.message) ) print(resp.error_msg) exit(ERROR_PD_KEEP_ALIVE.code) self._last_heart_beat = time.time() return resp
async def _dispatch_actor_message_in_loop(actor: ActorBase): loop_id = _new_loop_id() context = actor.context assert context if context.loop_id != 0: return context.loop_id = loop_id loaded = False try: try: await actor.activate_async() loaded = True except Exception as e: logger.error( "Actor:%s/%s ActivateAsync Fail, Exception:%s, StackTrace:%s" % (actor.type_name, actor.uid, e, traceback.format_exc()) ) context.loop_id = 0 return while True: # 让出CPU给其他协程, 防止某些协程等待时间过长 await asyncio.sleep(0) o = cast( Tuple[weakref.ReferenceType[SocketSession], object], await context.pop_message(), ) if o is None: logger.info( "Actor:%s/%s exit message loop" % (actor.type_name, actor.uid) ) break session, msg = o[0]() if o[0] else None, o[1] if isinstance(msg, RpcRequest): context.reentrant_id = msg.reentrant_id await _dispatch_actor_rpc_request(actor, session, msg) else: await actor.dispatch_message(msg) except Exception as e: logger.error( "_dispatch_actor_message_loop, Exception:%s, StackTrace:%s" % (e, traceback.format_exc()) ) pass try: if loaded: await actor.deactivate_async() except Exception as e: logger.error( "Actor:%s/%s DeactivateAsync Fail, Exception:%s, StaceTrace:%s" % (actor.type_name, actor.uid, e, traceback.format_exc()) ) if context.loop_id == loop_id: context.reentrant_id = -1 context.loop_id = 0 logger.info("Actor:%s/%s loop:%d finished" % (actor.type_name, actor.uid, loop_id))
async def dispatch_message(self, msg: object) -> None: try: if isinstance(msg, RpcMessage): rpc_message = cast(RpcMessage, msg) if isinstance(rpc_message.meta, NotifyNewActorMessage): await self.dispatch_user_message(msg) elif isinstance(rpc_message.meta, NotifyNewActorSession): await self.on_new_session( rpc_message.meta, rpc_message.body if rpc_message.body else b"{}", ) elif isinstance(rpc_message.meta, NotifyActorSessionAborted): await self.on_session_aborted(rpc_message.meta) elif isinstance(msg, ActorTimer): msg.tick() else: await self.dispatch_user_message(msg) # 定时器不能延长Actor的生命周期 if not isinstance(msg, ActorTimer) and self.context: self.context.last_message_time = time.time() except Exception as e: logger.error("Actor:%s/%s dispatch_message Exception:%s" % (self.type_name, self.uid, e))
async def _run_placement(): impl = Placement.instance() if impl is None: logger.error("Placement module not initialized") return try: await impl.register_server() except Exception as e: logger.error("register server fail, Exception:%s" % e) while True: try: await impl.placement_loop() except Exception as e: await asyncio.sleep(1.0) logger.error( "run placement fail, Exception:%s, StackTrace:%s" % (e, traceback.format_exc()) ) pass