def run_loop(self): """ 消息泵驱动逻辑。从消息泵中取消息并分发给关注此消息的处理器执行。 为避免执行异常退出,消息泵捕获处理器执行的所有异常。当消息泵中无消息时, 添加空闲消息,以驱动需要持续运行的消息处理器执行。 :return: 无返回 :rtype: None """ while not self._stop_tag: if not self._message_queue: self._message_queue.append(IDLEMessage()) message = self._message_queue[0] for listener in self._listener_list: if message.name not in self.list_listener_concern(listener): continue else: pass try: if isinstance(message, OperationMessage): log.Logger.setoid(message.operation_id) listener.on_message(message) except: log.f("error occurred on listener:{}".format( listener.__class__)) finally: log.Logger.clearoid() self._message_queue.remove(message) if not isinstance(message, IDLEMessage): self.on_persistence()
def load_context(cls): """ 加载context,首次加载时,新建GuardianContext对象,否则从状态服务反序列化对象 :return: context对象 :rtype: GuardianContext """ context_path = config.GuardianConfig.get_persistent_path("context") data = persistence.PersistenceDriver().get_data(context_path) log.i("load context success") guardian_context = pickle.loads(data) if data else GuardianContext() # load状态机信息 operations_path = config.GuardianConfig.get_persistent_path("operations") # operations子节点名称均为operation_id operation_ids = persistence.PersistenceDriver().get_children(operations_path) for operation_id in operation_ids: operation_path = operations_path + "/" + operation_id try: operation_data = persistence.PersistenceDriver().get_data(operation_path) log.i("load operation[{}] success".format(operation_id)) operation = pickle.loads(operation_data) guardian_context.operations[operation_id] = operation except Exception as e: log.f("load operation {} failed".format(operation_id)) cls._context = guardian_context return guardian_context
def state_listener(self, state): """ 监听会话状态 :param state: 本次触发的状态 :return: None """ if state == persistence.PersistenceEvent.PersistState.LOST: log.i("guardian instance state lost") while True: try: self.create_instance() self._pd.get_children( path=self.path, watcher=self.event_watcher) log.i("guardian instance state recreate finished") break except Exception as e: log.f("create instance err") time.sleep(1) elif state == persistence.PersistenceEvent.PersistState.SUSPENDED: log.i("guardian instance state suspended") elif state == persistence.PersistenceEvent.PersistState.CONNECTED: log.i("guardian instance state connected") else: log.i("guardian instance state unrecognized, state:{}".format(state))
def func(a): try: log.f("ddd") raise exception.EPNoNodeError("def") except: print sys.exc_info() log.f("abc")
def message_handler(self, message): """ 消息处理逻辑,该函数调用具体的消息执行函数,并获取结果放入结果队列中 .. Note:: 由于操作异步执行,因此各子进程执行结果统一放入多进程安全的结果队列, 由主进程统一进程结果的后续处理 :param Message message: 消息对象 :return: 无返回 :rtype: None """ try: log.Logger.setoid(message.operation_id) log.i("process new message") log.d("process new message,param:%s" % str(message.params)) ret = self.execute_message(message) except Exception as e: log.f("execute_message fail") ret = "err:{}".format(e) message = framework.OperationMessage( "COMPLETE_MESSAGE", message.operation_id, ret) self._result_queue.put(message) log.Logger.clearoid() return
def __run(self): """ http server进程 发生异常会记录日志 :raises: Exception: 运行中可能抛出的异常 """ try: handler = RequestHandler SocketServer.TCPServer.allow_reuse_address = True _server = SocketServer.TCPServer(('', self._port), handler) _server.serve_forever() except: log.f('Generic Exception')
def _run_catch(cls, func): """ 执行func并捕获异常,将kazoo异常转换为对应的异常对象 """ import kazoo try: return func() except kazoo.exceptions.NoNodeError: raise exception.EPNoNodeError() except kazoo.exceptions.ZookeeperError: log.f("zk fail") raise exception.EPServerError() except Exception as e: log.r(exception.EPIOError(), "Requesst I/O Error")
def get_control_message(self, session): """ 获取当前是否有控制消息需要处理。如果没有,则应返回None, None :param object session: 状态机的session :return: 控制消息ID,控制消息 :rtype: str, object """ try: control = self._control_message[session.id] return control['control_id'], control['message'] except: log.f("get control message fail") return None, None
def run_loop(self, idle_sleep): """ 消息泵驱动逻辑。从消息泵中取消息并分发给关注此消息的处理器执行。 为避免执行异常退出,消息泵捕获处理器执行的所有异常。当消息泵中无消息时, 添加空闲消息,以驱动需要持续运行的消息处理器执行。 :param float idle_sleep: 如果无事件要处理则强制sleep对应时间 :return: 无返回 :rtype: None """ while not self._stop_tag: if not self._message_queue: self.put(IDLEMessage()) is_idle = True else: is_idle = False message = self._message_queue[0] if not is_idle: # 将消息短路处理,跳过DecisionMaker,直接返回决策消息进行执行处理。如果返回None,则不需要处理,直接跳过该消息。 message = self._short_circuit_msg(message) if message is None: self._message_queue.pop(0) continue for listener in self._listener_list: if message.name not in self.list_listener_concern(listener): continue else: pass try: if not is_idle: log.Logger.setoid(message.operation_id) listener.on_message_wrapper(message) except Exception as e: log.f("error occurred on listener:{}".format( listener.__class__)) finally: log.Logger.clearoid() self._message_queue.pop(0) if not is_idle: if not self._short_circuit_mode: self.on_persistence() else: if not self._message_queue: time.sleep(idle_sleep)
def event_dealer(self): """ 事件处理函数,定期拉取外部事件 :return: 无返回 :rtype: None """ while not self._stop_tag: try: event = self.get_event() if event is None: time.sleep(self._query_interval) continue except Exception as e: log.f("get event failed") time.sleep(self._query_interval) continue log.i("get a new event from external system") self.callback_event(event)
def _do_request(self, uid): """ 发送http请求 发生异常会记录日志 :raises: exception.EFailedRequest 请求失败 :param str uid: :return: None 或者 请求数据 :rtype: None 或者str """ response = None try: condition = {"uid": uid} esc = client.ESClient("ark", "operation") response = esc.get_data_with_condition(condition) except exception.EFailedRequest as e: log.f(str(e.reason)) finally: return response
def _reload(self): """ 重新加载定时列表。加载时会对新的定时列表与上次获取的做比对。仅处理新增或者删除的定时器 """ while not self._stop_tag: try: current_cron = set(self.refresh()) delete_list = self._old_cron - current_cron add_list = current_cron - self._old_cron if len(delete_list) != 0: self._clock.delete_cron(delete_list) log.i("refresh cron list, delete:{num}".format(num=len(delete_list))) if len(add_list) != 0: self._clock.add_cron(add_list) log.i("refresh cron list, add:{num}".format(num=len(add_list))) self._old_cron = current_cron except: log.f("reload failed, err") time.sleep(self._reload_interval)
def execute_message(self, message): """ 执行消息 :param Message message: 消息对象 :return: 无返回 :rtype: None """ try: nodes = self._create_nodes(message) state_machine = self._create_state_machine(message, nodes) # 状态机启动 state_machine.start() except Exception as e: log.f("state machine run exception") try: self.exception_handler( "state machine run exception:{}".format(e), message.params) except: log.f("exception handler") try: del self._control_message[state_machine.session.id] except: log.f("fail to delete control message") log.i("state machine run finished, operationId:{}".format( state_machine.session.id))
def execute(self, operation): """ 执行消息 :param Operation operation: operation操作对象 :return: 无返回 :rtype: None """ try: nodes = self._create_nodes(operation) state_machine = self._create_state_machine(operation, nodes) # 状态机启动 state_machine.start() except IOError as e: log.f("state machine run IOError exception, process will exit.") try: self.exception_handler( "state machine run exception:{}".format(e), operation.operation_params) except Exception as e: log.f("IOError exception handler") log.i("state machine run IOError exception, process exit.") import os os._exit(9) except Exception as e: log.f("state machine run exception") try: self.exception_handler( "state machine run exception:{}".format(e), operation.operation_params) except Exception as e: log.f("exception handler") try: del self._control_message[state_machine.session.id] except Exception as e: log.f("fail to delete control message") log.i("state machine run finished, operationId:{}".format( state_machine.session.id))
def persist(self, session, reason, finished_name, next_name): """ 提供必要的持久化实现 .. Note:: session中的控制消息应在处理完成之后被清理,否则会造成重复触发 :param object session: 状态机的session :param str reason: 持久化的原因 :param str finished_name: 已经完成的节点名 :param str next_name: 下一个将处理的节点名 :return: 无返回 :rtype: None """ message_name = None if reason == graph.PersistedStateMachineHelper.Reason.CONTROL: message_name = "PERSIST_SESSION_MESSAGE" elif reason == graph.PersistedStateMachineHelper.Reason.STARTED: message_name = "STATE_COMPLETE_MESSAGE" elif reason == graph.PersistedStateMachineHelper.Reason.NODE_CHANGED: message_name = "STATE_COMPLETE_MESSAGE" if session is not None and message_name is not None: params = { "session": session, "finished_node": finished_name, "current_node": next_name, "timestamp": int(time.time()) } notice = framework.OperationMessage(message_name, str(session.id), params) try: self._result_queue.put(notice) except IOError: log.f("result_queue.put fail, retry") self._result_queue.put(notice) else: log.e("operation persist but session is None or reason unknown")
def http_request(self, host, port, method, url, header=None, data=None, timeout=30, retry_times=2, response_code=None, response_json=True): """ http请求接口 :param str host: 服务器地址 :param int port: 服务器端口 :param str method: http方法 :param str url: url地址 :param dict header: http消息头 :param str data: http body数据 :param int timeout: 请求超时时间 :param int retry_times: 请求重试次数 :param list response_code: 请求正确时的返回码 :param bool response_json: True 返回json格式的数据,False 返回字符串 :return: http 请求的数据 :rtype: str :raises EFailedRequest: 请求失败 """ log.d("http request, host:{}, port:{}, method:{}, url:{}, header:" "{}, data:{}, timeout:{}, retry_times:{}, response code:{}, " "response_json:{}".format(host, port, method, url, header, data, timeout, retry_times, response_code, response_json)) header = header or {} res_data = None for i in range(retry_times + 1): stop_tag = True if i == retry_times else False sleep_time = (i + 1) * (i + 1) try: conn = httplib.HTTPConnection(host=host, port=port, timeout=timeout) conn.request(method=method, url=url, body=data, headers=header) resp = conn.getresponse() res_data = resp.read() log.d("http request ret:{}".format(res_data)) except Exception as e: log.f("http request exe{}".format(res_data)) if stop_tag: raise exception.EFailedRequest( "http request failed,error:{}".format(e)) else: time.sleep(sleep_time) continue else: log.d("http request ok") if not response_code or not isinstance(response_code, list): if 200 <= resp.status < 300: break elif stop_tag: log.e("request failed,code:{},msg:{}".format( resp.status, resp.msg)) raise exception.EFailedRequest( "request failed,code:{},msg:{}".format( resp.status, resp.msg)) else: time.sleep((i + 1) * (i + 1)) continue else: if resp.status in response_code: break elif stop_tag: log.e("request failed,error,code:{},data:{}".format( resp.status, data)) raise exception.EFailedRequest( "request failed,error,code:{},data:{}".format( resp.status, data)) else: time.sleep(sleep_time) continue log.d("http response data:{}".format(res_data)) if response_json: return json.loads(res_data) else: return res_data