def execute_message(self, message): """ 执行消息 :param Message message: 消息对象 :return: 无返回 :rtype: None """ try: nodes = self._create_nodes(message) state_machine = self._create_state_machine(message, nodes) # 状态机启动 state_machine.start() except Exception as e: log.f("state machine run exception") try: self.exception_handler( "state machine run exception:{}".format(e), message.params) except: log.f("exception handler") try: del self._control_message[state_machine.session.id] except: log.f("fail to delete control message") log.i("state machine run finished, operationId:{}".format( state_machine.session.id))
def choose_master(self): """ guardian选主: master节点下, 所有ephemeral+sequence类型的节点中, 编号最小的获得领导权. :return: None """ instance_list = self._pd.get_children( path=self.path, watcher=self.event_watcher) instance = sorted(instance_list)[0].split("#")[0] # 本实例获得领导权 if str(instance) == str(config.GuardianConfig.get(config.INSTANCE_ID_NAME)): if not self.is_leader: self._start_scheduler_func() self.is_leader = True log.i("I am new master, scheduler") else: log.i("I am new master, and is old master, " "no longer reschedule") # 本实例没有获得领导权 else: if self.is_leader: self._stop_scheduler_func() self.is_leader = False log.i("I am slave, stop scheduler") else: log.i("I am slave, and is old slave, " "no longer stop scheduler") log.i("choose master finished")
def message_handler(self, message): """ 消息处理逻辑,该函数调用具体的消息执行函数,并获取结果放入结果队列中 .. Note:: 由于操作异步执行,因此各子进程执行结果统一放入多进程安全的结果队列, 由主进程统一进程结果的后续处理 :param Message message: 消息对象 :return: 无返回 :rtype: None """ try: log.Logger.setoid(message.operation_id) log.i("process new message") log.d("process new message,param:%s" % str(message.params)) ret = self.execute_message(message) except Exception as e: log.f("execute_message fail") ret = "err:{}".format(e) message = framework.OperationMessage( "COMPLETE_MESSAGE", message.operation_id, ret) self._result_queue.put(message) log.Logger.clearoid() return
def state_listener(self, state): """ 监听会话状态 :param state: 本次触发的状态 :return: None """ if state == persistence.PersistenceEvent.PersistState.LOST: log.i("guardian instance state lost") while True: try: self.create_instance() self._pd.get_children( path=self.path, watcher=self.event_watcher) log.i("guardian instance state recreate finished") break except Exception as e: log.f("create instance err") time.sleep(1) elif state == persistence.PersistenceEvent.PersistState.SUSPENDED: log.i("guardian instance state suspended") elif state == persistence.PersistenceEvent.PersistState.CONNECTED: log.i("guardian instance state connected") else: log.i("guardian instance state unrecognized, state:{}".format(state))
def load_context(cls): """ 加载context,首次加载时,新建GuardianContext对象,否则从状态服务反序列化对象 :return: context对象 :rtype: GuardianContext """ context_path = config.GuardianConfig.get_persistent_path("context") data = persistence.PersistenceDriver().get_data(context_path) log.i("load context success") guardian_context = pickle.loads(data) if data else GuardianContext() # load状态机信息 operations_path = config.GuardianConfig.get_persistent_path("operations") # operations子节点名称均为operation_id operation_ids = persistence.PersistenceDriver().get_children(operations_path) for operation_id in operation_ids: operation_path = operations_path + "/" + operation_id try: operation_data = persistence.PersistenceDriver().get_data(operation_path) log.i("load operation[{}] success".format(operation_id)) operation = pickle.loads(operation_data) guardian_context.operations[operation_id] = operation except Exception as e: log.f("load operation {} failed".format(operation_id)) cls._context = guardian_context return guardian_context
def add_listener(self, watcher): """ 监听会话状态 :param watcher: 状态监听函数。函数形参为(state),可能的取值包括"SUSPENDED"、"CONNECTED"、"LOST" :return: 无返回 :rtype: None """ log.i("nothing to do in FilePersistence.add_listener()")
def unsubscribe(self): """ 取消订阅,关闭连接 :return: 无返回 :rtype: None """ self.conn.disconnected() log.i("unsubscribe success, connect closed")
def on_persistence(self): """ 数据持久化操作 :return: 无返回 :rtype: None """ self._context.save_context() log.i("context persistent success")
def update_lock(self, is_lock): """ 更新锁标识 :param bool is_lock: 是否获得锁 :return: 无返回 :rtype: None """ self.lock = is_lock log.i("update context lock, {}".format(self.lock))
def subscribe(self, subscribe_condition): """ 根据订阅条件进行订阅 :param str subscribe_condition: 订阅条件 :return: 无返回 :rtype: None """ self.conn.subscribe(destination=subscribe_condition) log.i("start subscribe success, topic:{}".format(subscribe_condition))
def on_message(self, header, message): """ 响应订阅到的消息,调用回调方法进行操作 :param dict header: 消息头 :param dict message: 消息体 :return: 无返回 :rtype: None """ log.i("receive a message:{}".format(message)) self._callback_message(message)
def del_extend(self, key): """ 从扩展中删除指定的key :param str key: 需要删除的字段 :return: 无返回 :rtype: None """ del self.extend[key] self.save_context() log.i("delete extend success, current extend:{}".format(self.extend))
def update_extend(self, params): """ 更新扩展,增量更新 :param dict params: 更新的参数 :return: 无返回 :rtype: None """ self.extend.update(params) self.save_context() log.i("update extend success, current extend:{}".format(self.extend))
def update_operation(self, operation_id, operation): """ 更新一个操作 :param str operation_id: 操作id :param Operation operation: 操作对象 :return: 无返回 :rtype: None """ self.operations[operation_id] = operation self.save_operation(operation) log.i("update operation success, operation_id:{}".format(operation_id))
def __init__(self, callback_message): """ 初始化方法 :param func callback_message: 订阅到消息后的回调函数 """ mq_config = config.GuardianConfig.get(self.ARK_MQ_CONFIG) log.i("activeMQ config:{}".format(mq_config)) self.conn = self.__get_connection(mq_config) self.conn.set_listener('', ActiveMQListener(callback_message)) self.conn.start() self.conn.connect()
def create_operation(self, operation_id, operation): """ 新增一个操作,一般在感知完成时调用此方法。一个外部事件的整个处理流程,称为一个操作 :param str operation_id: 操作id,作为操作的唯一标识 :param Operation operation: 操作对象 :return: 无返回 :rtype: None """ self.operations[operation_id] = operation self.save_operation(operation) log.i("create new operation success, operation_id:{}".format( operation_id))
def deregister(self, message_name_list): """ 去除对某一组消息的关注 :param list(str) message_name_list: 消息名字列表 :return: 无返回 :rtype: None """ self._concerned_message_list = list( set(self._concerned_message_list).difference( set(message_name_list))) log.i("deregister message success, concerned message list:{}".format( self._concerned_message_list))
def delete_operation(self, operation_id): """ 删除一个操作,一般在一个事件操作结束时调用 :param str operation_id: 操作id :return: 无返回 :rtype: None """ del self.operations[operation_id] operation_path = config.GuardianConfig.get_persistent_path("operations")\ + "/" + operation_id persistence.PersistenceDriver().delete_node(operation_path) log.i("delete operation from context success, operation_id:{}".format( operation_id))
def _recover_executing_message(self): for operation in self._context.operations.itervalues(): if operation.status != "FINISH": operation_id = operation.operation_id ret = self._context.is_operation_id_in_message_list( operation_id) if not ret: name = "DECIDED_MESSAGE" params_cp = operation.operation_params message = OperationMessage(name, operation_id, copy.deepcopy(params_cp)) log.i( "recover_message operation_id:{}".format(operation_id)) self._context.message_list.append(message)
def get_operation_id(self, event): """ 获取操作id。操作id作为操作的唯一标识,需自行保证操作id的全局唯一性。 .. Note:: 默认操作id为event中operation_id字段的值,如果不存在该字段则生成uuid 作为操作id。如需修改操作id的获取方式,需重写改方法。 :param dict event: 外部事件 :return: 操作id :rtype: str """ ret = event["operation_id"] if "operation_id" in event else uuid.uuid1() log.i("event operation_id:{}".format(ret)) return str(ret)
def register(self, message_name_list): """ 增加对一组消息的关注 .. Note:: 该方法只能在消息处理器绑定消息泵后调用。多次调用,会对消息名字列表合并。 :param list(str) message_name_list: 消息名字列表 :return: 无返回 :rtype: None """ self._concerned_message_list = list( set(self._concerned_message_list).union(set(message_name_list))) log.i("register message success, concerned message list:{}".format( self._concerned_message_list))
def on_sensor_message(self, message): """ 感知事件处理。从事件队列中取消息,并发送给下游 :param Message message: 消息对象 :return: 无返回 :rtype: None """ event = self.wait_event() if event is None: return log.i("get new event:{}".format(event)) operation_id = self.get_operation_id(event) sensed_message = OperationMessage("SENSED_MESSAGE", operation_id, event) self.send(sensed_message)
def on_sensor_message(self, message): """ 感知事件处理。从事件队列中取消息,并发送给下游 :param Message message: 消息对象 :return: 无返回 :rtype: None """ try: event = self._event_queue.get(block=False) except Queue.Empty: return log.i("get new event:{}".format(event)) operation_id = self.get_operation_id(event) sensed_message = OperationMessage("SENSED_MESSAGE", operation_id, event) self.send(sensed_message)
def event_watcher(self, event): """ 监听事件 :param PersistenceEvent event: 节点状态事件 :return: None """ if event.state == persistence.PersistenceEvent.PersistState.CONNECTED \ or event.type == persistence.PersistenceEvent.EventType.CREATED \ or event.type == persistence.PersistenceEvent.EventType.DELETED \ or event.type == persistence.PersistenceEvent.EventType.CHANGED \ or event.type == persistence.PersistenceEvent.EventType.CHILD: log.i("event change, state:{}".format(event.state)) self.choose_master() else: log.i("event unrecognized")
def save_operation(self, operation): """ 持久化状态机信息 :param operation: :return: """ if not self.lock: log.e("current guardian instance no privilege to save operation") raise exception.EInvalidOperation( "current guardian instance no privilege to save operation") operation_path = config.GuardianConfig.get_persistent_path("operations") \ + "/" + operation.operation_id if not persistence.PersistenceDriver().exists(operation_path): persistence.PersistenceDriver().create_node(path=operation_path) persistence.PersistenceDriver().save_data(operation_path, pickle.dumps(operation)) log.i("save operation_id:{} success".format(operation.operation_id))
def execute_message(self, message): """ 执行消息 :param Message message: 消息对象 :return: 无返回 :rtype: None """ state_machine = self._create_state_machine(message, self._nodes) # 状态机启动 state_machine.start() try: del self._control_message[state_machine.session.id] except Exception as e: str(e) log.i( "state machine run finished, operationId:%s" % format(state_machine.session.id))
def _collect_new_events(self): """ 获取一批新的事件 """ now = time.time() collect_end_time = int(now) - self._persist_time - 1 collect_begin_time = self._collect_begin_time # 没有可采集时间区间,跳过。采集区间至少为1s if collect_begin_time >= collect_end_time: return [] # 超过最大可采集区间,重置采集开始时间。采集区间最大为max_collect_time if collect_end_time - collect_begin_time > self._max_collect_time: collect_begin_time = collect_end_time - self._max_collect_time - 1 log.i( "elastic_event collection_time exceed %d, begin time reset to %d" % (self._max_collect_time, collect_begin_time)) result = self._driver.query_all(collect_begin_time, collect_end_time) sorted_events = [ SortableEvent(self._driver.get_ts(i), self._driver.get_id(i), i) for i in result ] sorted_events.sort() # 当前已排序事件的时间戳内序号都是0,将该序号重置为排序后正确的时间戳内序号 idx = 0 last_ts = 0 for se in sorted_events: if last_ts != se.ts: last_ts = se.ts idx = 0 else: idx += 1 se.ts_seq = idx # context记录的时间与本次的采集开始时间一致,则要从上次未处理的事件开始处理 if self._begin_time >= collect_begin_time: sorted_events = [ se for se in sorted_events if (se.ts > self._begin_time or ( se.ts == self._begin_time and se.ts_seq >= self._begin_isn) ) ] self._collect_begin_time = collect_end_time return sorted_events
def event_dealer(self): """ 事件处理函数,定期拉取外部事件 :return: 无返回 :rtype: None """ while not self._stop_tag: try: event = self.get_event() if event is None: time.sleep(self._query_interval) continue except Exception as e: log.f("get event failed") time.sleep(self._query_interval) continue log.i("get a new event from external system") self.callback_event(event)
def on_message(self, message): """ 触发执行并返回结果,``on_execute_message`` 可能为同步或异步操作, 若为异步操作,则应返回None :param Message message: 消息对象 :return: 无返回 :rtype: None :raises ETypeMismatch: 返回值类型不匹配 :raises EMissingParam: 返回值缺字段 """ message = self.on_execute_message(message) if not message: return elif not isinstance(message, Message): raise exception.ETypeMismatch() else: log.i("execute message return:{}".format(message)) self.send(message)
def on_decision_message(self, message): """ 决策处理逻辑 :param Message message: 消息对象 :return: 无返回 :rtype: None :raises EUnknownEvent: 位置事件异常 """ log.i("on decision message:{}".format(message.name)) if message.name == "SENSED_MESSAGE": decided_message = self.decision_logic(message) self.send(decided_message) elif message.name == "COMPLETE_MESSAGE": pass elif message.name in self._concerned_message_list: self.on_extend_message(message) else: raise exception.EUnknownEvent( "message type [{}] is not concerned".format(message.name))