def choose_master(self): """ guardian选主: master节点下, 所有ephemeral+sequence类型的节点中, 编号最小的获得领导权. :return: None """ instance_list = self.zk.client.get_children(path=self.path, watch=self.event_watcher) instance = sorted(instance_list)[0].split("#")[0] # 本实例获得领导权 if str(instance) == str(config.GuardianConfig.get(INSTANCE_ID_NAME)): if not self.is_leader: self._start_scheduler_func() self.is_leader = True log.info("I am new master, scheduler") else: log.info("I am new master, and is old master, " "no longer rescheduler") # 本实例没有获得领导权 else: if self.is_leader: self._stop_scheduler_func() self.is_leader = False log.info("I am slave, stop scheduler") else: log.info("I am slave, and is old slave, " "no longer stop scheduler") log.info("choose master finished")
def create_driver(self, platform, driver_type): """ 根据平台和类型创建driver :param str platform: 平台名 :param str driver_type: driver类型 :return: driver对象 :rtype: BaseDriver :raises ImportError: 模块导入异常 """ driver_module = StringUtil.camel_to_underline(driver_type) + '_driver' concrete_driver_module = platform.lower() + '_' + driver_module full_driver_path = 'opal.driver.' + driver_module + \ '.' + concrete_driver_module # 动态导入 Driver 模块 log.info('create driver full path:{}'.format(full_driver_path)) try: module_name = __import__(full_driver_path, fromlist=[concrete_driver_module]) except ImportError as e: log.error('driver: {} of the platform:{} is not ' 'exist'.format(driver_type, platform)) raise e driver_class_name = StringUtil.underline_to_camel( concrete_driver_module.title()) driver_class = getattr(module_name, driver_class_name) driver_obj = driver_class() log.info('create driver class:{}'.format(driver_class_name)) return driver_obj
def state_listener(self, state): """ 监听会话状态 :param state: :return: None :raises: Exception通用异常 """ if state == client.KazooState.LOST: log.info("guardian instance state lost") while True: try: self.create_instance() self.zk.client.get_children(path=self.path, watch=self.event_watcher) log.info("guardian instance state recreate finished") break except Exception as e: traceback.print_exc("create instance err:{}".format(e)) time.sleep(1) elif state == client.KazooState.SUSPENDED: log.info("guardian instance state suspended") elif state == client.KazooState.CONNECTED: log.info("guardian instance state connected") else: log.info( "guardian instance state unrecognized, state:{}".format(state))
def say_hello(self, params): """ :param params: :return: """ log.info("hello, event params:{}".format(params)) return {}
def unsubscribe(self): """ 取消订阅,关闭连接 :return: 无返回 :rtype: None """ self.conn.disconnected() log.info("unsubscribe success, connect closed")
def on_persistence(self): """ 数据持久化操作 :return: 无返回 :rtype: None """ self._context.save_context() log.info("context persistent success")
def update_lock(self, is_lock): """ 更新锁标识 :param bool is_lock: 是否获得锁 :return: 无返回 :rtype: None """ self.lock = is_lock log.info("update context lock, {}".format(self.lock))
def subscribe(self, subscribe_condition): """ 根据订阅条件进行订阅 :param str subscribe_condition: 订阅条件 :return: 无返回 :rtype: None """ self.conn.subscribe(destination=subscribe_condition) log.info( "start subscribe success, topic:{}".format(subscribe_condition))
def on_message(self, header, message): """ 响应订阅到的消息,调用回调方法进行操作 :param dict header: 消息头 :param dict message: 消息体 :return: 无返回 :rtype: None """ log.info("receive a message:{}".format(message)) self._callback_message(message)
def deregister(self, message_name_list): """ 去除对某一组消息的关注 :param list(str) message_name_list: 消息名字列表 :return: 无返回 :rtype: None """ self._concerned_message_list = list(set(self._concerned_message_list). difference(set(message_name_list))) log.info("deregister message success, concerned message list:{}".format( self._concerned_message_list))
def del_extend(self, key): """ 从扩展中删除指定的key :param str key: 需要删除的字段 :return: 无返回 :rtype: None """ del self.extend[key] self.save_context() log.info("delete extend success, current extend:{}".format( self.extend))
def __init__(self, callback_message): """ 初始化方法 :param func callback_message: 订阅到消息后的回调函数 """ mq_config = config.GuardianConfig.get(self.ARK_MQ_CONFIG) log.info("activeMQ config:{}".format(mq_config)) self.conn = self.__get_connection(mq_config) self.conn.set_listener('', ActiveMQListener(callback_message)) self.conn.start() self.conn.connect()
def update_extend(self, params): """ 更新扩展,增量更新 :param dict params: 更新的参数 :return: 无返回 :rtype: None """ self.extend.update(params) self.save_context() log.info("update extend success, current extend:{}".format( self.extend))
def update_operation(self, operation_id, operation): """ 更新一个操作 :param str operation_id: 操作id :param Operation operation: 操作对象 :return: 无返回 :rtype: None """ self.operations[operation_id] = operation self.save_context() log.info( "update operation success, operation_id:{}".format(operation_id))
def create_operation(self, operation_id, operation): """ 新增一个操作,一般在感知完成时调用此方法。一个外部事件的整个处理流程,称为一个操作 :param str operation_id: 操作id,作为操作的唯一标识 :param Operation operation: 操作对象 :return: 无返回 :rtype: None """ self.operations[operation_id] = operation self.save_context() log.info("create new operation success, operation_id:{}".format( operation_id))
def delete_operation(self, operation_id): """ 删除一个操作,一般在一个事件操作结束时调用 :param str operation_id: 操作id :return: 无返回 :rtype: None """ del self.operations[operation_id] self.save_context() log.info( "delete operation from context success, operation_id:{}".format( operation_id))
def _recover_executing_message(self): for operation in self._context.operations.itervalues(): if operation.status != "FINISH": operation_id = operation.operation_id ret = self._context.is_operation_id_in_message_list( operation_id) if not ret: name = "DECIDED_MESSAGE" params_cp = operation.operation_params message = OperationMessage(name, operation_id, copy.deepcopy(params_cp)) log.info( "recover_message operation_id:{}".format(operation_id)) self._context.message_list.append(message)
def register(self, message_name_list): """ 增加对一组消息的关注 .. Note:: 该方法只能在消息处理器绑定消息泵后调用。多次调用,会对消息名字列表合并。 :param list(str) message_name_list: 消息名字列表 :return: 无返回 :rtype: None """ self._concerned_message_list = list(set(self._concerned_message_list). union(set(message_name_list))) log.info("register message success, concerned message list:{}".format( self._concerned_message_list))
def get_operation_id(self, event): """ 获取操作id。操作id作为操作的唯一标识,需自行保证操作id的全局唯一性。 .. Note:: 默认操作id为event中operation_id字段的值,如果不存在该字段则生成uuid 作为操作id。如需修改操作id的获取方式,需重写改方法。 :param dict event: 外部事件 :return: 操作id :rtype: str """ ret = event["operation_id"] if "operation_id" in event else uuid.uuid1( ) log.info("event operation_id:{}".format(ret)) return str(ret)
def send(self, message): """ 发送一个消息到消息泵 .. Note:: 此方法必须在主进程中调用,严禁在子进程中调用send方法,否则可能导致执行记录被覆盖、发送的消息未处理等严重问题。 .. Note:: 该函数涉及到的与消息持久化相关的操作在装饰器中实现,为避免非预期的问题,尽量避免对此方法进行重写操作,如需重写, 需明确可能的行为,并显式添加装饰器 :param Message message: 消息对象 :return: 无返回 :rtype: None """ self._message_pump.send(message) log.info("send message to message pump success, message:{}".format( message.name))
def save_context(self): """ 运行数据持久化,当当前Guardian为主(lock属性为True)时,可持久化数据,否则失败 :return: 无返回 :rtype: None :raises EInvalidOperation: 非法操作 """ if not self.lock: log.error("current guardian instance no privilege to save context") raise exception.EInvalidOperation( "current guardian instance no privilege to save context") context_path = CONTEXT_PATH.format(self.__guardian_id) ZkClient().save_data(context_path, pickle.dumps(self)) log.info("save context success")
def event_dealer(self): """ 事件处理函数,定期拉取外部事件 :return: 无返回 :rtype: None """ while not self._stop_tag: try: event = self.get_event() except Exception as e: log.warning("get event failed,err:{}".format(e)) time.sleep(self._query_interval) continue log.info("get a new event from external system") self.callback_event(event) time.sleep(self._query_interval)
def on_sensor_message(self, message): """ 感知事件处理。从事件队列中取消息,并发送给下游 :param Message message: 消息对象 :return: 无返回 :rtype: None """ try: event = self._event_queue.get(block=False) except Queue.Empty: return log.info("get new event:{}".format(event)) operation_id = self.get_operation_id(event) sensed_message = OperationMessage("SENSED_MESSAGE", operation_id, event) self.send(sensed_message)
def event_watcher(self, event): """ 监听事件 :param ZnodeStat instance event: 节点状态事件 :return: None :raises: None """ if event.state == "CONNECTED" \ or event.type == "CREATED" \ or event.type == "DELETED" \ or event.type == "CHANGED" \ or event.type == "CHILD": log.info("event change, state:{}".format(event.state)) self.choose_master() else: log.info("event unrecognized")
def record_action(self): """ 记录状态 .. Note:: 为避免es服务异常导致主进程退出,此方法捕获所有异常,并打印日志 :return: 无返回 :rtype: None """ try: ESClient("ark", "operation").put_data( self.operation_id, json.dumps(self, default=lambda obj: obj.__dict__)) except Exception as e: log.error("record operation err:{}".format(e)) else: log.info("record action success, operation_id:{}".format( self.operation_id))
def run_next(self): """ 进行一次状态轮转 .. Note:: 工作流模型中,每个状态处理完成后,下一次需要轮转的状态是不确定的(或者只提供下一个建议执行的状态),因此使用工作流模型,需要自己定义各个状态的 ``check``方法; 状态处理完成后启动对各状态的检查,检查通过的状态,进入处理阶段。 .. Note:: 在某个状态完成后,会从其返回的建议的下一个运行状态开始遍历(如未返回建议状态,则从状态列表中此状态的下一个开始),以提高命中效率 :return: 无返回 :rtype: None """ node = self.get_node(self._current_node) index = self._nodes.index(node) index_list = range(index, len(self._nodes)) index_list.extend(range(0, index)) for i in index_list: node = self._nodes[i] if not node.reentrance and self._nodes_process[node.name]: continue else: ret = node.check(self._session, self._current_node, self._nodes_process) log.info("node {} check ret:{}".format(self._current_node, ret)) if ret: self._nodes_process[node.name] = True current_node = node.process(self._session, self._current_node, self._nodes_process) log.info("node process finished, suggest next " "node:{}".format(current_node)) if current_node == self._ARK_NODE_END: self._status = self.Status.FINISHED elif current_node not in self._nodes_process: self._current_node = self._nodes[(i + 1) % len(self._nodes)].name else: self._current_node = current_node return else: continue
def on_decision_message(self, message): """ 决策处理逻辑 :param Message message: 消息对象 :return: 无返回 :rtype: None :raises EUnknownEvent: 位置事件异常 """ log.info("on decision message:{}".format(message.name)) if message.name == "SENSED_MESSAGE": decided_message = self.decision_logic(message) self.send(decided_message) elif message.name == "COMPLETE_MESSAGE": pass elif message.name in self._concerned_message_list: self.on_extend_message(message) else: raise exception.EUnknownEvent( "message type [{}] is not concerned".format(message.name))
def on_decision_message(self, message): """ 决策处理逻辑 :param Message message: 消息对象 :return: 无返回 :rtype: None :raises EUnknownEvent: 位置事件异常 """ log.info("on decision message:{}".format(message.name)) if message.name == "SENSED_MESSAGE": decided_message = self.decision_logic(message) self.send(decided_message) elif message.name == "COMPLETE_MESSAGE": # 任务完成需要清理状态机 guardian_context = context.GuardianContext.get_context() guardian_context.delete_operation(message.operation_id) elif message.name in self._concerned_message_list: self.on_extend_message(message) else: raise exception.EUnknownEvent( "message type [{}] is not concerned".format(message.name))
def decision_logic(self, message): """ 具体决策逻辑 :param Message message: 消息对象 :return: 决策完成的消息 :rtype: Message :raises ETypeMismatch: 事件参数不匹配异常 """ log.info("begin decision logic, message:{}".format(message.name)) operation_id = message.operation_id params = message.params if self._from_key in params \ and params[self._from_key] in self._mapping: params_cp = copy.deepcopy(params) params_cp[self._to_key] = self._mapping[params_cp[self._from_key]] decided_message = framework.OperationMessage( "DECIDED_MESSAGE", operation_id, params_cp) return decided_message else: raise exception.ETypeMismatch( "{} not in params or params[{}] not in " "mapping".format(self._from_key, self._from_key))
def on_message(self, message): """ 触发执行并返回结果,``on_execute_message`` 可能为同步或异步操作, 若为异步操作,则返回结果并非本次执行的结果, 需要根据ret中EXECUTOR_OPERATION_ID字段确定是哪个操作的结果 :param Message message: 消息对象 :return: 无返回 :rtype: None :raises ETypeMismatch: 返回值类型不匹配 :raises EMissingParam: 返回值缺字段 """ ret = self.on_execute_message(message) log.info("execute message return:{}".format(ret)) if not ret: return elif not isinstance(ret, dict): raise exception.ETypeMismatch() elif EXECUTOR_OPERATION_ID not in ret: raise exception.EMissingParam() else: message = OperationMessage("COMPLETE_MESSAGE", ret[EXECUTOR_OPERATION_ID], ret) self.send(message)