def __update_sync_table(self): """ Method: __update_sync_table Description: 更新数据同步表 Parameter: db_con: 数据库连接 Return: 错误码 Others: """ try: sqls = self.get_worker().get_app().get_moc_data_to_sync_tbl_sqls() #tables = ["tbl_"+moc.get_moc_name() for moc in mocs.itervalues()] # 这里使用另一个数据库连接, # 为了防止insert语句造成事务过大,在每张表操作后提交事务 con_pool = self.get_worker().get_app().get_conn_pool() with con_pool.get_connection(db_cfg_info.ORACLE_DEFAULT_CON_NAME, False) as db_con: for table_name, sql in sqls: #print "===", sql db_con.begin() try: with db_con.get_query() as db_query: db_query.execute(sql, (self.req.ne_id, )) db_con.commit() except: db_con.rollback() except: tracelog.exception("update sync table failed.") return -1 return 0
def __compress_data(self): """ Method: __compress_data Description: 压缩数据文件 Parameter: 无 Return: 错误码 Others: """ try: zfile_path = os.path.join(self.db_file_dir , db_sync_common_const.DB_SYNC_COMPRESSED_FILE_NAME) # 先删除可能存在的老文件 if os.path.exists(zfile_path): os.remove(zfile_path) with zipfile.ZipFile(zfile_path, 'w', zipfile.ZIP_DEFLATED) as zfile: zfile.write(self.db_file_path, db_sync_common_const.DB_SYNC_FILE_NAME) self.db_file_path = zfile_path except: tracelog.exception("compress data failed.") return err_code_mgr.ER_SYNC_EXPORT_DATA_FAILED return 0
def open_oracle(self, host="localhost" , port=1521 , username="******" , password="" , db="orcl" , sysdba=False): """ Method: open_oracle Description: 打开Oracle数据库连接 Parameter: 无 Return: Others: """ with self.__lock: try: db_mgr = self.__context.get_db_mgr() db_mgr.open_oracle("oracle", host, port, username, password, db, sysdba) except: tracelog.exception("mit.open_oracle failed. host:%s, port:%d, " "user:%s, db:%s, sysdba:%s" % ( host , port , username , db , sysdba )) raise
def _frame_to_msg(self, frame): """ Method: _frame_to_msg Description: 将frame装换为AcpMessage消息 Parameter: frame: AppFrame Return: 待接受消息端的url和AcpMessage消息 Others: """ # AppFrame转换为CallAcpMsg try: msg_id = self._struct.unpack_from(frame.get_custom_bytes())[0] except: tracelog.exception("SimpleCallAcpSrv._frame_to_msg() failed. " "frame.get_custom_bytes():%s" % ( repr(frame.get_custom_bytes()))) return None, None url = frame.get_custom_bytes()[self._struct.size:] if len(url) == 0: tracelog.error("SimpleCallAcpSrv._frame_to_msg() failed. " "url is null") return None, None msg = pycallacp.AcpMessage(frame.get_cmd_code(), frame.get_data()) msg.set_msg_id(msg_id) return url, msg
def __prepare_file_dir(self): """ Method: __prepare_file_dir Description: 准备文件目录,用于让网元上传数据文件 Parameter: 无 Return: 错误码 Others: """ file_path = os.path.join(self.get_worker().get_app().get_app_top_path() , "data/ftp" , db_sync_update_const.NE_DB_DUMP_COMPRESSED_PATH % self.ne_id ) try: if os.path.exists(file_path): os.remove(file_path) except: tracelog.exception("remove old file %s failed." % file_path) return -1 # 如果目录不存在,那么就需要建立目录 file_dir = os.path.join(self.get_worker().get_app().get_app_top_path() , "data/ftp/NE/%d/db_sync" % self.ne_id) try: if not os.path.exists(file_dir): os.makedirs(file_dir) except: tracelog.exception("make dir %s failed." % file_dir) return -1 return 0
def load(self, cfg_file_path): """ Method: load Description: 从指定的配置文件中加载设备配置信息 Parameter: cfg_file_path: 配置文件的路径 Return: 错误码 Others: """ ret = 0 try: xmldoc = ET.parse(cfg_file_path) xmlroot = xmldoc.getroot() ret = self.__load_device_info(xmlroot) if ret == 0: ret = self.__load_cluster_info(xmlroot) if ret != 0: tracelog.error("load device configuration failed. cfg_file_path:%s" % cfg_file_path) except: tracelog.exception("load device configuration failed. cfg_file_path:%s" % cfg_file_path) return -1 return ret
def __lock_table(self, db_con): """ Method: __lock_table Description: 锁定数据表tbl_OraSyncEvent Parameter: db_con: oracle数据库连接 Return: 错误码 Others: tbl_OraSyncEvent锁定后,其他链接不能修改该表了 """ # 将通知表锁住,不允许其他进程写 # 其他进程修改MOC时,会写通知表 # 这样,其他进程也同样不会修改那些需要同步的MOC表了 try: with db_con.get_query() as db_query: db_query.execute("LOCK TABLE tbl_OraSyncEvent IN SHARE MODE") # 清空通知表, 全同步后,不需要发送之前的增量通知了 db_query.execute("DELETE FROM tbl_OraSyncEvent") except: tracelog.exception("lock table tbl_OraSyncEvent failed.") return -1 return 0
def __delete_old_ne_data_in_db(self): """ Method: __delete_old_ne_data_in_db Description: 删除网元老的数据 Parameter: 无 Return: 错误码 Others: """ # 删除数据库中网元的老数据 try: con_pool = self.get_worker().get_app().get_conn_pool() with con_pool.get_connection(db_cfg_info.ORACLE_SYNC_CON_NAME) as db_con: with db_con.get_query() as db_query: mocs = self.get_worker().get_app().get_synchronized_mocs() for moc in mocs.itervalues(): sql = 'delete from tbl_%s where "_SYNC_SOURCE"=:1' % moc.get_moc_name() db_query.execute(sql, (self.ne_id,)) except: tracelog.exception("delete old NE data in DB failed.") return -1 return 0
def init_ip_from_os(self): """ Method: init_ip_from_os Description: 从操作系统中获取到内网、外网的ip Parameter: 无 Return: 错误码 Others: """ self.__device_external_ip = "" self.__device_internal_ip = "" # 从OS中读取网卡对应的ip if self.__device_external_NIC != "": try: self.__device_external_ip = utility.get_ip_by_NIC(self.__device_external_NIC) except: tracelog.exception("get external ip failed. NIC:%s" % self.__device_external_NIC) return -1 if self.__device_internal_NIC != "": try: self.__device_internal_ip = utility.get_ip_by_NIC(self.__device_internal_NIC) except: tracelog.exception("get internal ip failed. NIC:%s" % self.__device_internal_NIC) return -1 return 0
def on_msg_received(self, url_or_srv_name, msg): #print "on_msg_received:", msg.get_cmd_code(), url_or_srv_name, msg.get_data(), msg.get_msg_id() cmd_code = msg.get_cmd_code() if cmd_code == bf.CMD_QUERY_CLUSTER_MASTER_IP: rep = bf.QueryClusterMasterIpResponse() rep.ip = self.__name_server.get_cluster_master_ip() elif cmd_code == bf.REGISTER_NAME_COMMAND: try: req = bf.AppRegisterRequest.deserialize(msg.get_data()) except: tracelog.exception("AppRegisterRequest deserialize failed.") return rep = self.__name_server.register_app(req) if rep.return_code == 0: tracelog.info("app regist name service: %r, pid:%r" % (req, rep.app_info.pid)) else: tracelog.error("app regist name service failed: %r" % req) ack_msg = pycallacp.AcpMessage(pycallacp.CMD_ACK_MSG, rep.serialize()) ack_msg.set_msg_id(msg.get_msg_id()) self._callacp_inst.send(url_or_srv_name, ack_msg)
def on_msg_received(self, url_or_srv_name, msg): """ Method: on_msg_received Description: "收到消息"的处理接口 Parameter: url_or_srv_name: 消息发送者的url msg: 消息 Return: Others: """ #print "on_msg_received", msg.get_cmd_code() cmd_code = msg.get_cmd_code() if (cmd_code != cluster_cmd_code.CMD_CLUSTER_QUERY_STATE and cmd_code != cluster_cmd_code.CMD_CLUSTER_ACK_STATE): tracelog.error("ClusterServerEventHandler receved invalid msg:%d" % cmd_code) return try: state_msg = ClusterStateMsg.deserialize(msg.get_data()) if state_msg is None: tracelog.error("ClusterStateMsg.deserialize failed. " "msg:%d, %r" % (cmd_code, msg.get_data())) return if cmd_code == cluster_cmd_code.CMD_CLUSTER_QUERY_STATE: self.__cluster_thread.on_query_state(url_or_srv_name, state_msg) elif cmd_code == cluster_cmd_code.CMD_CLUSTER_ACK_STATE: self.__cluster_thread.on_ack_state(state_msg) except: tracelog.exception("handler msg(%d) failed" % cmd_code)
def _ready_for_work(self): """ Method: _ready_for_work Description: 线程工作前的初始化函数 Parameter: 无 Return: 0: 成功 非0: 失败 Others: """ self.__doing_what = WorkThread.doing_ready_for_work for wrkr in self.__workers: try: ret = wrkr.ready_for_work() except: tracelog.exception("worker %s ready_for_work failed." % wrkr.get_name()) return -1 if ret == 0: tracelog.info("%s is ready for work." % wrkr.get_name()) else: tracelog.error("%s is not ready." % wrkr.get_name()) return ret return 0
def idle(self, total_ready_frames): """ Method: idle Description: 空闲处理函数 Parameter: total_ready_frames: 等待执行的命令的数目 Return: Others: """ socks = dict(self.__poll.poll(1000)) if socks.get(self.__socket) != eipc.POLLIN: return try: msg = self.__socket.recv() except: tracelog.exception("receive data failed.") return oneFrame = AppFrame.serialize_from_str(msg) if oneFrame is None: return #if DEBUG_LOG_IN_MSG is True: # tracelog.debug("Recv:%s" % oneFrame) self.get_app().dispatch_frame_to_duty_worker(oneFrame)
def on_process_app_register(self, all_app_infos): """ Method: on_process_app_register Description: 响应register变更的消息 Parameter: all_app_infos: 系统内所有进程的app_info Return: Others: """ try: with self.__mutex: tmp = self.__peer_sockets self.__peer_sockets = {} for app_info in all_app_infos: old_endpoint, socket = tmp.pop(app_info.pid, ("", None)) if app_info.endpoint != old_endpoint: if socket is not None: socket.close() socket = None self.__peer_sockets[app_info.pid] = (app_info.endpoint, socket) # 关闭不再需要的socket for url, socket in tmp.itervalues(): if socket is not None: socket.close() except: tracelog.exception("IpcSendWorker.on_process_app_register failed.")
def __call_oracle_cmd(cmd_line, timeout, log_file_path): """ Function: __call_oracle_cmd Description: 调用命令行,并输出日志信息 Parameter: cmd_line: 用于Popen的命令行 timeout: 超时时间(秒) log_file_path: 日志路径 Return: 错误码 Others: 如果在超时时间内命令没有结束,那么就终止掉子进程 """ try: inst = subprocess.Popen(cmd_line , stdin=None #subprocess.PIPE #, stdout=subprocess.PIPE #, stderr=subprocess.STDOUT ) except: tracelog.exception("call oracle cmd failed. cmd_line:%s" % " ".join(cmd_line)) return -1 while timeout > 0: timeout -= 1 ret = inst.poll() if ret is None: time.sleep(1) continue else: break if timeout <= 0: tracelog.error("call oracle cmd timeout. cmd_line:%s" % " ".join(cmd_line)) # 超时的情况下,需要kill掉进程 try: inst.kill() except: pass return -1 if log_file_path is not None: try: log = file(log_file_path).read() tracelog.info(log) except: pass if inst.returncode != 0: tracelog.error("call oracle cmd failed, return %d, cmd_line:%s" % ( inst.returncode , " ".join(cmd_line))) return inst.returncode return 0
def handle_cmd(self, frame): buf = frame.get_data() try: msg = NameBroadCastMessage.deserialize(buf) except Exception: tracelog.exception('deserialize NameBroadCastMessage exception! buf:%s'%buf) return #print "==NameBroadcastHandler", msg self.get_worker().get_app().on_process_app_register(msg.all_app_infos)
def run(self): """ Method: run Description: app运行的接口函数 Parameter: 无 Return: Others: """ try: self.__stop_event.clear() if self._initialize() != 0: raise Exception("app _initialize failed!") if self._create_base_worker_and_threads() != 0: raise Exception("_create_base_worker_and_threads failed!") # 启动时,自动与monitor握手 while 1: if self._shakehand_with_monitor(BasicApp.SHAKEHAND_INITIALIZING) == 1: break if self.__stop_event.isSet(): raise Exception("shake hand with monitor failed while starting.") if self._ready_for_work() != 0: raise Exception("_ready_for_work failed!") tracelog.info("process %s starts successfully." % (self._instance_name)) self.__stop_event.clear() while True: self.__stop_event.wait(1) if self.__stop_event.isSet(): break self.__monitor_all_threads() self._shakehand_with_monitor(BasicApp.SHAKEHAND_RUNNING) except: tracelog.exception("process run failed.") finally: tracelog.info("process %s is stopping......" % (self._instance_name)) self._pre_exit() self.__stop_all_threads() self._exit_work() self.notify_monitor_stop_event() tracelog.info("process %s exits successfully." % (self._instance_name)) return 0
def handle_cmd(self, frame): try: req = name_msg_def.NotifyRunningPidsMsg.deserialize(frame.get_data()) except: tracelog.exception("NotifyRunningPidsMsg deserialize failed") return monitor_pid = frame.get_sender_pid() running_pids = req.running_pids self.get_worker().on_notify_running_pids(monitor_pid, running_pids)
def run(self): """ Method: run Description: 线程的run接口 Parameter: 无 Return: Others: """ self.__reset_query_counter(True) reload_counter = 0 reload_times = 30 while 1: try: if self.__role == CLUSTER_ROLE_UNKNOWN: if self.__state == CLUSTER_STATE_STARTING: self.__when_starting() elif self.is_master(): self.__when_now_master() elif self.is_slave(): if self.__state == CLUSTER_STATE_NO_MASTER: self.__when_now_no_master() else: self.__when_now_slave() except: tracelog.exception("error occur") if self.__stoped_event.wait(2) is True: break reload_counter += 1 if reload_counter == reload_times: self.reload_nodes() if self.is_master(): # 定期尝试绑定ip self.__bind_virtual_ip(False) elif self.is_slave(): # 定期尝试取消绑定ip self.__unbind_virtual_ip(False) reload_counter = 0 self.__clear() tracelog.info("cluster node stoped.")
def get_msg(self): """ Method: get_msg Description: 获取错误信息 Parameter: 无 Return: 错误信息 Others: """ try: return err_code_mgr.get_error_msg(self.err_code, **self.kw) except: tracelog.exception("RuleException: get_error_msg failed, err_code:%s, kw:%s" % (repr(self.err_code), repr(self.kw))) return "RuleException: err_code:%s, err_msg:unknow!" % repr(self.err_code)
def __query_name_master_ip(self, callacp): """ Method: __query_name_master_ip Description: 向monitor查询名字服务的ip Parameter: 无 Return: 错误码和名字服务的ip Others: """ # 获取名字服务端的ip req_msg = pycallacp.AcpMessage(local_cmd_code.CMD_QUERY_CLUSTER_MASTER_IP, "") local_name_serivce_url = "tcp://%s:%d" % (self._my_name_ip, local_const_def.NAME_SERVER_PORT) name_master_ip = "" # 重试次数 retry_times = 15 for i in xrange(retry_times): ret, rep_msg = callacp.call(local_name_serivce_url, req_msg, 3) if ret != 0: continue try: rep_data = name_msg_def.QueryClusterMasterIpResponse.deserialize(rep_msg.get_data()) except: tracelog.exception("QueryClusterMasterIpResponse deserialize failed.") return -1, "" name_master_ip = rep_data.ip if name_master_ip == "": # 此时master还没有产生,等待几秒后重试 if i +1 < retry_times: time.sleep(2) continue break if ret != 0: tracelog.error("query name master ip from local name service failed, the monitor maybe not running." " url:%s, ret:%d" % (local_name_serivce_url, ret) ) return ret, "" if name_master_ip == "": tracelog.error("the name master ip returned by local name service is null.") return -1, "" return 0, name_master_ip
def reload_nodes(self, log_all_nodes = False): """ Method: reload_nodes Description: 从数据库中重新记载节点信息 Parameter: log_all_nodes: 是否将所有的节点信息记录日志 Return: 错误码,当前的节点信息 Others: """ # 从DB中读取所有节点的信息 # 返回值: 错误码, 当前node cur_node = None with self.__lock: if self.__mit is None: try: db_file = os.path.join(self.__app_top_path, "data", "sqlite", "cluster.db") self.__mit = ClusterMit(db_file) except: tracelog.exception("reload cluster node failed.") return err_code_mgr.ER_CLUSTER_START_FAILED, None # 加载所有的节点信息 other_nodes_ips = set([node.get_ip() for node in self.__other_nodes]) nodes = self.__mit.get_all_nodes() for node in nodes: if node.ip == self.__cluster_cfg_info.my_inner_ip: cur_node = ClusterNodeInfo(node.ip) if node.is_enable == 0: cur_node.set_enable(False) else: other_nodes_ips.discard(node.ip) ret = self.__add_node(node.ip, node.is_enable) if ret != 0: tracelog.error("add cluster node %s failed." % node.ip) return err_code_mgr.ER_CLUSTER_START_FAILED, None if log_all_nodes is True: tracelog.info("load cluster node: %s" % node.ip) # 删除已经不存在的节点 for node_ip in other_nodes_ips: self.__rmv_node(node_ip) return 0, cur_node
def _on_round_over(self, round_id, r): """ Method: _on_round_over Description: 响应round结束的事件 Parameter: round_id: round的id r: round对象 Return: Others: """ try: frame = r.get_response_frame() rep = db_sync_base.SyncFullResponse.deserialize(frame.get_data()) except: tracelog.exception("_on_round_over error, NE id:%d" % self.ne_id) self.__change_ne_state_when_exp_failed(True) return if rep.return_code == err_code_mgr.ER_SYNC_NO_TABLE_NEED_SYNC: tracelog.info("the NE has no table to sync.") # 这种情况下,直接认为同步结束,并且不用再重试 self.__change_ne_state_when_exp_failed(False) return if rep.return_code != 0: tracelog.error("NE export data failed. ne id:%d, error:%d,%s" % ( self.ne_id , rep.return_code , rep.description)) self.__change_ne_state_when_exp_failed(True) return tracelog.info("NE export data ok. ne id:%d" % (self.ne_id)) # 将NE的状态修改为导入中 ret = NEInfoMgr.change_ne_state_to_imp(self.ne_id , self.get_worker().get_mit()) if ret != 0: tracelog.error("change_ne_state_to_imp failed. ne_id:%d" % self.ne_id) # 开始执行导入 frame = bf.AppFrame() frame.set_cmd_code(cmd_code_def.CMD_START_IMP_FULL) frame.add_data(str(self.ne_id)) self.get_worker().dispatch_frame_to_worker("SyncFullImpWorker", frame)
def reinforc_ssh(ssh_line): """ Function: reinforc_ssh Description: 加固ssh Parameter: ssh_line: 修改后的ssh配置文件语句 Return: return_code,0代表成功,非0代表失败 Others: 无 """ return_code = write_file(os.path.join('/etc/','ssh','sshd_config'),ssh_line) if return_code!=0: tracelog.exception('Reinforce SSH Strategy Failed') return 1 return restart_service('sshd')
def sync_data(self, events, ne_id): """ Method: sync_data Description: 同期数据表 Parameters: events: 同步消息列表 ne_id: 客户端ID """ ret_code = DBSyncStatus.ERROR_SUCCESS try: ret_code = self._sync_data(events, ne_id) tracelog.info('sync data completed') except: tracelog.exception('sync data failed') ret_code = DBSyncStatus.ERROR_FAILED return ret_code
def __delete_all_sync_events(self): """ Method: __delete_all_sync_events Description: 删除数据库中全部的增量变更通知 Parameter: 无 Return: 错误码 Others: """ try: con_pool = self.get_worker().get_app().get_conn_pool() with con_pool.get_connection(db_cfg_info.ORACLE_SYNC_CON_NAME) as con: DBSyncEventManager(con).remove_all_events() except: tracelog.exception("delete all sync events failed.") return err_code_mgr.ER_SYNC_EXPORT_DATA_FAILED return 0
def __commit_tran(self, db_con): """ Method: __commit_tran Description: 提交事务 Parameter: db_con: 数据库连接 Return: 错误码 Others: """ try: db_con.commit() except: tracelog.exception("commit transaction failed.") return -1 return 0
def reinforce_user_cycle(user_cycle_line): """ Function: reinforce_user_cycle Description: 加固用户口令的有效周期 Parameter: user_cycle_line: 修改后的配置文件语句 Return: return_code,0代表成功,非0代表失败 Others: 无 """ return_code = write_file(os.path.join('/etc/','login.defs'),user_cycle_line) if return_code!=0: tracelog.exception('Reinforce User Cycle Failed') return 1 return 0
def reinforce_user_strategy(user_strategy_line): """ Function: reinforce_user_strategy Description: 加固用户口令的策略 Parameter: user_strategy_line: 修改后的配置文件语句 Return: return_code,0代表成功,非0代表失败 Others: 无 """ file_patch = os.path.realpath(os.path.join('/etc/','pam.d','system-auth')) return_code = write_file(file_patch,user_strategy_line) if return_code!=0: tracelog.exception('Reinforce User Strategy Failed') return 1 return 0
def read_file(file_patch): """ Function: read_file Description: 读取配置文件 Parameter: file_patch: 文件路径 Return: 元组,第一个元素代表是否成功,user_set是配置文件的语句 Others: 无 """ openfile = None user_set = [] try: openfile = open(file_patch,'r') user_set = openfile.readlines() except Exception, err: tracelog.exception('Can not Open %s and the err is %s'%(file_patch,err)) return 1,user_set