def saveCfgInfo(addr, data): try: succFlag = 0 # 解析参数数据 str = "INSERT INTO OP_MCH_DT_CFG(OP_MCH_HD_ID, GATHERDATE, CPUCOREQTY, DISKSIZE,SWAPSIZE,MEMSIZE, SYSTEMVERSION, EFFFLAG ,MEMO) " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)" val = ( db.query_node_id(addr), # data['gatherDate'], time.strftime("%Y%m%d%H%M%S", time.strptime(data['gatherDate'], "%Y%m%d%H%M")), int(data['cpuCoreQty']), data['diskSize'], data['swapSize'], data['memSize'], data['systemVersion'], 1, "") succFlag = db.insert_data(sql=str, val=val) except Exception: # logging.exception('保存配置信息 saveCfgInfo error') setLog('保存配置信息 saveCfgInfo error:{0}'.format(traceback.format_exc())) # setLog( '保存配置信息 saveCfgInfo.data:{0}'.format(data)) succFlag = 0 return succFlag
def insert_table_batch(hdSql, hdVal, dtSql, dtValList): con = connect_op_db() cur = con.cursor() valList = [] succFlag = 0 try: cur.execute(hdSql, hdVal) hdKey = cur.lastrowid for i in dtValList: i = (hdKey, ) + i valList.append(i) cur.executemany(dtSql, valList) assert cur.rowcount == len(dtValList), 'my error message' con.commit() succFlag = 1 except Exception: setLog('insert_table_batch() Insert operation error:{0}'.format( traceback.format_exc())) con.rollback() succFlag = 0 # logging.exception('Insert operation error') return 0 finally: cur.close() con.close() return succFlag
def testConn(sleeptime): while True: print('testConn') try: # print('Waiting for connection...') testConnect() except Exception as e: setLog('连接测试失败:{0}'.format(e)) exit() time.sleep(sleeptime)
def agentMonitor(sleeptime): while True: print('agentMonitor') try: # print('Waiting for connection...') monitor() except Exception as e: setLog('代理进程监控失败:{0}'.format(e)) exit() time.sleep(sleeptime)
def RecvData(sock, addr, buffsize): try: data = '' sock.send(b'Welcome!') # 用字符串数组缓存数据 buffer = [] while True: # 一次接收1024字节的数据 try: recv = sock.recv(buffsize).decode('utf-8') except Exception as e: logging.INFO(e) # 休眠1s time.sleep(0.1) # 如果还能接收到数据,则继续接收,否则中断接收 if recv: buffer.append(recv) else: break # 字符数组组合成字符串 data = data + ''.join(buffer) buffer = [] # data最前面10位为信息内容长度,10到23位为发送的时间戳(毫秒级),后面的为实际内容 # 以上内容Demo格式,具体格式需要统一定义 if len(data) >= int(data[0:10]): # lens = data[0:10] sendtime = data[10:23] hostname = re.findall(r'({.*?})', data[23:])[0] data = data[23 + len(hostname):] break # print('lens:' + str(int(lens))) # 信息内容长度 # print('sendtime:' + sendtime) # 发送的时间戳(毫秒级) # print('hostname:' + hostname) # hostname # print('data:' + data) # 实际内容 # 调用数据处理路由,处理数据 # datarouter(data) # sock.close() lens = str(len(data)) if datarouter(addr[0], data): # add by cwt 20181022 返回接收数据后的处理数据结果 # 如果成功则,code=200 srecvmsg = '{"code":"200","lens":"' + lens + '","msg":"传输成功"}' sock.send(srecvmsg.encode('utf-8')) else: srecvmsg = '{"code":"998","lens":"' + lens + '","msg":"数据库数据写入失败"}' sock.send(srecvmsg.encode('utf-8')) except Exception as e: srecvmsg = '{"code":"999","lens":"' + lens + '","msg":"{0}"{1}'.format( e, '}') sock.send(srecvmsg.encode('utf-8')) setLog(traceback.format_exc()) finally: sock.close()
def receiveData(): print('进入进程') while True: print('receiveData') try: # print('Waiting for connection...') clientSocket, addr = serverSocket.accept() print('addr =', addr) RecvData(clientSocket, addr, CON_RECV['BUFFSIZE']) except Exception as e: setLog('数据接收失败:{0}'.format(e)) exit()
def select_table(sql): con = connect_op_db() cur = con.cursor() try: cur.execute(sql) results = cur.fetchall() except Exception: setLog('select_table() select operation error:{0}'.format( traceback.format_exc())) con.rollback() return 0 finally: cur.close() con.close() return results
def query_node_id(ipAddr): sql_str = ("SELECT id" + " FROM OP_MCH_HD" + " WHERE ipAddr='%s'" % (ipAddr)) # logging.info(sql_str) con = connect_op_db() cur = con.cursor() cur.execute(sql_str) row = cur.fetchone() cur.close() con.close() if not (row): setLog('Fatal error: ipAddr:{0} does not exists!'.format(ipAddr)) else: return row[0]
def update_table(sql): con = connect_op_db() cur = con.cursor() succFlag = 1 try: cur.execute(sql) con.commit() except Exception: succFlag = 0 setLog('update_table() update operation error:{0}'.format( traceback.format_exc())) con.rollback() finally: cur.close() con.close() return succFlag
def testConnect(): # startTime = time.strftime("%Y%m%d%H%M%S", time.localtime()) sqlStr = ( "SELECT H.id,H.ipaddr,break_time,run_time" + " FROM OP_MCH_HD H,OP_MCH_SET_INFO I WHERE I.mch_id=H.id AND I.normal_flag=1" ) # logging.info(sql_str) con = connect_op_db() cur = con.cursor() cur.execute(sqlStr) row = cur.fetchall() mchList = [] start = 0 for i in range(0, 7): end = start + ceil(len(row) / 7) mchList.append(row[start:end]) start = end try: thread1 = createThread(1, "thread1 ", connMch, (mchList[0], )) thread2 = createThread(2, "thread2 ", connMch, (mchList[1], )) thread3 = createThread(3, "thread3 ", connMch, (mchList[2], )) thread4 = createThread(4, "thread4 ", connMch, (mchList[3], )) thread5 = createThread(5, "thread5 ", connMch, (mchList[4], )) thread6 = createThread(6, "thread6 ", connMch, (mchList[5], )) thread7 = createThread(7, "thread7 ", connMch, (mchList[6], )) thread1.start() thread2.start() thread3.start() thread4.start() thread5.start() thread6.start() thread7.start() thread1.join() thread2.join() thread3.join() thread4.join() thread5.join() thread6.join() thread7.join() except: # raise setLog('多线程主机连接测试失败:{0}'.format(traceback.format_exc())) finally: cur.close() con.close()
def insert_data_batch(sql, val): con = connect_op_db() cur = con.cursor() succFlag = 0 try: cur.executemany(sql, val) assert cur.rowcount == len(val), 'my error message' con.commit() succFlag = 1 except Exception: con.rollback() setLog('insert_data_batch() Insert operation error:{0}'.format( traceback.format_exc())) succFlag = 0 finally: cur.close() con.close() return succFlag
def insert_data(sql, val): con = connect_op_db() cur = con.cursor() succFlag = 0 try: cur.execute(sql, val) # lastrowid = cur.lastrowid con.commit() succFlag = 1 except Exception: con.rollback() setLog('insert_data() Insert operation error:{0}'.format( traceback.format_exc())) raise succFlag = 0 finally: cur.close() con.close() return succFlag
def connMch(mchList): try: con = connect_op_db() cur = con.cursor() selectDict = ( "SELECT id,dictcode from stm_dict where dictcode in ('D_HOST_NORMAL','D_HOST_CLOSE') " ) cur.execute(selectDict) dictRow = cur.fetchall() for dictInfo in dictRow: if dictInfo[1] == 'D_HOST_NORMAL': normalKey = dictInfo[0] elif dictInfo[1] == 'D_HOST_CLOSE': closeKey = dictInfo[0] for info in mchList: mchId = info[0] ipAddr = info[1] result = subprocess.call('ping -c 1 ' + ipAddr, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) modifyTime = time.strftime("%Y%m%d%H%M%S", time.localtime()) if result == 1: if not info[2]: sqlUpdateStr = "update OP_MCH_HD set break_time={0},mch_status_id={1},run_time=NULL,amdlog={0} where id={2}".format( modifyTime, closeKey, mchId) cur.execute(sqlUpdateStr) else: if not info[3]: sqlUpdateStr = "update OP_MCH_HD set run_time={0},mch_status_id={1},break_time=NULL,amdlog={0} where id={2}".format( modifyTime, normalKey, mchId) cur.execute(sqlUpdateStr) con.commit() except: raise setLog('主机测试连接失败:{0}'.format(traceback.format_exc())) finally: cur.close() con.close()
def SendData(addr, buffersize, buffer): try: hostname = '{' + socket.gethostname() + '}' # 定义一个套接字 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # 连接服务,指定主机和端口 sock.connect(addr) sock.recv(buffersize).decode('utf-8') # buffer最前面10位加上信息内容长度,10到23位为发送的时间戳(毫秒级) buffer = str('%010d' % (len(buffer) + 23 + len(hostname))) + str( int(round(time.time() * 1000))) + hostname + buffer sock.send(buffer.encode('utf-8')) srecvmsg = sock.recv(buffersize).decode('utf-8') # 日志直接写返回json logging.info('recvtime:' + srecvmsg + ',data:' + buffer[23 + len(hostname):]) sock.close() except Exception as e: setLog('信息发送失败:{0}'.format(traceback.format_exc())) srecvmsg = '{0}"code":999,"msg":"发送失败{1}"{2}'.format('{', e, '}') return srecvmsg
def monitor(): try: sqlStr = ( "SELECT H.id,H.ipaddr,h.break_time,h.run_time" + " FROM OP_MCH_HD H,OP_MCH_SET_INFO I WHERE I.mch_id=H.id AND I.agent_process_flag=1" ) # sqlStr = ("SELECT H.id,H.ipaddr" # + " FROM OP_MCH_HD H") con = connect_op_db() cur = con.cursor() cur.execute(sqlStr) row = cur.fetchall() for mchInfo in row: mchId = mchInfo[0] bufferStr = '{0}{1}'.format( mchId, time.strftime("%Y%m%d%H%M%S", time.localtime())) ipAddr = (mchInfo[1], MCH_REC_PORT) srecvmsg = json.loads(SendData(ipAddr, 1024, bufferStr)) curTime = time.strftime("%Y%m%d%H%M%S", time.localtime()) if int(srecvmsg['code']) != 200: sqlUpdateStr = "INSERT INTO OP_MCH_DT_AGENT( OP_MCH_HD_ID, AGENT_NAME, END_TIME,MEMO, CRTUSR, CRTLOG ,AMDUSR,AMDLOG) " \ 'VALUES (' + str( mchId) + ',' + '\'' + '数据采集代理程序' + '\'' + ',' + curTime + ',' + '\'' + '数据采集代理程序失败' + '\'' + ',' + '1' + ',' + curTime + ',' + '1' + ',' + curTime + ')' succFlag = db.insert_data_one(sqlUpdateStr) if succFlag != 1: setLog('主机代理进程监控失败:{0}'.format(traceback.format_exc())) # else: # sqlUpdateStr = "INSERT INTO OP_MCH_DT_AGENT( OP_MCH_HD_ID, AGENT_NAME, END_TIME,MEMO, CRTUSR, CRTLOG ,AMDUSR,AMDLOG) " \ # 'VALUES (' + str( # mchId) + ',' + '\'' + '数据采集代理程序' + '\'' + ',' + curTime + ',' + '\'' + '数据采集代理程序' + '\'' + ',' + '1' + ',' + curTime + ',' + '1' + ',' + curTime + ')' # succFlag = db.insert_data_one(sqlUpdateStr) except: # raise setLog('主机代理进程监控失败:{0}'.format(traceback.format_exc())) finally: cur.close() con.close()
def saveSeverInfo(addr, data): try: succFlag = 1 mchhdid = db.query_node_id(addr) serverList = data['serverList'] updatesql = None updateflag = 0 # 拼接超负荷设置查询sql语句 getMchSetSql = 'select ' \ ' o.server_flag ' \ ' from op_mch_set_info o ' \ 'where o.effflag = 1 ' \ ' and o.mch_id =' + str(db.query_node_id(addr)) getMchSetResults = db.select_table(getMchSetSql) if getMchSetResults: # 获取第一条信息 setRow = getMchSetResults[0] # 判断是否需要写入进程 if setRow[0] == 1: getsql = 'select ds.id, d.dictname,ds.last_begin_time,ds.last_close_time,ds.pid ' \ ' from op_mch_dt_server s' \ ' ,op_mch_dtt_server ds ' \ ' ,stm_dict d ' \ ' where s.id = ds.op_mch_dt_server_id ' \ ' and ds.node_id = d.id ' \ ' and s.op_mch_hd_id =' + str(mchhdid) results = db.select_table(getsql) for row in results: nodeName = row[1] for server in serverList: if server['nodeName'] == nodeName: # 无开始时间为第一次获取 更新开始时间 if not row[2]: updatesql = 'update op_mch_dtt_server ds ' \ ' set ds.last_begin_time =' + 'str_to_date' + '(' + data[ 'gatherDate'] + ',' + '\'%Y%m%d%H%i\')' + \ ' ,ds.pid = ' + server['pid'] + \ ' where ds.id =' + str(row[0]) # 之前已有结束时间则认为已结束 此次检测到则重置开始时间 和 结束时间 elif row[3]: updatesql = 'update op_mch_dtt_server ds ' \ ' set ds.last_begin_time =' + 'str_to_date' + '(' + data[ 'gatherDate'] + ',' + '\'%Y%m%d%H%i\')' \ ' ,ds.last_close_time =' + 'null' \ ' where ds.id =' + str( row[0]) updateflag = 1 else: pass # 如果数据表有数据 但是轮询采集的进程无匹配数据 则认为进程已关闭 且当前结束时间为空 开始更新结束时间 if updateflag == 0 and not row[3]: updatesql = 'update op_mch_dtt_server ds ' \ ' set ds.last_close_time =' + 'str_to_date' + '(' + data[ 'gatherDate'] + ',' + '\'%Y%m%d%H%i\')' \ ' ,ds.pid = null' \ ' where ds.id =' + str(row[0]) if updatesql: succFlag = db.update_table(updatesql) except Exception as e: succFlag = 0 results = [] # logging.exception('保存服务进程信息 saveSeverInfo error') setLog('保存服务进程信息 saveSeverInfo error:{0}'.format( traceback.format_exc())) # setLog( '保存服务进程信息 saveSeverInfo.data:{0}'.format(data)) return succFlag
except Exception as e: setLog('代理进程监控失败:{0}'.format(e)) exit() time.sleep(sleeptime) if __name__ == '__main__': try: # 只在服务启动的时候执行一次,创建日志文件 initialize() connSleepTime = sleepTime(0, 1, 0) monitorSleepTime = sleepTime(0, 10, 0) # 启用多线程处理消息接收 receiveData = createThread(1, "receiveData ", '', receiveData, ('')) testconn = createThread(2, "testConn ", connSleepTime, testConn, (connSleepTime, )) agentmonitor = createThread(3, "agentMonitor ", monitorSleepTime, agentMonitor, (monitorSleepTime, )) receiveData.start() testconn.start() agentmonitor.start() receiveData.join() testconn.join() agentmonitor.join() except Exception as e: # print('traceback.format_exc()=', traceback.format_exc()) setLog(traceback.format_exc()) serverSocket.close() raise
def initialize(): createLogFolder(FILE_PATH) log.setLog("文件创建成功")
def saveLoadInfo(addr, data): try: alarm = {} alarmList = [] mchAlarmCode = 'D_HOST_NORMAL' # 解析参数数据 infoSql = "INSERT INTO OP_MCH_DT_INFO(" \ "OP_MCH_HD_ID,GATHERDATE,CPUUS,CPUSY,CPUID,CPUWA,MEMUSED,MEMBUFFCACHE,SWAPUSED" \ ",LOADAVGFIVE,LOADAVGTEN,LOADAVGFIFTEEN,TOTALDISKREAD,TOTALDISKWRITE,NETINCOMINGAVG,NETOUTGOINGAVG,EFFFLAG,MEMO) " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" # 保留三位小数 infoVal = ( db.query_node_id(addr), # data['gatherDate'], time.strftime("%Y%m%d%H%M%S", time.strptime(data['gatherDate'], "%Y%m%d%H%M")), data['cpuUs'], data['cpuSy'], data['cpuId'], data['cpuWa'], float('%.3f' % data['memUsed']), float('%.3f' % data['memBuffCache']), float('%.3f' % data['swapUsed']), data['loadAvgFive'], data['loadAvgTen'], data['loadAvgFifteen'], float('%.3f' % data['totalDiskRead']), float('%.3f' % data['totalDiskWrite']), float('%.3f' % data['netIncomingAvg']), float('%.3f' % data['netOutgoingAvg']), 1, "") dirSql = "INSERT INTO OP_MCH_DT_DIR( DIRNAME, USEDSIZE,FILESYSTEM,DIRSIZE,USEDPER,MCH_ID, EFFFLAG ,MEMO,AMDLOG,AMDUSR,CRTLOG,CRTUSR) " \ "VALUES (%s, %s, %s, %s, %s,%s, %s, %s, %s, %s, %s, %s)" succFlag = db.insert_data(infoSql, infoVal) if succFlag == 1: if len(data['dirInfo']) > 0: # 清除旧的数据 deletesql = 'delete from op_mch_dt_dir where mch_id =' + str( db.query_node_id(addr)) succFlag = db.delete_data(deletesql) if succFlag == 1: for i in data['dirInfo']: val = tuple(i.values()) + ( db.query_node_id(addr), 1, "", time.strftime("%Y%m%d%H%M%S", time.localtime()), 1, time.strftime("%Y%m%d%H%M%S", time.localtime()), 1) succFlag = db.insert_data(dirSql, val) # 拼接主机配置查询语句 getMchCfgSql = 'select ' \ ' c.disksize' \ ', c.memsize' \ ', c.swapsize ' \ ' from op_mch_dt_cfg c' \ ' where c.op_mch_hd_id = ' + str(db.query_node_id(addr)) + \ ' order by c.gatherdate desc' \ ' limit 1' getMchCfgResults = db.select_table(getMchCfgSql) if getMchCfgResults: cfgRow = getMchCfgResults[0] totalMem = cfgRow[1] totalSwap = cfgRow[2] # 拼接超负荷设置查询sql语句 getMchSetSql = 'select ' \ ' o.cpu_flag ' \ ', o.cpu_warning ' \ ', o.cpu_serious' \ ', o.memory_flag' \ ', o.memory_warning' \ ', o.memory_serious' \ ', o.file_sys_dir_flag' \ ', o.file_sys_dir_warning' \ ', o.file_sys_dir_serious' \ ', o.disk_io_flag' \ ', o.disk_io_warning' \ ', (select s.dictcode from stm_dict s where s.id = o.disk_io_warning_unit) disk_io_warning_code' \ ', o.disk_io_serious' \ ', (select s.dictcode from stm_dict s where s.id = o.disk_io_serious_unit) disk_io_serious_code' \ ', o.swap_flag' \ ', o.swap_warning' \ ', o.swap_serious' \ ', o.net_io_flag' \ ', o.net_io_warning' \ ', (select s.dictcode from stm_dict s where s.id = o.net_io_warning_unit) net_io_warning_unit_code' \ ', o.net_io_serious' \ ', (select s.dictcode from stm_dict s where s.id = o.net_io_serious_unit) net_io_serious_unit_code' \ ', o.load_flag' \ ', o.load_warning' \ ', o.load_serious' \ ', o.time_check_flag' \ ', o.time_check_num' \ ' from op_mch_set_info o ' \ 'where o.effflag = 1 ' \ ' and o.mch_id =' + str(db.query_node_id(addr)) getMchSetResults = db.select_table(getMchSetSql) if getMchSetResults: row = getMchSetResults[0] # 判断cpu是否超负载 if row[0] == 1: alarm = {} if Decimal.from_float(data['cpuUs']) >= row[2]: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = 'cpu使用率' + str( data['cpuUs'] * 100) + '%' + '负荷严重!' elif Decimal.from_float(data['cpuUs']) >= row[1]: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = 'cpu使用率' + str( data['cpuUs'] * 100) + '%' + '负荷告警!' else: pass if len(alarm) > 0: alarmList.append(alarm) # 判断内存是否超负载 if row[3] == 1: if totalMem: alarm = {} if Decimal.from_float( data['memUsed']) / totalMem >= row[5]: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '内存使用达' + str( data['memUsed']) + 'M' + '使用严重!' elif Decimal.from_float( data['memUsed']) / totalMem >= row[4]: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '内存使用达' + str( data['memUsed']) + 'M' + '使用告警!' else: pass if len(alarm) > 0: alarmList.append(alarm) # 判断文件系统使用状况 if row[6] == 1: if len(data['dirInfo']) > 0: for i in data['dirInfo']: alarm = {} if Decimal.from_float(i['usedPer']) >= row[8]: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '文件系统 挂载点:' + i[ 'dirName'] + ' ' + str( i['usedPer'] * 100) + '%' + '使用严重!' elif Decimal.from_float( i['usedPer']) >= row[7]: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '文件系统 挂载点:' + i[ 'dirName'] + ' ' + str( i['usedPer'] * 100) + '%' + '使用告警!' else: pass if len(alarm) > 0: alarmList.append(alarm) # 判断磁盘io负载 if row[9] == 1: # 更新警告阀值为kb warnIo = row[10] if row[11] == 'D_IOUNIT_MB': warnIo = row[10] * 1024 elif row[11] == 'D_IOUNIT_GB': warnIo = row[10] * 1024 * 1024 else: pass # 更新危险阀值为kb serIo = row[12] if row[13] == 'D_IOUNIT_MB': serIo = row[12] * 1024 elif row[13] == 'D_IOUNIT_GB': serIo = row[12] * 1024 * 1024 else: pass # 重置告警信息 alarm = {} if Decimal.from_float(data['totalDiskRead']) >= serIo: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '磁盘读取io 为' + str( data['totalDiskRead']) + 'kb/s' + '负载严重!' elif Decimal.from_float( data['totalDiskRead']) >= warnIo: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '磁盘读取io 为' + str( data['totalDiskRead']) + 'kb/s' + '负载告警!' if len(alarm) > 0: alarmList.append(alarm) # 重置告警信息 alarm = {} if Decimal.from_float(data['totalDiskWrite']) >= serIo: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '磁盘写入io 为' + str( data['totalDiskWrite']) + 'kb/s' + '负载严重!' elif Decimal.from_float( data['totalDiskWrite']) >= warnIo: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '磁盘写入io 为' + str( data['totalDiskWrite']) + 'kb/s' + '负载告警!' if len(alarm) > 0: alarmList.append(alarm) # 判断Swap是否超负载 if row[14] == 1: if totalSwap: # 重置告警信息 alarm = {} if Decimal.from_float( data['swapUsed']) / totalSwap >= row[16]: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = 'Swap使用达为' + str( data['swapUsed']) + 'M' + '使用严重!' elif Decimal.from_float( data['swapUsed']) / totalSwap >= row[15]: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = 'Swap使用达为' + str( data['swapUsed']) + 'M' + '使用告警!' else: pass if len(alarm) > 0: alarmList.append(alarm) # 判断网络io负载 if row[17] == 1: # 更新警告阀值为kb warnnetIo = row[18] if row[19] == 'D_IOUNIT_MB': warnnetIo = row[18] * 1024 elif row[19] == 'D_IOUNIT_GB': warnnetIo = row[18] * 1024 * 1024 else: pass # 更新危险阀值为kb sernetIo = row[20] if row[21] == 'D_IOUNIT_MB': sernetIo = row[20] * 1024 elif row[21] == 'D_IOUNIT_GB': sernetIo = row[20] * 1024 * 1024 else: pass # 重置告警信息 alarm = {} if Decimal.from_float( data['netIncomingAvg']) >= sernetIo: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '网络平均下载io 为' + str( data['netIncomingAvg']) + 'kb/s' + '负载严重!' elif Decimal.from_float( data['totalDiskRead']) >= warnnetIo: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '网络平均下载io 为' + str( data['netIncomingAvg']) + 'kb/s' + '负载告警!' if len(alarm) > 0: alarmList.append(alarm) # 重置告警信息 alarm = {} if Decimal.from_float( data['netOutgoingAvg']) >= sernetIo: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '网络平均上传io 为' + str( data['netOutgoingAvg']) + 'kb/s' + '负载严重!' elif Decimal.from_float( data['totalDiskWrite']) >= warnnetIo: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '网络平均上传io 为' + str( data['netOutgoingAvg']) + 'kb/s' + '负载告警!' if len(alarm) > 0: alarmList.append(alarm) # 判断负载均衡数值 if row[22] == 1: alarm = {} if Decimal.from_float(data['loadAvgFive']) >= row[24]: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '五分钟负载均衡达' + str( data['loadAvgFive']) + '负荷严重!' elif Decimal.from_float( data['loadAvgFive']) >= row[23]: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '五分钟负载均衡达' + str( data['loadAvgFive']) + '负荷告警!' else: pass if len(alarm) > 0: alarmList.append(alarm) alarm = {} if Decimal.from_float(data['loadAvgTen']) >= row[24]: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '十分钟分钟负载均衡达' + str( data['loadAvgTen']) + '负荷严重!' elif Decimal.from_float( data['loadAvgFive']) >= row[23]: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '十分钟负载均衡达' + str( data['loadAvgTen']) + '负荷告警!' else: pass if len(alarm) > 0: alarmList.append(alarm) alarm = {} if Decimal.from_float( data['loadAvgFifteen']) >= row[24]: alarm['alarmtypecode'] = 'D_ALARM_DANGER' alarm['alarminfo'] = '十五分钟负载均衡达' + str( data['loadAvgFifteen']) + '负荷严重!' elif Decimal.from_float( data['loadAvgFive']) >= row[23]: alarm['alarmtypecode'] = 'D_ALARM_WARM' alarm['alarminfo'] = '十五分钟负载均衡达' + str( data['loadAvgFifteen']) + '负荷告警!' else: pass if len(alarm) > 0: alarmList.append(alarm) # 写入报警信息表 if alarmList: # 存在告警信息代表主机状态至少是警告状态 mchAlarmCode = 'D_HOST_WARNING' for alarm in alarmList: alarm_type_id = getDictId(alarm['alarmtypecode']) alarm_info = alarm['alarminfo'] # 拼接select语句 alarmSql = "INSERT INTO op_mch_dt_alarm(" \ "OP_MCH_HD_ID,GATHERDATE,ALARM_TYPE_ID,ALARM_INFO,EFFFLAG) " # 拼接value语句 alarmVal = 'values' + '(' \ + str(db.query_node_id(addr)) + ',' \ + '\'' + time.strftime("%Y%m%d%H%M%S", time.strptime(data['gatherDate'], "%Y%m%d%H%M")) + '\'' + ',' \ + ('null' if not alarm_type_id else str(alarm_type_id)) + ',' \ + '\'' + alarm_info + '\'' + ',' \ + str(1) + ')' if alarmSql: succFlag = db.insert_data_one(alarmSql + alarmVal) # 如果存在危险信息 则更新主机状态为危险 if alarm[ 'alarmtypecode'] == 'D_ALARM_DANGER' and mchAlarmCode != 'D_HOST_ERROR': mchAlarmCode = 'D_HOST_ERROR' else: # 无告警信息代表正常 mchAlarmCode = 'D_HOST_NORMAL' updateMchSql = 'update op_mch_hd h ' \ ' set h.mch_status_id = ' + str(getDictId(mchAlarmCode)) + \ ' where h.id =' + str(db.query_node_id(addr)) if updateMchSql: db.update_table(updateMchSql) except Exception as e: setLog('保存负载信息 saveLoadInfo error:{0}'.format(traceback.format_exc())) # setLog( '保存配置信息 saveCfgInfo.data:{0}'.format(data)) succFlag = 0 return succFlag