def loop_monitor(argv): yaml_path = './config/server_status_logger.yaml' ct.setup_logging(yaml_path) #init interval inc = 60 modul = '' try: opts, args = getopt.getopt(argv, "hl:e:", ["loopsecends=", "excute="]) except getopt.GetoptError: print('monitor_status_task.py -l <loopsecends> -e <excute>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('monitor_status_task.py -l <loopsecends> -e <excute>\n \ loopsecends=0 means no loop and just run once.\n \ loopsecends=N means loop interval is N second. \n \ (default:python monitor_status_task.py) means loop interval is 60 seconds. \n \ excute=fpga means excute the fpgamonitor. \n \ excute=basic means excute the basic monitor. \n \ excute is Null means excute fpga and basic monitor.') sys.exit() elif opt in ("-l", "--loopsecends"): inc = int(arg) elif opt in ("-e", "--excute"): modul = arg logger.info('interval is: %d' % inc) logger.info('modull is:s: %s' % modul) if inc == 0: if modul == 'fpga' or modul == '': logger.info("Start to excute the fpgamonitor") fpgatask() if modul == 'basic' or modul == '': logger.info("Start to excute the basic monitor") alltask() else: while True: # 执行方法,函数 start_time = '08:45' end_time = '19:25' if (ct.time_check(start_time, end_time)): logger.info("Start to excute the fpgamonitor") fpgatask() else: logger.info("It's not time to excute the FPGA monitor") start_time2 = '08:45' end_time2 = '15:30' if (ct.time_check(start_time2, end_time2)): logger.info("Start to excute the basic monitor") alltask() else: logger.info("It's not time to excute the basic monitor") time.sleep(inc)
def monitor_errorLog_run(argv): #备份上一次的grep_result_file文件到back_file if (os.path.exists(grep_result_file)): os.rename(grep_result_file, back_file) yaml_path = './config/monitor_errorLog_logger.yaml' ct.setup_logging(yaml_path) #init interval inc = 60 try: opts, args = getopt.getopt(argv, "hl:", ["loopsecends="]) except getopt.GetoptError: print('monitor_errorLog.py -l <loopsecends>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('monitor_errorLog.py -l <loopsecends> \n \ loopsecends=0 means no loop and just run once.\n \ loopsecends=N means loop interval is N second. \n \ (default:python monitor_errorLog.py) means loop interval is 60 seconds' ) sys.exit() elif opt in ("-l", "--loopsecends"): inc = int(arg) print('interval is: ', inc) if inc == 0: grep_lists = get_errorLog(linuxInfo) fileNlist = get_result_file_list() check_flag = errorLog_check(fileNlist, grep_lists) sysstr = platform.system() if (not check_flag) and (sysstr == "Windows"): ct.readTexts("Server log warning") else: while True: # 执行方法,函数 start_time = '05:45' end_time = '19:25' if (ct.time_check(start_time, end_time)): grep_lists = get_errorLog(linuxInfo) fileNlist = get_result_file_list() check_flag = errorLog_check(fileNlist, grep_lists) sysstr = platform.system() if (not check_flag) and (sysstr == "Windows"): ct.readTexts("Server log warning") time.sleep(inc)
def non_trade_mem_info(self, info): command = 'cat /proc/meminfo' hostip = info[0] # servername = info[4] # #非交易时间自动清理一下缓存 # start_time = '08:45' # end_time = '15:30' # if not (ct.time_check(start_time, end_time)): # self.mem_info_verify = True # logger.info("Clear BuffersCachedRate") # else: # logger.info("Not to clear BuffersCachedRate") sshRes = self.sshExecCmd(command) mem_values = re.findall("(\d+)\ kB", ",".join(sshRes)) MemTotal = mem_values[0] MemFree = mem_values[1] MemAvailable = mem_values[2] Buffers = mem_values[3] Cached = mem_values[4] SwapCached = mem_values[5] SwapTotal = mem_values[14] SwapFree = mem_values[15] logger.info( '******************************Mem Monitor: [server:%s]*********************************' % hostip) cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) ct.write_file(result_file, cur_time + "::" + hostip + "_mem_info_result:") titlename = "MemTotal,MemFree,MemAvailable,Buffers,Cached,SwapCached,SwapTotal,SwapFree,BuffersCachedRate,Rate_Mem" logger.info(titlename) ct.write_file(result_file, titlename) #计算b/cRate,RateMem BuffersCachedRate = round( 100 * (int(Buffers) + int(Cached)) / float(MemTotal), 2) logger.info("BuffersCachedRate:" + str("%.2f" % BuffersCachedRate) + "%") Free_Mem = int(MemFree) + int(Buffers) + int(Cached) Used_Mem = int(MemTotal) - Free_Mem Rate_Mem = round(100 * Used_Mem / float(MemTotal), 2) logger.info("Rate_Mem:" + str("%.2f" % Rate_Mem) + "%") tem_list = [ MemTotal, MemFree, MemAvailable, Buffers, Cached, SwapCached, SwapTotal, SwapFree, BuffersCachedRate, Rate_Mem ] temp = map(str, tem_list) memstr = ','.join(temp) logger.debug(memstr) ct.write_file(result_file, memstr) # BuffersCachedRate > 50报警,交易时间不判断 start_time = '08:45' end_time = '15:30' if (ct.time_check(start_time, end_time)): self.mem_info_verify = True logger.info("Not to check BuffersCachedRate") else: if BuffersCachedRate < 50: self.mem_info_verify = True msg = "ok:" + hostip + " ::The BuffersCachedRate is " + str( BuffersCachedRate) + " % is ok" logger.info(msg) else: #清理缓存 command_clear = 'sync;echo 3 > /proc/sys/vm/drop_caches' sshRes_clear = self.sshExecCmd(command_clear) logger.debug(sshRes_clear) #再次检查一次 sshRes = self.sshExecCmd(command) mem_values = re.findall("(\d+)\ kB", ",".join(sshRes)) MemTotal = mem_values[0] MemFree = mem_values[1] MemAvailable = mem_values[2] Buffers = mem_values[3] Cached = mem_values[4] SwapCached = mem_values[5] SwapTotal = mem_values[14] SwapFree = mem_values[15] logger.info( '******************************Mem Monitor2: [server:%s]*********************************' % hostip) cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) ct.write_file(result_file, cur_time + "::" + hostip + "_mem_info_result:") titlename = "MemTotal,MemFree,MemAvailable,Buffers,Cached,SwapCached,SwapTotal,SwapFree,BuffersCachedRate,Rate_Mem" logger.info(titlename) ct.write_file(result_file, titlename) #计算b/cRate,RateMem BuffersCachedRate = round( 100 * (int(Buffers) + int(Cached)) / float(MemTotal), 2) logger.info("BuffersCachedRate:" + str("%.2f" % BuffersCachedRate) + "%") #如果还大于等于50,则报警 if BuffersCachedRate >= 50: self.mem_info_verify = False msg = "error:" + hostip + " ::The BuffersCachedRate is " + str( BuffersCachedRate) + " % is overload" ct.write_log(error_log_file, msg) logger.error(msg) ct.send_sms_control("mem", msg) # Rate_Mem>80报警 if Rate_Mem < 80: self.mem_info_verify = self.mem_info_verify and True msg = "ok:" + hostip + " ::The Rate_Mem is " + str( Rate_Mem) + " % is ok" logger.info(msg) else: self.mem_info_verify = False msg = "error:" + hostip + " ::The Rate_Mem is " + str( Rate_Mem) + " % is overload" ct.write_log(error_log_file, msg) logger.error(msg) ct.send_sms_control("mem", msg) msg = "Mem Check Result: " + str(self.mem_info_verify) logger.info(msg)
def main(argv): try: yaml_path = './config/non_trade_monitor_logger.yaml' ct.setup_logging(yaml_path) #获得log_file目录,不要改变yaml的root设置info_file_handler位置设置,不然获取可能失败 t = logger.handlers log_file = t[1].baseFilename # print(log_file) # log_file = './mylog/non_trade_monitor_run.log' #初始化参数表 ct.init_sms_control_data() #init interval inc = 59 manual_task = '' try: opts, args = getopt.getopt(argv,"ht:",["task="]) except getopt.GetoptError: print('non_trade_monitor.py -t <task> or you can use -h for help') sys.exit(2) for opt, arg in opts: if opt == '-h': print('non_trade_monitor.py -t <task>\n \ (default:python non_trade_monitor.py) means auto work by loops. \n \ use -t can input the manul single task.\n \ task=["ps","mem","ping","disk","core","xwdm","cleanup","sjdr","self_monitor","follow","ssh_connect","exch_file"]. \n \ task="ps" means porcess monitor \n \ task="mem" means memory monitor \n \ task="ping" means ping server monitor \n \ task="disk" means disk monitor \n \ task="core" means core file monitor \n \ task="xwdm" means init VIP_GDH file xwdm check \n \ task="bef_cleanup" means db cleanup check \n \ task="aft_cleanup" means db cleanup check \n \ task="self_monitor" means self check monitor \n \ task="follow" means follow csv file monitor \n \ task="ssh_connect" means ssh connect monitor \n \ task="ssh_excute" means ssh connect monitor \n \ task="bkdb" means backup db monitor \n \ task="clean_dblog" means shrink db log \n \ task="exch_file" means exhcnage file monitor \n \ task="sjdr" means sjdr folder Order file monitor ' ) sys.exit() elif opt in ("-t", "--task"): manual_task = arg if manual_task not in ["ps","mem","ping","disk","core","xwdm","aft_cleanup","bef_cleanup","sjdr","follow","bkdb","clean_dblog","self_monitor","ssh_connect","ssh_excute","exch_file"]: logger.info("[task] input is wrong, please try again!") sys.exit() logger.info('manual_task is:%s' % manual_task) # if inc == 0: #task=["ps_port","mem","fpga","db_init","db_trade","errorLog"] if manual_task == 'ping': logger.info("Start to excute the ping server monitor") ping_monitor_task() elif manual_task == 'mem': logger.info("Start to excute the mem monitor") mem_monitor_task() elif manual_task == 'disk': logger.info("Start to excute the disk monitor") disk_monitor_task() elif manual_task == 'ps': logger.info("Start to excute the ps monitor") ps_monitor_task() elif manual_task == 'core': logger.info("Start to excute the core file monitor") core_file_monitor_task() elif manual_task == 'xwdm': logger.info("Start to excute the xwdm check") xwdm_monitor_task() elif manual_task == 'aft_cleanup': logger.info("Start to excute the cleanup db monitor") after_cleanup_db_monitor_task() elif manual_task == 'bef_cleanup': logger.info("Start to excute the cleanup db monitor") before_cleanup_db_monitor_task() elif manual_task == 'sjdr': logger.info("Start to excute the sjdr monitor") sjdr_monitor_task() elif manual_task == 'follow': logger.info("Start to excute the follow csv monitor") follow_monitor_task() elif manual_task == 'self_monitor': logger.info("Start to excute the self monitor") self_log_monitor_task(log_file) elif manual_task == 'ssh_connect': logger.info("Start to excute the ssh login monitor") check_ssh_connect_task() elif manual_task == 'ssh_excute': logger.info("Start to excute the ssh remote command") ssh_remote_command_task() elif manual_task == 'bkdb': logger.info("Start to excute the backup db monitor") backup_db_monitor_task() elif manual_task == 'clean_dblog': logger.info("Start to excute the shrink db log monitor") shrink_dblog_monitor_task() elif manual_task == 'exch_file': logger.info("Start to excute the exchange file monitor") exchange_file_monitor_task() else: # 只执行一次的任务,fpga监控,数据库资金等信息监控 # fpga_task() # db_init_monitor_task() print("input error") sys.exit() #自动监控暂时不做20190814,下面代码无效 while True: start_time = '15:59' end_time = '08:30' #监控时间点列表 ps_monitor_minites = ['10','20','30','40','50','00'] # test_minites = ['12','41','05','07','09','00'] mem_monitor_minites = ['20','50'] db_monitor_minites = ['26','36','46','56','06','16'] now_Mtime = dt.datetime.now().strftime('%M') now_time = dt.datetime.now().strftime('%H:%M') if (ct.time_check(start_time, '23:59') or ct.time_check('00:00', end_time)): if (now_time in ['16:00','17:00','21:00','07:00','08:00']): #自己检查自己是否存活 self_log_monitor_task(log_file) else: logger.debug("Not to excute self check") #每个01分检查一次。 if (now_Mtime == '01'): #10分钟一次,端口和进程监控,错误日志监控 ping_monitor_task() else: logger.info("It's not time to excute the ping monitor") # if (now_Mtime in db_monitor_minites) and ct.trade_check(): # #10分钟一次,数据库盘中监控 # db_trade_monitor_task() # else: # logger.info("It's not time to excute the db monitor") # # now_time = dt.datetime.now().strftime('%H:%M') if (now_Mtime in mem_monitor_minites): #30分钟一次,服务器内存监控 mem_monitor_task() else: logger.info("It's not time to excute the mem monitor") if (ct.time_check('08:00', '15:32')): logger.info("Exit non trade monitor") break else: logger.info("It's not time to excute the non trade monitor") time.sleep(inc) except Exception: logger.error('Faild to run non_trade_monitor!', exc_info=True) finally: for handler in logger.handlers: logger.removeHandler(handler)
def main(argv): try: yaml_path = './config/trade_monitor_logger.yaml' ct.setup_logging(yaml_path) #初始化参数表 # init_data() ct.init_sms_control_data() #init interval inc = 59 #清除tempdate的数据库表记录条数文件。启动时执行一次。 if os.path.isdir("./tempdata"): for filename in os.listdir('./tempdata'): os.remove('./tempdata/' + filename) else: os.mkdir("./tempdata") manual_task = '' try: opts, args = getopt.getopt(argv, "ht:", ["task="]) except getopt.GetoptError: print('trade_monitor.py -t <task> or you can use -h for help') sys.exit(2) for opt, arg in opts: if opt == '-h': print('python trade_monitor.py -t <task>\n \ parameter -t comment: \n \ use -t can input the manul single task.\n \ task=["ps_port","mem","fpga","db_init","db_trade","errorLog"]. \n \ task="ps_port" means porcess and port monitor \n \ task="mem" means memory monitor \n \ task="fpga" means fpga file monitor \n \ task="db_init" means db init data monitor \n \ task="db_trade" means db trading data monitor \n \ task="errorLog" means file error log monitor \n \ task="smss" means check the sms send status \n \ task="sms0" means set sms total_count=0 \n \ task="sms100" means set sms total_count=100 \n \ No parameter comment: \n \ (default:python trade_monitor.py) means auto work by loops. \n \ ps_port_monitor_minites = ["10","20","30","40","50","00"] \n \ mem_monitor_minites = ["20","50"] \n \ db_monitor_minites = ["26","36","46","56","06","16"] \n \ slef_check_mitnits = ["09:00","10:00","11:00","13:00","14:00"] \n \ fpga_monitor and db_init_monitor just execute once on beginning ' ) sys.exit() elif opt in ("-t", "--task"): manual_task = arg if manual_task not in [ "ps_port", "mem", "fpga", "db_init", "db_trade", "errorLog", "self_monitor", "smss", "sms0", "sms100" ]: logger.error("[task] input is wrong, please try again!") sys.exit() logger.info('manual_task is:%s' % manual_task) # if inc == 0: #task=["ps_port","mem","fpga","db_init","db_trade","errorLog"] if manual_task == 'ps_port': logger.info("Start to excute the ps_port monitor") port_process_task() elif manual_task == 'mem': logger.info("Start to excute the mem monitor") mem_monitor_task() elif manual_task == 'self_monitor': logger.info("Start to excute the self monitor") self_log_monitor_task() elif manual_task == 'fpga': logger.info("Start to excute the fpga monitor") fpga_task() elif manual_task == 'db_init': logger.info("Start to excute the db_init monitor") db_init_monitor_task() elif manual_task == 'db_trade': logger.info("Start to excute the db_trade monitor") db_trade_monitor_task() elif manual_task == 'errorLog': logger.info("Start to excute the errorLog monitor") errorLog_monitor_task() elif manual_task == 'smss': logger.info("查看发送短信状态") ct.sms_switch('status') elif manual_task == 'sms0': logger.info("关闭发送短信功能") ct.sms_switch(0) elif manual_task == 'sms100': logger.info("打开发送短信功能") ct.sms_switch(100) else: # 只执行一次的任务,fpga监控,数据库资金等信息监控 #20200728,暂停fpga监控 #fpga_task() db_init_monitor_task() while True: start_time = '08:50' end_time = '15:30' #监控时间点列表 ps_monitor_minites = ['10', '20', '30', '40', '50', '00'] # test_minites = ['12','41','05','07','09','00'] mem_monitor_minites = ['20', '50'] db_monitor_minites = ['26', '36', '46', '56', '06', '16'] # now_Htime = dt.datetime.now().strftime('%H') now_Mtime = dt.datetime.now().strftime('%M') now_time = dt.datetime.now().strftime('%H:%M') if (ct.time_check(start_time, end_time)): if (now_time in [ '08:59', '09:59', '10:59', '11:59', '12:59', '13:59', '14:59' ]): #自己检查自己是否存活 self_log_monitor_task() else: logger.info("Not to excute self check") if (now_Mtime in ps_monitor_minites): #10分钟一次,端口和进程监控,错误日志监控 port_process_task() errorLog_monitor_task() else: logger.info("It's not time to excute the ps monitor") if (now_Mtime in db_monitor_minites) and ct.trade_check(): #10分钟一次,数据库盘中监控 db_trade_monitor_task() else: logger.info("It's not time to excute the db monitor") # now_time = dt.datetime.now().strftime('%H:%M') if (now_Mtime in mem_monitor_minites): #30分钟一次,服务器内存监控 mem_monitor_task() else: logger.info("It's not time to excute the mem monitor") if (ct.time_check('15:30', '21:32')): logger.info("Exit trade monitor") break else: logger.info("It's not time to excute the trade monitor") time.sleep(inc) except Exception: logger.error('Faild to run trade_monitor!', exc_info=True) finally: for handler in logger.handlers: logger.removeHandler(handler)
def main(argv): try: yaml_path = './config/db_check_logger.yaml' ct.setup_logging(yaml_path) with open('./config/table_check.json', 'r') as f: Jsonlist = json.load(f) logger.debug(Jsonlist) #init interval inc = 600 modul = '' try: opts, args = getopt.getopt(argv, "hl:e:", ["loopsecends=", "excute="]) except getopt.GetoptError: print('db_monitor.py -l <loopsecends> -e <excute>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('db_monitor.py -l <loopsecends> -e <excute>\n \ loopsecends=0 means no loop and just run once.\n \ loopsecends=N means loop interval is N second. \n \ (default:python db_monitor.py) means loop interval is 600 seconds. \n \ excute=before means excute the before trade db monitor. \n \ excute=trading means excute the trading db monitor. \n \ excute is Null means excute before and trading db monitor.' ) sys.exit() elif opt in ("-l", "--loopsecends"): inc = int(arg) elif opt in ("-e", "--excute"): modul = arg logger.info('interval is: %d' % inc) logger.info('modull is:s: %s' % modul) if inc == 0: if modul == 'before' or modul == '': #before trade montior logger.info("Start to excute the before trade monitor") thrlist = range(len(Jsonlist)) threads = [] for (i, info) in zip(thrlist, Jsonlist): #print("alltask.__name__:", alltask.__name__) t = MyThread(before_trade_monitor, (info, ), before_trade_monitor.__name__ + str(i)) threads.append(t) for i in thrlist: threads[i].start() for i in thrlist: threads[i].join() threadResult = threads[i].get_result() sysstr = platform.system() if (not threadResult) and (sysstr == "Windows"): ct.readTexts("Database trade before Worning") if modul == 'trading' or modul == '': #trading monitor #delete all tempdata/*.json if os.path.isdir("./tempdata"): for filename in os.listdir('./tempdata'): os.remove('./tempdata/' + filename) else: os.mkdir("./tempdata") logger.info("Start to excute the trading_monitor") thrlist = range(len(Jsonlist)) threads = [] for (i, info) in zip(thrlist, Jsonlist): t = MyThread(trading_monitor, (info, ), trading_monitor.__name__ + str(i)) threads.append(t) # print "thrcouat3:", threading.active_count() for i in thrlist: threads[i].start() for i in thrlist: threads[i].join() trading_check = threads[i].get_result() if (not trading_check) and (sysstr == "Windows"): ct.readTexts("Database trading Worning") elif inc > 20: #before trade montior logger.info("Start to excute the before trade monitor") thrlist = range(len(Jsonlist)) threads = [] for (i, info) in zip(thrlist, Jsonlist): #print("alltask.__name__:", alltask.__name__) t = MyThread(before_trade_monitor, (info, ), before_trade_monitor.__name__ + str(i)) threads.append(t) for i in thrlist: threads[i].start() for i in thrlist: threads[i].join() threadResult = threads[i].get_result() sysstr = platform.system() if (not threadResult) and (sysstr == "Windows"): ct.readTexts("Database trade before Worning") # print "thrcount:", threading.active_count() #trading monitor #delete all tempdata/*.json if os.path.isdir("./tempdata"): for filename in os.listdir('./tempdata'): os.remove('./tempdata/' + filename) else: os.mkdir("./tempdata") while True: # while False: if (ct.trade_check()): logger.info("Start to excute the trading_monitor") thrlist = range(len(Jsonlist)) threads = [] for (i, info) in zip(thrlist, Jsonlist): t = MyThread(trading_monitor, (info, ), trading_monitor.__name__ + str(i)) threads.append(t) for i in thrlist: threads[i].start() for i in thrlist: threads[i].join() trading_check = threads[i].get_result() if (not trading_check) and (sysstr == "Windows"): ct.readTexts("Database trading Worning") # print "thrcouat3:", threading.active_count() time.sleep(inc - 20) if (ct.time_check('15:00', '15:12')): logger.info("exit to monitor") break else: logger.info("It's not time to trading monitor") time.sleep(20) else: logger.error( "Input parameter error: The interval must greater than 20!") except Exception: logger.error('Faild to run monitor db!', exc_info=True) finally: for handler in logger.handlers: logger.removeHandler(handler)