Example #1
0
    def core_file_info(self, info):
        command = 'find /home/trade -name core.*'
        hostip = info[0]
        #        servername = info[4]
        logger.info("command: " + command)
        sshRes = self.sshExecCmd(command)
        #        print("sshRes:", sshRes)
        if sshRes == []:
            self.single_info_verify = True
            msg = "OK: Server %s The count of core file is 0 " % str(hostip)
            logger.info(msg)
        else:
            self.single_info_verify = False
            sshResStr = ''.join(sshRes)
            sshResList = sshResStr.strip().split('\n')
            print("sshResList: ", sshResList)
            #            ps_list = []
            for datalist in sshResList:

                msg = "error: " + hostip + " Have core file:" + datalist
                ct.write_log(error_log_file, msg)
                logger.warning(msg)

            sms_msg = "error: " + hostip + " 有core文件,请检查服务器文件"
            logger.error(sms_msg)
            ct.send_sms_control("core", sms_msg)

        msg = "core file Check Result: " + str(self.single_info_verify)
        logger.info(msg)
def get_db_records(info):

    tablename = info[4]
    sql = info[5]
    records_file = log_dir + tablename + "_" + ndates + '.csv'

    #清空文件内容
    if os.path.exists(records_file):
        with open(records_file, "r+") as f:
            f.seek(0)
            f.truncate()


#    sql = "SELECT [OrderLocalID], [OrderSysID] from \
#        dbo.t_SSEOrder ORDER BY OrderLocalID DESC"
#    sql = "SELECT UserID FROM dbo.t_User WHERE UserName = '******'"

    (res, des) = mt.fetchall_sql(info, sql)
    if res == None or res == []:
        msg = "Failed to get records"
        logger.error(msg)
        ct.write_log(log_file, msg)
    else:
        logger.debug(res)
        db_columns = list(zip(*des))[0]
        logger.debug(db_columns)

        with codecs.open(filename=records_file, mode='w',
                         encoding='utf-8') as f:
            write = csv.writer(f, dialect='excel')
            write.writerow(db_columns)
            #            write.writerows(res)
            for item in res:
                logger.debug(item)
                write.writerow(item)
Example #3
0
    def non_trade_ps_info(self, info):

        hostip = info[0]
        username = info[2]
        #        servername = info[4]
        processes = info[5]
        #        process_count = len(str(processes).split('|'))
        command = 'ps -u ' + username + ' -elf | grep -E "' + processes + '" | grep -v grep'
        #        command = 'ps -u trade -elf |grep -E "dbsync 1|dbsync 2" | grep -v grep'
        logger.info("command: " + command)
        sshRes = self.sshExecCmd(command)
        #        print "sshRes:", sshRes
        if sshRes == []:
            self.ps_info_verify = True
            msg = "OK: Server %s The count of the processes is 0 " % str(
                hostip)
            logger.info(msg)
        else:
            self.ps_info_verify = False
            sshResStr = ''.join(sshRes)
            sshResList = sshResStr.strip().split('\n')
            #        print "sshResList: ", sshResList
            sshResLists = []
            for sshCom in sshResList:
                sshResLists.append(sshCom.strip().split())
#    #        print "sshResLists:\n", sshResLists
#            titlename="F,S,UID,PID,PPID,C,PRI,NI,ADDR,SZ,WCHAN,TTY,TIME,CMD"

            ps_list = []
            for datalist in sshResLists:
                #                psstr=','.join(datalist)
                psstr = ' '.join(datalist[14:])
                logger.info("ps:" + psstr)
                #                chg_psstr = psstr.encode('utf-8')
                ct.write_log(error_log_file, psstr)
                msg = "error:" + hostip + " ::The process is " + psstr + ":: Time: " + str(
                    datalist[13]) + " is still working!"
                ct.write_log(error_log_file, msg)
                logger.warning(msg)
                ps_list.append(psstr)
            ps_cmd = ';'.join(ps_list)
            sms_msg = "error:" + hostip + " ::Processes : " + ps_cmd + " is still working!"
            ct.send_sms_control("ps_port", sms_msg)

        msg = "ps Processes Check Result: " + str(self.ps_info_verify)
        logger.info(msg)
Example #4
0
def get_query_data(linuxInfo):

    logger = logging.getLogger()
    yaml_path = './config/non_trade_monitor_logger.yaml'
    ct.setup_logging(yaml_path)
    for info in linuxInfo:
        hostip = info[0]
        port = info[1]
        username = info[2]
        password = info[3]
        #        servername = info[4]
        command = info[5]

        cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        temstr = "**********" + cur_time + "::" + hostip + "::" + command + "::**********"
        ct.write_file(query_result_file, temstr)

        sshClient = ct.sshConnect(hostip, port, username, password)
        sshRes = ct.sshExecCmd(sshClient, command)
        logger.info(hostip + "::" + command)
        try:
            for item in sshRes:
                #                de_item = item.decode('gb2312')
                #                error_list = de_item.strip().split(':', 1)
                #                grep_lists.append(error_list)
                #                memstr=','.join(error_list)
                #                print memstr
                #                temstr= item.strip().encode('utf-8')
                temstr = item.strip()
                logger.info(temstr)
                ct.write_file(query_result_file, temstr)
        except Exception as e:
            msg = "write failed: [hostip:%s];[username:%s];[error:%s]" % (
                hostip, username, str(e))
            logger.error(msg)
            ct.write_log(log_file, msg)

        ct.sshClose(sshClient)
    logger.info("get_query_data finished")
    for handler in logger.handlers:
        logger.removeHandler(handler)
Example #5
0
 def non_trade_mem_info(self, info):
     command = 'cat /proc/meminfo'
     hostip = info[0]
     #        servername = info[4]
     #        #非交易时间自动清理一下缓存
     #        start_time = '08:45'
     #        end_time = '15:30'
     #        if not (ct.time_check(start_time, end_time)):
     #            self.mem_info_verify = True
     #            logger.info("Clear BuffersCachedRate")
     #        else:
     #            logger.info("Not to clear BuffersCachedRate")
     sshRes = self.sshExecCmd(command)
     mem_values = re.findall("(\d+)\ kB", ",".join(sshRes))
     MemTotal = mem_values[0]
     MemFree = mem_values[1]
     MemAvailable = mem_values[2]
     Buffers = mem_values[3]
     Cached = mem_values[4]
     SwapCached = mem_values[5]
     SwapTotal = mem_values[14]
     SwapFree = mem_values[15]
     logger.info(
         '******************************Mem Monitor: [server:%s]*********************************'
         % hostip)
     cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     ct.write_file(result_file,
                   cur_time + "::" + hostip + "_mem_info_result:")
     titlename = "MemTotal,MemFree,MemAvailable,Buffers,Cached,SwapCached,SwapTotal,SwapFree,BuffersCachedRate,Rate_Mem"
     logger.info(titlename)
     ct.write_file(result_file, titlename)
     #计算b/cRate,RateMem
     BuffersCachedRate = round(
         100 * (int(Buffers) + int(Cached)) / float(MemTotal), 2)
     logger.info("BuffersCachedRate:" + str("%.2f" % BuffersCachedRate) +
                 "%")
     Free_Mem = int(MemFree) + int(Buffers) + int(Cached)
     Used_Mem = int(MemTotal) - Free_Mem
     Rate_Mem = round(100 * Used_Mem / float(MemTotal), 2)
     logger.info("Rate_Mem:" + str("%.2f" % Rate_Mem) + "%")
     tem_list = [
         MemTotal, MemFree, MemAvailable, Buffers, Cached, SwapCached,
         SwapTotal, SwapFree, BuffersCachedRate, Rate_Mem
     ]
     temp = map(str, tem_list)
     memstr = ','.join(temp)
     logger.debug(memstr)
     ct.write_file(result_file, memstr)
     # BuffersCachedRate > 50报警,交易时间不判断
     start_time = '08:45'
     end_time = '15:30'
     if (ct.time_check(start_time, end_time)):
         self.mem_info_verify = True
         logger.info("Not to check BuffersCachedRate")
     else:
         if BuffersCachedRate < 50:
             self.mem_info_verify = True
             msg = "ok:" + hostip + " ::The BuffersCachedRate is " + str(
                 BuffersCachedRate) + " % is ok"
             logger.info(msg)
         else:
             #清理缓存
             command_clear = 'sync;echo 3 > /proc/sys/vm/drop_caches'
             sshRes_clear = self.sshExecCmd(command_clear)
             logger.debug(sshRes_clear)
             #再次检查一次
             sshRes = self.sshExecCmd(command)
             mem_values = re.findall("(\d+)\ kB", ",".join(sshRes))
             MemTotal = mem_values[0]
             MemFree = mem_values[1]
             MemAvailable = mem_values[2]
             Buffers = mem_values[3]
             Cached = mem_values[4]
             SwapCached = mem_values[5]
             SwapTotal = mem_values[14]
             SwapFree = mem_values[15]
             logger.info(
                 '******************************Mem Monitor2: [server:%s]*********************************'
                 % hostip)
             cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             ct.write_file(result_file,
                           cur_time + "::" + hostip + "_mem_info_result:")
             titlename = "MemTotal,MemFree,MemAvailable,Buffers,Cached,SwapCached,SwapTotal,SwapFree,BuffersCachedRate,Rate_Mem"
             logger.info(titlename)
             ct.write_file(result_file, titlename)
             #计算b/cRate,RateMem
             BuffersCachedRate = round(
                 100 * (int(Buffers) + int(Cached)) / float(MemTotal), 2)
             logger.info("BuffersCachedRate:" +
                         str("%.2f" % BuffersCachedRate) + "%")
             #如果还大于等于50,则报警
             if BuffersCachedRate >= 50:
                 self.mem_info_verify = False
                 msg = "error:" + hostip + " ::The BuffersCachedRate is " + str(
                     BuffersCachedRate) + " % is overload"
                 ct.write_log(error_log_file, msg)
                 logger.error(msg)
                 ct.send_sms_control("mem", msg)
     # Rate_Mem>80报警
     if Rate_Mem < 80:
         self.mem_info_verify = self.mem_info_verify and True
         msg = "ok:" + hostip + " ::The Rate_Mem is " + str(
             Rate_Mem) + " % is ok"
         logger.info(msg)
     else:
         self.mem_info_verify = False
         msg = "error:" + hostip + " ::The Rate_Mem is " + str(
             Rate_Mem) + " % is overload"
         ct.write_log(error_log_file, msg)
         logger.error(msg)
         ct.send_sms_control("mem", msg)
     msg = "Mem Check Result: " + str(self.mem_info_verify)
     logger.info(msg)
Example #6
0
    def fpga_file_info(self, info):
        #        info = ['192.168.238.7', 22, 'trade', 'trade', 'FPGAServer','/home/trade/FPGA']
        hostip = info[0]
        #        servername = info[4]
        filepath = info[5]
        command = 'ls -l ' + filepath
        logger.info(command)
        sshRes = self.sshExecCmd(command)
        #        print "sshRes:", sshRes
        if sshRes == []:
            self.fpga_file_info_verify = False
            msg = "error: The sshResturn is None, please check it"
            #            print msg
            ct.write_log(error_log_file, msg)
            logger.warning(msg)
        else:
            sshResStr = ''.join(sshRes)
            #            print "sshResStr: ", sshResStr
            sshResList = sshResStr.strip().split('\n')
            #        print "sshResList: ", sshResList
            sshResLists = []
            for sshCom in sshResList:
                sshResLists.append(sshCom.strip().split())
#            print "len(sshResLists):", len(sshResLists)
            logger.info(
                "******************************FPGA Monitor: [server:%s]*********************************"
                % hostip)
            cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            ct.write_file(result_file,
                          cur_time + "::" + hostip + "_ps_info_result:")
            #            print("sshResLists:\n", sshResLists)
            titlename = "TypePermission,ConnectedCount,Owner,Group,Size,ModifyMonth,ModifyDate,ModifyTime,FName"
            logger.debug(titlename)
            ct.write_file(result_file, titlename)
            #从第二行开始
            fileSize_dict = {
                'sent0': None,
                'received0': None,
                'received1': None
            }
            for datalist in sshResLists[1:]:
                lsstr = ','.join(datalist)
                logger.debug(lsstr)
                #                chg_str = lsstr.encode('utf-8')
                #                print("aaggg:", chg_str)
                ct.write_file(result_file, lsstr)
                if len(datalist) == 9:
                    Size = int(datalist[4])
                    FName = datalist[8]
                    Today = dt.datetime.now().strftime("%Y%m%d")
                    #                    matchlist=['sent','journal','received']
                    str1 = 'FPGA0_CSESSION00_' + Today + '.sent'
                    str2 = 'FPGA0_VSESSION00_' + Today + '.received'
                    str3 = 'FPGA0_VSESSION01_' + Today + '.received'

                    if str1 == FName:
                        fileSize_dict['sent0'] = Size
#                        print("Fname:",FName,Size)
                    if str2 == FName:
                        fileSize_dict['received0'] = Size
#                        print("Fname:",FName,Size)
                    if str3 == FName:
                        fileSize_dict['received1'] = Size
#                        print("Fname:",FName,Size)
            logger.info(fileSize_dict)
            ntime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            dictstr = ntime + "::FPGA file size: " + str(fileSize_dict)
            ct.write_file(result_file, dictstr)
            #            self.fpga_file_info_verify = (fileSize_dict['sent0'] or fileSize_dict['sent1']) and (fileSize_dict['journal0'] or fileSize_dict['journal1']) and (fileSize_dict['received0'] or fileSize_dict['received1'])
            self.fpga_file_info_verify = (fileSize_dict['sent0']
                                          and (fileSize_dict['received0']
                                               and fileSize_dict['received1']))
            if self.fpga_file_info_verify:
                msg = "ok: The server %s FPGA Monitor is ok, %s " % (hostip,
                                                                     dictstr)
                logger.info(msg)
                self.fpga_Check_flag_list.append(1)
            else:
                msg = "error: The server %s FPGA Monitor is not correct, %s " % (
                    hostip, dictstr)
                ct.write_log(error_log_file, msg)
                logger.error(msg)
                ct.send_sms_control("fpga", msg)
                self.fpga_Check_flag_list.append(0)
Example #7
0
    def ps_info(self, info):

        hostip = info[0]
        username = info[2]
        #        servername = info[4]
        processes = info[5]
        process_count = len(str(processes).split('|'))
        command = 'ps -u ' + username + ' -elf | grep -E "' + processes + '" | grep -v grep'
        #        command = 'ps -u trade -elf |grep -E "dbsync 1|dbsync 2" | grep -v grep'
        logger.debug("command: " + command)
        sshRes = self.sshExecCmd(command)
        #        print "sshRes:", sshRes
        if sshRes == []:
            self.ps_info_verify = False
            msg = "error: Server %s The count of the processes is 0, please check it" % str(
                hostip)
            ct.write_log(error_log_file, msg)
            logger.error(msg)
            ct.send_sms_control("ps_port", msg)
        else:
            sshResStr = ''.join(sshRes)
            #            print("sshResStr: ", sshResStr)
            sshResList = sshResStr.strip().split('\n')
            #        print "sshResList: ", sshResList
            sshResLists = []
            for sshCom in sshResList:
                sshResLists.append(sshCom.strip().split())
            logger.info(
                "******************************Processes Monitor: [server:%s]*********************************"
                % hostip)
            cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            ct.write_file(result_file,
                          cur_time + "::" + hostip + "_ps_info_result:")
            #        print "sshResLists:\n", sshResLists
            #                F S   UID    PID   PPID  C PRI  NI ADDR SZ WCHAN  TTY          TIME CMD
            titlename = "F,S,UID,PID,PPID,C,PRI,NI,ADDR,SZ,WCHAN,TTY,TIME,CMD"
            logger.debug(titlename)
            ct.write_file(result_file, titlename)
            #判断线程数量是否正确
            self.ps_info_verify = (process_count == len(sshRes))
            if (self.ps_info_verify):
                msg = "ok: The query count of the processes is " + str(
                    len(sshRes))
                logger.info(msg)
                #再检查进程的状态是否正确
                for datalist in sshResLists:

                    self.ps_info_verify = True
                    psstr = ','.join(datalist)
                    logger.debug(psstr)
                    #                chg_psstr = psstr.encode('utf-8')
                    ct.write_file(result_file, psstr)
                    if datalist[1] in ['R', 'S', 'D']:
                        self.ps_info_verify = self.ps_info_verify and True
                        msg = "ok:" + hostip + ":: Time:" + str(
                            datalist[13]) + " ::The state is " + str(
                                datalist[1]) + " is ok"
                        logger.info(msg)
                    else:
                        self.ps_info_verify = False
                        msg = "error:" + hostip + ":: Time: " + str(
                            datalist[13]) + " ::The state is " + str(
                                datalist[1]) + " is not correct"
                        ct.write_log(error_log_file, msg)
                        logger.error(msg)
                        ct.send_sms_control("ps_port", msg)

            else:
                msg = "error: Server %s The query count %s of the processes is not equal: %s" % (
                    hostip, str(len(sshRes)), str(process_count))
                ct.write_log(error_log_file, msg)
                logger.error(msg)
                ct.send_sms_control("ps_port", msg)

        msg = "ps Processes Check Result: " + str(self.ps_info_verify)
        logger.info(msg)
Example #8
0
    def disk_info(self, info):
        command = 'df -h'
        hostip = info[0]
        #        servername = info[4]
        sshRes = self.sshExecCmd(command)
        #        print "sshRes:", sshRes
        sshResStr = ''.join(sshRes)
        sshResList = sshResStr.strip().split('\n')
        df_info_list = []
        for disk in sshResList[1:]:
            df_info_list.append(disk.strip().split())
#        print "df_info_list:", df_info_list
#        print "len(df_info_list):", len(df_info_list)

        sshResLists = []
        for i in range(len(df_info_list)):
            if len(df_info_list[i]) == 1 and len(df_info_list[i + 1]) == 5:
                sshResLists.append(df_info_list[i] + df_info_list[i + 1])
            elif len(df_info_list[i]) == 6:
                sshResLists.append(df_info_list[i])
            elif len(df_info_list[i]) != 1 and len(df_info_list[i]) != 5:
                msg = "The df_info's format is not correct!"
                #                print msg
                ct.write_file(error_log_file, msg)
                logger.error(msg)
#        print "sshResLists:",sshResLists
#        print "len(sshResLists):", len(sshResLists)

        logger.info(
            "************************Disk Monitor: [server:%s]****************************"
            % hostip)
        cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        ct.write_file(result_file,
                      cur_time + "::" + hostip + "_disk_info_result:")
        titlename = "Filesystem,Size,Used,Avail,Use%,Mounted on"
        logger.debug(titlename)
        ct.write_file(result_file, titlename)
        if len(sshResLists):
            self.disk_info_verify = True
        for disklist in sshResLists:
            diskstr = ','.join(disklist)
            logger.debug(diskstr)
            ct.write_file(result_file, diskstr)
            Use_Rate = int(disklist[4].split('%')[0])
            #匹配网络路径
            matchObj = re.search(r'//.*?/', disklist[0], re.M | re.I)
            #磁盘空间已用%>80报警,去掉mnt/cdrom和//ip/path这样的文件
            #if (disklist[0]!="/dev/sr0") and not(matchObj):
            if (disklist[0] not in ["/dev/sr0", "/dev/sr1"
                                    ]) and not (matchObj):
                if Use_Rate < 80:
                    self.disk_info_verify = self.disk_info_verify and True
                    msg = "ok:" + hostip + "::" + disklist[
                        0] + " ::The Use% is " + str(Use_Rate) + " % is ok"
                    logger.info(msg)
                else:
                    self.disk_info_verify = False
                    msg = "error:" + hostip + "::" + disklist[
                        0] + " ::The Use% is " + str(
                            Use_Rate) + " % is overload"
                    #                    print msg
                    ct.write_log(error_log_file, msg)
                    logger.error(msg)
                    ct.send_sms_control("disk", msg)
        msg = "Disk Check Result: " + str(self.disk_info_verify)
        #        print msg
        #        ct.write_log(log_file,msg)
        logger.info(msg)
Example #9
0
    def ping_server_info(self, info):

        hostip = info[0]

        sysstr = platform.system()
        if sysstr == "Windows":
            logger.debug('ping ' + hostip)
            ping = subprocess.Popen('ping ' + hostip,
                                    shell=True,
                                    stderr=subprocess.PIPE,
                                    stdout=subprocess.PIPE)  # 执行命令
            res, err = ping.communicate()
            #            print("err:", err.decode('gbk'))
            #            if err: sys.exit(err.decode('gbk').strip('\n'))
            if err:
                logger.warning("ping error: %s" % str(err))
                pres = []
            else:
                pres = list(res.decode('gbk').split('\n'))
                logger.debug("pres:" + pres)
            try:
                loss = pres[8].split('(')[1].split('%')[0] + "%"  # 获取丢包率
            except IndexError:
                loss = "100%"
            try:
                rtt = pres[10].split('=')[3].split('ms')[0]  # 获取rtt avg值
            except IndexError:
                rtt = ""
        else:
            #        ping = subprocess.Popen('ping -i 0.2 -c 4 -q -I ' + src + ' ' + dest,
            #-I<网络界面> 使用指定的网络接口送出数据包
            ping = subprocess.Popen('ping -i 1 -c 4 -q ' + hostip,
                                    shell=True,
                                    stderr=subprocess.PIPE,
                                    stdout=subprocess.PIPE)  # 执行命令
            res, err = ping.communicate()
            #            print("err:", err.decode('gbk'))
            #            if err: sys.exit(err.decode('gbk').strip('\n'))
            if err:
                logger.warning("ping error: %s" % str(err))
                pres = []
            else:
                pres = list(res.decode('gbk').split('\n'))
                logger.debug("pres:" + pres)
            try:
                #tem = "4 packets transmitted, 0 received, 100% packet loss, time 611ms"
                loss = pres[3].split()[5]  # 获取丢包率
                #loss = tem.split()[5]
            except IndexError:
                loss = "100%"
            try:
                rtt = pres[4].split('/')[4]  # 获取rtt avg值
            except IndexError:
                rtt = "9999"
        # loss>0,rtt>800报警
        if float(loss.strip('%')) > 0 or float(rtt) > 800:
            self.ping_info_verify = False
            msg = "error:" + hostip + " ::The ping lost is " + loss + " rtt is " + rtt + "ms"
            ct.write_log(error_log_file, msg)
            logger.error(msg)
            ct.send_sms_control("ping", msg)
        else:
            self.ping_info_verify = True
            msg = "ok:" + hostip + " ::The ping lost is " + loss + " rtt is " + rtt + "ms"
            logger.info(msg)
        msg = "Ping Check Result: " + str(self.ping_info_verify)
        logger.info(msg)