Ejemplo n.º 1
0
def get_errorLog(linuxInfo):

    #清空文件内容
    if os.path.exists(grep_result_file):
        with open(grep_result_file, "r+") as f:
            f.seek(0)
            f.truncate()

    grep_lists = []
    for info in linuxInfo:

        hostip = info[0]
        port = info[1]
        username = info[2]
        password = info[3]
        servername = info[4]
        filedir = info[5]

        logger.info(servername + "::" + hostip + " get errorLog")
        try:
            ssh = paramiko.SSHClient()
            ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
            ssh.connect(hostip, port, username, password)
            command = 'egrep -i "error|warning|critical" ' + filedir + ' | egrep -iv "errorid=0|error_code: 4294856287|ERROR woker id"'
            filename = "SZSESyslog.log"
            if filename == "SZSESyslog.log":
                command = 'egrep -i "on_connected" ' + filedir
            stdin, stdout, stderr = ssh.exec_command(command)
            #egrep -i "error|warning|critical" /home/trade/myproject/log/* | egrep -iv "errorid=0|error_code: 4294856287|ERROR woker id"
            stdoutstr = stdout.read().decode('utf-8')
            #            print stdoutstr
            sshRes = []
            sshRes = stdoutstr.strip().split('\n')
#            for item in templist:
#                 print item.decode('gb2312')
#            sshRes = []
#            sshRes = stdout.readlines()
#            print "sshRes:", len(sshRes)
        except Exception as e:
            msg = "SSH and get log failed: [hostip:%s];[username:%s];[error:%s]" % (
                hostip, username, str(e))
            logger.error(msg)

        for item in sshRes:
            #            de_item = item.decode('gb2312')
            error_list = item.strip().split(':', 1)
            grep_lists.append(error_list)
            memstr = ','.join(error_list)
            print(memstr)
            #            temstr= memstr.encode('utf-8')
            ct.write_file(grep_result_file, memstr)
#        print "grep_lists", grep_lists
    return grep_lists
Ejemplo n.º 2
0
def get_query_data(linuxInfo):

    logger = logging.getLogger()
    yaml_path = './config/non_trade_monitor_logger.yaml'
    ct.setup_logging(yaml_path)
    for info in linuxInfo:
        hostip = info[0]
        port = info[1]
        username = info[2]
        password = info[3]
        #        servername = info[4]
        command = info[5]

        cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        temstr = "**********" + cur_time + "::" + hostip + "::" + command + "::**********"
        ct.write_file(query_result_file, temstr)

        sshClient = ct.sshConnect(hostip, port, username, password)
        sshRes = ct.sshExecCmd(sshClient, command)
        logger.info(hostip + "::" + command)
        try:
            for item in sshRes:
                #                de_item = item.decode('gb2312')
                #                error_list = de_item.strip().split(':', 1)
                #                grep_lists.append(error_list)
                #                memstr=','.join(error_list)
                #                print memstr
                #                temstr= item.strip().encode('utf-8')
                temstr = item.strip()
                logger.info(temstr)
                ct.write_file(query_result_file, temstr)
        except Exception as e:
            msg = "write failed: [hostip:%s];[username:%s];[error:%s]" % (
                hostip, username, str(e))
            logger.error(msg)
            ct.write_log(log_file, msg)

        ct.sshClose(sshClient)
    logger.info("get_query_data finished")
    for handler in logger.handlers:
        logger.removeHandler(handler)
Ejemplo n.º 3
0
    def socket_info(self, info):

        hostip = info[0]
        #        servername = info[4]
        ports_i = info[6]
        ports = ports_i.split(';')
        #        print "port:", ports
        cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        logger.info(
            "******************************Ports Monitor: [server:%s]*********************************"
            % hostip)
        ct.write_file(result_file,
                      cur_time + "::" + hostip + "_socket_port_info_result:")
        flag_list = []
        for port in ports:
            sk = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sk.settimeout(3)
            try:
                sk.connect((hostip, int(port)))
                msg = "ok:" + str(hostip) + ":" + str(port) + " is ok"
                ct.write_file(result_file, msg)
                logger.info(msg)
                list_flag = 1
                flag_list.append(list_flag)
            except Exception:
                #        print "\033[1;31;mServer port 18000 is close\033[0m"
                msg = "error: " + str(hostip) + ":" + str(port) + " is closed"
                logger.error(msg)
                ct.send_sms_control("ps_port", msg)
                ct.write_file(error_log_file, msg)
                ct.write_file(result_file, msg)
                list_flag = 0
                flag_list.append(list_flag)
            sk.close()
#        print "flag_list: ", flag_list
        if len(flag_list) == 0:
            self.socket_info_verify = False
        else:
            self.socket_info_verify = (sum(flag_list) == len(flag_list))
        msg = "socket Ports Check Result: " + hostip + "::" + str(
            self.socket_info_verify)
        logger.info(msg)
Ejemplo n.º 4
0
 def non_trade_mem_info(self, info):
     command = 'cat /proc/meminfo'
     hostip = info[0]
     #        servername = info[4]
     #        #非交易时间自动清理一下缓存
     #        start_time = '08:45'
     #        end_time = '15:30'
     #        if not (ct.time_check(start_time, end_time)):
     #            self.mem_info_verify = True
     #            logger.info("Clear BuffersCachedRate")
     #        else:
     #            logger.info("Not to clear BuffersCachedRate")
     sshRes = self.sshExecCmd(command)
     mem_values = re.findall("(\d+)\ kB", ",".join(sshRes))
     MemTotal = mem_values[0]
     MemFree = mem_values[1]
     MemAvailable = mem_values[2]
     Buffers = mem_values[3]
     Cached = mem_values[4]
     SwapCached = mem_values[5]
     SwapTotal = mem_values[14]
     SwapFree = mem_values[15]
     logger.info(
         '******************************Mem Monitor: [server:%s]*********************************'
         % hostip)
     cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
     ct.write_file(result_file,
                   cur_time + "::" + hostip + "_mem_info_result:")
     titlename = "MemTotal,MemFree,MemAvailable,Buffers,Cached,SwapCached,SwapTotal,SwapFree,BuffersCachedRate,Rate_Mem"
     logger.info(titlename)
     ct.write_file(result_file, titlename)
     #计算b/cRate,RateMem
     BuffersCachedRate = round(
         100 * (int(Buffers) + int(Cached)) / float(MemTotal), 2)
     logger.info("BuffersCachedRate:" + str("%.2f" % BuffersCachedRate) +
                 "%")
     Free_Mem = int(MemFree) + int(Buffers) + int(Cached)
     Used_Mem = int(MemTotal) - Free_Mem
     Rate_Mem = round(100 * Used_Mem / float(MemTotal), 2)
     logger.info("Rate_Mem:" + str("%.2f" % Rate_Mem) + "%")
     tem_list = [
         MemTotal, MemFree, MemAvailable, Buffers, Cached, SwapCached,
         SwapTotal, SwapFree, BuffersCachedRate, Rate_Mem
     ]
     temp = map(str, tem_list)
     memstr = ','.join(temp)
     logger.debug(memstr)
     ct.write_file(result_file, memstr)
     # BuffersCachedRate > 50报警,交易时间不判断
     start_time = '08:45'
     end_time = '15:30'
     if (ct.time_check(start_time, end_time)):
         self.mem_info_verify = True
         logger.info("Not to check BuffersCachedRate")
     else:
         if BuffersCachedRate < 50:
             self.mem_info_verify = True
             msg = "ok:" + hostip + " ::The BuffersCachedRate is " + str(
                 BuffersCachedRate) + " % is ok"
             logger.info(msg)
         else:
             #清理缓存
             command_clear = 'sync;echo 3 > /proc/sys/vm/drop_caches'
             sshRes_clear = self.sshExecCmd(command_clear)
             logger.debug(sshRes_clear)
             #再次检查一次
             sshRes = self.sshExecCmd(command)
             mem_values = re.findall("(\d+)\ kB", ",".join(sshRes))
             MemTotal = mem_values[0]
             MemFree = mem_values[1]
             MemAvailable = mem_values[2]
             Buffers = mem_values[3]
             Cached = mem_values[4]
             SwapCached = mem_values[5]
             SwapTotal = mem_values[14]
             SwapFree = mem_values[15]
             logger.info(
                 '******************************Mem Monitor2: [server:%s]*********************************'
                 % hostip)
             cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
             ct.write_file(result_file,
                           cur_time + "::" + hostip + "_mem_info_result:")
             titlename = "MemTotal,MemFree,MemAvailable,Buffers,Cached,SwapCached,SwapTotal,SwapFree,BuffersCachedRate,Rate_Mem"
             logger.info(titlename)
             ct.write_file(result_file, titlename)
             #计算b/cRate,RateMem
             BuffersCachedRate = round(
                 100 * (int(Buffers) + int(Cached)) / float(MemTotal), 2)
             logger.info("BuffersCachedRate:" +
                         str("%.2f" % BuffersCachedRate) + "%")
             #如果还大于等于50,则报警
             if BuffersCachedRate >= 50:
                 self.mem_info_verify = False
                 msg = "error:" + hostip + " ::The BuffersCachedRate is " + str(
                     BuffersCachedRate) + " % is overload"
                 ct.write_log(error_log_file, msg)
                 logger.error(msg)
                 ct.send_sms_control("mem", msg)
     # Rate_Mem>80报警
     if Rate_Mem < 80:
         self.mem_info_verify = self.mem_info_verify and True
         msg = "ok:" + hostip + " ::The Rate_Mem is " + str(
             Rate_Mem) + " % is ok"
         logger.info(msg)
     else:
         self.mem_info_verify = False
         msg = "error:" + hostip + " ::The Rate_Mem is " + str(
             Rate_Mem) + " % is overload"
         ct.write_log(error_log_file, msg)
         logger.error(msg)
         ct.send_sms_control("mem", msg)
     msg = "Mem Check Result: " + str(self.mem_info_verify)
     logger.info(msg)
Ejemplo n.º 5
0
    def fpga_file_info(self, info):
        #        info = ['192.168.238.7', 22, 'trade', 'trade', 'FPGAServer','/home/trade/FPGA']
        hostip = info[0]
        #        servername = info[4]
        filepath = info[5]
        command = 'ls -l ' + filepath
        logger.info(command)
        sshRes = self.sshExecCmd(command)
        #        print "sshRes:", sshRes
        if sshRes == []:
            self.fpga_file_info_verify = False
            msg = "error: The sshResturn is None, please check it"
            #            print msg
            ct.write_log(error_log_file, msg)
            logger.warning(msg)
        else:
            sshResStr = ''.join(sshRes)
            #            print "sshResStr: ", sshResStr
            sshResList = sshResStr.strip().split('\n')
            #        print "sshResList: ", sshResList
            sshResLists = []
            for sshCom in sshResList:
                sshResLists.append(sshCom.strip().split())
#            print "len(sshResLists):", len(sshResLists)
            logger.info(
                "******************************FPGA Monitor: [server:%s]*********************************"
                % hostip)
            cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            ct.write_file(result_file,
                          cur_time + "::" + hostip + "_ps_info_result:")
            #            print("sshResLists:\n", sshResLists)
            titlename = "TypePermission,ConnectedCount,Owner,Group,Size,ModifyMonth,ModifyDate,ModifyTime,FName"
            logger.debug(titlename)
            ct.write_file(result_file, titlename)
            #从第二行开始
            fileSize_dict = {
                'sent0': None,
                'received0': None,
                'received1': None
            }
            for datalist in sshResLists[1:]:
                lsstr = ','.join(datalist)
                logger.debug(lsstr)
                #                chg_str = lsstr.encode('utf-8')
                #                print("aaggg:", chg_str)
                ct.write_file(result_file, lsstr)
                if len(datalist) == 9:
                    Size = int(datalist[4])
                    FName = datalist[8]
                    Today = dt.datetime.now().strftime("%Y%m%d")
                    #                    matchlist=['sent','journal','received']
                    str1 = 'FPGA0_CSESSION00_' + Today + '.sent'
                    str2 = 'FPGA0_VSESSION00_' + Today + '.received'
                    str3 = 'FPGA0_VSESSION01_' + Today + '.received'

                    if str1 == FName:
                        fileSize_dict['sent0'] = Size
#                        print("Fname:",FName,Size)
                    if str2 == FName:
                        fileSize_dict['received0'] = Size
#                        print("Fname:",FName,Size)
                    if str3 == FName:
                        fileSize_dict['received1'] = Size
#                        print("Fname:",FName,Size)
            logger.info(fileSize_dict)
            ntime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            dictstr = ntime + "::FPGA file size: " + str(fileSize_dict)
            ct.write_file(result_file, dictstr)
            #            self.fpga_file_info_verify = (fileSize_dict['sent0'] or fileSize_dict['sent1']) and (fileSize_dict['journal0'] or fileSize_dict['journal1']) and (fileSize_dict['received0'] or fileSize_dict['received1'])
            self.fpga_file_info_verify = (fileSize_dict['sent0']
                                          and (fileSize_dict['received0']
                                               and fileSize_dict['received1']))
            if self.fpga_file_info_verify:
                msg = "ok: The server %s FPGA Monitor is ok, %s " % (hostip,
                                                                     dictstr)
                logger.info(msg)
                self.fpga_Check_flag_list.append(1)
            else:
                msg = "error: The server %s FPGA Monitor is not correct, %s " % (
                    hostip, dictstr)
                ct.write_log(error_log_file, msg)
                logger.error(msg)
                ct.send_sms_control("fpga", msg)
                self.fpga_Check_flag_list.append(0)
Ejemplo n.º 6
0
    def ps_info(self, info):

        hostip = info[0]
        username = info[2]
        #        servername = info[4]
        processes = info[5]
        process_count = len(str(processes).split('|'))
        command = 'ps -u ' + username + ' -elf | grep -E "' + processes + '" | grep -v grep'
        #        command = 'ps -u trade -elf |grep -E "dbsync 1|dbsync 2" | grep -v grep'
        logger.debug("command: " + command)
        sshRes = self.sshExecCmd(command)
        #        print "sshRes:", sshRes
        if sshRes == []:
            self.ps_info_verify = False
            msg = "error: Server %s The count of the processes is 0, please check it" % str(
                hostip)
            ct.write_log(error_log_file, msg)
            logger.error(msg)
            ct.send_sms_control("ps_port", msg)
        else:
            sshResStr = ''.join(sshRes)
            #            print("sshResStr: ", sshResStr)
            sshResList = sshResStr.strip().split('\n')
            #        print "sshResList: ", sshResList
            sshResLists = []
            for sshCom in sshResList:
                sshResLists.append(sshCom.strip().split())
            logger.info(
                "******************************Processes Monitor: [server:%s]*********************************"
                % hostip)
            cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            ct.write_file(result_file,
                          cur_time + "::" + hostip + "_ps_info_result:")
            #        print "sshResLists:\n", sshResLists
            #                F S   UID    PID   PPID  C PRI  NI ADDR SZ WCHAN  TTY          TIME CMD
            titlename = "F,S,UID,PID,PPID,C,PRI,NI,ADDR,SZ,WCHAN,TTY,TIME,CMD"
            logger.debug(titlename)
            ct.write_file(result_file, titlename)
            #判断线程数量是否正确
            self.ps_info_verify = (process_count == len(sshRes))
            if (self.ps_info_verify):
                msg = "ok: The query count of the processes is " + str(
                    len(sshRes))
                logger.info(msg)
                #再检查进程的状态是否正确
                for datalist in sshResLists:

                    self.ps_info_verify = True
                    psstr = ','.join(datalist)
                    logger.debug(psstr)
                    #                chg_psstr = psstr.encode('utf-8')
                    ct.write_file(result_file, psstr)
                    if datalist[1] in ['R', 'S', 'D']:
                        self.ps_info_verify = self.ps_info_verify and True
                        msg = "ok:" + hostip + ":: Time:" + str(
                            datalist[13]) + " ::The state is " + str(
                                datalist[1]) + " is ok"
                        logger.info(msg)
                    else:
                        self.ps_info_verify = False
                        msg = "error:" + hostip + ":: Time: " + str(
                            datalist[13]) + " ::The state is " + str(
                                datalist[1]) + " is not correct"
                        ct.write_log(error_log_file, msg)
                        logger.error(msg)
                        ct.send_sms_control("ps_port", msg)

            else:
                msg = "error: Server %s The query count %s of the processes is not equal: %s" % (
                    hostip, str(len(sshRes)), str(process_count))
                ct.write_log(error_log_file, msg)
                logger.error(msg)
                ct.send_sms_control("ps_port", msg)

        msg = "ps Processes Check Result: " + str(self.ps_info_verify)
        logger.info(msg)
Ejemplo n.º 7
0
    def disk_info(self, info):
        command = 'df -h'
        hostip = info[0]
        #        servername = info[4]
        sshRes = self.sshExecCmd(command)
        #        print "sshRes:", sshRes
        sshResStr = ''.join(sshRes)
        sshResList = sshResStr.strip().split('\n')
        df_info_list = []
        for disk in sshResList[1:]:
            df_info_list.append(disk.strip().split())
#        print "df_info_list:", df_info_list
#        print "len(df_info_list):", len(df_info_list)

        sshResLists = []
        for i in range(len(df_info_list)):
            if len(df_info_list[i]) == 1 and len(df_info_list[i + 1]) == 5:
                sshResLists.append(df_info_list[i] + df_info_list[i + 1])
            elif len(df_info_list[i]) == 6:
                sshResLists.append(df_info_list[i])
            elif len(df_info_list[i]) != 1 and len(df_info_list[i]) != 5:
                msg = "The df_info's format is not correct!"
                #                print msg
                ct.write_file(error_log_file, msg)
                logger.error(msg)
#        print "sshResLists:",sshResLists
#        print "len(sshResLists):", len(sshResLists)

        logger.info(
            "************************Disk Monitor: [server:%s]****************************"
            % hostip)
        cur_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        ct.write_file(result_file,
                      cur_time + "::" + hostip + "_disk_info_result:")
        titlename = "Filesystem,Size,Used,Avail,Use%,Mounted on"
        logger.debug(titlename)
        ct.write_file(result_file, titlename)
        if len(sshResLists):
            self.disk_info_verify = True
        for disklist in sshResLists:
            diskstr = ','.join(disklist)
            logger.debug(diskstr)
            ct.write_file(result_file, diskstr)
            Use_Rate = int(disklist[4].split('%')[0])
            #匹配网络路径
            matchObj = re.search(r'//.*?/', disklist[0], re.M | re.I)
            #磁盘空间已用%>80报警,去掉mnt/cdrom和//ip/path这样的文件
            #if (disklist[0]!="/dev/sr0") and not(matchObj):
            if (disklist[0] not in ["/dev/sr0", "/dev/sr1"
                                    ]) and not (matchObj):
                if Use_Rate < 80:
                    self.disk_info_verify = self.disk_info_verify and True
                    msg = "ok:" + hostip + "::" + disklist[
                        0] + " ::The Use% is " + str(Use_Rate) + " % is ok"
                    logger.info(msg)
                else:
                    self.disk_info_verify = False
                    msg = "error:" + hostip + "::" + disklist[
                        0] + " ::The Use% is " + str(
                            Use_Rate) + " % is overload"
                    #                    print msg
                    ct.write_log(error_log_file, msg)
                    logger.error(msg)
                    ct.send_sms_control("disk", msg)
        msg = "Disk Check Result: " + str(self.disk_info_verify)
        #        print msg
        #        ct.write_log(log_file,msg)
        logger.info(msg)