Example #1
0
 def getDisk(self):
     mountPoints, diskTotal = [], 0          #diskTotal累加
     parts = psutil.disk_partitions()        #分区信息
     for p in parts:
         mp = str(p).split(",")[1].split("=")[1]
         mountPoints.append(mp)
     for x in mountPoints:
         pTotal = psutil.disk_usage(x.strip("'")).total      #单个分区容量
         diskTotal += pTotal
     diskTotal = changeUnit(int(diskTotal))
     rootSize, rootRate = psutil.disk_usage('/').free, psutil.disk_usage('/').percent    #获取/分区的剩余空间和使用率
     writeToFile("totalDisk", diskTotal)
     writeToFile("rootSize", changeUnit(int(rootSize)))
     writeToFile("rootRate", rootRate)
     if float(rootRate) > float(85):
         """ON开关,防止多次重复报警;报警一次之后设为关闭"""
         if self.sDb.read(self.telIP, "rootAlarm") == "ON":     ## 状态为"NO"且开关为"ON"时,报故障
             sub = "DBTX ROOT partition Alarm:%s" % self.ps1
             msg = "DateTime: %s\n根分区已使用%s%%, 超过85%%, 请警惕!!!" % (getTimeNow(), rootRate)
             saveLog.warning(msg)
             sendMail(sub, msg)
             self.sDb.update(self.telIP, "rootAlarm", "OFF")    ## 报警后,将开关置为"OFF"
     else:
         if self.sDb.read(self.telIP, "rootAlarm") == "OFF":    ## 状态OK且开关为"OFF"时,报恢复
             sub = "DBTX ROOT partition is OK:%s" % self.ps1
             msg = "DateTime: %s\n根分区可用空间充足%s." % (getTimeNow(), changeUnit(int(rootSize)))
             saveLog.info(msg)
             sendMail(sub, msg)
             self.sDb.update(self.telIP, "rootAlarm", "ON")     ## 使用率不足85%时,将开关置为"ON"
Example #2
0
        redisData["connects_rate"] = (int(redisData["total_connects"]) - int(last_connects)) / float(60)
        redisData["command_rate"] = (int(redisData["total_commands"]) - int(last_commands)) / float(60)

        fZbx = str(redisData).strip("{}").replace(",","\n").replace("'","")   ## 转换成zabbix需要的格式
        fObj = open(rZbx,'w')
        fObj.write(fZbx)
        fObj.close()
    except Exception,err:
        saveLog.error("getRedisBaseInfo(): %s" % str(err))

    ## 分析数据,必要时报警: 1, 使用内存大于1G; 2, 命中率低于20%
    if int(redisData["used_memory"]) > 1073741824:
        sDb = opRedis()    ## 实例化 Redis库操作类
        if sDb.read(telIP, "rRateAlarm") == "ON":
            sub = "DBTX Redis Memory Alarm: %s" % ps1
            msg = "DateTime: %s\nRedis使用内存为: %s, 超过1G,请警惕 !!!" % (getTimeNow(), changeUnit(int(redisData["used_memory"])))
            saveLog.warning(msg)
            sendMail(sub, msg)
            sDb.update(telIP, "rRateAlarm", "OFF")    ## 报警后,将开关置为"OFF"
    else:
        sDb = opRedis()    ## 实例化 Redis库操作类
        if sDb.read(telIP, "rRateAlarm") == "OFF":
            sub = "DBTX Redis Memory is OK: %s" % ps1
            msg = "DateTime: %s\nRedis使用内存情况正常: %s." % (getTimeNow(), changeUnit(int(redisData["used_memory"])))
            saveLog.info(msg)
            sendMail(sub, msg)
            sDb.update(telIP, "rRateAlarm", "ON")

##    if totalHits != 0 and redisData["hit_rate"] < 20:
##        sDb = opShelveDb()    ## 实例化 shelve库操作类
##        if sDb.readDb(pubDb, telIP, "rRateAlarm") == "ON":
Example #3
0
def appCpuAnalyze(srvPidStatus, srvFlag, sDb):
    from dbtx.Merge.etc import ps1
    corMuser = "******"
    tNowCpu = topNow()
    if not sDb.exists(srvFlag, "ct"):  ## Redis库中不存在该服区标识,初始化游戏程序占用CPU达到95%的次数
        sDb.intAppCpuCt(srvFlag)

    for app in srvPidStatus:
        appName = app.split("_")[0]
        if srvPidStatus[app][app+"_status"] == 0 or "Snapshot" in app:     ## 如果app状态异常,不检测CPU,MEM信息,忽略掉Snapshot
            continue
        appPid = srvPidStatus[app][app+"_pid"].split()
        for pid in appPid:   ## CellApp,GateApp,CopyApp会有多个进程ID
            nCpu, appCount = tNowCpu[pid], int(sDb.read(appName, srvFlag))
            if float(nCpu) > 95.5:   ## CPU使用率都大于95%时,count+1
                appCount += 1
                sDb.update(appName, srvFlag, appCount)     ## 将新的count值更新到Redis库
            else:
                sDb.update(appName, srvFlag, 0)            ## 如果没有超过95%,则置为0

            if int(appCount) >= 8:
                if sDb.read(srvFlag, "isFixing") == "OFF":         ## 如果维护标识为"OFF"
                    if sDb.read(srvFlag, "appCpuAlarm") == "ON":     ## 状态为"NO"且开关为"ON"时,报故障
                        sub = "DBTX App-CPU Alarm: %s" % ps1
                        msg = "DateTime: %s\n%s CPU使用率连续五次达到95%%, 进程ID为: %s !!!" % (getTimeNow(), app, pid)
                        saveLog.warning(msg)
                        sendMail(sub, msg, corMuser)
##                        sDb.updateDb(mulDb, srvFlag, "appCpuAlarm", "OFF")    ## 报警后,将开关置为"OFF"
                        sDb.update(appName, srvFlag, 0)                                       ## 报警后,重置count值

            if "DbCacheApp_"+srvFlag == app:
                proc = psutil.Process(int(pid))
                shr = proc.get_ext_memory_info()[2]
                if float(shr) > 1932735283.2:    ## DbCacheApp共享内存大于1.8G时,报警
                    if sDb.read(srvFlag, "isFixing") == "OFF":         ## 如果维护标识为"OFF"
                        if sDb.read(srvFlag, "appShrAlarm") == "ON":     ## 状态为"NO"且开关为"ON"时,报故障
                            sub = "DBTX DbCacheApp-SHR Alarm: %s" % ps1
                            msg = "DateTime: %s\n%s DbCacheApp 共享内存达到红色警戒,当前占用共享内存: %s !!!" % (getTimeNow(), srvFlag, changeUnit(shr))
                            saveLog.warning(msg)
                            sendMail(sub, msg, corMuser)
                            sDb.update(srvFlag, "appShrAlarm", "OFF")    ## 报警后,将开关置为"OFF"
                else:
                    if sDb.read(srvFlag, "isFixing") == "OFF":         ## 如果维护标识为"OFF"
                        if sDb.read(srvFlag, "appShrAlarm") == "OFF":    ## 状态OK且开关为"OFF"时,报恢复
                            sub = "DBTX DbCacheApp-SHR Alarm: %s" % ps1
                            msg = "DateTime: %s\n%s DbCacheApp 占用共享内存恢复到正常水平: %s ." % (getTimeNow(), srvFlag, changeUnit(shr))
                            saveLog.warning(msg)
                            sendMail(sub, msg, corMuser)
                            sDb.update(srvFlag, "appShrAlarm", "ON")
Example #4
0
 def getMem(self):
     totMem = changeUnit(int(psutil.TOTAL_PHYMEM))       #获取物理内存总数,diskTotal()函数用来单位转换
     writeToFile("totalMem", totMem)