def checkSysTable(self): primaryDNidList = [] nodeInfo = self.cluster.getDbNodeByName(self.host) CN = nodeInfo.coordinators masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) for DnInstance in nodeInfo.datanodes: if (DnInstance.instanceId in masterDnList): primaryDNidList.append(DnInstance) if (len(CN) < 1 and len(primaryDNidList) < 1): raise CheckNAException( "There is no primary database node instance in the " "current node.") # test database Connection for Instance in (CN + primaryDNidList): if not Instance: continue sqlcmd = "select pg_sleep(1);" SharedFuncs.runSqlCmd(sqlcmd, self.user, "", Instance.port, self.tmpPath, self.database, self.mpprcFile) outputList = [] pool = ThreadPool(DefaultValue.getCpuSet()) results = pool.map(self.checkSingleSysTable, CN + primaryDNidList) pool.close() pool.join() for result in results: if (result): outputList.append(result) outputList.sort() return outputList
def wrapper(): if (not hasattr(self, 'cluster')): raise Exception(ErrorCode.GAUSS_530["GAUSS_53030"] % "cluster attribute") if (not hasattr(self, 'host')): raise Exception(ErrorCode.GAUSS_530["GAUSS_53030"] % "host attribute") if (not self.cluster): raise Exception(ErrorCode.GAUSS_530["GAUSS_53031"]) dbNode = self.cluster.getDbNodeByName(self.host) # The specified node does not exist or is empty if (dbNode is None or dbNode == ""): raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "The dbNode") if self.cluster.isSingleInstCluster(): masterDn = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) if len(dbNode.datanodes) < 1 or dbNode.datanodes[ 0].instanceId not in masterDn: raise CheckNAException( "The node does not contains materDn instance") self.port = dbNode.datanodes[0].port else: # The specified CN node does not exist if (len(dbNode.coordinators) == 0): raise CheckNAException( "The node does not contains cn instance") # get cn port self.port = dbNode.coordinators[0].port self.cntype = dbNode.coordinators[0].instanceType return func()
def doCheck(self): cmd = "gs_om -t status" output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) if (output.find("Normal") < 0 and output.find("Degraded") < 0): self.result.rst = ResultStatus.NG self.result.val = "The database can not be connected." return instanceList = [] AbnormalInst = [] primaryDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) nodeInfo = self.cluster.getDbNodeByName(self.host) localDnList = nodeInfo.datanodes for dn in localDnList: if (dn.instanceId in primaryDnList): instanceList.append(dn) sqlcmd = "select pg_sleep(1);" for instance in instanceList: cmd = "gsql -m -d postgres -p %s -c '%s'" % (instance.port, sqlcmd) if (self.mpprcFile): cmd = "source '%s' && %s" % (self.mpprcFile, cmd) if (os.getuid() == 0): cmd = "su - %s -c \"%s\" " % (self.user, cmd) self.result.raw += "\n%s" % cmd (status, output) = subprocess.getstatusoutput(cmd) if (status != 0 or output.find("connect to server failed") > 0): AbnormalInst.append(instance.instanceId) self.result.val += "The install %s can not be connected.\n" \ % instance.instanceId self.result.raw += "\nError: %s" % output if AbnormalInst: self.result.rst = ResultStatus.NG else: self.result.rst = ResultStatus.OK self.result.val = "The database connection is normal."
def doCheck(self): nodeInfo = self.cluster.getDbNodeByName(self.host) maxusage = None minusage = None usagedic = {} val = "" masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) for DnInstance in nodeInfo.datanodes: if (DnInstance.instanceId in masterDnList): datadir = os.path.join(DnInstance.datadir, "base") output = g_file.getDirSize(datadir, "m") output = output.split()[0][:-1] if (not output.isdigit()): raise Exception(ErrorCode.GAUSS_504["GAUSS_50412"] % (DnInstance.instanceId)) if (not maxusage or int(maxusage) < int(output)): maxusage = int(output) if (not minusage or int(minusage) > int(output)): minusage = int(output) usagedic[DnInstance.instanceId] = output val += "\ndn %s: vol %sm" % (DnInstance.instanceId, output) if (not usagedic): self.result.rst = ResultStatus.NA self.result.val = "No master database node in this host" else: if (maxusage > minusage * 1.05): self.result.rst = ResultStatus.NG self.result.val = "The result is not ok:\n%s" % val self.result.raw = json.dumps(usagedic) else: self.result.rst = ResultStatus.OK self.result.val = "Data distributed well in local host" self.result.raw = json.dumps(usagedic)
def doCheck(self): """ """ global g_gucDist # get ignore list dirName = os.path.dirname(os.path.realpath(__file__)) configFile = "%s/../../config/check_list_%s.conf" % (dirName, self.version) self.getIgnoreParameters(configFile, 'guc_ignore', 'guc_logic') DNidList = [] result = [] logicCluster = False nodeInfo = self.cluster.getDbNodeByName(self.host) masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) for DnInstance in nodeInfo.datanodes: if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE): DNidList.append(DnInstance) if len(DNidList) < 1: raise Exception(ErrorCode.GAUSS_512["GAUSS_51249"]) # get information of logicCluster on current node (lcName, dbnode) = self.checkLogicCluster() if (dbnode): logicCluster = True for DnInstance in dbnode.datanodes: if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE): if (DnInstance.instanceId in masterDnList): needm = False else: needm = True result.append( self.checkInstanceGucValue(DnInstance, needm, lcName, logicCluster)) g_gucDist[lcName] = result # test database Connection for Instance in DNidList: if not Instance: continue sqlcmd = "select pg_sleep(1);" if Instance.instanceId in masterDnList: needm = False else: needm = True output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", Instance.port, self.tmpPath, 'postgres', self.mpprcFile, needm) self.checkInstanceGucValue(Instance, needm, "", logicCluster) self.result.val = json.dumps(g_gucDist) self.result.raw = str(g_gucDist) self.result.rst = ResultStatus.OK
def doCheck(self): cmd = "free -g | grep Mem | awk '{print $2}' 2>/dev/null" totalMem = SharedFuncs.runShellCmd(cmd) cmd = "free -g | grep Mem | awk '{print $3}' 2>/dev/null" usedMem = SharedFuncs.runShellCmd(cmd) if (int(usedMem) > int(totalMem) * self.percentt): self.result.rst = ResultStatus.NG self.result.val = "Memory usage exceeded threshold" return cmd = "show max_process_memory;" cnPort = None masterDnPort = None slaveDnPort = None self.node = self.cluster.getDbNodeByName(self.host) if self.node.coordinators: cnPort = self.node.coordinators[0].port masterDnlist = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) for datanode in self.node.datanodes: if (datanode.instanceId in masterDnlist): masterDnPort = datanode.port break elif (datanode.instanceType == MASTER_INSTANCE or datanode.instanceType == STANDBY_INSTANCE): slaveDnPort = datanode.port break if (cnPort): output = SharedFuncs.runSqlCmd(cmd, self.user, "", cnPort, self.tmpPath, "postgres", self.mpprcFile) elif (masterDnPort): output = SharedFuncs.runSqlCmd(cmd, self.user, "", masterDnPort, self.tmpPath, "postgres", self.mpprcFile) elif (slaveDnPort): output = SharedFuncs.runSqlCmd(cmd, self.user, "", slaveDnPort, self.tmpPath, "postgres", self.mpprcFile, True) else: self.result.val = "There's no master database node " \ "or slave database node in this node" self.result.rst = ResultStatus.OK return if (output.upper().endswith("GB")): maxProcessM = int(output[:-2]) * 1024 * 1024 * self.percentm elif (output.upper().endswith("MB")): maxProcessM = int(output[:-2]) * 1024 * self.percentm elif (output.upper().endswith("KB")): maxProcessM = int(output[:-2]) * self.percentm else: self.result.val = \ "Can not get the correct value of max_process_memroy" self.result.rst = ResultStatus.NG return cmd = "ps ux | grep gaussdb | awk '{print $6}'" output = SharedFuncs.runShellCmd(cmd) for line in output.splitlines(): procM = int(line) if (procM > maxProcessM): self.result.val = \ "Memroy usage of some gaussdb process exceeded threshold" self.result.rst = ResultStatus.NG return self.result.rst = ResultStatus.OK self.result.val = "Memory is sufficient"
def doCheck(self): instance = [] allDisk = [] # Get all disk and the avail size cmd_df = "df -B M" diskinfo = SharedFuncs.runShellCmd(cmd_df, self.user, self.mpprcFile) # split with \n and remove the title diskList_space = diskinfo.split("\n") diskList_space.remove(diskList_space[0]) # loop the list, remove space and remove the size unit "MB", # only keep disk path and avail size for disk_space in diskList_space: disk = disk_space.split() disk_new = [] disk_new.append(disk[0]) disk_new.append(int(disk[3].replace("M", ""))) allDisk.append(disk_new) # Get the port and datadir list of instance nodeInfo = self.cluster.getDbNodeByName(self.host) CN = nodeInfo.coordinators # check if CN exists if (len(CN) > 0): instance.append(CN[0]) else: # no CN in instance, do nothing pass for DnInstance in nodeInfo.datanodes: if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE): instance.append(DnInstance) # check if no instances in this node if (len(instance) == 0): return else: pass for inst in instance: cmd_dir = g_Platform.getDiskFreeCmd(inst.datadir) result = SharedFuncs.runShellCmd(cmd_dir, self.user, self.mpprcFile) diskInfo_withspace = result.split("\n") diskInfo = diskInfo_withspace[1].split() for disk in allDisk: if (diskInfo[0] == disk[0]): disk.append(inst) masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) # Get the database in the node, remove template0 sqldb = "select datname from pg_database;" needm = False if (instance[0].instanceRole == INSTANCE_ROLE_COODINATOR): needm = False elif (instance[0].instanceId in masterDnList): needm = False else: needm = True output = SharedFuncs.runSqlCmd(sqldb, self.user, "", instance[0].port, self.tmpPath, "postgres", self.mpprcFile, needm) dbList = output.split("\n") dbList.remove("template0") # loop all database with port list value = "" Flag = [] for disk in allDisk: sumSize = 0 for inst in disk[2:]: for db in dbList: # Calculate the size with sql cmd cmd = "select sum(pg_total_relation_size(oid)/1024)/1024" \ " from pg_class where oid<16384 and relkind='r';" needm = False if (inst.instanceRole == INSTANCE_ROLE_COODINATOR): needm = False elif (inst.instanceId in masterDnList): needm = False else: needm = True output = SharedFuncs.runSqlCmd(cmd, self.user, "", inst.port, self.tmpPath, db, self.mpprcFile, needm) sumSize = sumSize + float(output) # Calculate the size of datadir strdir = inst.datadir clog = g_file.getDirSize(os.path.join(strdir, 'pg_clog'), "M") size_clog = int(clog[0].replace("M", "")) xlog = g_file.getDirSize(os.path.join(strdir, 'pg_xlog'), "M") size_xlog = int(xlog[0].replace("M", "")) sumSize = sumSize + size_clog + size_xlog if (sumSize == 0): continue # Compare system table size with avail disk size if (sumSize < disk[1]): Flag.append(True) FileSystem = "FileSystem: %s" % disk[0] SystemTableSize = "SystemTableSize: %sM" % sumSize DiskAvailSize = "DiskAvailSize: %sM" % disk[1] value += FileSystem.ljust(35) + SystemTableSize.ljust(35) \ + DiskAvailSize.ljust(35) + "Status: OK; \n" elif (sumSize >= disk[1]): Flag.append(False) FileSystem = "FileSystem: %s" % disk[0] SystemTableSize = "SystemTableSize: %sM" % sumSize DiskAvailSize = "DiskAvailSize: %sM" % disk[1] value += FileSystem.ljust(35) + SystemTableSize.ljust(35) \ + DiskAvailSize.ljust(35) + "Status: NG; \n" self.result.val = value if (False not in Flag): self.result.rst = ResultStatus.OK else: self.result.rst = ResultStatus.NG