Beispiel #1
0
    def checkSysTable(self):
        primaryDNidList = []
        nodeInfo = self.cluster.getDbNodeByName(self.host)
        CN = nodeInfo.coordinators
        masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile)
        for DnInstance in nodeInfo.datanodes:
            if (DnInstance.instanceId in masterDnList):
                primaryDNidList.append(DnInstance)
        if (len(CN) < 1 and len(primaryDNidList) < 1):
            raise CheckNAException(
                "There is no primary database node instance in the "
                "current node.")

        # test database Connection
        for Instance in (CN + primaryDNidList):
            if not Instance:
                continue
            sqlcmd = "select pg_sleep(1);"
            SharedFuncs.runSqlCmd(sqlcmd, self.user, "", Instance.port,
                                  self.tmpPath, self.database, self.mpprcFile)
        outputList = []
        pool = ThreadPool(DefaultValue.getCpuSet())
        results = pool.map(self.checkSingleSysTable, CN + primaryDNidList)
        pool.close()
        pool.join()
        for result in results:
            if (result):
                outputList.append(result)
        outputList.sort()
        return outputList
 def wrapper():
     if (not hasattr(self, 'cluster')):
         raise Exception(ErrorCode.GAUSS_530["GAUSS_53030"]
                         % "cluster attribute")
     if (not hasattr(self, 'host')):
         raise Exception(ErrorCode.GAUSS_530["GAUSS_53030"]
                         % "host attribute")
     if (not self.cluster):
         raise Exception(ErrorCode.GAUSS_530["GAUSS_53031"])
     dbNode = self.cluster.getDbNodeByName(self.host)
     # The specified node does not exist or is empty
     if (dbNode is None or dbNode == ""):
         raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"]
                         % "The dbNode")
     if self.cluster.isSingleInstCluster():
         masterDn = SharedFuncs.getMasterDnNum(self.user,
                                               self.mpprcFile)
         if len(dbNode.datanodes) < 1 or dbNode.datanodes[
             0].instanceId not in masterDn:
             raise CheckNAException(
                 "The node does not contains materDn instance")
         self.port = dbNode.datanodes[0].port
     else:
         # The specified CN node does not exist
         if (len(dbNode.coordinators) == 0):
             raise CheckNAException(
                 "The node does not contains cn instance")
         # get cn port
         self.port = dbNode.coordinators[0].port
         self.cntype = dbNode.coordinators[0].instanceType
     return func()
Beispiel #3
0
 def doCheck(self):
     cmd = "gs_om -t status"
     output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile)
     if (output.find("Normal") < 0 and output.find("Degraded") < 0):
         self.result.rst = ResultStatus.NG
         self.result.val = "The database can not be connected."
         return
     instanceList = []
     AbnormalInst = []
     primaryDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile)
     nodeInfo = self.cluster.getDbNodeByName(self.host)
     localDnList = nodeInfo.datanodes
     for dn in localDnList:
         if (dn.instanceId in primaryDnList):
             instanceList.append(dn)
     sqlcmd = "select pg_sleep(1);"
     for instance in instanceList:
         cmd = "gsql -m -d postgres -p %s -c '%s'" % (instance.port, sqlcmd)
         if (self.mpprcFile):
             cmd = "source '%s' && %s" % (self.mpprcFile, cmd)
         if (os.getuid() == 0):
             cmd = "su - %s -c \"%s\" " % (self.user, cmd)
         self.result.raw += "\n%s" % cmd
         (status, output) = subprocess.getstatusoutput(cmd)
         if (status != 0 or output.find("connect to server failed") > 0):
             AbnormalInst.append(instance.instanceId)
             self.result.val += "The install %s can not be connected.\n" \
                                % instance.instanceId
             self.result.raw += "\nError: %s" % output
     if AbnormalInst:
         self.result.rst = ResultStatus.NG
     else:
         self.result.rst = ResultStatus.OK
         self.result.val = "The database connection is normal."
 def doCheck(self):
     nodeInfo = self.cluster.getDbNodeByName(self.host)
     maxusage = None
     minusage = None
     usagedic = {}
     val = ""
     masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile)
     for DnInstance in nodeInfo.datanodes:
         if (DnInstance.instanceId in masterDnList):
             datadir = os.path.join(DnInstance.datadir, "base")
             output = g_file.getDirSize(datadir, "m")
             output = output.split()[0][:-1]
             if (not output.isdigit()):
                 raise Exception(ErrorCode.GAUSS_504["GAUSS_50412"]
                                 % (DnInstance.instanceId))
             if (not maxusage or int(maxusage) < int(output)):
                 maxusage = int(output)
             if (not minusage or int(minusage) > int(output)):
                 minusage = int(output)
             usagedic[DnInstance.instanceId] = output
             val += "\ndn %s: vol %sm" % (DnInstance.instanceId, output)
     if (not usagedic):
         self.result.rst = ResultStatus.NA
         self.result.val = "No master database node in this host"
     else:
         if (maxusage > minusage * 1.05):
             self.result.rst = ResultStatus.NG
             self.result.val = "The result is not ok:\n%s" % val
             self.result.raw = json.dumps(usagedic)
         else:
             self.result.rst = ResultStatus.OK
             self.result.val = "Data distributed well in local host"
             self.result.raw = json.dumps(usagedic)
    def doCheck(self):
        """
        
        """
        global g_gucDist
        # get ignore list
        dirName = os.path.dirname(os.path.realpath(__file__))
        configFile = "%s/../../config/check_list_%s.conf" % (dirName,
                                                             self.version)
        self.getIgnoreParameters(configFile, 'guc_ignore', 'guc_logic')
        DNidList = []
        result = []
        logicCluster = False
        nodeInfo = self.cluster.getDbNodeByName(self.host)
        masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile)
        for DnInstance in nodeInfo.datanodes:
            if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE):
                DNidList.append(DnInstance)
        if len(DNidList) < 1:
            raise Exception(ErrorCode.GAUSS_512["GAUSS_51249"])
        # get information of logicCluster on current node
        (lcName, dbnode) = self.checkLogicCluster()
        if (dbnode):
            logicCluster = True
            for DnInstance in dbnode.datanodes:
                if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE):
                    if (DnInstance.instanceId in masterDnList):
                        needm = False
                    else:
                        needm = True
                    result.append(
                        self.checkInstanceGucValue(DnInstance, needm, lcName,
                                                   logicCluster))
            g_gucDist[lcName] = result
        # test database Connection
        for Instance in DNidList:
            if not Instance:
                continue
            sqlcmd = "select pg_sleep(1);"
            if Instance.instanceId in masterDnList:
                needm = False
            else:
                needm = True
            output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "",
                                           Instance.port, self.tmpPath,
                                           'postgres', self.mpprcFile, needm)
            self.checkInstanceGucValue(Instance, needm, "", logicCluster)

        self.result.val = json.dumps(g_gucDist)
        self.result.raw = str(g_gucDist)
        self.result.rst = ResultStatus.OK
    def doCheck(self):
        cmd = "free -g | grep Mem | awk '{print $2}' 2>/dev/null"
        totalMem = SharedFuncs.runShellCmd(cmd)
        cmd = "free -g | grep Mem | awk '{print $3}' 2>/dev/null"
        usedMem = SharedFuncs.runShellCmd(cmd)
        if (int(usedMem) > int(totalMem) * self.percentt):
            self.result.rst = ResultStatus.NG
            self.result.val = "Memory usage exceeded threshold"
            return
        cmd = "show max_process_memory;"
        cnPort = None
        masterDnPort = None
        slaveDnPort = None

        self.node = self.cluster.getDbNodeByName(self.host)
        if self.node.coordinators:
            cnPort = self.node.coordinators[0].port

        masterDnlist = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile)
        for datanode in self.node.datanodes:
            if (datanode.instanceId in masterDnlist):
                masterDnPort = datanode.port
                break
            elif (datanode.instanceType == MASTER_INSTANCE
                  or datanode.instanceType == STANDBY_INSTANCE):
                slaveDnPort = datanode.port
                break
        if (cnPort):
            output = SharedFuncs.runSqlCmd(cmd, self.user, "", cnPort,
                                           self.tmpPath, "postgres",
                                           self.mpprcFile)
        elif (masterDnPort):
            output = SharedFuncs.runSqlCmd(cmd, self.user, "", masterDnPort,
                                           self.tmpPath, "postgres",
                                           self.mpprcFile)
        elif (slaveDnPort):
            output = SharedFuncs.runSqlCmd(cmd, self.user, "", slaveDnPort,
                                           self.tmpPath, "postgres",
                                           self.mpprcFile, True)
        else:
            self.result.val = "There's no master database node " \
                              "or slave database node in this node"
            self.result.rst = ResultStatus.OK
            return
        if (output.upper().endswith("GB")):
            maxProcessM = int(output[:-2]) * 1024 * 1024 * self.percentm
        elif (output.upper().endswith("MB")):
            maxProcessM = int(output[:-2]) * 1024 * self.percentm
        elif (output.upper().endswith("KB")):
            maxProcessM = int(output[:-2]) * self.percentm
        else:
            self.result.val = \
                "Can not get the correct value of max_process_memroy"
            self.result.rst = ResultStatus.NG
            return
        cmd = "ps ux | grep gaussdb | awk '{print $6}'"
        output = SharedFuncs.runShellCmd(cmd)
        for line in output.splitlines():
            procM = int(line)
            if (procM > maxProcessM):
                self.result.val = \
                    "Memroy usage of some gaussdb process exceeded threshold"
                self.result.rst = ResultStatus.NG
                return

        self.result.rst = ResultStatus.OK
        self.result.val = "Memory is sufficient"
Beispiel #7
0
    def doCheck(self):
        instance = []
        allDisk = []

        # Get all disk and the avail size
        cmd_df = "df -B M"
        diskinfo = SharedFuncs.runShellCmd(cmd_df, self.user, self.mpprcFile)
        # split with \n and remove the title
        diskList_space = diskinfo.split("\n")
        diskList_space.remove(diskList_space[0])
        # loop the list, remove space and remove the size unit "MB",
        # only keep disk path and avail size
        for disk_space in diskList_space:
            disk = disk_space.split()
            disk_new = []
            disk_new.append(disk[0])
            disk_new.append(int(disk[3].replace("M", "")))
            allDisk.append(disk_new)

        # Get the port and datadir list of instance
        nodeInfo = self.cluster.getDbNodeByName(self.host)
        CN = nodeInfo.coordinators
        # check if CN exists
        if (len(CN) > 0):
            instance.append(CN[0])
        else:
            # no CN in instance, do nothing
            pass
        for DnInstance in nodeInfo.datanodes:
            if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE):
                instance.append(DnInstance)

        # check if no instances in this node
        if (len(instance) == 0):
            return
        else:
            pass

        for inst in instance:
            cmd_dir = g_Platform.getDiskFreeCmd(inst.datadir)
            result = SharedFuncs.runShellCmd(cmd_dir, self.user,
                                             self.mpprcFile)
            diskInfo_withspace = result.split("\n")
            diskInfo = diskInfo_withspace[1].split()
            for disk in allDisk:
                if (diskInfo[0] == disk[0]):
                    disk.append(inst)
        masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile)
        # Get the database in the node, remove template0
        sqldb = "select datname from pg_database;"
        needm = False
        if (instance[0].instanceRole == INSTANCE_ROLE_COODINATOR):
            needm = False
        elif (instance[0].instanceId in masterDnList):
            needm = False
        else:
            needm = True
        output = SharedFuncs.runSqlCmd(sqldb, self.user, "", instance[0].port,
                                       self.tmpPath, "postgres",
                                       self.mpprcFile, needm)
        dbList = output.split("\n")
        dbList.remove("template0")

        # loop all database with port list
        value = ""
        Flag = []
        for disk in allDisk:
            sumSize = 0
            for inst in disk[2:]:
                for db in dbList:
                    # Calculate the size with sql cmd
                    cmd = "select sum(pg_total_relation_size(oid)/1024)/1024" \
                          " from pg_class where oid<16384 and relkind='r';"
                    needm = False
                    if (inst.instanceRole == INSTANCE_ROLE_COODINATOR):
                        needm = False
                    elif (inst.instanceId in masterDnList):
                        needm = False
                    else:
                        needm = True
                    output = SharedFuncs.runSqlCmd(cmd, self.user, "",
                                                   inst.port, self.tmpPath, db,
                                                   self.mpprcFile, needm)
                    sumSize = sumSize + float(output)
                # Calculate the size of datadir
                strdir = inst.datadir
                clog = g_file.getDirSize(os.path.join(strdir, 'pg_clog'), "M")
                size_clog = int(clog[0].replace("M", ""))
                xlog = g_file.getDirSize(os.path.join(strdir, 'pg_xlog'), "M")
                size_xlog = int(xlog[0].replace("M", ""))
                sumSize = sumSize + size_clog + size_xlog
            if (sumSize == 0):
                continue
            # Compare system table size with avail disk size
            if (sumSize < disk[1]):
                Flag.append(True)
                FileSystem = "FileSystem: %s" % disk[0]
                SystemTableSize = "SystemTableSize: %sM" % sumSize
                DiskAvailSize = "DiskAvailSize: %sM" % disk[1]
                value += FileSystem.ljust(35) + SystemTableSize.ljust(35) \
                         + DiskAvailSize.ljust(35) + "Status: OK; \n"
            elif (sumSize >= disk[1]):
                Flag.append(False)
                FileSystem = "FileSystem: %s" % disk[0]
                SystemTableSize = "SystemTableSize: %sM" % sumSize
                DiskAvailSize = "DiskAvailSize: %sM" % disk[1]
                value += FileSystem.ljust(35) + SystemTableSize.ljust(35) \
                         + DiskAvailSize.ljust(35) + "Status: NG; \n"
        self.result.val = value
        if (False not in Flag):
            self.result.rst = ResultStatus.OK
        else:
            self.result.rst = ResultStatus.NG