コード例 #1
0
    def generateAndSendXmlFile(self):
        """
        """
        self.logger.debug("Start to generateAndSend XML file.\n")

        tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir
        cmd = "mkdir -p %s; touch %s; cat /dev/null > %s" % \
        (self.tempFileDir, tempXmlFile, tempXmlFile)
        (status, output) = subprocess.getstatusoutput(cmd)

        cmd = "chown -R %s:%s %s" % (self.user, self.group, self.tempFileDir)
        (status, output) = subprocess.getstatusoutput(cmd)

        newHosts = self.context.newHostList
        for host in newHosts:
            # create single deploy xml file for each standby node
            xmlContent = self.__generateXml(host)
            with os.fdopen(
                    os.open("%s" % tempXmlFile, os.O_WRONLY | os.O_CREAT,
                            stat.S_IWUSR | stat.S_IRUSR), 'w') as fo:
                fo.write(xmlContent)
                fo.close()
            # send single deploy xml file to each standby node
            sshTool = SshTool(host)
            retmap, output = sshTool.getSshStatusOutput(
                "mkdir -p %s" % self.tempFileDir, [host], self.envFile)
            retmap, output = sshTool.getSshStatusOutput(
                "chown %s:%s %s" % (self.user, self.group, self.tempFileDir),
                [host], self.envFile)
            sshTool.scpFiles("%s" % tempXmlFile, "%s" % tempXmlFile, [host],
                             self.envFile)
            self.cleanSshToolFile(sshTool)

        self.logger.debug("End to generateAndSend XML file.\n")
コード例 #2
0
    def checkUserAndGroupExists(self):
        """
        check system user and group exists and be same 
        on primary and standby nodes
        """
        inputUser = self.user
        inputGroup = self.group

        user_group_id = ""
        isUserExits = False
        localHost = socket.gethostname()
        for user in pwd.getpwall():
            if user.pw_name == self.user:
                user_group_id = user.pw_gid
                isUserExits = True
                break
        if not isUserExits:
            GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \
                % ("User", self.user, localHost))

        isGroupExits = False
        group_id = ""
        for group in grp.getgrall():
            if group.gr_name == self.group:
                group_id = group.gr_gid
                isGroupExits = True
        if not isGroupExits:
            GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \
                % ("Group", self.group, localHost))
        if user_group_id != group_id:
            GaussLog.exitWithError("User [%s] is not in the group [%s]."\
                 % (self.user, self.group))

        hostNames = self.context.newHostList
        envfile = self.envFile
        sshTool = SshTool(hostNames)

        #get username in the other standy nodes
        getUserNameCmd = "cat /etc/passwd | grep -w %s" % inputUser
        resultMap, outputCollect = sshTool.getSshStatusOutput(
            getUserNameCmd, [], envfile)

        for hostKey in resultMap:
            if resultMap[hostKey] == STATUS_FAIL:
                self.cleanSshToolFile(sshTool)
                GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \
                       % ("User", self.user, hostKey))

        #get groupname in the other standy nodes
        getGroupNameCmd = "cat /etc/group | grep -w %s" % inputGroup
        resultMap, outputCollect = sshTool.getSshStatusOutput(
            getGroupNameCmd, [], envfile)
        for hostKey in resultMap:
            if resultMap[hostKey] == STATUS_FAIL:
                self.cleanSshToolFile(sshTool)
                GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35704"] \
                       % ("Group", self.group, hostKey))
        self.cleanSshToolFile(sshTool)
コード例 #3
0
 def checkTmpDir(self, hostName):
     """
     if the tmp dir id not exist, create it.
     """
     tmpDir = os.path.realpath(DefaultValue.getTmpDirFromEnv())
     checkCmd = 'if [ ! -d "%s" ]; then exit 1;fi;' % (tmpDir)
     sshTool = SshTool([hostName])
     resultMap, outputCollect = sshTool.getSshStatusOutput(
         checkCmd, [hostName], self.envFile)
     ret = resultMap[hostName]
     if ret == STATUS_FAIL:
         self.logger.debug("Node [%s] does not have tmp dir. need to fix.")
         fixCmd = "mkdir -p %s" % (tmpDir)
         sshTool.getSshStatusOutput(fixCmd, [hostName], self.envFile)
     self.cleanSshToolFile(sshTool)
コード例 #4
0
    def validNodeInStandbyList(self):
        """
        check if the node has been installed in the cluster.
        """
        self.logger.debug("Start to check if the nodes in standby list\n")

        curHostName = socket.gethostname()
        command = "su - %s -c 'source %s;gs_om -t status --detail'" % \
            (self.user, self.envFile)
        sshTool = SshTool([curHostName])
        resultMap, outputCollect = sshTool.getSshStatusOutput(
            command, [curHostName], self.envFile)
        self.logger.debug(outputCollect)

        newHosts = self.context.newHostList
        standbyHosts = []
        existHosts = []
        while len(newHosts) > 0:
            hostIp = newHosts.pop()
            nodeName = self.context.backIpNameMap[hostIp]
            nodeInfo = self.context.clusterInfoDict[nodeName]
            dataNode = nodeInfo["dataNode"]
            exist_reg = r"(.*)%s[\s]*%s(.*)" % (nodeName, hostIp)
            if not re.search(exist_reg, outputCollect):
                standbyHosts.append(hostIp)
            else:
                existHosts.append(hostIp)
        self.context.newHostList = standbyHosts
        if len(existHosts) > 0:
            self.logger.log("The nodes [%s] are already in the cluster. Skip expand these nodes." \
                % ",".join(existHosts))
        self.cleanSshToolFile(sshTool)
        if len(standbyHosts) == 0:
            self.logger.log("There is no node can be expanded.")
            sys.exit(0)
コード例 #5
0
 def addTrustOnExistNodes(self):
     """
     add host trust in pg_hba.conf on existing standby node. 
     """
     self.logger.debug("Start to set host trust on existing node.")
     allNodeNames = self.context.nodeNameList
     newNodeIps = self.context.newHostList
     newNodeNames = []
     trustCmd = []
     for node in newNodeIps:
         nodeName = self.context.backIpNameMap[node]
         newNodeNames.append(nodeName)
         cmd = 'host    all    all    %s/32    trust' % node
         trustCmd.append(cmd)
     existNodes = list(set(allNodeNames).difference(set(newNodeNames)))
     for node in existNodes:
         dataNode = self.context.clusterInfoDict[node]["dataNode"]
         cmd = ""
         for trust in trustCmd:
             cmd += "source %s; gs_guc set -D %s -h '%s';" % \
                 (self.envFile, dataNode, trust)
         sshTool = SshTool([node])
         resultMap, outputCollect = sshTool.getSshStatusOutput(
             cmd, [node], self.envFile)
         self.cleanSshToolFile(sshTool)
     self.logger.debug("End to set host trust on existing node.")
コード例 #6
0
    def preInstallOnHosts(self):
        """
        execute preinstall step
        """
        self.logger.debug("Start to preinstall database step.\n")
        newBackIps = self.context.newHostList
        newHostNames = []
        for host in newBackIps:
            newHostNames.append(self.context.backIpNameMap[host])
        envfile = self.envFile
        tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir

        preinstallCmd = "{softpath}/script/gs_preinstall -U {user} -G {group} \
            -X {xmlfile} --sep-env-file={envfile} \
                --non-interactive 2>&1\
                    ".format(softpath=self.context.packagepath,user=self.user,
                    group=self.group,xmlfile=tempXmlFile,envfile=envfile)

        sshTool = SshTool(newHostNames)
        
        status, output = sshTool.getSshStatusOutput(preinstallCmd , [], envfile)
        statusValues = status.values()
        if STATUS_FAIL in statusValues:
            GaussLog.exitWithError(output)

        self.logger.debug("End to preinstall database step.\n")
コード例 #7
0
    def queryInstanceStatus(self, host, datanode, env):
        """
        """
        command = "source %s ; gs_ctl query -D %s" % (env, datanode)
        sshTool = SshTool([datanode])
        resultMap, outputCollect = sshTool.getSshStatusOutput(
            command, [host], env)
        self.logger.debug(outputCollect)
        localRole = re.findall(r"local_role.*: (.*?)\n", outputCollect)
        db_state = re.findall(r"db_state.*: (.*?)\n", outputCollect)

        insType = ""

        if (len(localRole)) == 0:
            insType = ""
        else:
            insType = localRole[0]

        dbStatus = ""
        if (len(db_state)) == 0:
            dbStatus = ""
        else:
            dbStatus = db_state[0]
        self.cleanSshToolTmpFile(sshTool)
        return insType.strip().lower(), dbStatus.strip().lower()
コード例 #8
0
 def buildInstance(self, host, datanode, mode, env):
     command = "source %s ; gs_ctl build -D %s -M %s" % (env, datanode, mode)
     self.logger.debug(command)
     sshTool = SshTool([host])
     resultMap, outputCollect = sshTool.getSshStatusOutput(command, 
     [host], env)
     self.logger.debug(host)
     self.logger.debug(outputCollect)
コード例 #9
0
    def generateClusterStaticFile(self):
        """
        generate static_config_files and send to all hosts
        """
        self.logger.debug("Start to generate and send cluster static file.\n")

        primaryHosts = self.getPrimaryHostName()
        command = "gs_om -t generateconf -X %s --distribute" % self.context.xmlFile
        sshTool = SshTool([primaryHosts])
        resultMap, outputCollect = sshTool.getSshStatusOutput(
            command, [primaryHosts], self.envFile)
        self.logger.debug(outputCollect)
        self.cleanSshToolFile(sshTool)

        nodeNameList = self.context.nodeNameList

        for hostName in nodeNameList:
            hostSsh = SshTool([hostName])
            toolPath = self.context.clusterInfoDict["toolPath"]
            appPath = self.context.clusterInfoDict["appPath"]
            srcFile = "%s/script/static_config_files/cluster_static_config_%s" \
                % (toolPath, hostName)
            if not os.path.exists(srcFile):
                GaussLog.exitWithError("Generate static file [%s] not found." \
                    % srcFile)
            targetFile = "%s/bin/cluster_static_config" % appPath
            hostSsh.scpFiles(srcFile, targetFile, [hostName], self.envFile)
            self.cleanSshToolFile(hostSsh)

        self.logger.debug("End to generate and send cluster static file.\n")
        time.sleep(10)

        # Single-node database need start cluster after expansion
        if self.isSingleNodeInstance:
            self.logger.debug("Single-Node instance need restart.\n")
            self.commonGsCtl.queryOmCluster(primaryHosts, self.envFile)

            # if primary database not normal, restart it
            primaryHost = self.getPrimaryHostName()
            dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
            insType, dbStat = self.commonGsCtl.queryInstanceStatus(
                primaryHost, dataNode, self.envFile)
            if insType != MODE_PRIMARY:
                self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode,
                                                       MODE_PRIMARY,
                                                       self.envFile)
            # if stat if not normal,rebuild standby database
            standbyHosts = self.context.newHostList
            for host in standbyHosts:
                hostName = self.context.backIpNameMap[host]
                dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
                insType, dbStat = self.commonGsCtl.queryInstanceStatus(
                    hostName, dataNode, self.envFile)
                if dbStat != STAT_NORMAL:
                    self.commonGsCtl.startInstanceWithMode(
                        hostName, dataNode, MODE_STANDBY, self.envFile)

            self.commonGsCtl.startOmCluster(primaryHosts, self.envFile)
コード例 #10
0
    def setGUCOnClusterHosts(self, hostNames=[]):
        """
        guc config on all hosts 
        """

        gucDict = self.getGUCConfig()

        tempShFile = "%s/guc.sh" % self.tempFileDir

        if len(hostNames) == 0:
            hostNames = self.context.nodeNameList

        for host in hostNames:

            command = "source %s ; " % self.envFile + gucDict[host]

            self.logger.debug(command)

            sshTool = SshTool([host])

            # create temporary dir to save guc command bashfile.
            mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \
                (self.tempFileDir,self.user,self.group,self.tempFileDir)
            retmap, output = sshTool.getSshStatusOutput(
                mkdirCmd, [host], self.envFile)

            subprocess.getstatusoutput("mkdir -m a+x -p %s; touch %s; \
                cat /dev/null > %s"                                    % \
                    (self.tempFileDir, tempShFile, tempShFile))
            with os.fdopen(
                    os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT,
                            stat.S_IWUSR | stat.S_IRUSR), 'w') as fo:
                fo.write("#bash\n")
                fo.write(command)
                fo.close()

            # send guc command bashfile to each host and execute it.
            sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host],
                             self.envFile)

            resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \
                tempShFile, [host], self.envFile)

            self.logger.debug(outputCollect)
            self.cleanSshToolFile(sshTool)
コード例 #11
0
 def stopInstance(self, host, datanode, env):
     """
     """
     command = "source %s ; gs_ctl stop -D %s" % (env, datanode)
     sshTool = SshTool([host])
     resultMap, outputCollect = sshTool.getSshStatusOutput(
         command, [host], env)
     self.logger.debug(host)
     self.logger.debug(outputCollect)
     self.cleanSshToolTmpFile(sshTool)
コード例 #12
0
 def reloadPrimaryConf(self):
     """
     """
     primaryHost = self.getPrimaryHostName()
     dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"]
     command = "gs_ctl reload -D %s " % dataNode
     sshTool = SshTool([primaryHost])
     self.logger.debug(command)
     resultMap, outputCollect = sshTool.getSshStatusOutput(command, 
     [primaryHost], self.envFile)
     self.logger.debug(outputCollect)
コード例 #13
0
    def checkLocalModeOnStandbyHosts(self):
        """
        expansion the installed standby node. check standby database.
        1. if the database is normal
        2. if the databases version are same before existing and new 
        """
        standbyHosts = self.context.newHostList
        envfile = self.envFile

        self.logger.log("Checking the database with locale mode.")
        for host in standbyHosts:
            hostName = self.context.backIpNameMap[host]
            dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
            insType, dbStat = self.commonGsCtl.queryInstanceStatus(
                hostName, dataNode, self.envFile)
            if insType not in (MODE_PRIMARY, MODE_STANDBY, MODE_NORMAL):
                GaussLog.exitWithError(
                    ErrorCode.GAUSS_357["GAUSS_35703"] %
                    (hostName, self.user, dataNode, dataNode))

        allHostIp = []
        allHostIp.append(self.context.localIp)
        versionDic = {}

        for hostip in standbyHosts:
            allHostIp.append(hostip)
        sshTool = SshTool(allHostIp)
        #get version in the nodes
        getversioncmd = "gaussdb --version"
        resultMap, outputCollect = sshTool.getSshStatusOutput(
            getversioncmd, [], envfile)
        self.cleanSshToolFile(sshTool)
        versionLines = outputCollect.splitlines()
        for verline in versionLines:
            if verline[0:9] == '[SUCCESS]':
                ipKey = verline[10:-1]
                continue
            else:
                versionStr = "".join(verline)
                preVersion = versionStr.split(' ')
                versionInfo = preVersion[4]
                versionDic[ipKey] = versionInfo[:-2]
        for hostip in versionDic:
            if hostip == self.context.localIp:
                versionCompare = ""
                versionCompare = versionDic[hostip]
            else:
                if versionDic[hostip] == versionCompare:
                    continue
                else:
                    GaussLog.exitWithError(ErrorCode.GAUSS_357["GAUSS_35705"] \
                       %(hostip, versionDic[hostip]))

        self.logger.log("Successfully checked the database with locale mode.")
コード例 #14
0
 def clearTmpFile(self):
     """
     clear temporary file after expansion success
     """
     self.logger.debug("start to delete temporary file")
     hostNames = self.context.nodeNameList
     sshTool = SshTool(hostNames)
     clearCmd = "source %s ; rm -rf %s" % (self.envFile, self.tempFileDir)
     result, output = sshTool.getSshStatusOutput(clearCmd, 
     hostNames, self.envFile)
     self.logger.debug(output)
コード例 #15
0
 def startOmCluster(self, host, env):
     """
     om tool start cluster
     """
     command = "source %s ; gs_om -t start" % env
     self.logger.debug(command)
     sshTool = SshTool([host])
     resultMap, outputCollect = sshTool.getSshStatusOutput(command, 
     [host], env)
     self.logger.debug(host)
     self.logger.debug(outputCollect)
コード例 #16
0
 def queryOmCluster(self, host, env):
     """
     query om cluster detail with command:
     gs_om -t status --detail
     """
     command = "source %s ; gs_om -t status --detail" % env
     sshTool = SshTool([host])
     resultMap, outputCollect = sshTool.getSshStatusOutput(command, 
     [host], env)
     self.logger.debug(host)
     self.logger.debug(outputCollect)
     return outputCollect
コード例 #17
0
 def queryOmCluster(self, host, env):
     """
     query om cluster detail with command:
     gs_om -t status --detail
     """
     command = "source %s ; gs_om -t status --detail" % env
     sshTool = SshTool([host])
     resultMap, outputCollect = sshTool.getSshStatusOutput(
         command, [host], env)
     self.logger.debug(host)
     self.logger.debug(outputCollect)
     if resultMap[host] == STATUS_FAIL:
         GaussLog.exitWithError("Query cluster failed. Please check " \
             "the cluster status or " \
             "source the environmental variables of user [%s]." % self.user)
     self.cleanSshToolTmpFile(sshTool)
     return outputCollect
コード例 #18
0
    def addStandbyIpInPrimaryConf(self):
        """
        add standby hosts ip in primary node pg_hba.conf
        """

        standbyHosts = self.context.newHostList
        primaryHost = self.getPrimaryHostName()
        command = ''
        for host in standbyHosts:
            hostName = self.context.backIpNameMap[host]
            dataNode = self.context.clusterInfoDict[hostName]["dataNode"]
            command += "gs_guc set -D %s -h 'host    all    all    %s/32   \
                 trust';" % (dataNode, host)
        self.logger.debug(command)
        sshTool = SshTool([primaryHost])
        resultMap, outputCollect = sshTool.getSshStatusOutput(command, 
        [primaryHost], self.envFile)
        self.logger.debug(outputCollect)
コード例 #19
0
 def clearTmpFile(self):
     """
     clear temporary file after expansion success
     """
     self.logger.debug("start to delete temporary file %s" %
                       self.tempFileDir)
     clearCmd = "if [ -d '%s' ];then rm -rf %s;fi" % \
         (self.tempFileDir, self.tempFileDir)
     hostNames = self.context.nodeNameList
     for host in hostNames:
         try:
             sshTool = SshTool(hostNames)
             result, output = sshTool.getSshStatusOutput(
                 clearCmd, hostNames, self.envFile)
             self.logger.debug(output)
             self.cleanSshToolFile(sshTool)
         except Exception as e:
             self.logger.debug(str(e))
             self.cleanSshToolFile(sshTool)
コード例 #20
0
    def checkAllStandbyState(self):
        """
        check all standby state whether switchover is happening
        """
        for hostNameLoop in self.context.hostMapForExist.keys():
            sshtool_host = SshTool([hostNameLoop])
            for i in self.context.hostMapForExist[hostNameLoop]['datadir']:
                # check whether switchover/failover is happening
                self.commonOper.checkStandbyState(hostNameLoop, i,
                                                  sshtool_host,
                                                  self.userProfile)
            self.cleanSshToolFile(sshtool_host)

        for hostNameLoop in self.context.hostMapForDel.keys():
            if hostNameLoop not in self.context.failureHosts:
                sshtool_host = SshTool([hostNameLoop])
                for i in self.context.hostMapForDel[hostNameLoop]['datadir']:
                    # check whether switchover/failover is happening
                    self.commonOper.checkStandbyState(hostNameLoop, i,
                                                      sshtool_host,
                                                      self.userProfile, True)
                    self.commonOper.stopInstance(hostNameLoop, sshtool_host, i,
                                                 self.userProfile)
                cmdDelCert = "ls %s/share/sslcert/grpc/* | " \
                    "grep -v openssl.cnf | xargs rm -rf" % self.appPath
                result, output = sshtool_host.getSshStatusOutput(
                    cmdDelCert, [hostNameLoop], self.userProfile)
                if result[hostNameLoop] != 'Success':
                    self.logger.debug(output)
                    self.logger.log("[gs_dropnode]Failed to delete the GRPC "
                                    "sslcert of %s." % hostNameLoop)
                    self.logger.log("[gs_dropnode]Please check and delete the "
                                    "GRPC sslcert of %s manually." %
                                    hostNameLoop)
                self.cleanSshToolFile(sshtool_host)
            else:
                self.logger.log("[gs_dropnode]Cannot connect %s. Please check "
                                "and delete the GRPC sslcert of %s manually." %
                                (hostNameLoop, hostNameLoop))
コード例 #21
0
class OmImplOLAP(OmImpl):
    """
    class: OmImplOLAP
    """
    def __init__(self, OperationManager=None):
        """
        function:class init
        input:OperationManager
        output:NA
        """
        OmImpl.__init__(self, OperationManager)

    def checkNode(self):
        """
        function: check if the current node is to be uninstalled
        input : NA
        output: NA
        """
        if (len(self.context.g_opts.nodeInfo) != 0
                and self.context.g_opts.hostname
                == DefaultValue.GetHostIpOrName()):
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51631"] % "coordinate" +
                            "\nPlease perform this operation on other nodes "
                            "because this node will be deleted.")

    # AP
    def stopCluster(self):
        """
        function:Stop cluster
        input:NA
        output:NA
        """
        self.logger.log("Stopping the cluster.")
        # Stop cluster in 300 seconds
        cmd = "source %s; %s -t %d" % (self.context.g_opts.mpprcFile,
                                       OMCommand.getLocalScript("Gs_Stop"),
                                       DefaultValue.TIMEOUT_CLUSTER_STOP)
        (status, output) = subprocess.getstatusoutput(cmd)
        if (status != 0):
            self.logger.log(
                "Warning: Failed to stop cluster within 300 seconds,"
                "stopping cluster again at immediate mode.")
            cmd = "source %s; %s -m immediate -t %d" % (
                self.context.g_opts.mpprcFile,
                OMCommand.getLocalScript("Gs_Stop"),
                DefaultValue.TIMEOUT_CLUSTER_STOP)
            (status, output) = subprocess.getstatusoutput(cmd)
            if (status != 0):
                self.logger.log("The cmd is %s " % cmd)
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51610"] %
                                "the cluster at immediate mode" +
                                " Error: \n%s" % output)

        self.logger.log("Successfully stopped the cluster.")

    # AP
    def startCluster(self):
        """
        function:Start cluster
        input:NA
        output:NA
        """
        self.logger.log("Starting the cluster.", "addStep")
        # Delete cluster dynamic config if it is exist on all nodes
        clusterDynamicConf = "%s/bin/cluster_dynamic_config" \
                             % self.oldClusterInfo.appPath
        cmd = g_file.SHELL_CMD_DICT["deleteFile"] % (clusterDynamicConf,
                                                     clusterDynamicConf)
        self.logger.debug(
            "Command for removing the cluster dynamic configuration: %s." %
            cmd)
        self.sshTool.executeCommand(cmd, "remove dynamic configuration")
        # Start cluster in 300 seconds
        cmd = "source %s; %s -t %s" % (self.context.g_opts.mpprcFile,
                                       OMCommand.getLocalScript("Gs_Start"),
                                       DefaultValue.TIMEOUT_CLUSTER_START)
        (status, output) = subprocess.getstatusoutput(cmd)
        if (status != 0):
            self.logger.debug("The cmd is %s " % cmd)
            raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] %
                            "the cluster" + " Error: \n%s" % output)

        self.logger.log("Successfully started the cluster.", "constant")

    ##########################################################################
    # Start Flow
    ##########################################################################
    def getNodeId(self):
        """
        function: get node Id
        input: NA
        output: NA
        """
        clusterType = "cluster"
        nodeId = 0
        if (self.context.g_opts.nodeName != ""):
            clusterType = "node"
            dbNode = self.context.clusterInfo.getDbNodeByName(
                self.context.g_opts.nodeName)
            if not dbNode:
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] %
                                self.context.g_opts.nodeName)
            nodeId = dbNode.id
        elif (self.context.g_opts.azName != ""):
            clusterType = self.context.g_opts.azName
            # check whether the given azName is in the cluster
            if (self.context.g_opts.azName
                    not in self.context.clusterInfo.getazNames()):
                raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % '-az' +
                                " The az name [%s] is not in the cluster." %
                                self.context.g_opts.azName)
        return nodeId, clusterType

    def doStartCluster(self):
        """
        function: do start cluster
        input: NA
        output: NA
        """
        self.logger.debug("Operating: Starting.")
        # Specifies the stop node
        # Gets the specified node id
        startType = "node" if self.context.g_opts.nodeName != "" else "cluster"
        # Perform a start operation
        self.logger.log("Starting %s." % startType)
        self.logger.log("=========================================")
        hostName = DefaultValue.GetHostIpOrName()
        #get the newest dynaminc config and send to other node
        self.clusterInfo.checkClusterDynamicConfig(self.context.user, hostName)
        if self.context.g_opts.nodeName == "":
            hostList = self.clusterInfo.getClusterNodeNames()
        else:
            hostList = []
            hostList.append(self.context.g_opts.nodeName)
        self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None,
                               DefaultValue.TIMEOUT_CLUSTER_START)
        if self.time_out is None:
            time_out = DefaultValue.TIMEOUT_CLUSTER_START
        else:
            time_out = self.time_out
        cmd = "source %s; %s -U %s -R %s -t %s --security-mode=%s" % (
            self.context.g_opts.mpprcFile,
            OMCommand.getLocalScript("Local_StartInstance"), self.context.user,
            self.context.clusterInfo.appPath, time_out,
            self.context.g_opts.security_mode)
        if self.dataDir != "":
            cmd += " -D %s" % self.dataDir
        (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, hostList)
        for nodeName in hostList:
            if statusMap[nodeName] != 'Success':
                raise Exception(ErrorCode.GAUSS_536["GAUSS_53600"] %
                                (cmd, output))
        if re.search("another server might be running", output):
            self.logger.log(output)
        if startType == "cluster":
            starttime = time.time()
            cluster_state = ""
            cmd = "source %s; gs_om -t status|grep cluster_state" \
                  % self.context.g_opts.mpprcFile
            while time.time() <= 30 + starttime:
                status, output = subprocess.getstatusoutput(cmd)
                if status != 0:
                    raise Exception(
                        ErrorCode.GAUSS_516["GAUSS_51607"] % "cluster" +
                        " After startup, check cluster_state failed")
                else:
                    cluster_state = output.split()[-1]
                    if cluster_state != "Normal":
                        self.logger.log("Waiting for check cluster state...")
                        time.sleep(5)
                    else:
                        break
            if cluster_state != "Normal":
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51607"] %
                                "cluster" +
                                " After startup, the last check results were"
                                " %s. Please check manually." % cluster_state)
        self.logger.log("=========================================")
        self.logger.log("Successfully started.")
        self.logger.debug("Operation succeeded: Start.")

    def doStopCluster(self):
        """
        function: do stop cluster
        input: NA
        output: NA
        """
        self.logger.debug("Operating: Stopping.")
        # Specifies the stop node
        # Gets the specified node id
        stopType = "node" if self.context.g_opts.nodeName != "" else "cluster"
        # Perform a stop operation
        self.logger.log("Stopping %s." % stopType)
        self.logger.log("=========================================")
        if self.context.g_opts.nodeName == "":
            hostList = self.clusterInfo.getClusterNodeNames()
        else:
            hostList = []
            hostList.append(self.context.g_opts.nodeName)
        self.sshTool = SshTool(self.clusterInfo.getClusterNodeNames(), None,
                               DefaultValue.TIMEOUT_CLUSTER_START)
        if self.time_out is None:
            time_out = DefaultValue.TIMEOUT_CLUSTER_STOP
        else:
            time_out = self.time_out
        cmd = "source %s; %s -U %s -R %s -t %s" % (
            self.context.g_opts.mpprcFile,
            OMCommand.getLocalScript("Local_StopInstance"), self.context.user,
            self.context.clusterInfo.appPath, time_out)
        if self.dataDir != "":
            cmd += " -D %s" % self.dataDir
        if self.mode != "":
            cmd += " -m %s" % self.mode
        (statusMap, output) = self.sshTool.getSshStatusOutput(cmd, hostList)
        for nodeName in hostList:
            if statusMap[nodeName] != 'Success':
                raise Exception(ErrorCode.GAUSS_536["GAUSS_53606"] %
                                (cmd, output))
        self.logger.log("Successfully stopped %s." % stopType)

        self.logger.log("=========================================")
        self.logger.log("End stop %s." % stopType)
        self.logger.debug("Operation succeeded: Stop.")

    def doView(self):
        """
        function:get cluster node info
        input:NA
        output:NA
        """
        # view static_config_file
        self.context.clusterInfo.printStaticConfig(self.context.user,
                                                   self.context.g_opts.outFile)

    def doQuery(self):
        """
        function: do query
        input  : NA
        output : NA
        """
        hostName = DefaultValue.GetHostIpOrName()
        sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames())
        cmd = queryCmd()
        if (self.context.g_opts.outFile != ""):
            cmd.outputFile = self.context.g_opts.outFile
        self.context.clusterInfo.queryClsInfo(hostName, sshtool,
                                              self.context.mpprcFile, cmd)

    def doRefreshConf(self):
        """
        function: do refresh conf
        input  : NA
        output : NA
        """
        if self.context.clusterInfo.isSingleNode():
            self.logger.log(
                "No need to generate dynamic configuration file for one node.")
            return
        self.logger.log("Generating dynamic configuration file for all nodes.")
        hostName = DefaultValue.GetHostIpOrName()
        sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames())
        self.context.clusterInfo.createDynamicConfig(self.context.user,
                                                     hostName, sshtool)
        self.logger.log("Successfully generated dynamic configuration file.")
コード例 #22
0
class ParallelBaseOM(object):
    """
    Base class of parallel command
    """
    ACTION_INSTALL = "install"
    ACTION_CONFIG = "config"
    ACTION_START = "start"
    ACTION_REDISTRIBUTE = "redistribute"
    ACTION_HEALTHCHECK = "healthcheck"

    HEALTH_CHECK_BEFORE = "before"
    HEALTH_CHECK_AFTER = "after"
    """
    Base class for parallel command
    """
    def __init__(self):
        '''
        Constructor
        '''
        self.logger = None
        self.clusterInfo = None
        self.oldClusterInfo = None
        self.sshTool = None
        self.action = ""

        # Cluster config file.
        self.xmlFile = ""
        self.oldXmlFile = ""

        self.logType = DefaultValue.LOCAL_LOG_FILE
        self.logFile = ""
        self.localLog = ""
        self.user = ""
        self.group = ""
        self.mpprcFile = ""
        # Temporary catalog for install
        self.operateStepDir = TempfileManagement.getTempDir(
            "%s_step" % self.__class__.__name__.lower())
        # Temporary files for install step
        self.operateStepFile = "%s/%s_step.dat" % (
            self.operateStepDir, self.__class__.__name__.lower())
        self.initStep = ""
        self.dws_mode = False
        self.rollbackCommands = []
        self.etcdCons = []
        self.cmCons = []
        self.gtmCons = []
        self.cnCons = []
        self.dnCons = []
        # localMode is same as isSingle in all OM script, expect for
        # gs_preinstall.
        # in gs_preinstall, localMode means local mode for master-standby
        # cluster.
        # in gs_preinstall, localMode also means local mode for single
        # cluster(will not create os user).
        # in gs_preinstall, isSingle means single cluster, it will create
        # os user.
        # not isSingle and not localMode : master-standby cluster global
        # mode(will create os user).
        # not isSingle and localMode : master-standby cluster local
        # mode(will not create os user).
        # isSingle and not localMode : single cluster(will create os user).
        # isSingle and localMode : single cluster(will not create os user).
        self.localMode = False
        self.isSingle = False
        # Indicates whether there is a logical cluster.
        # If elastic_group exists, the current cluster is a logical cluster.
        # Otherwise, it is a large physical cluster.
        self.isElasticGroup = False
        self.isAddElasticGroup = False
        self.lcGroup_name = ""
        # Lock the cluster mode, there are two modes: exclusive lock and
        # wait lock mode,
        # the default exclusive lock
        self.lockMode = "exclusiveLock"

        # SinglePrimaryMultiStandby support binary upgrade, inplace upgrade
        self.isSinglePrimaryMultiStandby = False

        # Adapt to 200 and 300
        self.productVersion = None

    def initComponent(self):
        """
        function: Init component
        input : NA
        output: NA
        """
        for nodeInfo in self.clusterInfo.dbNodes:
            self.initKernelComponent(nodeInfo)

    def initComponentAttributes(self, component):
        """
        function: Init  component attributes on current node
        input : Object component
        output: NA
        """
        component.logger = self.logger
        component.binPath = "%s/bin" % self.clusterInfo.appPath
        component.dwsMode = self.dws_mode

    def initKernelComponent(self, nodeInfo):
        """
        function: Init kernel component
        input : Object nodeInfo
        output: NA
        """
        for inst in nodeInfo.datanodes:
            component = DN_OLAP()
            # init component cluster type
            component.clusterType = self.clusterInfo.clusterType
            component.instInfo = inst
            self.initComponentAttributes(component)
            self.dnCons.append(component)

    def initLogger(self, module=""):
        """
        function: Init logger
        input : module
        output: NA
        """
        # log level
        LOG_DEBUG = 1
        self.logger = GaussLog(self.logFile, module, LOG_DEBUG)

        dirName = os.path.dirname(self.logFile)
        self.localLog = os.path.join(dirName, DefaultValue.LOCAL_LOG_FILE)

    def initClusterInfo(self, refreshCN=True):
        """
        function: Init cluster info
        input : NA
        output: NA
        """
        try:
            self.clusterInfo = dbClusterInfo()
            if (refreshCN):
                static_config_file = "%s/bin/cluster_static_config" % \
                                     DefaultValue.getInstallDir(self.user)
                self.clusterInfo.initFromXml(self.xmlFile, static_config_file)
            else:
                self.clusterInfo.initFromXml(self.xmlFile)
        except Exception as e:
            raise Exception(str(e))
        self.logger.debug("Instance information of cluster:\n%s." %
                          str(self.clusterInfo))

    def initClusterInfoFromStaticFile(self, user, flag=True):
        """
        function: Function to init clusterInfo from static file
        input : user
        output: NA
        """
        try:
            self.clusterInfo = dbClusterInfo()
            self.clusterInfo.initFromStaticConfig(user)
        except Exception as e:
            raise Exception(str(e))
        if flag:
            self.logger.debug("Instance information of cluster:\n%s." %
                              str(self.clusterInfo))

    def initSshTool(self, nodeNames, timeout=0):
        """
        function: Init ssh tool
        input : nodeNames, timeout
        output: NA
        """
        self.sshTool = SshTool(nodeNames, self.logger.logFile, timeout)

    def check_cluster_version_consistency(self, clusterNodes, newNodes=None):
        """
        """
        self.logger.log("Check cluster version consistency.")
        if newNodes is None:
            newNodes = []
        dic_version_info = {}
        # check version.cfg on every node.
        gp_home = DefaultValue.getEnv("GPHOME")
        gauss_home = DefaultValue.getEnv("GAUSSHOME")
        if not (os.path.exists(gp_home) and os.path.exists(gauss_home)):
            GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] %
                                   ("%s", "or %s") % (gp_home, gauss_home))
        for ip in clusterNodes:
            if ip in newNodes:
                cmd = "pssh -s -H %s 'cat %s/version.cfg'" % \
                      (ip, DefaultValue.getEnv("GPHOME"))
            else:
                cmd = "pssh -s -H %s 'cat %s/bin/upgrade_version'" % \
                      (ip, DefaultValue.getEnv("GAUSSHOME"))
            status, output = subprocess.getstatusoutput(cmd)
            if (status != 0):
                raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd +
                                " Error:\n%s" % str(output))
            if len(output.strip().split()) < 3:
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51623"])
            dic_version_info[ip] = ",".join(output.strip().split()[1:])

        self.logger.debug("The cluster version on every node.")
        for check_ip, version_info in dic_version_info.items():
            self.logger.debug("%s : %s" % (check_ip, version_info))
        if len(set(dic_version_info.values())) != 1:
            L_inconsistent = list(set(dic_version_info.values()))
            self.logger.debug("The package version on some nodes are "
                              "inconsistent\n%s" % str(L_inconsistent))
            raise Exception("The package version on some nodes are "
                            "inconsistent,%s" % str(L_inconsistent))
        self.logger.log("Successfully checked cluster version.")

    def checkBaseFile(self, checkXml=True):
        """
        function: Check xml file and log file
        input : checkXml
        output: NA
        """
        if (checkXml):
            if (self.xmlFile == ""):
                raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".")

            if (not os.path.exists(self.xmlFile)):
                raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] %
                                ("configuration file [%s]" % self.xmlFile))

            if (not os.path.isabs(self.xmlFile)):
                raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] %
                                ("configuration file [%s]" % self.xmlFile))
        else:
            self.xmlFile = ""

        if (self.logFile == ""):
            self.logFile = DefaultValue.getOMLogPath(self.logType, self.user,
                                                     "", self.xmlFile)

        if (not os.path.isabs(self.logFile)):
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % "log")

    def initSignalHandler(self):
        """
        function: Function to init signal handler
        input : NA
        output: NA
        """
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        signal.signal(signal.SIGQUIT, signal.SIG_IGN)
        signal.signal(signal.SIGTERM, signal.SIG_IGN)
        signal.signal(signal.SIGALRM, signal.SIG_IGN)
        signal.signal(signal.SIGHUP, signal.SIG_IGN)
        signal.signal(signal.SIGUSR1, signal.SIG_IGN)
        signal.signal(signal.SIGUSR2, signal.SIG_IGN)

    def print_signal_stack(self, frame):
        """
        function: Function to print signal stack
        input : frame
        output: NA
        """
        if (self.logger is None):
            return
        try:
            import inspect
            stacks = inspect.getouterframes(frame)
            for curr in range(len(stacks)):
                stack = stacks[curr]
                self.logger.debug("Stack level: %d. File: %s. Function: "
                                  "%s. LineNo: %d." %
                                  (curr, stack[1], stack[3], stack[2]))
                self.logger.debug("Code: %s." %
                                  (stack[4][0].strip().strip("\n")))
        except Exception as e:
            self.logger.debug("Failed to print signal stack. Error: \n%s" %
                              str(e))

    def raise_handler(self, signal_num, frame):
        """
        function: Function to raise handler
        input : signal_num, frame
        output: NA
        """
        if (self.logger is not None):
            self.logger.debug("Received signal[%d]." % (signal_num))
            self.print_signal_stack(frame)
        raise Exception(ErrorCode.GAUSS_516["GAUSS_51614"] % (signal_num))

    def setupTimeoutHandler(self):
        """
        function: Function to set up time out handler
        input : NA
        output: NA
        """
        signal.signal(signal.SIGALRM, self.timeout_handler)

    def setTimer(self, timeout):
        """
        function: Function to set timer
        input : timeout
        output: NA
        """
        self.logger.debug("Set timer. The timeout: %d." % timeout)
        signal.signal(signal.SIGALRM, self.timeout_handler)
        signal.alarm(timeout)

    def resetTimer(self):
        """
        function: Reset timer
        input : NA
        output: NA
        """
        signal.signal(signal.SIGALRM, signal.SIG_IGN)
        self.logger.debug("Reset timer. Left time: %d." % signal.alarm(0))

    def timeout_handler(self, signal_num, frame):
        """
        function: Received the timeout signal
        input : signal_num, frame
        output: NA
        """
        if (self.logger is not None):
            self.logger.debug("Received the timeout signal: [%d]." %
                              (signal_num))
            self.print_signal_stack(frame)
        raise Timeout("Time out.")

    def waitProcessStop(self, processKeywords, hostname):
        """
        function: Wait the process stop
        input : process name 
        output: NA
        """
        count = 0
        while (True):
            psCmd = "ps ux|grep -v grep |awk '{print \$11}'|grep '%s' " % \
                    processKeywords.strip()
            (status,
             output) = self.sshTool.getSshStatusOutput(psCmd, [hostname])
            # Determine whether the process can be found.
            if (status[hostname] != DefaultValue.SUCCESS):
                self.logger.debug("The %s process stopped." % processKeywords)
                break

            count += 1
            if (count % 20 == 0):
                self.logger.debug("The %s process exists." % processKeywords)
            time.sleep(3)

    def managerOperateStepDir(self, action='create', nodes=None):
        """
        function: manager operate step directory 
        input : NA
        output: currentStep
        """
        if nodes is None:
            nodes = []
        try:
            # Creating the backup directory
            if (action == "create"):
                cmd = "(if [ ! -d '%s' ];then mkdir -p '%s' -m %s;fi)" % (
                    self.operateStepDir, self.operateStepDir,
                    DefaultValue.KEY_DIRECTORY_MODE)
            else:
                cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % (
                    self.operateStepDir, self.operateStepDir)
            DefaultValue.execCommandWithMode(cmd,
                                             "%s temporary directory" % action,
                                             self.sshTool, self.localMode
                                             or self.isSingle, "", nodes)
        except Exception as e:
            raise Exception(str(e))

    def readOperateStep(self):
        """
        function: read operate step signal 
        input : NA
        output: currentStep
        """
        currentStep = self.initStep

        if not os.path.exists(self.operateStepFile):
            self.logger.debug("The %s does not exits." % self.operateStepFile)
            return currentStep

        if not os.path.isfile(self.operateStepFile):
            self.logger.debug("The %s must be a file." % self.operateStepFile)
            return currentStep

        with open(self.operateStepFile, "r") as fp:
            line = fp.readline().strip()
            if line is not None and line != "":
                currentStep = line

        return currentStep

    def writeOperateStep(self, stepName, nodes=None):
        """
        function: write operate step signal 
        input : step
        output: NA
        """
        if nodes is None:
            nodes = []
        try:
            # write the step into INSTALL_STEP
            # open the INSTALL_STEP
            with open(self.operateStepFile, "w") as g_DB:
                # write the INSTALL_STEP
                g_DB.write(stepName)
                g_DB.write(os.linesep)
                g_DB.flush()
            # change the INSTALL_STEP permissions
            g_file.changeMode(DefaultValue.KEY_FILE_MODE, self.operateStepFile)

            # distribute file to all nodes
            cmd = "mkdir -p -m %s '%s'" % (DefaultValue.KEY_DIRECTORY_MODE,
                                           self.operateStepDir)
            DefaultValue.execCommandWithMode(
                cmd, "create backup directory "
                "on all nodes", self.sshTool, self.localMode or self.isSingle,
                "", nodes)

            if not self.localMode and not self.isSingle:
                self.sshTool.scpFiles(self.operateStepFile,
                                      self.operateStepDir, nodes)
        except Exception as e:
            # failed to write the step into INSTALL_STEP
            raise Exception(str(e))

    def distributeFiles(self):
        """
        function: distribute package to every host
        input : NA
        output: NA
        """
        self.logger.debug("Distributing files.")
        try:
            # get the all nodes
            hosts = self.clusterInfo.getClusterNodeNames()
            if DefaultValue.GetHostIpOrName() not in hosts:
                raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] %
                                DefaultValue.GetHostIpOrName())
            hosts.remove(DefaultValue.GetHostIpOrName())
            # Send xml file to every host
            DefaultValue.distributeXmlConfFile(self.sshTool, self.xmlFile,
                                               hosts, self.mpprcFile)
            # Successfully distributed files
            self.logger.debug("Successfully distributed files.")
        except Exception as e:
            # failed to distribute package to every host
            raise Exception(str(e))

    def checkPreInstall(self, user, flag, nodes=None):
        """
        function: check if have done preinstall on given nodes
        input : user, nodes
        output: NA
        """
        if nodes is None:
            nodes = []
        try:
            cmd = "%s -U %s -t %s" % (
                OMCommand.getLocalScript("Local_Check_PreInstall"), user, flag)
            DefaultValue.execCommandWithMode(cmd, "check preinstall",
                                             self.sshTool, self.localMode
                                             or self.isSingle, "", nodes)
        except Exception as e:
            raise Exception(str(e))

    def checkNodeInstall(self,
                         nodes=None,
                         checkParams=None,
                         strictUserCheck=True):
        """
        function: Check node install
        input : nodes, checkParams, strictUserCheck
        output: NA
        """
        if nodes is None:
            nodes = []
        if checkParams is None:
            checkParams = []
        validParam = ["shared_buffers", "max_connections"]
        cooGucParam = ""
        for param in checkParams:
            entry = param.split("=")
            if (len(entry) != 2):
                raise Exception(ErrorCode.GAUSS_500["GAUSS_50009"])
            if (entry[0].strip() in validParam):
                cooGucParam += " -C \\\"%s\\\"" % param
        self.logger.log("Checking installation environment on all nodes.")
        cmd = "%s -U %s:%s -R %s %s -l %s -X '%s'" % (
            OMCommand.getLocalScript("Local_Check_Install"), self.user,
            self.group, self.clusterInfo.appPath, cooGucParam, self.localLog,
            self.xmlFile)
        if (not strictUserCheck):
            cmd += " -O"
        self.logger.debug("Checking the install command: %s." % cmd)
        DefaultValue.execCommandWithMode(cmd, "check installation environment",
                                         self.sshTool, self.localMode
                                         or self.isSingle, "", nodes)

    def cleanNodeConfig(self, nodes=None, datadirs=None):
        """
        function: Clean instance
        input : nodes, datadirs
        output: NA
        """
        self.logger.log("Deleting instances from all nodes.")
        if nodes is None:
            nodes = []
        if datadirs is None:
            datadirs = []
        cmdParam = ""
        for datadir in datadirs:
            cmdParam += " -D %s " % datadir
        cmd = "%s -U %s %s -l %s" % (OMCommand.getLocalScript(
            "Local_Clean_Instance"), self.user, cmdParam, self.localLog)
        DefaultValue.execCommandWithMode(cmd, "clean instance", self.sshTool,
                                         self.localMode or self.isSingle, "",
                                         nodes)
        self.logger.log("Successfully deleted instances from all nodes.")

    @staticmethod
    def getPrepareKeysCmd(key_file,
                          user,
                          confFile,
                          destPath,
                          logfile,
                          userProfile="",
                          localMode=False):
        """
        function: get  etcd communication keys command
        input: key_file, user, confFile, destPath, localMode:do not scp keys
        output: NA
        """
        if (not os.path.exists(key_file)):
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % key_file)
        if (not userProfile):
            userProfile = DefaultValue.getMpprcFile()
        # create the directory on all nodes
        cmd = "source %s; %s -U %s -X %s --src-file=%s --dest-path=%s -l %s" \
              % (userProfile, OMCommand.getLocalScript("Local_PrepareKeys"),
               user, confFile, key_file, destPath, logfile)
        # if local mode, only prepare keys, do not scp keys to cluster nodes
        if (localMode):
            cmd += " -L"
        return cmd

    def getClusterRings(self, clusterInfo):
        """
        function: get clusterRings from cluster info
        input: DbclusterInfo() instance
        output: list
        """
        hostPerNodeList = self.getDNHostnamesPerNode(clusterInfo)
        # Loop the hostname list on each node where the master and slave
        # of the DB instance.
        for i in range(len(hostPerNodeList)):
            # Loop the list after the i-th list
            for perNodelist in hostPerNodeList[i + 1:len(hostPerNodeList)]:
                # Define a tag
                flag = 0
                # Loop the elements of each perNodelist
                for hostNameElement in perNodelist:
                    # If elements on the i-th node, each element of the
                    # list are joined in hostPerNodeList[i
                    if hostNameElement in hostPerNodeList[i]:
                        flag = 1
                        for element in perNodelist:
                            if element not in hostPerNodeList[i]:
                                hostPerNodeList[i].append(element)
                if (flag == 1):
                    hostPerNodeList.remove(perNodelist)

        return hostPerNodeList

    def getDNHostnamesPerNode(self, clusterInfo):
        """
        function: get DB hostnames per node
        input: DbclusterInfo() instance
        output: list
        """
        hostPerNodeList = []
        for dbNode in clusterInfo.dbNodes:
            nodeDnlist = []
            # loop per node
            for dnInst in dbNode.datanodes:
                if (dnInst.instanceType == DefaultValue.MASTER_INSTANCE):
                    if dnInst.hostname not in nodeDnlist:
                        nodeDnlist.append(dnInst.hostname)
                    # get other standby and dummy hostname
                    instances = clusterInfo.getPeerInstance(dnInst)
                    for inst in instances:
                        if inst.hostname not in nodeDnlist:
                            nodeDnlist.append(inst.hostname)
            if nodeDnlist != []:
                hostPerNodeList.append(nodeDnlist)
        return hostPerNodeList

    # for olap function
    def checkIsElasticGroupExist(self, dbNodes):
        """
        function: Check if elastic_group exists.
        input : NA
        output: NA
        """
        self.logger.debug("Checking if elastic group exists.")

        self.isElasticGroup = False
        coorNode = []
        # traverse old nodes
        for dbNode in dbNodes:
            if (len(dbNode.coordinators) >= 1):
                coorNode.append(dbNode.coordinators[0])
                break

        # check elastic group
        CHECK_GROUP_SQL = "SELECT count(*) FROM pg_catalog.pgxc_group " \
                          "WHERE group_name='elastic_group' " \
                          "and group_kind='e'; "
        (checkstatus, checkoutput) = ClusterCommand.remoteSQLCommand(
            CHECK_GROUP_SQL, self.user, coorNode[0].hostname, coorNode[0].port)
        if (checkstatus != 0 or not checkoutput.isdigit()):
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "node group" +
                            " Error:\n%s" % str(checkoutput))
        elif (checkoutput.strip() == '1'):
            self.isElasticGroup = True
        elif (checkoutput.strip() == '0'):
            self.isElasticGroup = False
        else:
            raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] %
                            "the number of node group")

        self.logger.debug("Successfully checked if elastic group exists.")

    def checkHostnameIsLoop(self, nodenameList):
        """
        function: check if hostname is looped
        input : NA
        output: NA
        """
        isRing = True
        # 1.get ring information in the cluster
        clusterRings = self.getClusterRings(self.clusterInfo)
        nodeRing = ""
        nodenameRings = []
        # 2.Check if the node is in the ring
        for num in iter(clusterRings):
            ringNodeList = []
            for nodename in nodenameList:
                if (nodename in num):
                    ringNodeList.append(nodename)
            if (len(ringNodeList) != 0 and len(ringNodeList) == len(num)):
                nodenameRings.append(ringNodeList)
            if (len(ringNodeList) != 0 and len(ringNodeList) != len(num)):
                isRing = False
                break
            else:
                continue
        if not isRing:
            raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % "h" +
                            " The hostname (%s) specified by the -h parameter "
                            "must be looped." % nodeRing)
        return (clusterRings, nodenameRings)

    def getDNinstanceByNodeName(self, hostname, isMaster=True):
        """
        function: Get the DB instance of the node based on the node name.
        input : hostname
                isMaster: get master DB instance
        output: NA
        """
        masterdnInsts = []
        standbydnInsts = []
        # notice
        for dbNode in self.clusterInfo.dbNodes:
            if (dbNode.name == hostname):
                for dbInst in dbNode.datanodes:
                    # get master DB instance
                    if (dbInst.instanceType == DefaultValue.MASTER_INSTANCE):
                        masterdnInsts.append(dbInst)
                    # get standby or dummy DB instance
                    else:
                        standbydnInsts.append(dbInst)

        if (isMaster):
            return masterdnInsts
        else:
            return standbydnInsts

    def getSQLResultList(self, sql, user, hostname, port, database="postgres"):
        """
        """
        (status,
         output) = ClusterCommand.remoteSQLCommand(sql, user, hostname, port,
                                                   False, database)
        if status != 0 or ClusterCommand.findErrorInSql(output):
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql +
                            " Error:\n%s" % str(output))
        # split the output string with '\n'
        resultList = output.split("\n")
        return resultList

    def getCooInst(self):
        """
        function: get CN instance
        input : NA
        output: CN instance
        """
        coorInst = []
        # get CN on nodes
        for dbNode in self.clusterInfo.dbNodes:
            if (len(dbNode.coordinators) >= 1):
                coorInst.append(dbNode.coordinators[0])
        # check if contain CN on nodes
        if (len(coorInst) == 0):
            raise Exception(ErrorCode.GAUSS_526["GAUSS_52602"])
        else:
            return coorInst

    def getGroupName(self, fieldName, fieldVaule):
        """
        function: Get nodegroup name by field name and field vaule.
        input : field name and field vaule
        output: node group name
        """
        # 1.get CN instance info from cluster
        cooInst = self.getCooInst()

        # 2.obtain the node group
        OBTAIN_SQL = "select group_name from pgxc_group where %s = %s; " % \
                     (fieldName, fieldVaule)
        # execute the sql command
        (status, output) = ClusterCommand.remoteSQLCommand(OBTAIN_SQL,
                                                           self.user,
                                                           cooInst[0].hostname,
                                                           cooInst[0].port,
                                                           ignoreError=False)
        if (status != 0):
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % OBTAIN_SQL +
                            " Error:\n%s" % str(output))

        return output.strip()

    def killKernalSnapshotThread(self, dnInst):
        """
        function: kill snapshot thread in Kernel,
                avoid dead lock with redistribution)
        input : NA
        output: NA
        """
        self.logger.debug("Stopping snapshot thread in database node Kernel.")
        killSnapshotSQL = "select * from kill_snapshot();"

        (status, output) = ClusterCommand.remoteSQLCommand(
            killSnapshotSQL, self.user, dnInst.hostname, dnInst.port, False,
            DefaultValue.DEFAULT_DB_NAME)
        if (status != 0):
            raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] %
                            killSnapshotSQL + " Error:\n%s" % str(output))
        self.logger.debug("Successfully stopped snapshot "
                          "thread in database node Kernel.")

    def createServerCa(self, hostList=None):
        """
        function: create grpc ca file
        input : NA
        output: NA
        """
        self.logger.debug("Generating CA files.")
        if hostList is None:
            hostList = []
        appPath = DefaultValue.getInstallDir(self.user)
        caPath = os.path.join(appPath, "share/sslcert/om")
        self.logger.debug("The ca file dir is: %s." % caPath)
        if (len(hostList) == 0):
            for dbNode in self.clusterInfo.dbNodes:
                hostList.append(dbNode.name)
        # Create CA dir and prepare files for using.
        self.logger.debug("Create CA file directory.")
        try:
            DefaultValue.createCADir(self.sshTool, caPath, hostList)
            self.logger.debug("Add hostname to config file.")
            DefaultValue.createServerCA(DefaultValue.SERVER_CA, caPath,
                                        self.logger)
            # Clean useless files, and change permission of ca file to 600.
            DefaultValue.cleanServerCaDir(caPath)
            self.logger.debug("Scp CA files to all nodes.")
        except Exception as e:
            certFile = caPath + "/demoCA/cacert.pem"
            if os.path.exists(certFile):
                g_file.removeFile(certFile)
            DefaultValue.cleanServerCaDir(caPath)
            raise Exception(str(e))
        if not self.isSingle:
            # localhost no need scp files
            for certFile in DefaultValue.SERVER_CERT_LIST:
                scpFile = os.path.join(caPath, "%s" % certFile)
                self.sshTool.scpFiles(scpFile, caPath, hostList)
        self.logger.debug("Successfully generated server CA files.")

    def createGrpcCa(self, hostList=None):
        """
        function: create grpc ca file
        input : NA
        output: NA
        """
        self.logger.debug("Generating grpc CA files.")
        if hostList is None:
            hostList = []
        appPath = DefaultValue.getInstallDir(self.user)
        caPath = os.path.join(appPath, "share/sslcert/grpc")
        self.logger.debug("The ca file dir is: %s." % caPath)
        if (len(hostList) == 0):
            for dbNode in self.clusterInfo.dbNodes:
                hostList.append(dbNode.name)
        # Create CA dir and prepare files for using.
        self.logger.debug("Create CA file directory.")
        try:
            DefaultValue.createCADir(self.sshTool, caPath, hostList)
            self.logger.debug("Add hostname to config file.")
            configPath = os.path.join(appPath,
                                      "share/sslcert/grpc/openssl.cnf")
            self.logger.debug("The ca file dir is: %s." % caPath)
            # Add hostname to openssl.cnf file.
            DefaultValue.changeOpenSslConf(configPath, hostList)
            self.logger.debug("Generate CA files.")
            DefaultValue.createCA(DefaultValue.GRPC_CA, caPath)
            # Clean useless files, and change permission of ca file to 600.
            DefaultValue.cleanCaDir(caPath)
            self.logger.debug("Scp CA files to all nodes.")
        except Exception as e:
            certFile = caPath + "/demoCA/cacertnew.pem"
            if os.path.exists(certFile):
                g_file.removeFile(certFile)
            DefaultValue.cleanCaDir(caPath)
            raise Exception(str(e))
        for certFile in DefaultValue.GRPC_CERT_LIST:
            scpFile = os.path.join(caPath, "%s" % certFile)
            self.sshTool.scpFiles(scpFile, caPath, hostList)
        self.logger.debug("Successfully generated grpc CA files.")

    def genCipherAndRandFile(self, hostList=None, initPwd=None):
        self.logger.debug("Encrypting cipher and rand files.")
        if hostList is None:
            hostList = []
        appPath = DefaultValue.getInstallDir(self.user)
        binPath = os.path.join(appPath, "bin")
        retry = 0
        while True:
            if not initPwd:
                sshpwd = getpass.getpass("Please enter password for database:")
                sshpwd_check = getpass.getpass("Please repeat for database:")
            else:
                sshpwd = sshpwd_check = initPwd
            if sshpwd_check != sshpwd:
                sshpwd = ""
                sshpwd_check = ""
                self.logger.error(ErrorCode.GAUSS_503["GAUSS_50306"] %
                                  "database" +
                                  "The two passwords are different, "
                                  "please enter password again.")
            else:
                cmd = "%s/gs_guc encrypt -M server -K %s -D %s " % (
                    binPath, sshpwd, binPath)
                (status, output) = subprocess.getstatusoutput(cmd)
                sshpwd = ""
                sshpwd_check = ""
                initPwd = ""
                if status != 0:
                    self.logger.error(ErrorCode.GAUSS_503["GAUSS_50322"] %
                                      "database" + "Error:\n %s" % output)
                else:
                    break
            if retry >= 2:
                raise Exception(ErrorCode.GAUSS_503["GAUSS_50322"] %
                                "database")
            retry += 1
        g_file.changeMode(DefaultValue.KEY_FILE_MODE,
                          "'%s'/server.key.cipher" % binPath)
        g_file.changeMode(DefaultValue.KEY_FILE_MODE,
                          "'%s'/server.key.rand" % binPath)
        if len(hostList) == 0:
            for dbNode in self.clusterInfo.dbNodes:
                hostList.append(dbNode.name)
        if not self.isSingle:
            # localhost no need scp files
            for certFile in DefaultValue.BIN_CERT_LIST:
                scpFile = os.path.join(binPath, "%s" % certFile)
                self.sshTool.scpFiles(scpFile, binPath, hostList)
        self.logger.debug("Successfully encrypted cipher and rand files.")