Beispiel #1
0
 def localStop(self):
     nodeId = dbClusterInfo.getNodeIdByName(self.dbNodeInfo.name, self.clusterConfig)
     cmd = ClusterCommand.getStopCmd(self.user, nodeId, "i")
     (status, output) = commands.getstatusoutput(cmd)
     if (status != 0):
         self.logger.log(output)
         self.logger.logExit("Stop node failed!")
     self.logger.log("Stop node success.")
     self.logger.closeLog()
Beispiel #2
0
 def __switchToPrimary(self, datadir):
     """
     Switch local instance to be primary
     """
     cmd = ClusterCommand.getSwitchOverCmd(self.user, self.dbNodeInfo.id, datadir)
     self.logger.debug("Switch to primary: %s" % cmd)
     (status, output) = commands.getstatusoutput(cmd)
     if status != 0:
         self.logger.logExit("Switch instance to be primary failed!Datadir %s.\nOutput: %s" % (datadir, output))
Beispiel #3
0
    def doRestore(self):
        """
        Restore the status of instances
        """
        self.logger.log("Begin to restore instance status...")

        try:
            self.readConfigInfo()
            self.getUserInfo()

            # dump status to file
            cmd = ClusterCommand.getQueryStatusCmd(self.user, self.dbNodeInfo.id, self.__curStatusFile)
            (status, output) = commands.getstatusoutput(cmd)
            if status != 0:
                self.logger.logExit("Query local instance status failed!Error: %s" % output)

            bakDbStatus = DbClusterStatus()
            bakDbStatus.initFromFile(self.__bakStatusFile)
            bakNodeStatus = bakDbStatus.getDbNodeStatusById(self.dbNodeInfo.id)
            if bakNodeStatus is None:
                self.logger.logExit("Get backup status of local node failed!")

            curDbStatus = DbClusterStatus()
            curDbStatus.initFromFile(self.__curStatusFile)
            curNodeStatus = curDbStatus.getDbNodeStatusById(self.dbNodeInfo.id)
            if curNodeStatus is None:
                self.logger.logExit("Get current status of local node failed!")
            if not curNodeStatus.isNodeHealthy():
                self.logger.logExit("Current status of node is not healthy!")

            # Compare the status and restore it
            bakInstances = bakNodeStatus.datanodes + bakNodeStatus.gtms
            for bakInst in bakInstances:
                curInst = curNodeStatus.getInstanceByDir(bakInst.datadir)
                if curInst is None:
                    self.logger.logExit("Get current status of instance failed!DataDir:%s" % bakInst.datadir)

                if bakInst.status == curInst.status:
                    continue

                if bakInst.status == DbClusterStatus.INSTANCE_STATUS_PRIMARY:
                    self.__switchToPrimary(bakInst.datadir)
                elif bakInst.status == DbClusterStatus.INSTANCE_STATUS_STANDBY:
                    self.__switchToStandby(bakInst.datadir)

        except Exception, e:
            self.logger.logExit(str(e))
Beispiel #4
0
    def doBackup(self):
        """
        Backup the status of instances
        """
        self.logger.log("Begin to backup instance status...")

        try:
            self.readConfigInfo()
            self.getUserInfo()

            # dump status to file
            cmd = ClusterCommand.getQueryStatusCmd(self.user, self.dbNodeInfo.id, self.__bakStatusFile)
            (status, output) = commands.getstatusoutput(cmd)
            if status != 0:
                self.logger.logExit("Query local instance status failed!Error: %s" % output)
        except Exception, e:
            self.logger.logExit(str(e))
Beispiel #5
0
            
    if (user == ""):
        GaussLog.exitWithError("Parameter input error, need '-U' parameter.")
        
    if (logFile == ""):
        logFile = DefaultValue.getOMLogPath(DefaultValue.DEFAULT_LOG_FILE, user, "")
    
    writePid(user)
    global g_logger
    g_logger = GaussLog(logFile)
    
    if (time_out <= 0):
        time_out = 1800
    endTime = datetime.now() + timedelta(seconds=time_out)

    connList = ClusterCommand.readCooConnections(user)
    if (len(connList) == 0):
        raise Exception("There is no coordinator to connect!")
    ip = connList[0][0]
    port = connList[0][1]
    
    sql = "select case (select pgxc_lock_for_backup()) when true then (select pg_sleep(%d)) end;" % time_out
    cmd = "gsql -h %s -p %d postgres -X -c \"%s\"" % (ip, port, sql)
    cmd = "su - %s -c '%s'" % (user, cmd)
    sqlThread = CommandThread(cmd)
    sqlThread.start()
    time.sleep(5)
    while True:
        td = endTime - datetime.now()
        leftSeconds = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
        if (leftSeconds <= 0):