def __ensureStopped(self, gpEnv, directives): """ @param directives a list of the GpStopSegmentDirectoryDirective values indicating which segments to stop """ if len(directives) == 0: return logger.info("Ensuring %d failed segment(s) are stopped" % (len(directives))) segments = [d.getSegment() for d in directives] segmentByHost = GpArray.getSegmentsByHostName(segments) cmds = [] for hostName, segments in segmentByHost.iteritems(): cmd=gp.GpSegStopCmd("remote segment stop on host '%s'" % hostName, gpEnv.getGpHome(), gpEnv.getGpVersion(), mode='fast', dbs=segments, verbose=logging_is_verbose(), ctxt=base.REMOTE, remoteHost=hostName) cmds.append( cmd) # we suppress checking for the error. This is because gpsegstop will actually error # in many cases where the stop is actually done (that is, for example, the segment is # running but slow to shutdown so gpsegstop errors after whacking it with a kill) # # Perhaps we should make it so that it so that is checks if the seg is running and only attempt stop # if it's running? In that case, we could propagate the error # self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "stopping segments", suppressErrorCheck=True)
def __sendPrimaryMirrorTransition(self, targetMode, segments, convertUsingFullResync, gpArray, resultOut): """ @param segments the segments to convert @param convertUsingFullResync in parallel with segments, may be None, gives true/false for whether fullResync flag should be passed to the transition """ if len(segments) == 0: logger.debug("%s conversion of zero segments...skipping" % targetMode) return logger.info( "Commencing parallel %s conversion of %s segments, please wait..." % (targetMode, len(segments))) ############################################### # for each host, create + transfer the transition arguments file dispatchCount = 0 dbIdToPeerMap = gpArray.getDbIdToPeerMap() segmentsByHostName = GpArray.getSegmentsByHostName(segments) for hostName, segments in segmentsByHostName.iteritems(): assert len(segments) > 0 logger.debug( "Dispatching command to convert segments on host: %s " % (hostName)) targetModePerSegment = [targetMode for seg in segments] pickledParams = self.__createPickledTransitionParameters( segments, targetModePerSegment, convertUsingFullResync, dbIdToPeerMap) address = segments[0].getSegmentAddress() cmd = gp.GpSegChangeMirrorModeCmd( "remote segment mirror mode conversion on host '%s' using address '%s'" % (hostName, address), self.__gpHome, self.__localeData, self.__gpVersion, segments, targetMode, pickledParams, verbose=logging_is_verbose(), ctxt=base.REMOTE, remoteHost=address) self.__workerPool.addCommand(cmd) dispatchCount += 1 self.__workerPool.wait_and_printdots(dispatchCount, self.__quiet) # process results self.__processStartOrConvertCommands(resultOut) self.__workerPool.empty_completed_items()
def __cleanUpSegmentDirectories(self, directives): if len(directives) == 0: return logger.info("Cleaning files from %d segment(s)" % (len(directives))) segments = [d.getSegment() for d in directives] segmentByHost = GpArray.getSegmentsByHostName(segments) cmds = [] for hostName, segments in segmentByHost.iteritems(): cmds.append( gp.GpCleanSegmentDirectories("clean segment directories on %s" % hostName, \ segments, gp.REMOTE, hostName)) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "cleaning existing directories")
def checkForPortAndDirectoryConflicts(self, gpArray): """ Check gpArray for internal consistency -- no duplicate ports or directories on the same host, for example A detected problem causes an Exception to be raised """ for hostName, segmentArr in GpArray.getSegmentsByHostName(gpArray.getDbList()).iteritems(): usedPorts = {} usedDataDirectories = {} for segment in segmentArr: # check for port conflict replicationPort = segment.getSegmentReplicationPort() port = segment.getSegmentPort() dbid = segment.getSegmentDbId() if port in usedPorts: raise Exception("On host %s, a port for segment with dbid %s conflicts with a port for segment dbid %s" \ % (hostName, dbid, usedPorts.get(port))) if segment.isSegmentQE(): if replicationPort is None: raise Exception("On host %s, the replication port is not set for segment with dbid %s" \ % (hostName, dbid)) if replicationPort in usedPorts: raise Exception("On host %s, a port for segment with dbid %s conflicts with a port for segment dbid %s" \ % (hostName, dbid, usedPorts.get(replicationPort))) if port == replicationPort: raise Exception("On host %s, segment with dbid %s has equal port and replication port" \ % (hostName, dbid)) usedPorts[port] = dbid usedPorts[replicationPort] = dbid # check for directory conflict; could improve this by reporting nicer the conflicts paths = [path for oid, path in segment.getSegmentFilespaces().items() if oid != gparray.SYSTEM_FILESPACE] paths.append(segment.getSegmentDataDirectory()) for path in paths: if path in usedDataDirectories: raise Exception("On host %s, directory (base or filespace) for segment with dbid %s conflicts with a " \ "directory (base or filespace) for segment dbid %s; directory: %s" % \ (hostName, dbid, usedDataDirectories.get(path), path)) usedDataDirectories[path] = dbid
def __updateGpIdFile(self, gpEnv, gpArray, segments): segmentByHost = GpArray.getSegmentsByHostName(segments) newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(segments) cmds = [] for hostName in segmentByHost.keys(): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) cmd = gp.ConfigureNewSegment("update gpid file", segmentInfo, newSegments=False, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=False, writeGpIdFileOnly=True) cmds.append(cmd) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "writing updated gpid files")
def __runStartCommand(self, segments, startMethod, numContentsInCluster, resultOut, gpArray, era): """ Putt results into the resultOut object """ if len(segments) == 0: return if startMethod == START_AS_PRIMARY_OR_MIRROR: logger.info( "Commencing parallel primary and mirror segment instance startup, please wait..." ) else: logger.info( "Commencing parallel segment instance startup, please wait...") dispatchCount = 0 dbIdToPeerMap = gpArray.getDbIdToPeerMap() mirroringModePreTransition = MIRROR_MODE_MIRRORLESS if startMethod == START_AS_MIRRORLESS else MIRROR_MODE_QUIESCENT # launch the start for hostName, segments in GpArray.getSegmentsByHostName( segments).iteritems(): logger.debug("Dispatching command to start segments on host: %s, " \ "with %s contents in cluster" % (hostName, numContentsInCluster)) pickledTransitionData = None if startMethod == START_AS_PRIMARY_OR_MIRROR: mirroringModePerSegment = [] for seg in segments: modeThisSegment = MIRROR_MODE_PRIMARY if seg.isSegmentPrimary( True) else MIRROR_MODE_MIRROR mirroringModePerSegment.append(modeThisSegment) pickledTransitionData = self.__createPickledTransitionParameters( segments, mirroringModePerSegment, None, dbIdToPeerMap) # # This will call sbin/gpsegstart.py # cmd = gp.GpSegStartCmd("remote segment starts on host '%s'" % hostName, self.__gpHome, segments, self.__localeData, self.__gpVersion, mirroringModePreTransition, numContentsInCluster, era, self.__timeout, verbose=logging_is_verbose(), ctxt=base.REMOTE, remoteHost=segments[0].getSegmentAddress(), pickledTransitionData=pickledTransitionData, specialMode=self.__specialMode, wrapper=self.__wrapper, wrapper_args=self.__wrapper_args) self.__workerPool.addCommand(cmd) dispatchCount += 1 self.__workerPool.wait_and_printdots(dispatchCount, self.__quiet) # process results self.__processStartOrConvertCommands(resultOut) self.__workerPool.empty_completed_items()
def rebalance(self): # Get the unbalanced primary segments grouped by hostname # These segments are what we will shutdown. logger.info("Getting unbalanced segments") unbalanced_primary_segs = GpArray.getSegmentsByHostName( self.gpArray.get_unbalanced_primary_segdbs()) pool = WorkerPool() count = 0 try: # Disable ctrl-c signal.signal(signal.SIGINT, signal.SIG_IGN) logger.info("Stopping unbalanced primary segments...") for hostname in unbalanced_primary_segs.keys(): cmd = GpSegStopCmd("stop unbalanced primary segs", self.gpEnv.getGpHome(), self.gpEnv.getGpVersion(), 'fast', unbalanced_primary_segs[hostname], ctxt=REMOTE, remoteHost=hostname, timeout=600) pool.addCommand(cmd) count += 1 pool.wait_and_printdots(count, False) failed_count = 0 completed = pool.getCompletedItems() for res in completed: if not res.get_results().wasSuccessful(): failed_count += 1 if failed_count > 0: logger.warn( "%d segments failed to stop. A full rebalance of the") logger.warn( "system is not possible at this time. Please check the") logger.warn( "log files, correct the problem, and run gprecoverseg -r") logger.warn("again.") logger.info( "gprecoverseg will continue with a partial rebalance.") pool.empty_completed_items() # issue a distributed query to make sure we pick up the fault # that we just caused by shutting down segments conn = None try: logger.info("Triggering segment reconfiguration") dburl = dbconn.DbURL() conn = dbconn.connect(dburl) cmd = ReconfigDetectionSQLQueryCommand(conn) pool.addCommand(cmd) pool.wait_and_printdots(1, False) except Exception: # This exception is expected pass finally: if conn: conn.close() # Final step is to issue a recoverseg operation to resync segments logger.info("Starting segment synchronization") cmd = GpRecoverseg("rebalance recoverseg") pool.addCommand(cmd) pool.wait_and_printdots(1, False) except Exception, ex: raise ex
def __copySegmentDirectories(self, gpEnv, gpArray, directives): """ directives should be composed of GpCopySegmentDirectoryDirective values """ if len(directives) == 0: return srcSegments = [d.getSrcSegment() for d in directives] destSegments = [d.getDestSegment() for d in directives] isTargetReusedLocation = [d.isTargetReusedLocation() for d in directives] destSegmentByHost = GpArray.getSegmentsByHostName(destSegments) newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation) logger.info('Building template directory') (tempDir, blankTarFile, tarFileName) = self.__buildTarFileForTransfer(gpEnv, gpArray.master, srcSegments[0], destSegments) def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) return gp.ConfigureNewSegment(cmdLabel, segmentInfo, tarFile=tarFileName, newSegments=True, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=validationOnly) # # validate directories for target segments # logger.info('Validating remote directories') cmds = [] for hostName in destSegmentByHost.keys(): cmds.append(createConfigureNewSegmentCommand(hostName, 'validate blank segments', True)) for cmd in cmds: self.__pool.addCommand(cmd) self.__pool.wait_and_printdots(len(cmds), self.__quiet) validationErrors = [] for item in self.__pool.getCompletedItems(): results = item.get_results() if not results.wasSuccessful(): if results.rc == 1: # stdoutFromFailure = results.stdout.replace("\n", " ").strip() lines = results.stderr.split("\n") for line in lines: if len(line.strip()) > 0: validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line)) else: validationErrors.append(str(item)) self.__pool.empty_completed_items() if validationErrors: raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors))) # # copy tar from master to target hosts # logger.info('Copying template directory file') cmds = [] for hostName in destSegmentByHost.keys(): cmds.append( gp.RemoteCopy("copy segment tar", blankTarFile, hostName, tarFileName )) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "building and transferring basic segment directory") # # unpack and configure new segments # logger.info('Configuring new segments') cmds = [] for hostName in destSegmentByHost.keys(): cmds.append(createConfigureNewSegmentCommand(hostName, 'configure blank segments', False)) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory") # # Clean up copied tar from each remote host # logger.info('Cleaning files') cmds = [] for hostName, segments in destSegmentByHost.iteritems(): cmds.append(unix.RemoveFiles('remove tar file', tarFileName, ctxt=gp.REMOTE, remoteHost=hostName)) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "cleaning up tar file on segment hosts") # # clean up the local temp directory # unix.RemoveFiles.local('remove temp directory', tempDir)