def __init__(self, targetSegment, sourceHostname, sourcePort, timeStamp): self.targetSegment = targetSegment self.sourceHostname = sourceHostname self.sourcePort = sourcePort self.progressFile = '%s/pg_rewind.%s.dbid%s.out' % (gplog.get_logger_dir(), timeStamp, targetSegment.getSegmentDbId())
def build_recovery_info(mirrors_to_build): """ This function is used to format recovery information to send to each segment host @param mirrors_to_build: list of mirrors that need recovery @return A dictionary with the following format: Key = <host name> Value = list of RecoveryInfos - one RecoveryInfo per segment on that host """ timestamp = datetime.datetime.today().strftime('%Y%m%d_%H%M%S') recovery_info_by_host = defaultdict(list) for to_recover in mirrors_to_build: source_segment = to_recover.getLiveSegment() target_segment = to_recover.getFailoverSegment() or to_recover.getFailedSegment() # FIXME: move the progress file naming to gpsegrecovery process_name = 'pg_basebackup' if to_recover.isFullSynchronization() else 'pg_rewind' progress_file = '{}/{}.{}.dbid{}.out'.format(gplog.get_logger_dir(), process_name, timestamp, target_segment.getSegmentDbId()) hostname = target_segment.getSegmentHostName() recovery_info_by_host[hostname].append(RecoveryInfo( target_segment.getSegmentDataDirectory(), target_segment.getSegmentPort(), target_segment.getSegmentDbId(), source_segment.getSegmentHostName(), source_segment.getSegmentPort(), to_recover.isFullSynchronization(), progress_file)) return recovery_info_by_host
def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) return gp.ConfigureNewSegment(cmdLabel, segmentInfo, gplog.get_logger_dir(), newSegments=True, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=validationOnly, forceoverwrite=self.__forceoverwrite)
def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) return gp.ConfigureNewSegment(cmdLabel, segmentInfo, gplog.get_logger_dir(), newSegments=True, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=validationOnly, forceoverwrite=self.__forceoverwrite)
def __updateGpIdFile(self, gpEnv, gpArray, segments): segmentByHost = GpArray.getSegmentsByHostName(segments) newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(segments) cmds = [] for hostName in segmentByHost.keys(): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) cmd = gp.ConfigureNewSegment("update gpid file", segmentInfo, gplog.get_logger_dir(), newSegments=False, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=False, writeGpIdFileOnly=True) cmds.append(cmd) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "writing updated gpid files")
def __updateGpIdFile(self, gpEnv, gpArray, segments): segmentByHost = GpArray.getSegmentsByHostName(segments) newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(segments) cmds = [] for hostName in segmentByHost.keys(): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) cmd = gp.ConfigureNewSegment("update gpid file", segmentInfo, gplog.get_logger_dir(), newSegments=False, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=False, writeGpIdFileOnly=True) cmds.append(cmd) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "writing updated gpid files")
def _do_setup_for_recovery(self, recovery_info_by_host): self.__logger.info('Setting up the required segments for recovery') cmds = [] for host_name, recovery_info_list in recovery_info_by_host.items(): cmds.append( gp.GpSegSetupRecovery( 'Run validation checks and setup data directories for recovery', recoveryinfo.serialize_list(recovery_info_list), gplog.get_logger_dir(), verbose=gplog.logging_is_verbose(), batchSize=self.__parallelPerHost, remoteHost=host_name, forceoverwrite=self.__forceoverwrite)) for cmd in cmds: self.__pool.addCommand(cmd) if self.__quiet: self.__pool.join() else: base.join_and_indicate_progress(self.__pool) completed_results = self.__pool.getCompletedItems() self.__pool.empty_completed_items() return completed_results
def _do_recovery(self, recovery_info_by_host, gpEnv): """ # Recover and start segments using gpsegrecovery, which will internally call either # pg_basebackup or pg_rewind. gprecoverseg generates a log filename which is # passed to gpsegrecovery using the confinfo parameter. gprecoverseg # tails this file to show recovery progress to the user, and removes the # file when done. A new file is generated for each run of gprecoverseg # based on a timestamp. :param gpEnv: :param recovery_info_by_host: :return: """ self.__logger.info( 'Initiating segment recovery. Upon completion, will start the successfully recovered segments' ) cmds = [] progress_cmds = [] era = read_era(gpEnv.getCoordinatorDataDir(), logger=self.__logger) for hostName, recovery_info_list in recovery_info_by_host.items(): for ri in recovery_info_list: progressCmd = self._get_progress_cmd(ri.progress_file, ri.target_segment_dbid, hostName) if progressCmd: progress_cmds.append(progressCmd) cmds.append( gp.GpSegRecovery( 'Recover segments', recoveryinfo.serialize_list(recovery_info_list), gplog.get_logger_dir(), verbose=gplog.logging_is_verbose(), batchSize=self.__parallelPerHost, remoteHost=hostName, era=era, forceoverwrite=self.__forceoverwrite)) completed_recovery_results = self.__runWaitAndCheckWorkerPoolForErrorsAndClear( cmds, suppressErrorCheck=True, progressCmds=progress_cmds) return completed_recovery_results
def __copySegmentDirectories(self, gpEnv, gpArray, directives): """ directives should be composed of GpCopySegmentDirectoryDirective values """ if len(directives) == 0: return srcSegments = [] destSegments = [] isTargetReusedLocation = [] timeStamp = datetime.datetime.today().strftime('%Y%m%d_%H%M%S') for directive in directives: srcSegment = directive.getSrcSegment() destSegment = directive.getDestSegment() destSegment.primaryHostname = srcSegment.getSegmentHostName() destSegment.primarySegmentPort = srcSegment.getSegmentPort() destSegment.progressFile = '%s/pg_basebackup.%s.dbid%s.out' % ( gplog.get_logger_dir(), timeStamp, destSegment.getSegmentDbId()) srcSegments.append(srcSegment) destSegments.append(destSegment) isTargetReusedLocation.append(directive.isTargetReusedLocation()) destSegmentByHost = GpArray.getSegmentsByHostName(destSegments) newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment( destSegments, isTargetReusedLocation) def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) return gp.ConfigureNewSegment(cmdLabel, segmentInfo, gplog.get_logger_dir(), newSegments=True, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=validationOnly, forceoverwrite=self.__forceoverwrite) # # validate directories for target segments # self.__logger.info('Validating remote directories') cmds = [] for hostName in list(destSegmentByHost.keys()): cmds.append( createConfigureNewSegmentCommand(hostName, 'validate blank segments', True)) for cmd in cmds: self.__pool.addCommand(cmd) if self.__quiet: self.__pool.join() else: base.join_and_indicate_progress(self.__pool) validationErrors = [] for item in self.__pool.getCompletedItems(): results = item.get_results() if not results.wasSuccessful(): if results.rc == 1: # stdoutFromFailure = results.stdout.replace("\n", " ").strip() lines = results.stderr.split("\n") for line in lines: if len(line.strip()) > 0: validationErrors.append( "Validation failure on host %s %s" % (item.remoteHost, line)) else: validationErrors.append(str(item)) self.__pool.empty_completed_items() if validationErrors: raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors))) # Configure a new segment # # Recover segments using gpconfigurenewsegment, which # uses pg_basebackup. gprecoverseg generates a log filename which is # passed to gpconfigurenewsegment as a confinfo parameter. gprecoverseg # tails this file to show recovery progress to the user, and removes the # file when one done. A new file is generated for each run of # gprecoverseg based on a timestamp. self.__logger.info('Configuring new segments') cmds = [] progressCmds = [] removeCmds = [] for hostName in list(destSegmentByHost.keys()): for segment in destSegmentByHost[hostName]: progressCmd, removeCmd = self.__getProgressAndRemoveCmds( segment.progressFile, segment.getSegmentDbId(), hostName) removeCmds.append(removeCmd) if progressCmd: progressCmds.append(progressCmd) cmds.append( createConfigureNewSegmentCommand(hostName, 'configure blank segments', False)) self.__runWaitAndCheckWorkerPoolForErrorsAndClear( cmds, "unpacking basic segment directory", suppressErrorCheck=False, progressCmds=progressCmds) self.__runWaitAndCheckWorkerPoolForErrorsAndClear( removeCmds, "removing pg_basebackup progress logfiles", suppressErrorCheck=False) # # copy dump files from old segment to new segment # for srcSeg in srcSegments: for destSeg in destSegments: if srcSeg.content == destSeg.content: src_dump_dir = os.path.join( srcSeg.getSegmentDataDirectory(), 'db_dumps') cmd = base.Command('check existence of db_dumps directory', 'ls %s' % (src_dump_dir), ctxt=base.REMOTE, remoteHost=destSeg.getSegmentAddress()) cmd.run() if cmd.results.rc == 0: # Only try to copy directory if it exists cmd = Scp( 'copy db_dumps from old segment to new segment', os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps*', '*'), os.path.join(destSeg.getSegmentDataDirectory(), 'db_dumps'), srcSeg.getSegmentAddress(), destSeg.getSegmentAddress(), recursive=True) cmd.run(validateAfter=True) break
def get_recovery_progress_file(gplog): # recovery progress file on the coordinator, used by gpstate to read and show progress return "{}/recovery_progress.file".format(gplog.get_logger_dir())
def __copySegmentDirectories(self, gpEnv, gpArray, directives): """ directives should be composed of GpCopySegmentDirectoryDirective values """ if len(directives) == 0: return srcSegments = [] destSegments = [] isTargetReusedLocation = [] timeStamp = datetime.datetime.today().strftime('%Y%m%d_%H%M%S') for directive in directives: srcSegment = directive.getSrcSegment() destSegment = directive.getDestSegment() destSegment.primaryHostname = srcSegment.getSegmentHostName() destSegment.primarySegmentPort = srcSegment.getSegmentPort() destSegment.progressFile = '%s/pg_basebackup.%s.dbid%s.out' % (gplog.get_logger_dir(), timeStamp, destSegment.getSegmentDbId()) srcSegments.append(srcSegment) destSegments.append(destSegment) isTargetReusedLocation.append(directive.isTargetReusedLocation()) destSegmentByHost = GpArray.getSegmentsByHostName(destSegments) newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation) def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) return gp.ConfigureNewSegment(cmdLabel, segmentInfo, gplog.get_logger_dir(), newSegments=True, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=validationOnly, forceoverwrite=self.__forceoverwrite) # # validate directories for target segments # self.__logger.info('Validating remote directories') cmds = [] for hostName in destSegmentByHost.keys(): cmds.append(createConfigureNewSegmentCommand(hostName, 'validate blank segments', True)) for cmd in cmds: self.__pool.addCommand(cmd) if self.__quiet: self.__pool.join() else: base.join_and_indicate_progress(self.__pool) validationErrors = [] for item in self.__pool.getCompletedItems(): results = item.get_results() if not results.wasSuccessful(): if results.rc == 1: # stdoutFromFailure = results.stdout.replace("\n", " ").strip() lines = results.stderr.split("\n") for line in lines: if len(line.strip()) > 0: validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line)) else: validationErrors.append(str(item)) self.__pool.empty_completed_items() if validationErrors: raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors))) # Configure a new segment # # Recover segments using gpconfigurenewsegment, which # uses pg_basebackup. gprecoverseg generates a log filename which is # passed to gpconfigurenewsegment as a confinfo parameter. gprecoverseg # tails this file to show recovery progress to the user, and removes the # file when one done. A new file is generated for each run of # gprecoverseg based on a timestamp. # # There is race between when the pg_basebackup log file is created and # when the progress command is run. Thus, the progress command touches # the file to ensure its present before tailing. self.__logger.info('Configuring new segments') cmds = [] progressCmds = [] removeCmds= [] for hostName in destSegmentByHost.keys(): for segment in destSegmentByHost[hostName]: if self.__progressMode != GpMirrorListToBuild.Progress.NONE: progressCmds.append( GpMirrorListToBuild.ProgressCommand("tail the last line of the file", "set -o pipefail; touch -a {0}; tail -1 {0} | tr '\\r' '\\n' | tail -1".format( pipes.quote(segment.progressFile)), segment.getSegmentDbId(), segment.progressFile, ctxt=base.REMOTE, remoteHost=hostName)) removeCmds.append( base.Command("remove file", "rm -f %s" % pipes.quote(segment.progressFile), ctxt=base.REMOTE, remoteHost=hostName)) cmds.append( createConfigureNewSegmentCommand(hostName, 'configure blank segments', False)) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory", suppressErrorCheck=False, progressCmds=progressCmds) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(removeCmds, "removing pg_basebackup progress logfiles", suppressErrorCheck=False) # # copy dump files from old segment to new segment # for srcSeg in srcSegments: for destSeg in destSegments: if srcSeg.content == destSeg.content: src_dump_dir = os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps') cmd = base.Command('check existence of db_dumps directory', 'ls %s' % (src_dump_dir), ctxt=base.REMOTE, remoteHost=destSeg.getSegmentAddress()) cmd.run() if cmd.results.rc == 0: # Only try to copy directory if it exists cmd = Scp('copy db_dumps from old segment to new segment', os.path.join(srcSeg.getSegmentDataDirectory(), 'db_dumps*', '*'), os.path.join(destSeg.getSegmentDataDirectory(), 'db_dumps'), srcSeg.getSegmentAddress(), destSeg.getSegmentAddress(), recursive=True) cmd.run(validateAfter=True) break