def __init__(self, dblist, gpversion, collation, targetMirroringMode, pickledTransitionData): self.dblist = dblist self.expected_gpversion = gpversion self.pool = base.WorkerPool(numWorkers=len(dblist)) self.expected_lc_collate = None self.expected_lc_monetary = None self.expected_lc_numeric = None self.targetMirroringMode = targetMirroringMode self.pickledTransitionData = pickledTransitionData self.gphome = os.path.abspath(os.pardir) self.actual_gpversion = gp.GpVersion.local( 'local GP software version check', self.gphome) if self.actual_gpversion != self.expected_gpversion: raise Exception( "Local Software Version does not match what is expected.\n" "The local software version is: '%s'\n" "But we were expecting it to be: '%s'\n" "Please review and correct" % (self.actual_gpversion, self.expected_gpversion)) collation_strings = collation.split(':') if len(collation_strings) != 3: raise Exception("Invalid collation string specified!") (self.expected_lc_collate, self.expected_lc_monetary, self.expected_lc_numeric) = collation_strings pass
def run(self): results = [] failures = [] self.logger.info("Issuing shutdown commands to local segments...") self.pool = base.WorkerPool() for db in self.dblist: cmd = SegStop('segment shutdown', db=db, mode=self.mode, timeout=self.timeout) self.pool.addCommand(cmd) self.pool.join() failed = False for cmd in self.pool.getCompletedItems(): result = cmd.get_results() if not result.stopped: failed = True results.append(result) # Log the results! status = '\nCOMMAND RESULTS\n' for result in results: status += str(result) + "\n" self.logger.info(status) return 1 if failed else 0
def get_unreachable_segment_hosts(hosts, num_workers): pool = base.WorkerPool(numWorkers=num_workers) try: for host in hosts: cmd = Command(name='check %s is up' % host, cmdStr="ssh %s 'echo %s'" % (host, host)) pool.addCommand(cmd) pool.join() finally: pool.haltWork() pool.joinWorkers() # There's no good way to map a CommandResult back to its originating Command. # To determine reachable hosts parse the stdout of the successful commands. reachable_hosts = set() for item in pool.getCompletedItems(): result = item.get_results() if result.rc == 0: host = result.stdout.strip() reachable_hosts.add(host) unreachable_hosts = list(set(hosts).difference(reachable_hosts)) unreachable_hosts.sort() if len(unreachable_hosts) > 0: logger.warning("One or more hosts are not reachable via SSH.") for host in sorted(unreachable_hosts): logger.warning("Host %s is unreachable" % host) return unreachable_hosts
def run(self): if self.__options.parallelDegree < 1 or self.__options.parallelDegree > 64: raise ProgramArgumentValidationException( "Invalid parallelDegree provided with -B argument: %d" % self.__options.parallelDegree) self.__pool = base.WorkerPool(self.__options.parallelDegree) gpEnv = GpMasterEnvironment(self.__options.masterDataDirectory, True) faultProberInterface.getFaultProber().initializeProber( gpEnv.getMasterPort()) confProvider = configInterface.getConfigurationProvider( ).initializeProvider(gpEnv.getMasterPort()) gpArray = confProvider.loadSystemConfig(useUtilityMode=False) # check that heap_checksums is consistent across cluster, fail immediately if not self.validate_heap_checksums(gpArray) # check that we actually have mirrors if gpArray.getFaultStrategy() != gparray.FAULT_STRATEGY_NONE: raise ExceptionNoStackTraceNeeded( \ "GPDB physical mirroring cannot be added. The cluster is already configured with %s." % \ gparray.getFaultStrategyLabel(gpArray.getFaultStrategy())) # figure out what needs to be done mirrorBuilder = self.__getMirrorsToBuildBasedOnOptions(gpEnv, gpArray) mirrorBuilder.checkForPortAndDirectoryConflicts(gpArray) if self.__options.outputSampleConfigFile is not None: # just output config file and done self.__outputToFile(mirrorBuilder, self.__options.outputSampleConfigFile, gpArray) logger.info('Configuration file output to %s successfully.' % self.__options.outputSampleConfigFile) else: self.__displayAddMirrors(gpEnv, mirrorBuilder, gpArray) if self.__options.interactive: if not userinput.ask_yesno( None, "\nContinue with add mirrors procedure", 'N'): raise UserAbortedException() gpArray.setFaultStrategy(gparray.FAULT_STRATEGY_FILE_REPLICATION) mirrorBuilder.buildMirrors("add", gpEnv, gpArray) logger.info( "******************************************************************" ) logger.info( "Mirror segments have been added; data synchronization is in progress." ) logger.info( "Data synchronization will continue in the background.") logger.info("") logger.info( "Use gpstate -s to check the resynchronization progress.") logger.info( "******************************************************************" ) return 0 # success -- exit code 0!
def __init__(self, dblist, gpversion, collation, mirroringMode, num_cids, era, timeout, pickledTransitionData, specialMode, wrapper, wrapper_args, logfileDirectory=False): # validate/store arguments # self.dblist = map(gparray.GpDB.initFromString, dblist) expected_gpversion = gpversion actual_gpversion = gp.GpVersion.local( 'local GP software version check', os.path.abspath(os.pardir)) if actual_gpversion != expected_gpversion: raise Exception( "Local Software Version does not match what is expected.\n" "The local software version is: '%s'\n" "But we were expecting it to be: '%s'\n" "Please review and correct" % (actual_gpversion, expected_gpversion)) collation_strings = collation.split(':') if len(collation_strings) != 3: raise Exception("Invalid collation string specified!") (self.expected_lc_collate, self.expected_lc_monetary, self.expected_lc_numeric) = collation_strings self.mirroringMode = mirroringMode self.num_cids = num_cids self.era = era self.timeout = timeout self.pickledTransitionData = pickledTransitionData assert (specialMode in [None, 'upgrade', 'maintenance']) self.specialMode = specialMode self.wrapper = wrapper self.wrapper_args = wrapper_args # initialize state # self.pool = base.WorkerPool(numWorkers=len(dblist)) self.logger = logger self.overall_status = None self.logfileDirectory = logfileDirectory
def __init__(self, dblist, gpversion, mirroringMode, num_cids, era, timeout, pickledTransitionData, specialMode, wrapper, wrapper_args, coordinator_checksum_version, segment_batch_size, logfileDirectory=False): # validate/store arguments # self.dblist = list(map(gparray.Segment.initFromString, dblist)) expected_gpversion = gpversion actual_gpversion = gp.GpVersion.local( 'local GP software version check', os.path.abspath(os.pardir)) if actual_gpversion != expected_gpversion: raise Exception( "Local Software Version does not match what is expected.\n" "The local software version is: '%s'\n" "But we were expecting it to be: '%s'\n" "Please review and correct" % (actual_gpversion, expected_gpversion)) self.mirroringMode = mirroringMode self.num_cids = num_cids self.era = era self.timeout = timeout self.pickledTransitionData = pickledTransitionData assert (specialMode in [None, 'upgrade', 'maintenance']) self.specialMode = specialMode self.wrapper = wrapper self.wrapper_args = wrapper_args # initialize state # self.pool = base.WorkerPool( numWorkers=min(len(dblist), segment_batch_size)) self.logger = logger self.overall_status = None self.logfileDirectory = logfileDirectory self.coordinator_checksum_version = coordinator_checksum_version
def validate(self): pool = base.WorkerPool() gp_array = GpArray.initFromCatalog(dbconn.DbURL(), utility=True) host_list = list(set(gp_array.get_hostlist(True))) msg = None for h in host_list: cmd = Command(h, "gpcheckresgroupimpl", REMOTE, h) pool.addCommand(cmd) pool.join() items = pool.getCompletedItems() failed = [] for i in items: if not i.was_successful(): failed.append("[%s:%s]"%(i.remoteHost, i.get_stderr().rstrip())) pool.haltWork() pool.joinWorkers() if failed: msg = ",".join(failed) return msg
def __init__(self, dblist, gpversion, targetMirroringMode, pickledTransitionData): self.dblist = dblist self.expected_gpversion = gpversion self.pool = base.WorkerPool(numWorkers=len(dblist)) self.targetMirroringMode = targetMirroringMode self.pickledTransitionData = pickledTransitionData self.gphome = os.path.abspath(os.pardir) self.actual_gpversion = gp.GpVersion.local( 'local GP software version check', self.gphome) if self.actual_gpversion != self.expected_gpversion: raise Exception( "Local Software Version does not match what is expected.\n" "The local software version is: '%s'\n" "But we were expecting it to be: '%s'\n" "Please review and correct" % (self.actual_gpversion, self.expected_gpversion)) pass
def rebalance(self): self.logger.info("Determining primary and mirror segment pairs to rebalance") # The current implementation of rebalance calls "gprecoverseg -a" below. # Thus, if another balanced pair is not synchronized, or has a down mirror # that pair will be recovered as a side-effect of rebalancing. unbalanced_primary_segs = [] for segmentPair in self.gpArray.segmentPairs: if segmentPair.balanced(): continue if segmentPair.up() and segmentPair.reachable() and segmentPair.synchronized(): unbalanced_primary_segs.append(segmentPair.primaryDB) else: self.logger.warning( "Not rebalancing primary segment dbid %d with its mirror dbid %d because one is either down, unreachable, or not synchronized" \ % (segmentPair.primaryDB.dbid, segmentPair.mirrorDB.dbid)) if not len(unbalanced_primary_segs): self.logger.info("No segments to rebalance") return True unbalanced_primary_segs = GpArray.getSegmentsByHostName(unbalanced_primary_segs) pool = base.WorkerPool(min(len(unbalanced_primary_segs), self.batch_size)) try: # Disable ctrl-c signal.signal(signal.SIGINT, signal.SIG_IGN) self.logger.info("Stopping unbalanced primary segments...") for hostname in list(unbalanced_primary_segs.keys()): cmd = GpSegStopCmd("stop unbalanced primary segs", self.gpEnv.getGpHome(), self.gpEnv.getGpVersion(), 'fast', unbalanced_primary_segs[hostname], ctxt=base.REMOTE, remoteHost=hostname, timeout=600, segment_batch_size=self.segment_batch_size) pool.addCommand(cmd) base.join_and_indicate_progress(pool) failed_count = 0 completed = pool.getCompletedItems() for res in completed: if not res.get_results().wasSuccessful(): failed_count += 1 allSegmentsStopped = (failed_count == 0) if not allSegmentsStopped: self.logger.warn("%d segments failed to stop. A full rebalance of the" % failed_count) self.logger.warn("system is not possible at this time. Please check the") self.logger.warn("log files, correct the problem, and run gprecoverseg -r") self.logger.warn("again.") self.logger.info("gprecoverseg will continue with a partial rebalance.") pool.empty_completed_items() segment_reconfigurer = SegmentReconfigurer(logger=self.logger, worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT) segment_reconfigurer.reconfigure() # Final step is to issue a recoverseg operation to resync segments self.logger.info("Starting segment synchronization") original_sys_args = sys.argv[:] self.logger.info("=============================START ANOTHER RECOVER=========================================") # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram cmd_args = ['gprecoverseg', '-a', '-B', str(self.batch_size), '-b', str(self.segment_batch_size)] sys.argv = cmd_args[:] local_parser = GpRecoverSegmentProgram.createParser() local_options, args = local_parser.parse_args() recover_cmd = GpRecoverSegmentProgram.createProgram(local_options, args) try: recover_cmd.run() except SystemExit as e: if e.code != 0: self.logger.error("Failed to start the synchronization step of the segment rebalance.") self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run") self.logger.error(' '.join(cmd_args)) raise Exception("Error synchronizing.\nError: %s" % str(e)) finally: if recover_cmd: recover_cmd.cleanup() sys.argv = original_sys_args self.logger.info("==============================END ANOTHER RECOVER==========================================") except Exception as ex: raise ex finally: pool.join() pool.haltWork() pool.joinWorkers() signal.signal(signal.SIGINT, signal.default_int_handler) return allSegmentsStopped # if all segments stopped, then a full rebalance was done
def run(self): if self.__options.batch_size < 1 or self.__options.batch_size > gp.MAX_COORDINATOR_NUM_WORKERS: raise ProgramArgumentValidationException( "Invalid batch_size provided with -B argument: %d" % self.__options.batch_size) if self.__options.segment_batch_size < 1 or self.__options.segment_batch_size > gp.MAX_SEGHOST_NUM_WORKERS: raise ProgramArgumentValidationException( "Invalid segment_batch_size provided with -b argument: %d" % self.__options.segment_batch_size) self.__pool = base.WorkerPool(self.__options.batch_size) gpEnv = GpCoordinatorEnvironment( self.__options.coordinatorDataDirectory, True) faultProberInterface.getFaultProber().initializeProber( gpEnv.getCoordinatorPort()) confProvider = configInterface.getConfigurationProvider( ).initializeProvider(gpEnv.getCoordinatorPort()) gpArray = confProvider.loadSystemConfig(useUtilityMode=False) # check that heap_checksums is consistent across cluster, fail immediately if not self.validate_heap_checksums(gpArray) if self.__options.mirrorConfigFile is None: self.checkMirrorOffset(gpArray) # check that we actually have mirrors if gpArray.hasMirrors: raise ExceptionNoStackTraceNeeded( \ "GPDB physical mirroring cannot be added. The cluster is already configured with Mirrors.") # figure out what needs to be done (AND update the gpArray!) mirrorBuilder = self.__getMirrorsToBuildBasedOnOptions(gpEnv, gpArray) mirrorBuilder.checkForPortAndDirectoryConflicts(gpArray) if self.__options.outputSampleConfigFile is not None: # just output config file and done self.__outputToFile(mirrorBuilder, self.__options.outputSampleConfigFile, gpArray) logger.info('Configuration file output to %s successfully.' % self.__options.outputSampleConfigFile) else: self.__displayAddMirrors(gpEnv, mirrorBuilder, gpArray) if self.__options.interactive: if not userinput.ask_yesno( None, "\nContinue with add mirrors procedure", 'N'): raise UserAbortedException() update_pg_hba_on_segments(gpArray, self.__options.hba_hostnames, self.__options.batch_size) if not mirrorBuilder.buildMirrors("add", gpEnv, gpArray): return 1 logger.info( "******************************************************************" ) logger.info( "Mirror segments have been added; data synchronization is in progress." ) logger.info( "Data synchronization will continue in the background.") logger.info( "Use gpstate -s to check the resynchronization progress.") logger.info( "******************************************************************" ) return 0 # success -- exit code 0!
def rebalance(self): # Get the unbalanced primary segments grouped by hostname # These segments are what we will shutdown. self.logger.info("Getting unbalanced segments") unbalanced_primary_segs = GpArray.getSegmentsByHostName( self.gpArray.get_unbalanced_primary_segdbs()) pool = base.WorkerPool() try: # Disable ctrl-c signal.signal(signal.SIGINT, signal.SIG_IGN) self.logger.info("Stopping unbalanced primary segments...") for hostname in unbalanced_primary_segs.keys(): cmd = GpSegStopCmd("stop unbalanced primary segs", self.gpEnv.getGpHome(), self.gpEnv.getGpVersion(), 'fast', unbalanced_primary_segs[hostname], ctxt=base.REMOTE, remoteHost=hostname, timeout=600) pool.addCommand(cmd) base.join_and_indicate_progress(pool) failed_count = 0 completed = pool.getCompletedItems() for res in completed: if not res.get_results().wasSuccessful(): failed_count += 1 allSegmentsStopped = (failed_count == 0) if not allSegmentsStopped: self.logger.warn( "%d segments failed to stop. A full rebalance of the") self.logger.warn( "system is not possible at this time. Please check the") self.logger.warn( "log files, correct the problem, and run gprecoverseg -r") self.logger.warn("again.") self.logger.info( "gprecoverseg will continue with a partial rebalance.") pool.empty_completed_items() segment_reconfigurer = SegmentReconfigurer( logger=self.logger, worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT) segment_reconfigurer.reconfigure() # Final step is to issue a recoverseg operation to resync segments self.logger.info("Starting segment synchronization") original_sys_args = sys.argv[:] try: self.logger.info( "=============================START ANOTHER RECOVER=========================================" ) # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram sys.argv = ['gprecoverseg', '-a'] local_parser = GpRecoverSegmentProgram.createParser() local_options, args = local_parser.parse_args() cmd = GpRecoverSegmentProgram.createProgram( local_options, args) cmd.run() except SystemExit as e: if e.code != 0: self.logger.error( "Failed to start the synchronization step of the segment rebalance." ) self.logger.error( "Check the gprecoverseg log file, correct any problems, and re-run" ) self.logger.error("'gprecoverseg -a'.") raise Exception("Error synchronizing.\nError: %s" % str(e)) finally: if cmd: cmd.cleanup() sys.argv = original_sys_args self.logger.info( "==============================END ANOTHER RECOVER==========================================" ) except Exception, ex: raise ex
gphostcachelookup.py -- look up the hostname for a list of interfaces Usage: gphostcachelookup.py interface-name Input is taken from stdin. Each line from stdin is considered as interface name. Output is the hostname, gets printed to stdout. ''' import sys from gppylib.gphostcache import GpInterfaceToHostNameCache from gppylib.commands import base #------------------------------------------------------------------------- if __name__ == '__main__': pool = base.WorkerPool(1) retCode = 0 try: interfaces = [] hostNames = [] for line in sys.stdin: interfaces.append(line.strip()) hostNames.append(None) lookup = GpInterfaceToHostNameCache(pool, interfaces, hostNames) for interface in interfaces: hostname = lookup.getHostName(interface) if hostname is None: sys.stdout.write("__lookup_of_hostname_failed__\n")