def setUp(self): raw_options = GpRecoverSegmentProgram.createParser() (options, _) = raw_options.parse_args(args=[]) options.spareDataDirectoryFile = None options.newRecoverHosts = None self.subject = GpRecoverSegmentProgram(options) self.execSqlResult = Mock(spec=['fetchall']) self.gp_env = Mock() GpCoordinatorEnvironmentMock = Mock(return_value=self.gp_env) self.gparray = Mock(spec=GpArray) self.gparray.getDbList.return_value = self._segments_mock() configProviderMock = Mock(spec=GpConfigurationProvider) configProviderMock.initializeProvider.return_value = configProviderMock configProviderMock.loadSystemConfig.return_value = self.gparray self.getConfigProviderFunctionMock = Mock(GpConfigurationProvider) self.getConfigProviderFunctionMock.return_value = configProviderMock self.subject.logger = Mock() self.worker_pool = Mock(spec=WorkerPool, return_value=None) self.worker_pool.getCompletedItems.return_value = [] self.worker_pool.logger = self.subject.logger self.worker_pool.addCommand.return_value = None self.pool_completed = [] self.apply_patches([ patch("gppylib.db.dbconn.connect"), patch("gppylib.db.dbconn.DbURL"), patch("gppylib.db.dbconn.execSQL", return_value=self.execSqlResult), patch('time.sleep'), patch( 'gppylib.programs.clsRecoverSegment.GpCoordinatorEnvironment', GpCoordinatorEnvironmentMock), # patch('gppylib.system.environment.GpCoordinatorEnvironment.__init__', self.gp_env), # patch('gppylib.system.environment.GpCoordinatorEnvironment.getCoordinatorPort'), patch('gppylib.system.faultProberInterface.getFaultProber'), patch( 'gppylib.system.configurationInterface.getConfigurationProvider', self.getConfigProviderFunctionMock), patch('gppylib.commands.base.WorkerPool.__init__', self.worker_pool), patch('gppylib.commands.base.WorkerPool.getCompletedItems', return_value=self.pool_completed), patch('gppylib.commands.base.WorkerPool.addCommand'), patch('gppylib.commands.base.WorkerPool.join'), ]) # tests make use of a workaround to access a python attribute that is normally # name mangled when specified with a "__" prefix. That workaround is to use _<class>__<attribute> # such as self.subject._GpRecoverSegmentProgram__pool = mock_pool self.subject._GpRecoverSegmentProgram__pool = self.worker_pool
def setUp(self): raw_options = GpRecoverSegmentProgram.createParser() (options, _) = raw_options.parse_args() options.spareDataDirectoryFile = None options.newRecoverHosts = None self.subject = GpRecoverSegmentProgram(options) self.execSqlResult = Mock(spec=['fetchall']) self.gp_env = Mock() GpMasterEnvironmentMock = Mock(return_value=self.gp_env) self.gparray = Mock(spec=GpArray) self.gparray.getDbList.return_value = self._segments_mock() configProviderMock = Mock(spec=GpConfigurationProvider) configProviderMock.initializeProvider.return_value = configProviderMock configProviderMock.loadSystemConfig.return_value = self.gparray self.getConfigProviderFunctionMock = Mock(GpConfigurationProvider) self.getConfigProviderFunctionMock.return_value = configProviderMock self.subject.logger = Mock() self.worker_pool = Mock(spec=WorkerPool, return_value=None) self.worker_pool.getCompletedItems.return_value = [] self.worker_pool.logger = self.subject.logger self.worker_pool.addCommand.return_value = None self.pool_completed = [] self.apply_patches([ patch("gppylib.db.dbconn.connect"), patch("gppylib.db.dbconn.DbURL"), patch("gppylib.db.dbconn.execSQL", return_value=self.execSqlResult), patch('time.sleep'), patch('gppylib.programs.clsRecoverSegment.GpMasterEnvironment', GpMasterEnvironmentMock), # patch('gppylib.system.environment.GpMasterEnvironment.__init__', self.gp_env), # patch('gppylib.system.environment.GpMasterEnvironment.getMasterPort'), patch('gppylib.system.faultProberInterface.getFaultProber'), patch('gppylib.system.configurationInterface.getConfigurationProvider', self.getConfigProviderFunctionMock), patch('gppylib.commands.base.WorkerPool.__init__', self.worker_pool), patch('gppylib.commands.base.WorkerPool.getCompletedItems', return_value=self.pool_completed), patch('gppylib.commands.base.WorkerPool.addCommand'), patch('gppylib.commands.base.WorkerPool.join'), ]) # tests make use of a workaround to access a python attribute that is normally # name mangled when specified with a "__" prefix. That workaround is to use _<class>__<attribute> # such as self.subject._GpRecoverSegmentProgram__pool = mock_pool self.subject._GpRecoverSegmentProgram__pool = self.worker_pool
def rebalance(self): self.logger.info("Determining primary and mirror segment pairs to rebalance") # The current implementation of rebalance calls "gprecoverseg -a" below. # Thus, if another balanced pair is not synchronized, or has a down mirror # that pair will be recovered as a side-effect of rebalancing. unbalanced_primary_segs = [] for segmentPair in self.gpArray.segmentPairs: if segmentPair.balanced(): continue if segmentPair.up() and segmentPair.reachable() and segmentPair.synchronized(): unbalanced_primary_segs.append(segmentPair.primaryDB) else: self.logger.warning( "Not rebalancing primary segment dbid %d with its mirror dbid %d because one is either down, unreachable, or not synchronized" \ % (segmentPair.primaryDB.dbid, segmentPair.mirrorDB.dbid)) if not len(unbalanced_primary_segs): self.logger.info("No segments to rebalance") return True unbalanced_primary_segs = GpArray.getSegmentsByHostName(unbalanced_primary_segs) pool = base.WorkerPool(min(len(unbalanced_primary_segs), self.batch_size)) try: # Disable ctrl-c signal.signal(signal.SIGINT, signal.SIG_IGN) self.logger.info("Stopping unbalanced primary segments...") for hostname in list(unbalanced_primary_segs.keys()): cmd = GpSegStopCmd("stop unbalanced primary segs", self.gpEnv.getGpHome(), self.gpEnv.getGpVersion(), 'fast', unbalanced_primary_segs[hostname], ctxt=base.REMOTE, remoteHost=hostname, timeout=600, segment_batch_size=self.segment_batch_size) pool.addCommand(cmd) base.join_and_indicate_progress(pool) failed_count = 0 completed = pool.getCompletedItems() for res in completed: if not res.get_results().wasSuccessful(): failed_count += 1 allSegmentsStopped = (failed_count == 0) if not allSegmentsStopped: self.logger.warn("%d segments failed to stop. A full rebalance of the" % failed_count) self.logger.warn("system is not possible at this time. Please check the") self.logger.warn("log files, correct the problem, and run gprecoverseg -r") self.logger.warn("again.") self.logger.info("gprecoverseg will continue with a partial rebalance.") pool.empty_completed_items() segment_reconfigurer = SegmentReconfigurer(logger=self.logger, worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT) segment_reconfigurer.reconfigure() # Final step is to issue a recoverseg operation to resync segments self.logger.info("Starting segment synchronization") original_sys_args = sys.argv[:] self.logger.info("=============================START ANOTHER RECOVER=========================================") # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram cmd_args = ['gprecoverseg', '-a', '-B', str(self.batch_size), '-b', str(self.segment_batch_size)] sys.argv = cmd_args[:] local_parser = GpRecoverSegmentProgram.createParser() local_options, args = local_parser.parse_args() recover_cmd = GpRecoverSegmentProgram.createProgram(local_options, args) try: recover_cmd.run() except SystemExit as e: if e.code != 0: self.logger.error("Failed to start the synchronization step of the segment rebalance.") self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run") self.logger.error(' '.join(cmd_args)) raise Exception("Error synchronizing.\nError: %s" % str(e)) finally: if recover_cmd: recover_cmd.cleanup() sys.argv = original_sys_args self.logger.info("==============================END ANOTHER RECOVER==========================================") except Exception as ex: raise ex finally: pool.join() pool.haltWork() pool.joinWorkers() signal.signal(signal.SIGINT, signal.default_int_handler) return allSegmentsStopped # if all segments stopped, then a full rebalance was done
def rebalance(self): # Get the unbalanced primary segments grouped by hostname # These segments are what we will shutdown. self.logger.info("Getting unbalanced segments") unbalanced_primary_segs = GpArray.getSegmentsByHostName(self.gpArray.get_unbalanced_primary_segdbs()) pool = base.WorkerPool() count = 0 try: # Disable ctrl-c signal.signal(signal.SIGINT, signal.SIG_IGN) self.logger.info("Stopping unbalanced primary segments...") for hostname in unbalanced_primary_segs.keys(): cmd = GpSegStopCmd("stop unbalanced primary segs", self.gpEnv.getGpHome(), self.gpEnv.getGpVersion(), 'fast', unbalanced_primary_segs[hostname], ctxt=base.REMOTE, remoteHost=hostname, timeout=600) pool.addCommand(cmd) count += 1 pool.wait_and_printdots(count, False) failed_count = 0 completed = pool.getCompletedItems() for res in completed: if not res.get_results().wasSuccessful(): failed_count += 1 allSegmentsStopped = (failed_count == 0) if not allSegmentsStopped: self.logger.warn("%d segments failed to stop. A full rebalance of the") self.logger.warn("system is not possible at this time. Please check the") self.logger.warn("log files, correct the problem, and run gprecoverseg -r") self.logger.warn("again.") self.logger.info("gprecoverseg will continue with a partial rebalance.") pool.empty_completed_items() # issue a distributed query to make sure we pick up the fault # that we just caused by shutting down segments conn = None try: self.logger.info("Triggering segment reconfiguration") dburl = dbconn.DbURL() conn = dbconn.connect(dburl) cmd = ReconfigDetectionSQLQueryCommand(conn) pool.addCommand(cmd) pool.wait_and_printdots(1, False) except Exception: # This exception is expected pass finally: if conn: conn.close() # Final step is to issue a recoverseg operation to resync segments self.logger.info("Starting segment synchronization") original_sys_args = sys.argv[:] try: self.logger.info("=============================START ANOTHER RECOVER=========================================") # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram sys.argv = ['gprecoverseg', '-a'] local_parser = GpRecoverSegmentProgram.createParser() local_options, args = local_parser.parse_args() cmd = GpRecoverSegmentProgram.createProgram(local_options, args) cmd.run() except SystemExit as e: if e.code != 0: self.logger.error("Failed to start the synchronization step of the segment rebalance.") self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run") self.logger.error("'gprecoverseg -a'.") raise Exception("Error synchronizing.\nError: %s" % str(e)) finally: if cmd: cmd.cleanup() sys.argv = original_sys_args self.logger.info("==============================END ANOTHER RECOVER==========================================") except Exception, ex: raise ex
def rebalance(self): # Get the unbalanced primary segments grouped by hostname # These segments are what we will shutdown. self.logger.info("Getting unbalanced segments") unbalanced_primary_segs = GpArray.getSegmentsByHostName( self.gpArray.get_unbalanced_primary_segdbs()) pool = base.WorkerPool() try: # Disable ctrl-c signal.signal(signal.SIGINT, signal.SIG_IGN) self.logger.info("Stopping unbalanced primary segments...") for hostname in unbalanced_primary_segs.keys(): cmd = GpSegStopCmd("stop unbalanced primary segs", self.gpEnv.getGpHome(), self.gpEnv.getGpVersion(), 'fast', unbalanced_primary_segs[hostname], ctxt=base.REMOTE, remoteHost=hostname, timeout=600) pool.addCommand(cmd) base.join_and_indicate_progress(pool) failed_count = 0 completed = pool.getCompletedItems() for res in completed: if not res.get_results().wasSuccessful(): failed_count += 1 allSegmentsStopped = (failed_count == 0) if not allSegmentsStopped: self.logger.warn( "%d segments failed to stop. A full rebalance of the") self.logger.warn( "system is not possible at this time. Please check the") self.logger.warn( "log files, correct the problem, and run gprecoverseg -r") self.logger.warn("again.") self.logger.info( "gprecoverseg will continue with a partial rebalance.") pool.empty_completed_items() segment_reconfigurer = SegmentReconfigurer( logger=self.logger, worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT) segment_reconfigurer.reconfigure() # Final step is to issue a recoverseg operation to resync segments self.logger.info("Starting segment synchronization") original_sys_args = sys.argv[:] try: self.logger.info( "=============================START ANOTHER RECOVER=========================================" ) # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram sys.argv = ['gprecoverseg', '-a'] local_parser = GpRecoverSegmentProgram.createParser() local_options, args = local_parser.parse_args() cmd = GpRecoverSegmentProgram.createProgram( local_options, args) cmd.run() except SystemExit as e: if e.code != 0: self.logger.error( "Failed to start the synchronization step of the segment rebalance." ) self.logger.error( "Check the gprecoverseg log file, correct any problems, and re-run" ) self.logger.error("'gprecoverseg -a'.") raise Exception("Error synchronizing.\nError: %s" % str(e)) finally: if cmd: cmd.cleanup() sys.argv = original_sys_args self.logger.info( "==============================END ANOTHER RECOVER==========================================" ) except Exception, ex: raise ex