def test_batch_size_4(self): """ check the batch size option -B of gpaddmirrors, depending on how many mirror segment to setup, otherwise, it will start up to 10 """ gprecover = GpRecover() self._setup_gpaddmirrors() self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) workers = Set() batch_size = 4 res = {'rc': 0, 'stdout': '', 'stderr': ''} run_shell_command( "gpaddmirrors -a -i %s -B %s -d %s --verbose" % (self.mirror_config_file, batch_size, self.mdd), 'run gpaddmirrros batch size %s' % batch_size, res) self.assertEqual(0, res['rc']) lines = res['stdout'].split('\n') for line in lines: if 'worker' in line and 'haltWork' in line: elems = line.split(' ')[1] worker = elems.split('-')[-1] workers.add(worker) self.assertEquals(len(workers), batch_size) gprecover.wait_till_insync_transition() self.verify_config_file_with_gp_config() self.check_mirror_seg()
def test_mirror_spread(self): """ Mirror spreading will place each mirror on a different host within the Greenplum Database array """ gprecover = GpRecover() if self.number_of_segments_per_host > len(self.hosts): self.skipTest( 'skipping test since the number of host is less than number of segments per hosts' ) self._setup_gpaddmirrors() self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) res = {'rc': 0, 'stdout': '', 'stderr': ''} run_shell_command( "gpaddmirrors -a -i %s -s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with mirror spreading', res) self.assertEqual(0, res['rc']) check_mirror_spreading = '''SELECT A.hostname, B.hostname FROM gp_segment_configuration A, gp_segment_configuration B WHERE A.preferred_role = \'p\' AND B.preferred_role = \'m\' AND A.content = B.content AND A.hostname <> B.hostname;''' result = PSQL.run_sql_command(check_mirror_spreading, flags='-q -t', dbname='template1') result = result.strip() self.assertNotEqual(0, len(result)) rows = result.split('\n') self.assertEqual(self.number_of_segments, len(rows)) gprecover.wait_till_insync_transition() self.verify_config_file_with_gp_config() self.check_mirror_seg()
def test_with_fault_injection(self): """ add new mirrors run workload to verify if cluster functioning correctly, and inject the mirror to bring cluster into change tracking, then recoverseg """ filerepUtil = Filerepe2e_Util() gprecover = GpRecover() self._setup_gpaddmirrors() self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) res = {'rc': 0, 'stdout' : '', 'stderr': ''} run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with fault injection', res) gprecover.wait_till_insync_transition() self.assertEqual(0, res['rc']) self.run_simple_ddl_dml() # after adding new mirrors, check the intergrity between primary and mirror self.check_mirror_seg() out_file = local_path('inject_fault_into_ct') filerepUtil.inject_fault(f='filerep_consumer', m='async', y='fault', r='mirror', H='ALL', outfile=out_file) # trigger the transtion to change tracking PSQL.run_sql_command('drop table if exists foo;', dbname = 'template1') filerepUtil.wait_till_change_tracking_transition() gprecover.incremental() gprecover.wait_till_insync_transition() out_file=local_path('reset_fault') filerepUtil.inject_fault(f='filerep_consumer', m='async', y='reset', r='mirror', H='ALL', outfile=out_file)
def test_option_port_offset(self): """ primary port + offset = mirror database port primary port + (2 * offset) = mirror replication port primary port + (3 * offset) = primary replication port """ gprecover = GpRecover() port_offset = 500 self._setup_gpaddmirrors(port_offset = port_offset) self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) res = {'rc': 0, 'stdout' : '', 'stderr': ''} run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with non default port_offset', res) self.assertEqual(0, res['rc']) query_ports = 'SELECT port, replication_port FROM gp_segment_configuration WHERE content = 0 ORDER BY preferred_role DESC;' result = PSQL.run_sql_command(query_ports, flags='-q -t', dbname='template1') ports = result.strip().split('\n') primary_ports = ports[0] mirror_ports = ports[1] primary_ports = primary_ports.split('|') primary_ports = [port.strip() for port in primary_ports] primary_db_port = int(primary_ports[0]) primary_replic_port = int(primary_ports[1]) mirror_ports = mirror_ports.split('|') mirror_ports = [port.strip() for port in mirror_ports] mirror_db_port = int(mirror_ports[0]) mirror_replic_port = int(mirror_ports[1]) self.assertEqual(primary_db_port + port_offset, mirror_db_port) self.assertEqual(primary_db_port + 2*port_offset, mirror_replic_port) self.assertEqual(primary_db_port + 3*port_offset, primary_replic_port) gprecover.wait_till_insync_transition() self.verify_config_file_with_gp_config() self.check_mirror_seg()
def test_with_concurrent_workload(self): """ add new mirrors while concurrent workload in progress, check that mirrors added and current workload won't get affected, in the end, run checkmirrorseg. Note that: adding mirrors while running workload has checkmirrorseg issue with MPP-24311 """ gprecover = GpRecover() self._setup_gpaddmirrors() self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) sql_setup_file = local_path('sql/ao_heap_table_setup.sql') sql_file = local_path('sql/ao_heap_table.sql') pg_stat_activity = 'SELECT * FROM pg_stat_activity;' PSQL.run_sql_file(sql_setup_file) subprocess.Popen(["psql", "-f", sql_file]) time.sleep(15) subprocess.Popen( ["gpaddmirrors", "-ai", self.mirror_config_file, "-d", self.mdd]) time.sleep(15) result = PSQL.run_sql_command(pg_stat_activity, flags='-q -t', dbname='template1') result = result.strip() rows = result.split('\n') self.assertTrue(len(rows) > 1) while len(rows) > 1: result = PSQL.run_sql_command(pg_stat_activity, flags='-q -t', dbname='template1') result = result.strip() rows = result.split('\n') time.sleep(3) gprecover.wait_till_insync_transition() self.verify_config_file_with_gp_config()
def check_insync_transition(self, dbname='template1'): """ confirming that the current mode is in sync before performing the gpcheckmirrorseg, resyncInterval increase 10 seconds for each new query, maximumly sleep 75 sec, can be tuned. """ recoverseg = GpRecover() is_synchronized = recoverseg.wait_till_insync_transition() if not is_synchronized: self.fail('Segments are not in sync')
def __init__(self, methodName): self.pgport = os.environ.get('PGPORT') self.fileutil = Filerepe2e_Util() self.gpconfig = GPDBConfig() self.gprecover = GpRecover(self.gpconfig) self.gpstate = Gpstate() self.gpprimarymirror = Gpprimarymirror() self.base = GPDBStorageBaseTestCase(self.gpconfig) super(FtsTransitions, self).__init__(methodName)
def __init__(self, methodName): self.filereputil = Filerepe2e_Util() self.config = GPDBConfig() self.gprecover = GpRecover(self.config) self.gpstop = GpStop() self.gpstart = GpStart() self.gpverify = GpdbVerify(config=self.config) self.dbstate = DbStateClass('run_validation', self.config) self.port = os.getenv('PGPORT') super(PgtwoPhaseClass, self).__init__(methodName)
def __init__(self, methodName): self.pgport = os.environ.get('PGPORT') self.util = Filerepe2e_Util() self.gpconfig = GpConfig() self.config = GPDBConfig() self.gpr = GpRecover(self.config) self.dbstate = DbStateClass('run_validation', self.config) self.gpstart = GpStart() self.gpstop = GpStop() super(FilerepTestCase, self).__init__(methodName)
def __init__(self,methodName): self.fileutil = Filerepe2e_Util() self.config = GPDBConfig() self.gprecover = GpRecover(self.config) self.gpstart = GpStart() self.gpstop = GpStop() self.gpfile = Gpfilespace(self.config) self.dbstate = DbStateClass('run_validation', self.config) self.port = os.getenv('PGPORT') self.base = GPDBStorageBaseTestCase() super(SuspendCheckpointCrashRecovery,self).__init__(methodName)
def test_option_d(self): """ check the -d option of gpaddmirrors """ gprecover = GpRecover() self._setup_gpaddmirrors() self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) del os.environ['MASTER_DATA_DIRECTORY'] Command('run gpaddmirrors -i -d', 'gpaddmirrors -a -i %s -d %s' % (self.mirror_config_file, self.mdd)).run(validateAfter=True) os.environ['MASTER_DATA_DIRECTORY']=self.mdd gprecover.wait_till_insync_transition() self.verify_config_file_with_gp_config() self.check_mirror_seg()
def __init__(self, config=None): if config is not None: self.config = config else: self.config = GPDBConfig() self.filereputil = Filerepe2e_Util() self.gprecover = GpRecover(self.config) self.gpstop = GpStop() self.gpstart = GpStart() self.gpverify = GpdbVerify(config=self.config) self.dbstate = DbStateClass('run_validation', self.config) self.port = os.getenv('PGPORT')
def run_gprecoverseg(self, recover_option): ''' @summary : Call gpecoverseg full or incremental to bring back the cluster to sync ''' self.gpr = GpRecover() tinctest.logger.info("[STLRTest] Running run_gprecoverseg") if recover_option == 'full': self.gpr.full() else: self.gpr.incremental() self.gpr.wait_till_insync_transition()
def test_interview(self): gprecover = GpRecover() child = pexpect.spawn('gpaddmirrors') #child.logfile = sys.stdout for i in range(0, self.number_of_segments_per_host): child.expect('Enter mirror segment data directory location.*.\r\n') child.sendline(self.mirror_data_dir) child.expect('Continue with add mirrors procedure Yy|Nn (default=N):') child.sendline('Y') child.expect(pexpect.EOF) # wait until cluste totally synced, then run gpcheckmirrorseg gprecover.wait_till_insync_transition() self.check_mirror_seg() self._do_gpdeletesystem() self._do_gpinitsystem()
def test_gpaddmirrors_with_workload(self): """ add new mirrors after creating some workload in progress, check that mirrors added and checkmirrorseg passes. """ gprecover = GpRecover() self._setup_gpaddmirrors() self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) sql_setup_file = local_path('sql/ao_heap_table_setup.sql') sql_file = local_path('sql/ao_heap_table.sql') pg_stat_activity = 'SELECT * FROM pg_stat_activity;' PSQL.run_sql_file(sql_setup_file) PSQL.run_sql_file(sql_file) res = {'rc': 0, 'stdout' : '', 'stderr': ''} run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with fault injection', res) self.assertEqual(0, res['rc']) gprecover.wait_till_insync_transition() self.verify_config_file_with_gp_config() self.check_mirror_seg()
def run_gprecoverseg(self,recover_option): ''' @summary : Call gpecoverseg full or incremental to bring back the cluster to sync ''' self.gpr = GpRecover() tinctest.logger.info("[STLRTest] Running run_gprecoverseg") tinctest.logger.info("[STLRTest] START printing gp segment configuration") (gp_seg_conf) = PSQL.run_sql_command("select * from gp_segment_configuration order by dbid") tinctest.logger.info(gp_seg_conf) if recover_option == 'full': self.gpr.full() else: self.gpr.incremental() #Wait till the primary and mirror are in sync tinctest.logger.info("[STLRTest] Middle printing gp segment configuration") (gp_seg_conf) = PSQL.run_sql_command("select * from gp_segment_configuration order by dbid") tinctest.logger.info(gp_seg_conf) self.gpr.wait_till_insync_transition() tinctest.logger.info("[STLRTest] END printing gp segment configuration") (gp_seg_conf) = PSQL.run_sql_command("select * from gp_segment_configuration order by dbid") tinctest.logger.info(gp_seg_conf)
def setUpClass(cls): super(mpp23395, cls).setUpClass() recoverseg = GpRecover() recoverseg.recover_rebalance_segs()
def setUp(self): self.gprec = GpRecover() self.gphome = os.environ.get('GPHOME')
def wait_till_insync_transition(self): self.gpr = GpRecover() self.gpr.wait_till_insync_transition()
def wait_till_insync(self): gprecover = GpRecover(GPDBConfig()) gprecover.wait_till_insync_transition()
def __init__(self): self.fileutil = Filerepe2e_Util() self.gprecover = GpRecover() self.config = GpConfig() self.base_dir = os.path.dirname(sys.modules[self.__class__.__module__].__file__)
def test_recovery(self): gprecover = GpRecover() gprecover.incremental() gprecover.wait_till_insync_transition()
def incremental_recoverseg(self, workerPool=False): gprecover = GpRecover(GPDBConfig()) gprecover.incremental(workerPool)
def test_recovery_full(self): gprecover = GpRecover() gprecover.full() gprecover.wait_till_insync_transition()
def __init__(self,methodName): self.filereputil = Filerepe2e_Util() self.gprecover = GpRecover() super(BaseClass,self).__init__(methodName)
def test_resync_ct_blocks_per_query(self): '''Catch a bug in resync that manifests only after rebalance. The logic used by a resync worker to obtain changed blocks from CT log had a bug. The SQL query used to obtain a batch of changed blocks from CT log was incorrectly using LSN to filter out changed blocks. All of the following must be true for the bug to occur: * More than gp_filerep_ct_batch_size blocks of a relation are changed on a segment in changetracking. * A block with a higher number is changed earlier (lower LSN) than lower numbered blocks. * The first batch of changed blocks obtained by resync worker from CT log for this relation contains only lower (according to block number) blocks. The higher block with lower LSN is not included in this batch. Another query must be run against CT log to obtain this block. * The SQL query used to obtain next batch of changed blocks for this relation contains incorrect WHERE clause involving a filter based on LSN of previously obtained blocks. The higher numbered block is missed out - not returned by the query as changed block for the relation. The block is never shipped from primary to mirror, resulting in data loss. The test aims to verify that this doesn't happen as the bug is now fixed. ''' config = GPDBConfig() assert (config.is_not_insync_segments() & config.is_balanced_segments() ), 'cluster not in-sync and balanced' # Create table and insert data so that adequate number of # blocks are occupied. self.run_sql('resync_bug_setup') # Bring down primaries and transition mirrors to # changetracking. filerep = Filerepe2e_Util() filerep.inject_fault(y='fault', f='segment_probe_response', r='primary') # Trigger the fault by running a sql file. PSQL.run_sql_file(local_path('test_ddl.sql')) filerep.wait_till_change_tracking_transition() # Set gp_filerep_ct_batch_size = 3. cmd = Command('reduce resync batch size', 'gpconfig -c gp_filerep_ct_batch_size -v 3') cmd.run() assert cmd.get_results().rc == 0, 'gpconfig failed' cmd = Command('load updated config', 'gpstop -au') cmd.run() assert cmd.get_results().rc == 0, '"gpstop -au" failed' self.run_sql('change_blocks_in_ct') # Capture change tracking log contents from the segment of # interest for debugging, in case the test fails. (host, port) = GPDBConfig().get_hostandport_of_segment(0, 'p') assert PSQL.run_sql_file_utility_mode( sql_file=local_path('sql/ct_log_contents.sql'), out_file=local_path('output/ct_log_contents.out'), host=host, port=port), sql_file gprecover = GpRecover(GPDBConfig()) gprecover.incremental(False) gprecover.wait_till_insync_transition() # Rebalance, so that original primary is back in the role gprecover = GpRecover(GPDBConfig()) gprecover.rebalance() gprecover.wait_till_insync_transition() # Reset gp_filerep_ct_batch_size cmd = Command('reset resync batch size', 'gpconfig -r gp_filerep_ct_batch_size') cmd.run() assert cmd.get_results().rc == 0, 'gpconfig failed' cmd = Command('load updated config', 'gpstop -au') cmd.run() assert cmd.get_results().rc == 0, '"gpstop -au" failed' self.run_sql('select_after_rebalance')
def full_recoverseg(self): gprecover = GpRecover(GPDBConfig()) gprecover.full()
def test_with_standby(self): """ check that cluster's host address is same when it is with standby and without standby """ if not self.config.is_multinode(): self.skipTest('skipping test since the cluster is not multinode') gprecover = GpRecover() self._setup_gpaddmirrors() # adding mirrors first self._setup_gpaddmirrors() self._generate_gpinit_config_files() self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) res = {'rc': 0, 'stdout': '', 'stderr': ''} run_shell_command( "gpaddmirrors -a -i %s -s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with mirror spreading', res) self.assertEqual(0, res['rc']) gprecover.wait_till_insync_transition() get_mirror_address = 'SELECT content, address FROM gp_segment_configuration WHERE preferred_role = \'m\';' rows = self.format_sql_result(get_mirror_address) # create a dictionary for mirror and its host address mirror_hosts_wo_stdby = {} for row in rows: content = row[0] address = row[1] mirror_hosts_wo_stdby[content] = address # delete and reinitialize cluster again self._do_gpdeletesystem() self._do_gpinitsystem() gprecover.wait_till_insync_transition() res = {'rc': 0, 'stdout': '', 'stderr': ''} # create standby, needs to get a new config_info instance for new cluster config_info = GPDBConfig() if not config_info.has_master_mirror(): self._do_gpinitstandby() self._setup_gpaddmirrors() self._generate_gpinit_config_files() self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir) # add mirror for the new cluster which has standby configured res = {'rc': 0, 'stdout': '', 'stderr': ''} run_shell_command( "gpaddmirrors -a -i %s -s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with mirror spreading', res) self.assertEqual(0, res['rc']) gprecover.wait_till_insync_transition() # verify that the configuration will be same as mirror_config_file specified self.verify_config_file_with_gp_config() self.check_mirror_seg() rows = self.format_sql_result(get_mirror_address) mirror_hosts_with_stdby = {} for row in rows: content = row[0] address = row[1] mirror_hosts_with_stdby[content] = address for key in mirror_hosts_wo_stdby: self.assertEqual(mirror_hosts_wo_stdby[key], mirror_hosts_with_stdby[key]) res = {'rc': 0, 'stdout': '', 'stderr': ''} run_shell_command("gpinitstandby -ar", 'remove standby', res) if res['rc'] > 0: raise GPAddmirrorsTestCaseException("Failed to remove the standby")