def test_ao_malloc_failure(self): """ @product_version gpdb: [4.3.5.1 -] """ PSQL.run_sql_command('DROP table if exists ao_read_malloc') PSQL.run_sql_command( 'create table ao_read_malloc (a int) with (appendonly=true, compresstype=quicklz)' ) PSQL.run_sql_command('insert into ao_read_malloc ' 'select * from generate_series(1, 1000)') gpfaultinjector = Command( 'fault injector', 'source $GPHOME/greenplum_path.sh; ' 'gpfaultinjector -f malloc_failure ' '-y error -H ALL -r primary') gpfaultinjector.run() res = {'rc': 0, 'stdout': '', 'stderr': ''} PSQL.run_sql_command( sql_cmd='select count(*) from ao_read_malloc', results=res) logger.info(res) self.assertTrue("ERROR: fault triggered" in res['stderr']) self.assertFalse( "ERROR: could not temporarily connect to one or more segments" in res['stderr']) logger.info('Pass')
def template0_wrap_around_on_segment(self, primary): """ Same as template0_wrap_around, but on segment. """ logger.info("template0_wrap_around_on_segment: dbid(%d) %s:%d'" % (primary.dbid, primary.hostname, primary.port)) self._raise_template0_age(self.WRAP_LIMIT, primary) PSQL(sql_cmd="CREATE DATABASE newdb TEMPLATE template0").run( validateAfter=True) sql = "SELECT age(datfrozenxid) FROM pg_database WHERE datname='newdb'" # Verify that age of newdb on the segment is negative. dburl = dbconn.DbURL(hostname=primary.hostname, port=primary.port) with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) self.assertTrue(age_newdb < 0) # Reset newdb age so as to recover from wrap around. self._reset_age("newdb", primary) # Verify that normal operations can be performed on newdb whose age was # reset to a correct value. self._basic_sanity_check("clean", {"dbname": "newdb"}) # Verify that age of newdb on the segment is valid. with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) self.assertTrue(age_newdb > 0) PSQL.drop_database(dbname="newdb")
def template0_wrap_around_on_segment(self, primary): """ Same as template0_wrap_around, but on segment. """ logger.info("template0_wrap_around_on_segment: dbid(%d) %s:%d'" % (primary.dbid, primary.hostname, primary.port)) self._raise_template0_age(self.WRAP_LIMIT, primary) PSQL(sql_cmd="CREATE DATABASE newdb TEMPLATE template0").run( validateAfter=True) sql = "SELECT age(datfrozenxid) FROM pg_database WHERE datname='newdb'" # Verify that age of newdb on the segment is negative. dburl = dbconn.DbURL(hostname=primary.hostname, port=primary.port) with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) self.assertTrue(age_newdb < 0) # Reset newdb age so as to recover from wrap around. self._reset_age("newdb", primary) # Verify that normal operations can be performed on newdb whose age was # reset to a correct value. self._basic_sanity_check("clean", {"dbname":"newdb"}) # Verify that age of newdb on the segment is valid. with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) self.assertTrue(age_newdb > 0) PSQL.drop_database(dbname="newdb")
def test_autovacuum_signaling(self): """ Raise the nextXid to oldest_frozenxid + autovacuum_freeze_max_age. Run a transaction. Ensure that no autovacuum daemon is started. """ dburl = dbconn.DbURL() with dbconn.connect(dburl) as conn: oldest_xid = int(dbconn.execSQLForSingleton(conn, 'select get_oldest_xid()')) autovacuum_freeze_max_age = int(dbconn.execSQLForSingleton(conn, 'show autovacuum_freeze_max_age')) autovacuum_xid_limit = xid_sum(oldest_xid, autovacuum_freeze_max_age) logger.info('Raising master xid to autovacuum_xid_limit %d' % autovacuum_xid_limit) dbconn.execSQLForSingleton(conn, "select spoof_next_xid('%d'::xid)" % autovacuum_xid_limit) # A new connection to the postmaster, at this point, will ensure that we roll through # the ServerLoop and potentially fork an autovacuum process... if enabled. # Burn a transaction to trigger any undesirable behavior that we're disabling. with dbconn.connect(dburl) as conn: self.assertEqual(1, int(dbconn.execSQLForSingleton(conn, 'select 1'))) cmd = Command('check for autovacuum', 'ps -ef | grep -v grep | grep postgres | grep autovacuum') cmd.run() self.assertEqual(cmd.get_results().stdout, "", "Seriously? Found a postgres autovacuum process!") self._basic_sanity_check('clean')
def test_autovacuum_signaling_on_segment(self): """ Same as above, but on a segment. """ # connect to the master to build gparray primary, _ = self._get_primary_mirror_pair() logger.info('Isolated segment %d at %s:%d' % (primary.dbid, primary.hostname, primary.port)) dburl = dbconn.DbURL(hostname=primary.hostname, port=primary.port) with dbconn.connect(dburl, utility=True) as conn: oldest_xid = int(dbconn.execSQLForSingleton(conn, 'select get_oldest_xid()')) autovacuum_freeze_max_age = int(dbconn.execSQLForSingleton(conn, 'show autovacuum_freeze_max_age')) autovacuum_xid_limit = xid_sum(oldest_xid, autovacuum_freeze_max_age) logger.info('Raising segment xid to autovacuum_xid_limit %d' % autovacuum_xid_limit) dbconn.execSQLForSingleton(conn, "select spoof_next_xid('%d'::xid)" % autovacuum_xid_limit) # A new connection to the postmaster, at this point, will ensure that we roll through # the ServerLoop and potentially fork an autovacuum process... if enabled. with dbconn.connect(dburl, utility=True) as conn: self.assertEqual(1, int(dbconn.execSQLForSingleton(conn, 'select 1'))) cmd = Command('check for autovacuum', 'ssh %s ps -ef | grep -v grep | grep postgres | grep autovacuum' % primary.hostname) cmd.run() self.assertEqual(cmd.get_results().stdout, "", "Seriously? Found a postgres autovacuum process!") self._basic_sanity_check('clean')
def template0_warn_limit_on_segment(self, primary): """ Same as template0_warn_limit, but on a segment. """ logger.info("template0_warn_limit_on_segment: dbid(%d) %s:%d'" % (primary.dbid, primary.hostname, primary.port)) # Bump up age of template0 to cause warn limit violation. self._raise_template0_age(self.WARN_LIMIT, primary) # All is well until we create a new db off template0. self._basic_sanity_check("clean") # Create database newdb off template0. PSQL(sql_cmd="CREATE DATABASE newdb TEMPLATE template0").run( validateAfter=True) logger.info("newdb created off template0") # The xid limits in shared memory are only updated at a VACUUM, # so run one now. PSQL(sql_cmd='VACUUM FREEZE', dbname='postgres', out_file='vacuum_postgres.out').run(validateAfter=True) # newdb is now the oldest database, older than warn limit. self._basic_sanity_check("warn_segment") # Ensure that vacuum freeze on newdb stops the warnings. PSQL(sql_cmd="VACUUM FREEZE", dbname="newdb", out_file="vacuum_newdb_warn_seg.out").run(validateAfter=True) self._basic_sanity_check("clean") PSQL.drop_database(dbname="newdb")
def template0_wrap_around(self): """ Raise next xid so that age(template0) suffers a wrap around and becomes negative. Create a new database off template0, which also suffers wrap around. Reset the new db's age. Sanity must succeed on the new db. """ self._raise_template0_age(self.WRAP_LIMIT, self.gparray.master) PSQL(sql_cmd="CREATE DATABASE newdb TEMPLATE template0").run( validateAfter=True) sql = "SELECT age(datfrozenxid) FROM pg_database WHERE datname='newdb'" dburl = dbconn.DbURL() with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) # Xid wrap-around should cause template0 and newdb's age to be negative. self.assertTrue(age_newdb < 0) # All xids in newdb are frozen at this point. Therefore, we # can reset its age so that it is not negative. self._reset_age("newdb") with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) self.assertTrue(age_newdb > 0) # Verify that normal operations can be performed on newdb post recovery # from wraparound. self._basic_sanity_check("clean", {"dbname":"newdb"}) logger.info("Sanity succeeded on newdb, dropping it.") PSQL.drop_database(dbname="newdb")
def test_ao_malloc_failure(self): """ @product_version gpdb: [4.3.5.1 -] """ PSQL.run_sql_command('DROP table if exists ao_read_malloc') PSQL.run_sql_command( 'create table ao_read_malloc (a int) with (appendonly=true, compresstype=quicklz)' ) PSQL.run_sql_command('insert into ao_read_malloc ' 'select * from generate_series(1, 1000)') gpfaultinjector = Command( 'fault injector', 'source $GPHOME/greenplum_path.sh; ' 'gpfaultinjector -f malloc_failure ' '-y error -H ALL -r primary') gpfaultinjector.run() res = {'rc': 0, 'stdout': '', 'stderr': ''} PSQL.run_sql_command(sql_cmd='select count(*) from ao_read_malloc', results=res) logger.info(res) self.assertTrue("ERROR: fault triggered" in res['stderr']) self.assertFalse( "ERROR: could not temporarily connect to one or more segments" in res['stderr']) logger.info('Pass')
def template0_wrap_around(self): """ Raise next xid so that age(template0) suffers a wrap around and becomes negative. Create a new database off template0, which also suffers wrap around. Reset the new db's age. Sanity must succeed on the new db. """ self._raise_template0_age(self.WRAP_LIMIT, self.gparray.master) PSQL(sql_cmd="CREATE DATABASE newdb TEMPLATE template0").run( validateAfter=True) sql = "SELECT age(datfrozenxid) FROM pg_database WHERE datname='newdb'" dburl = dbconn.DbURL() with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) # Xid wrap-around should cause template0 and newdb's age to be negative. self.assertTrue(age_newdb < 0) # All xids in newdb are frozen at this point. Therefore, we # can reset its age so that it is not negative. self._reset_age("newdb") with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) self.assertTrue(age_newdb > 0) # Verify that normal operations can be performed on newdb post recovery # from wraparound. self._basic_sanity_check("clean", {"dbname": "newdb"}) logger.info("Sanity succeeded on newdb, dropping it.") PSQL.drop_database(dbname="newdb")
def test_function(my_self): orig_test_method = getattr(my_self, my_self._orig_testMethodName) runtime_list = [] for i in range(my_self.repetitions): # Get time before and after this function to time the test start = datetime.now() orig_test_method() end = datetime.now() delta = end - start milli = delta.seconds * 1000 + (float(delta.microseconds) / 1000) runtime_list.append(milli) total_runtime = sum(runtime_list) min_runtime = min(runtime_list) max_runtime = max(runtime_list) avg_runtime = total_runtime / my_self.repetitions std_dev = sqrt( sum((runtime - avg_runtime)**2 for runtime in runtime_list) / my_self.repetitions) std_dev_pct = std_dev * 100 / float(avg_runtime) logger.info("%s - %s" % (my_self, runtime_list)) # Find the baseline file. For now, we assume that there is only # one baseline version specified current_dir = os.path.dirname(inspect.getfile(my_self.__class__)) baseline_file = 'baseline_' + my_self.baseline + '.csv' baseline_file_path = os.path.join(current_dir, baseline_file) (baseline_runtime, delta) = GPPerfDiff.check_perf_deviation(my_self._orig_testMethodName, \ baseline_file_path, avg_runtime, \ my_self.threshold) # compose statistics stats = [('Test Name', "%s.%s" % (self.__class__.__name__, self._orig_testMethodName)), ('Average Runtime', "%0.2f" % avg_runtime), ('Baseline Runtime', "%0.2f" % baseline_runtime), ('% Difference', "%0.2f" % delta), ('Allowable Threshold', "%0.2f" % my_self.threshold), ('Repetitions Performed', "%d" % my_self.repetitions), ('Total Runtime', "%0.2f" % total_runtime), ('Min Runtime', "%0.2f" % min_runtime), ('Max Runtime', "%0.2f" % max_runtime), ('Std Dev', "%0.2f" % std_dev), ('% Std Dev', "%0.2f" % std_dev_pct)] header = [x[0] for x in stats] data = [x[1] for x in stats] # dump statistics to a runtime_stats.csv file output_file_path = os.path.join(current_dir, 'runtime_stats.csv') existing = os.path.exists(output_file_path) mode = 'a' if existing else 'w' with open(output_file_path, mode) as f: if not existing: f.write("%s\n" % ",".join(header)) f.write("%s\n" % ",".join(data)) self.assertGreater(my_self.threshold, delta, "assert delta < my_self.threshold")
def run_expansion(self, mapfile): """ Run an expansion test based on the mapping file """ self.expansion_map_file = mapfile = self._get_absolute_filename(mapfile) self.distribution_policy_snapshot(diff=False) logger.info("Running expansion setup ...") cmd = Command(name='run gpexpand', cmdStr='gpexpand -i %s -D %s' % (mapfile, self.test_database), ctxt=REMOTE, remoteHost='localhost') cmd.run(validateAfter=True) return True
def test_function(my_self): orig_test_method = getattr(my_self, my_self._orig_testMethodName) runtime_list = [] for i in range(my_self.repetitions): # Get time before and after this function to time the test start = datetime.now() orig_test_method() end = datetime.now() delta = end - start milli = delta.seconds * 1000 + (float(delta.microseconds) / 1000) runtime_list.append(milli) total_runtime = sum(runtime_list) min_runtime = min(runtime_list) max_runtime = max(runtime_list) avg_runtime = total_runtime / my_self.repetitions std_dev = sqrt(sum((runtime - avg_runtime)**2 for runtime in runtime_list) / my_self.repetitions) std_dev_pct = std_dev * 100 / float(avg_runtime) logger.info("%s - %s" % (my_self, runtime_list)) # Find the baseline file. For now, we assume that there is only # one baseline version specified current_dir = os.path.dirname(inspect.getfile(my_self.__class__)) baseline_file = 'baseline_' + my_self.baseline + '.csv' baseline_file_path = os.path.join(current_dir, baseline_file) (baseline_runtime, delta) = GPPerfDiff.check_perf_deviation(my_self._orig_testMethodName, \ baseline_file_path, avg_runtime, \ my_self.threshold) # compose statistics stats = [ ('Test Name', "%s.%s" % (self.__class__.__name__, self._orig_testMethodName)), ('Average Runtime', "%0.2f" % avg_runtime), ('Baseline Runtime', "%0.2f" % baseline_runtime), ('% Difference', "%0.2f" % delta), ('Allowable Threshold', "%0.2f" % my_self.threshold), ('Repetitions Performed', "%d" % my_self.repetitions), ('Total Runtime', "%0.2f" % total_runtime), ('Min Runtime', "%0.2f" % min_runtime), ('Max Runtime', "%0.2f" % max_runtime), ('Std Dev', "%0.2f" % std_dev), ('% Std Dev', "%0.2f" % std_dev_pct) ] header = [x[0] for x in stats] data = [x[1] for x in stats] # dump statistics to a runtime_stats.csv file output_file_path = os.path.join(current_dir, 'runtime_stats.csv') existing = os.path.exists(output_file_path) mode = 'a' if existing else 'w' with open(output_file_path, mode) as f: if not existing: f.write("%s\n" % ",".join(header)) f.write("%s\n" % ",".join(data)) self.assertGreater(my_self.threshold, delta, "assert delta < my_self.threshold")
def run_data_redistribution(self, duration='60:00:00'): """ Run expansion to perform data redistribution """ logger.info("Running expansion redistribution ...") cmd = Command(name='run data distribution', cmdStr='gpexpand -d %s -D %s' % (duration, self.test_database), ctxt=REMOTE, remoteHost='localhost') cmd.run(validateAfter=True) self.run_data_validation() return True
def set_guc(self, guc_name, guc_value): logger.info('Configuring ' + guc_name + ' ...') cmd = Command("gpconfig " + guc_name, "gpconfig -c " + guc_name + " -v " + guc_value) cmd.run() self.assertEqual(cmd.get_results().rc, 0, str(cmd)) logger.info('gpstop -u to reload config files...') cmd = Command("gpstop -u", "gpstop -u") cmd.run() self.assertEqual(cmd.get_results().rc, 0, str(cmd))
def set_guc(self, guc_name, guc_value): logger.info('Configuring ' + guc_name +' ...') cmd = Command("gpconfig " + guc_name, "gpconfig -c " + guc_name + " -v " + guc_value) cmd.run() self.assertEqual(cmd.get_results().rc, 0, str(cmd)) logger.info('gpstop -u to reload config files...') cmd = Command("gpstop -u", "gpstop -u") cmd.run() self.assertEqual(cmd.get_results().rc, 0, str(cmd))
def check_pg_stat_activity(self, query): """ Check if the transaction has been started for pg_stat_activity """ logger.info('checking for query in pg_stat_activity') MAX_TRIES = 300 tries = 0 while tries < MAX_TRIES: output = PSQL.run_sql_command('SELECT * FROM pg_stat_activity') if query in output: logger.info('found query in pg_stat_activity') return True tries += 1 return False
def run_test(self): """ Override of SQLTestCase. Create a base backup and start standby, run some SQL in primary side then promote, check if the data is streamed correctly. """ sql_file = self.sql_file ans_file = self.ans_file Command('gpinitstandby -r', 'gpinitstandby -ra').run() self.assertEqual(self.standby.create(), 0) gpact_stdby = GpactivateStandby() res = self.standby.start() self.assertTrue(res.wasSuccessful()) # wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) # setup script is run on primary while standby is running. # .in file will be substitute with runtime information, if any. setup_file = sql_file.replace('.sql', '_setup.sql') if os.path.exists(setup_file + '.in'): self.preprocess_file(setup_file + '.in') self.assertTrue(PSQL.run_sql_file(setup_file, dbname=self.db_name)) if self.promote_using_pg_ctl: self.assertTrue(self.standby.promote()) else: self.assertTrue(self.standby.promote_manual()) # fetch timelineids for both primary and standby (post-promote) primary_tli = self.fetch_tli(os.environ.get('MASTER_DATA_DIRECTORY')) standby_tli = self.fetch_tli(self.standby_datadir) logger.info("primary tli = " + primary_tli) logger.info("standby tli after promote = " + standby_tli) # primary_tli should be less than standby_tli by 1 self.assertTrue(int(primary_tli) + 1 == int(standby_tli)) # SQLTestCase doesn't allow to set port. Use environ to tell it. with NewEnv(PGPORT=self.standby_port, MASTER_DATA_DIRECTORY=self.standby_datadir) as env: result = super(PromoteTestCase, self).run_test() return result # always fail back to old master after test complete gpact_stdby.failback_to_original_master()
def _restore_stop_limit_guc(self, datadir): """ Reset xid_stop_limit GUC to default value, by removing the setting from postgresql.conf. @param datadir: PGDATA directory containing postgresql.conf that needs to be restored. """ logger.info("Undo the stop limit GUC change") cmd = "source $GPHOME/greenplum_path.sh && gpstop -a" Command("stop system", cmd).run(validateAfter=True) cmd = ('sed -i".bk" "s|xid_stop_limit=.*||g" %s/postgresql.conf' % datadir) Command("undo xid_stop_limit change", cmd).run(validateAfter=True) cmd = "source $GPHOME/greenplum_path.sh && gpstart -a" Command("start system", cmd).run(validateAfter=True)
def run_test(self): """ Override of SQLTestCase. Create a base backup and start standby, run some SQL in primary side then promote, check if the data is streamed correctly. """ sql_file = self.sql_file ans_file = self.ans_file Command('gpinitstandby -r', 'gpinitstandby -ra').run() self.assertEqual(self.standby.create(), 0) gpact_stdby = GpactivateStandby() res = self.standby.start() self.assertTrue(res.wasSuccessful()) # wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) # setup script is run on primary while standby is running. # .in file will be substitute with runtime information, if any. setup_file = sql_file.replace('.sql', '_setup.sql') if os.path.exists(setup_file + '.in'): self.preprocess_file(setup_file + '.in') self.assertTrue(PSQL.run_sql_file(setup_file, dbname=self.db_name)) if self.promote_using_pg_ctl: self.assertTrue(self.standby.promote()) else: self.assertTrue(self.standby.promote_manual()) # fetch timelineids for both primary and standby (post-promote) primary_tli = self.fetch_tli(os.environ.get('MASTER_DATA_DIRECTORY')) standby_tli = self.fetch_tli(self.standby_datadir) logger.info("primary tli = " + primary_tli) logger.info("standby tli after promote = " + standby_tli) # primary_tli should be less than standby_tli by 1 self.assertTrue(int(primary_tli) + 1 == int(standby_tli)) # SQLTestCase doesn't allow to set port. Use environ to tell it. with NewEnv(PGPORT=self.standby_port,MASTER_DATA_DIRECTORY=self.standby_datadir) as env: result = super(PromoteTestCase, self).run_test() return result # always fail back to old master after test complete gpact_stdby.failback_to_original_master()
def template0_stop_limit_on_segment(self, primary): """ Same as template0_stop_limit, but on segment. """ logger.info("template0_stop_limit_on_segment: dbid(%d) %s:%d'" % (primary.dbid, primary.hostname, primary.port)) dburl = dbconn.DbURL(hostname=primary.hostname, port=primary.port) with dbconn.connect(dburl, utility=True) as conn: sql = "SHOW xid_stop_limit" slimit_guc = int(dbconn.execSQLForSingleton(conn, sql)) new_limit = xid_sum(slimit_guc, -(10**6)) # Raise nextXid so that template0 age would cross stop limit. self._raise_template0_age(self.STOP_LIMIT, primary) # newdb's age crosses stop limit and GPDB stops accepting commands. PSQL(sql_cmd="CREATE DATABASE newdb TEMPLATE template0").run( validateAfter=True) logger.info("newdb created off template0") # The xid limits in shared memory are only updated at a VACUUM, # so run one now. PSQL(sql_cmd='VACUUM FREEZE', dbname='postgres', out_file='vacuum_postgres.out').run(validateAfter=True) # Ensure that utility connections to the segment fail with error. psql_args = { "PGOPTIONS": "-c 'gp_session_role=utility'", "host": primary.hostname, "port": primary.port } self._basic_sanity_check("error", psql_args) logger.info("Utility connection to dbid(%d) reported stop limit " "error, as expected." % primary.dbid) try: # Verify that SQL commands from master fail. PSQL(sql_cmd="CREATE TABLE test (a int, b int)").run( validateAfter=True) self.fail("CREATE TABLE succeeded from master, when expecting " "stop limit error on segment.") except ExecutionError: logger.info("CREATE TABLE failed from master, as expected.") # Reduce xid_stop_limit as per the standard procedure. self._reduce_stop_limit_guc(primary, new_limit) # Vacuum freezing newdb should be suffice to recover. PSQL(sql_cmd="VACUUM FREEZE", dbname="newdb", out_file="vacuum_newdb_wl.out").run(validateAfter=True) # Ensure that utility connections to the segment are successful. sql = "SELECT age(datfrozenxid) FROM pg_database WHERE datname='newdb'" with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) self.assertTrue(age_newdb > 0) # Verify SQL commands from master are successful. self._basic_sanity_check("clean") self._restore_stop_limit_guc(primary.datadir) # Verify SQL commands after restoring xid_stop_limit GUC. self._basic_sanity_check("clean") PSQL.drop_database(dbname="newdb")
def gpdb_functionality_validation(self): """ Verify that we can create a table, insert data, select data and drop data. """ filename = self._get_absolute_filename('test_basic_gpdb_functionality.sql') logger.info('Validating that Greenplum Database is still functional ...') assert PSQL.run_sql_file(sql_file=filename, dbname=self.test_database) answer_file = re.sub('sql$', 'ans', filename) output_file = re.sub('sql$', 'out', filename) if not Gpdiff.are_files_equal(output_file, answer_file): logger.error('Could not validate gpdb functionality') return False return True
def _set_allowconn_template0(self, flag): """ Enable connections to template0 on master and all segments. """ if flag: logger.info("Enabling connections to template0") else: logger.info("Disabling connections to template0") for seg in self.gparray.getDbList(True): if seg.role == 'p': seg_url = dbconn.DbURL(hostname=seg.hostname, port=seg.port) with dbconn.connect(seg_url, utility=True, allowSystemTableMods='dml') as conn: dbconn.execSQL( conn, "update pg_database set datallowconn=%s " "where datname='template0'" % flag) conn.commit()
def run_gppkg_uninstall(self, pkgname): """ @summary: Runs gppkg -r to uninstall a gppkg. Output is written to gppkg_r.log file in current directory. @param pkgfile: The name of the .gppkg file @raise GppkgUtilError: If gppkg uninstall fails """ (existed, pkg) = self.check_pkg_exists(pkgname) if not existed: logger.info('the package does not exist, no need to remove, %s'%pkgname) return True logger.debug( '\nGppkgUtil: Uninstalling gppkg using gppkg file: %s' % (pkg)) cmd = 'gppkg -r %s' % pkg res = {'rc':0, 'stderr':'', 'stdout':''} run_shell_command (cmd, 'run gppkg', res) logger.debug(res) if res['rc']> 0: logger.info('Failed to Uninstall the package, %s' % pkgname) return False else: return True
def check_random_dist_tuple_count_skew(self, tname, values): """ max - min should not exceed 5% of the Maximum number of tuples. @return: False if there is any error True otherwise """ if not values: return True max_tuple_count, min_tuple_count = max(values), min(values) diff = max_tuple_count - min_tuple_count pct = float(diff) / float(max_tuple_count) * 100.0 if pct > 5: logger.error("MAX (%d) MIN (%d) DIFF (%d) PCT(%f)" % (max_tuple_count, min_tuple_count, diff, pct)) return False logger.info("OK: Table (%s) Max (%d) Min (%d) tuples per segdb" % (tname, max_tuple_count, min_tuple_count)) return True
def run_data_validation(self): """ Validate data by executing a SQL file and comparing results with the answer file. """ filename = self._get_absolute_filename(self.select_file) logger.info("Validating data using '%s' ..." % filename) if not PSQL.run_sql_file(sql_file=filename, dbname=self.dbname): raise Exception("failed querying data pre-expansion: '%s'" % filename) if not filename.endswith('sql'): raise Exception("The filename must end in .sql extension") answer_file = re.sub('sql$', 'ans', filename) output_file = re.sub('sql$', 'out', filename) if not Gpdiff.are_files_equal(output_file, answer_file): logger.error("files don't match pre-expansion: '%s' and '%s'" % (answer_file, output_file)) return False return True
def template0_warn_limit_on_segment(self, primary): """ Same as template0_warn_limit, but on a segment. """ logger.info("template0_warn_limit_on_segment: dbid(%d) %s:%d'" % (primary.dbid, primary.hostname, primary.port)) # Bump up age of template0 to cause warn limit violation. self._raise_template0_age(self.WARN_LIMIT, primary) # All is well until we create a new db off template0. self._basic_sanity_check("clean") # Create database newdb off template0. PSQL(sql_cmd="CREATE DATABASE newdb TEMPLATE template0").run( validateAfter=True) logger.info("newdb created off template0") # newdb is now the oldest database, older than warn limit. self._basic_sanity_check("warn_segment") # Ensure that vacuum freeze on newdb stops the warnings. PSQL(sql_cmd="VACUUM FREEZE", dbname="newdb", out_file="vacuum_newdb_warn_seg.out").run(validateAfter=True) self._basic_sanity_check("clean") PSQL.drop_database(dbname="newdb")
def _reduce_stop_limit_guc(self, segdb, new_slimit): """ Reduce the xid_stop_limit GUC by the specified value. @param datadir: PGDATA directory containing postgresql.conf that needs to be modified. @param new_slimit: New value of xid_stop_limit GUC, less than the default value of 10**9. """ for seg in self.gparray.getDbList(True): logger.info("Stopping segment %d at %s" % (seg.dbid, seg.datadir)) cmd = "pg_ctl -D %s stop" % seg.datadir Command("stop segment", cmd).run(validateAfter=True) logger.info("New xid_stop_limit: %s" % new_slimit) cmd = ('echo "xid_stop_limit=%d" >> %s/postgresql.conf' % (new_slimit, segdb.datadir)) Command("revise xid_stop_limit", cmd).run(validateAfter=True) logger.info("Starting the cluster") cmd = "source $GPHOME/greenplum_path.sh && gpstart -a" Command("start cluster", cmd).run(validateAfter=True) dburl = dbconn.DbURL(hostname=segdb.hostname, port=segdb.port) with dbconn.connect(dburl, utility=True) as conn: stop_limit = int( dbconn.execSQLForSingleton(conn, "SHOW xid_stop_limit")) self.assertEqual(stop_limit, new_slimit, "Failed to set xid_stop_limit")
def hosts_gpsegconf_validation(self): """ Validate if the new hosts are added to gp_segment_configuration table Parse the expansion map and populate the datafields into a list """ logger.info("Verifying expanded segments in gp_segment_configuration table ...") with open(self.expansion_map_file) as fp: replication_port_list = [] for line in fp: fields = line.split(':') if len(fields) == 8: replication_port_list.append(fields[7]) cmd = """select count(*) from gp_segment_configuration where hostname = '%s' and address = '%s' and port = %s and dbid = %s and content = %s and role = '%s' and replication_port = %s""" % (fields[0], fields[1], fields[2], fields[4], fields[5], fields[6], fields[7]) else: cmd = """select count(*) from gp_segment_configuration where hostname = '%s' and address = '%s' and port = %s and dbid = %s and content = %s and role = '%s'""" % (fields[0], fields[1], fields[2], fields[4], fields[5], fields[6]) with dbconn.connect(dbconn.DbURL()) as conn: row = dbconn.execSQLForSingleton(conn, cmd) if row != 1: return False return True
def test_autovacuum_signaling_on_segment(self): """ Same as above, but on a segment. """ # connect to the master to build gparray primary, _ = self._get_primary_mirror_pair() logger.info('Isolated segment %d at %s:%d' % (primary.dbid, primary.hostname, primary.port)) dburl = dbconn.DbURL(hostname=primary.hostname, port=primary.port) with dbconn.connect(dburl, utility=True) as conn: oldest_xid = int( dbconn.execSQLForSingleton(conn, 'select get_oldest_xid()')) autovacuum_freeze_max_age = int( dbconn.execSQLForSingleton(conn, 'show autovacuum_freeze_max_age')) autovacuum_xid_limit = xid_sum(oldest_xid, autovacuum_freeze_max_age) logger.info('Raising segment xid to autovacuum_xid_limit %d' % autovacuum_xid_limit) dbconn.execSQLForSingleton( conn, "select spoof_next_xid('%d'::xid)" % autovacuum_xid_limit) # A new connection to the postmaster, at this point, will ensure that we roll through # the ServerLoop and potentially fork an autovacuum process... if enabled. with dbconn.connect(dburl, utility=True) as conn: self.assertEqual(1, int(dbconn.execSQLForSingleton(conn, 'select 1'))) cmd = Command( 'check for autovacuum', 'ssh %s ps -ef | grep -v grep | grep postgres | grep autovacuum' % primary.hostname) cmd.run() self.assertEqual(cmd.get_results().stdout, "", "Seriously? Found a postgres autovacuum process!") self._basic_sanity_check('clean')
def distribution_policy_snapshot(self, diff=False): """ Take a snapshot of the distribution policies of the tables. @return: True if we succesfully collected the distribution policy for the tables. False even if we are unable to collect the distribution policy for a single table. """ logger.info("Collecting distribution policies of all tables on all DBs ...") with dbconn.connect(dbconn.DbURL()) as conn: cmd = "select datname from pg_database where datname not in ('postgres', 'template1', 'template0')" rows = dbconn.execSQL(conn, cmd) for row in rows: dbname_sql = 'distribution_%s.sql' % row[0].strip() if diff: dbname_ans = 'distribution_%s.out' % row[0].strip() else: dbname_ans = 'distribution_%s.ans' % row[0].strip() shutil.copyfile(self.distribution_sql, dbname_sql) data = '' with open(dbname_sql, 'r') as fp: for line in fp: data += line newdata = data.replace('$DBNAME', row[0].strip()) with open(dbname_sql, 'w') as fp: fp.write(newdata) if not PSQL.run_sql_file(dbname=self.test_database, sql_file=dbname_sql, out_file=dbname_ans): raise Exception("failed to get the distribution policy: '%s'" % dbname_sql) if diff: dbname_out = "%s.out" % dbname_ans[:-4] equal = Gpdiff.are_files_equal(dbname_ans, dbname_out) if not equal: return False return True
def template0_stop_limit_on_segment(self, primary): """ Same as template0_stop_limit, but on segment. """ logger.info("template0_stop_limit_on_segment: dbid(%d) %s:%d'" % (primary.dbid, primary.hostname, primary.port)) dburl = dbconn.DbURL(hostname=primary.hostname, port=primary.port) with dbconn.connect(dburl, utility=True) as conn: sql = "SHOW xid_stop_limit" slimit_guc = int(dbconn.execSQLForSingleton(conn, sql)) new_limit = xid_sum(slimit_guc, -(10**6)) # Raise nextXid so that template0 age would cross stop limit. self._raise_template0_age(self.STOP_LIMIT, primary) # newdb's age crosses stop limit and GPDB stops accepting commands. PSQL(sql_cmd="CREATE DATABASE newdb TEMPLATE template0").run( validateAfter=True) logger.info("newdb created off template0") # Ensure that utility connections to the segment fail with error. psql_args = {"PGOPTIONS":"-c 'gp_session_role=utility'", "host":primary.hostname, "port":primary.port} self._basic_sanity_check("error", psql_args) logger.info("Utility connection to dbid(%d) reported stop limit " "error, as expected." % primary.dbid) try: # Verify that SQL commands from master fail. PSQL(sql_cmd="CREATE TABLE test (a int, b int)").run( validateAfter=True) self.fail("CREATE TABLE succeeded from master, when expecting " "stop limit error on segment.") except ExecutionError: logger.info("CREATE TABLE failed from master, as expected.") # Reduce xid_stop_limit as per the standard procedure. self._reduce_stop_limit_guc(primary, new_limit) # Vacuum freezing newdb should be suffice to recover. PSQL(sql_cmd="VACUUM FREEZE", dbname="newdb", out_file="vacuum_newdb_wl.out").run(validateAfter=True) # Ensure that utility connections to the segment are successful. sql = "SELECT age(datfrozenxid) FROM pg_database WHERE datname='newdb'" with dbconn.connect(dburl, utility=True) as conn: age_newdb = int(dbconn.execSQLForSingleton(conn, sql)) self.assertTrue(age_newdb > 0) # Verify SQL commands from master are successful. self._basic_sanity_check("clean") self._restore_stop_limit_guc(primary.datadir) # Verify SQL commands after restoring xid_stop_limit GUC. self._basic_sanity_check("clean") PSQL.drop_database(dbname="newdb")
def setup_cluster(self, initsystem_file, data_load_file, select_file, dbname=None): """ Runs gpinitsystem to initialize the cluster and creates the test database. """ if dbname is None: dbname = self.test_database initsystem_file = self._get_absolute_filename(initsystem_file) self.run_initsystem(initsystem_file) logger.info("Creating DB %s ..." % dbname) cmd = Command('create a test database', 'createdb %s' % self.test_database) cmd.run(validateAfter=True) data_load_file = self._get_absolute_filename(data_load_file) logger.info("Loading data '%s' ..." % data_load_file) if not PSQL.run_sql_file(sql_file=data_load_file, dbname=dbname, output_to_file=False): raise Exception("data load of %s failed" % data_load_file) self.select_file = select_file self.dbname = dbname assert self.run_data_validation()
def cleanup_expansion(self): """ Run gpexpand to cleanup the expansion catalog @return: True if the cleanup of gpexpand schema suceeded False otherwise """ logger.info("Running expansion cleanup ...") query = "SELECT count(*) FROM information_schema.schemata where schema_name='gpexpand';" cmd = Command(name='run gpexpand cleanup', cmdStr='echo -e \"y\n\" | gpexpand -c -D %s' % self.test_database, ctxt=REMOTE, remoteHost='localhost') cmd.run(validateAfter=True) with dbconn.connect(dbconn.DbURL(dbname=self.test_database)) as conn: try: row = dbconn.execSQLForSingleton(conn, query) except UnexpectedRowsError, e: return False if row != 0: return False
def test_autovacuum_signaling(self): """ Raise the nextXid to oldest_frozenxid + autovacuum_freeze_max_age. Run a transaction. Ensure that no autovacuum daemon is started. """ dburl = dbconn.DbURL() with dbconn.connect(dburl) as conn: teardown_next_xid = int(dbconn.execSQLForSingleton(conn, 'select get_next_xid()')) teardown_next_xid = xid_sum(teardown_next_xid, 1) # we burned a xid when trying to ascertain nextXid logger.info('Intending to return master to xid %d' % teardown_next_xid) def cleanup(): with dbconn.connect(dburl) as myconn: dbconn.execSQLForSingleton(myconn, "select spoof_next_xid('%d'::xid)" % teardown_next_xid) dbconn.execSQL(myconn, "checkpoint") self._basic_sanity_check('clean') logger.info('Returned master to xid %d' % teardown_next_xid) self.addCleanup(cleanup) oldest_xid = int(dbconn.execSQLForSingleton(conn, 'select get_oldest_xid()')) autovacuum_freeze_max_age = int(dbconn.execSQLForSingleton(conn, 'show autovacuum_freeze_max_age')) autovacuum_xid_limit = xid_sum(oldest_xid, autovacuum_freeze_max_age) logger.info('Raising master xid to autovacuum_xid_limit %d' % autovacuum_xid_limit) dbconn.execSQLForSingleton(conn, "select spoof_next_xid('%d'::xid)" % autovacuum_xid_limit) # A new connection to the postmaster, at this point, will ensure that we roll through # the ServerLoop and potentially fork an autovacuum process... if enabled. # Burn a transaction to trigger any undesirable behavior that we're disabling. with dbconn.connect(dburl) as conn: self.assertEqual(1, int(dbconn.execSQLForSingleton(conn, 'select 1'))) cmd = Command('check for autovacuum', 'ps -ef | grep -v grep | grep postgres | grep autovacuum') cmd.run() self.assertEqual(cmd.get_results().stdout, "", "Seriously? Found a postgres autovacuum process!") self._basic_sanity_check('clean')
def check_skew(self): """Check that all tables have been distributed reasonably""" tables = [] logger.info("Checking skew ...") with dbconn.connect(dbconn.DbURL(dbname=self.test_database)) as conn: query = "select fq_name from gpexpand.status_detail where dbname = '%s'" % self.test_database rows = dbconn.execSQL(conn, query) for row in rows: tables.append(row[0].partition(".")[2]) for table in tables: query = "select data.segid, data.segtupcount from gp_toolkit.gp_skew_details( (select oid from pg_class where relname = 't1')) as data" rows = dbconn.execSQL(conn, query) tuplecounts = [] for row in rows: segtupcount = row[1] tuplecounts.append(segtupcount) if not self.check_random_dist_tuple_count_skew(table, tuplecounts): raise Exception("Table %s has not been redistributed well. Check skew." % table) return True
def catalog_validation(self): """ Validate that there are no inconsistencies in the catalog @return: True if there are no inconsistencies False otherwise """ logger.info("Running gpcheckcat to validate catalog ...") # Fetch the count of databases using gpcheckcat that pass the catalog check test out_file = self._get_absolute_filename('gpcheckcat.out') assert self.db_port is not None cmd_str = '$GPHOME/bin/lib/gpcheckcat -A -O -p %s &> %s' % (self.db_port, out_file) cmd = Command('run gpcheckcat', cmd_str) cmd.run(validateAfter=True) line_no = 0 with open(out_file) as fp: for line in fp: if 'Found no catalog issue' in line: line_no += 1 count_db = 0 # fetch the database count on the host using pg_catalog with dbconn.connect(dbconn.DbURL()) as conn: row = dbconn.execSQLForSingleton(conn, "select count(*) from pg_database") # -1 because gpcheckcat does not run against template0 count_db = row - 1 # Check if the numbers match else expansion dint go through fine return false if line_no != count_db: failed_dbs = self._get_checkcat_failed_dbs(out_file) logger.error('gpcheckcat failed for the following databases %s' % failed_dbs) return False return True
def test_xansrep(self): """ Test for distributed transactions. Here are two cases: A. a transaction in prepared state B. a transaction after broadcast commit prepared In case A, the transaction will be aborted, and B should be visible. The flow of this test is as follows. 1. Initiate the Standby using the Master (primary) postmaster paramerters. 2. B: Inject the fault to suspend Master after Commit done. 3. B: Now execute a transaction and commit it. This master will be blocked. 4. A: Inject the fault to suspend Master after Prepare done. 5. A: Now execute a transaction and commit it. This transaction will be blocked. 6. Promote the standby. 7. Verify the result, transaction A results should not be visible and transaction B results should be visible. """ PSQL.run_sql_command('DROP table if exists xansrep_prepare') PSQL.run_sql_command('DROP table if exists xansrep_commit') PSQL.run_sql_command('DROP table if exists xansrep_dummy') PSQL.run_sql_command('create table xansrep_dummy (a int)') PSQL.run_sql_command('insert into xansrep_dummy ' 'select * from generate_series(1, 1000)') Command('remove standby', 'gpinitstandby -ra').run() fault = Gpfault() # 1. Initial setup res = self.standby.create() self.assertEqual(res, 0) res = self.standby.start() self.assertTrue(res.wasSuccessful()) # wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) # 2. Inject fault at commit prepared state result = fault.suspend_at('dtm_broadcast_commit_prepared') logger.info(result.stdout) self.assertEqual(result.rc, 0, result.stdout) # 3. Now execute a transaction and commit it. The backend is expected # to be blocked. logger.info('Create table xansrep_commit...') # Due to the suspend, we don't wait for the result proc = self.run_sql('create table xansrep_commit as ' 'select * from xansrep_dummy') logger.info('Check if suspend fault is hit after commit...') triggered = fault.wait_triggered('dtm_broadcast_commit_prepared') self.assertTrue(triggered, 'Fault was not triggered') # 4. Inject fault at prepared state result = fault.suspend_at( 'transaction_abort_after_distributed_prepared') logger.info(result.stdout) self.assertEqual(result.rc, 0, result.stdout) # 5. Now execute a transaction and commit it. The backend is expected # to be blocked. logger.info('Create table xansrep_prepare...') # Due to the suspend, we don't wait for the result proc = self.run_sql('create table xansrep_prepare (a int)') logger.info('Check if suspend fault is hit ...') triggered = fault.wait_triggered( 'transaction_abort_after_distributed_prepared') self.assertTrue(triggered, 'Fault was not triggered') # 6. Promote standby # We don't kill/stop the primary, as it's convenient for # next testing, and the outcome wouldn't change. self.standby.promote() # 7. Verify the result replicated to the standby. logger.info('Verify if table xansrep_prepare exists...') proc = self.run_sql('select * from xansrep_prepare', str(self.standby.port)) # the table should not exist stderr = proc.communicate()[1] logger.info(stderr) search = "ERROR: relation \"xansrep_prepare\" does not exist" self.assertTrue(stderr.find(search) >= 0) logger.info('Verify if table xansrep_commit exists...') proc = self.run_sql('select count(*) from xansrep_commit', str(self.standby.port)) # the table should exit stdout = proc.communicate()[0] logger.info(stdout) search = "1000" self.assertTrue(stdout.find(search) >= 0) logger.info('Pass')
def test_tli_mismatch(self): """ Test to verify if TLI mismatch issue during Pass 3 of xlog record (checkpoint) replay occurs or not. A set of checkpoints one after the other when replayed on the standby and then if the standby is promoted, it should go through the prmotion just fine. The flow of this test is as follows. 1. Initiate the Standby using the Master (primary) postmaster paramerters. 2. Perform explicit checkpoints and wait so that they get replicated. 3. Then promote the standby, wait and then try to access it. 4. If we can successfully access it, its a Pass otherwise a Fail. """ PSQL.run_sql_command('DROP table if exists foo') PSQL.run_sql_command('create table foo (a int)') PSQL.run_sql_command('insert into foo ' 'select * from generate_series(1, 1000)') # Initial setup, forcibly remove standby and install new one pgutil = GpUtility() pgutil.remove_standby() logger.info('\nCreate a standby...') res = self.standby.create() self.assertEqual(res, 0) res = self.standby.start() self.assertTrue(res.wasSuccessful()) # Wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) logger.info('Standby activated...') logger.info('Give the standby some time to catchup...') time.sleep(3) logger.info('Create checkpoints and let them get replicated...') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') PSQL.run_sql_command('checkpoint') time.sleep(2) # Promote standby # We don't kill/stop the primary, as it's convenient for # next testing logger.info('Promote the standby immediatly') self.standby.promote() logger.info( 'Wait for the standby to be ready to accept connections ...') time.sleep(3) # Verify the result replicated to the standby. logger.info('Verify if table foo exists...') proc = self.run_sql('select count(*) from foo', str(self.standby.port)) # The table should exist stdout = proc.communicate()[0] logger.info(stdout) search = "1000" self.assertTrue(stdout.find(search) >= 0) logger.info('Pass')
def test_block_while_catchup_within_range(self): """ This test verifies if a backend gets blocked in case the WAL sender is still in catchup mode. """ with WalClient("replication=true") as client: (sysid, tli, xpos) = client.identify_system() # Set the guc to > 1 so that we can verify the test # using less amount of xlog self.set_guc('repl_catchup_within_range', '3') # Generate enough xlog in WAL sender startup phase. None of the sql statements # should get blocked. If blocked we have some issue. # Checkpointing causes full page writes on updates/inserts. Hence helps # xlog generation. i = 0 logger.info('Running a bunch of SQLs to generate enough xlog to maintain catchup phase...') while (i < 10): PSQL.run_sql_command('DROP TABLE IF EXISTS foo; CREATE TABLE foo(a int, b int); CHECKPOINT;') i = i + 1 xpos_ptr = XLogRecPtr.from_string(xpos) client.start_replication(xpos_ptr) while True: msg = client.receive(1000) if isinstance(msg, WalMessageData): header = msg.header # walsender must be still in catchup phase as a lot xlog needs to be sent sql_catchup = "SELECT count(*) FROM pg_stat_replication where state = 'catchup'" sql_table_present = "SELECT count(*) from pg_class where relname = 'foo'" sql_bkd_count = ("SELECT count(*) from pg_stat_activity where waiting ='t' and waiting_reason = 'replication'") with dbconn.connect(dbconn.DbURL(), utility=True) as conn: curs = dbconn.execSQL(conn, sql_catchup) results = curs.fetchall() self.assertEqual(int(results[0][0]), 1, "No Catchup WAL sender found") logger.info('WAL sender is alive and now is in catchup phase...') logger.info('In catchup phase, create table...') subprocess.Popen(['psql', '-c', 'DROP TABLE IF EXISTS raghav; create table raghav (a int);'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) with dbconn.connect(dbconn.DbURL(), utility=True) as conn: # verify if WAL sender is still in catchup phase curs = dbconn.execSQL(conn, sql_catchup) results = curs.fetchall() self.assertEqual(int(results[0][0]), 1, "WAL sender catchup phase over before verification") logger.info('WAL sender is alive, still in catchup phase ..') while (i < 5): with dbconn.connect(dbconn.DbURL(), utility=True) as conn: # verify if the previous backend is blocked curs = dbconn.execSQL(conn, sql_bkd_count) results = curs.fetchall() if (int(results[0][0]) == 1): break; if (i == 4): self.assertTrue(0, "Previous backend not blocked ...") i = i + 1 logger.info('But, create table is blocked...') with dbconn.connect(dbconn.DbURL(), utility=True) as conn: # verify if WAL sender is still in catchup phase curs = dbconn.execSQL(conn, sql_catchup) results = curs.fetchall() self.assertEqual(int(results[0][0]), 1, "WAL sender catchup phase over before verification") logger.info('WAL sender is alive, in catchup phase and backend is blocked...') # sync replication needs a reply otherwise backend blocks client.reply(header.walEnd, header.walEnd, header.walEnd) # success, should get some 'w' message logger.info ("Pass - Backends block if WAL sender is alive and the catchup is within-range") break elif isinstance(msg, WalMessageNoData): # could be timeout client.reply(xpos_ptr, xpos_ptr, xpos_ptr) else: raise StandardError(msg.errmsg) logger.info ("Pass") self.set_guc('repl_catchup_within_range', '1')
def _reset_age(self, dbname, segdb=None): """ Resets datfrozenxid and relfrozenxid's in pg_class of the specified dbname to a value close to the current xid. This is a recommended way of resetting age of dbname or a database that is created off template0. @param segdb: identifies the segment on which to operate. It is an instance of GpDB class. Note that the database dbname must have all tuples frozen (xmin=2). This holds true of template0 and of a database created off template0, only if there are no modifications done to the database. """ if segdb is None: segdb = self.gparray.master dburl = dbconn.DbURL(hostname=segdb.hostname, port=segdb.port) dburl_dbname = dbconn.DbURL(hostname=segdb.hostname, port=segdb.port, dbname=dbname) with dbconn.connect(dburl, utility=True, allowSystemTableMods="dml") as conn: sql = "SELECT get_next_xid()" next_xid = int(dbconn.execSQLForSingleton(conn, sql)) sql = "UPDATE pg_database SET datfrozenxid='%d'::xid WHERE datname='%s'" dbconn.execSQL(conn, sql % (next_xid, dbname)) conn.commit() if dbname == "template0": self._set_allowconn_template0(True) with dbconn.connect(dburl_dbname, utility=True, allowSystemTableMods="dml") as conn: sql = ("UPDATE pg_class SET relfrozenxid='%d'::xid WHERE " "int8in(xidout(relfrozenxid)) > 0") dbconn.execSQL(conn, sql % next_xid) conn.commit() PSQL(sql_cmd="VACUUM FREEZE pg_class", dbname=dbname, PGOPTIONS="-c 'gp_session_role=utility'", host=segdb.hostname, port=segdb.port, out_file="vacuum_%s.out" % dbname).run(validateAfter=True) with dbconn.connect(dburl_dbname, utility=True, allowSystemTableMods="dml") as conn: dbconn.execSQL(conn, "DELETE FROM pg_stat_last_operation") conn.commit() PSQL(sql_cmd="VACUUM FREEZE pg_stat_last_operation", dbname=dbname, PGOPTIONS="-c 'gp_session_role=utility'", host=segdb.hostname, port=segdb.port, out_file="vacuum_%s.out" % dbname).run(validateAfter=True) if dbname == "template0": self._set_allowconn_template0(False) with dbconn.connect(dburl, utility=True) as conn: sql = "SELECT age(datfrozenxid) FROM pg_database WHERE datname='%s'" age_dbname = dbconn.execSQLForSingleton(conn, sql % dbname) age_dbname = int(age_dbname) logger.info("Age of %s reset to %d" % (dbname, age_dbname)) # We are OK as long as dbname age is less than xid_warn_limit. The # 10000 is just a number assumed to be less than xid_warn_limit. self.assertTrue( age_dbname > 0 and age_dbname < 10000, "age(%s) = %d, next xid = %d" % (dbname, age_dbname, next_xid))
def _raise_template0_age(self, limit, segdb): """ Increase age of template0 beyond the specified limit on the specified segment. When a new database is created off template0, the limit will be exceeded. Assumption: template0 age =~ 0 or at least not already crossing any of the xid limits. Because this function can only raise the age, cannot decrease it. @param limit: one of WARN_LIMIT, STOP_LIMIT and WRAP_LIMIT. @param segdb: an instance of GpDB class representing the segment on which the limit will be exceeded. """ dburl = dbconn.DbURL(hostname=segdb.hostname, port=segdb.port) databases = [] with dbconn.connect(dburl, utility=True) as conn: sql = "SELECT datname FROM pg_database WHERE datallowconn='t'" for row in dbconn.execSQL(conn, sql): databases.append(row[0]) sql = "SHOW xid_stop_limit" stop_limit_guc = int(dbconn.execSQLForSingleton(conn, sql)) sql = "SHOW xid_warn_limit" warn_limit_guc = int(dbconn.execSQLForSingleton(conn, sql)) sql = ("SELECT datfrozenxid, age(datfrozenxid) FROM pg_database " "WHERE datname='template0'") row = dbconn.execSQL(conn, sql).fetchone() datfxid, age = int(row[0]), int(row[1]) sql = "SELECT get_next_xid()" current_xid = int(dbconn.execSQLForSingleton(conn, sql)) # Estimate of XIDs consumed by vacuum freeze operaiton on all databases. vacuum_xids = len(databases) * 500 logger.info("Estimated xids for vacuume freeze: %d" % vacuum_xids) if limit == self.WARN_LIMIT: target_age = (2**31) - stop_limit_guc - warn_limit_guc target_xid = xid_sum(datfxid, target_age) keep_raising = lambda x: x < target_age elif limit == self.STOP_LIMIT: target_age = (2**31) - stop_limit_guc target_xid = xid_sum(datfxid, target_age) keep_raising = lambda x: x < target_age elif limit == self.WRAP_LIMIT: target_xid = xid_sum(datfxid, 2**31) keep_raising = lambda x: x > 0 logger.info("Target xid = %d, limit = %d" % (target_xid, limit)) self.assertEqual( preceding_xid(target_xid, current_xid), current_xid, "Target xid (%d) precedes current xid (%d)" % (target_xid, current_xid)) while keep_raising(age): with dbconn.connect(dburl, utility=True) as conn: sql = "SELECT get_stop_limit()" stop_limit = int(dbconn.execSQLForSingleton(conn, sql)) # GPDB may stop accepting connections if we spoof nextXid beyond # max_xid. max_xid = xid_sum(stop_limit, -vacuum_xids) new_xid = preceding_xid(target_xid, max_xid) logger.info( "Spoofing next xid to %d, current stop limit = %d" % (new_xid, stop_limit)) sql = "SELECT spoof_next_xid('%d'::xid)" dbconn.execSQL(conn, sql % new_xid) conn.commit() sql = ("SELECT age(datfrozenxid) FROM pg_database " "WHERE datname='template0'") age = int(dbconn.execSQLForSingleton(conn, sql)) logger.info("template0 age raised to %d" % age) # The vacuum freeze of all databases advances stop_limit further, # necessary for iterating the while loop. And template0 becomes the # oldest database aka the only culprit to violate the specified # limit. PSQL(sql_cmd='VACUUM FREEZE', dbname='postgres', out_file='vacuum_postgres.out').run(validateAfter=True) for datname in databases: logger.info('vacuum freeze %s' % datname) PSQL(sql_cmd='VACUUM FREEZE', dbname=datname, out_file='vacuum_%s.out' % datname).run(validateAfter=True)
def test_syncrep(self): # 1. Initiate the Standby # 2. Once the WAL receiver starts, signal it to suspend post xlog flush # but before sending the ack. # 3. Now execute a transaction and commit it. The backend is expected # be blocked. # 4. Resume the WALReceiver and see the transaction passed and its # results are visible. # cleanup PSQL.run_sql_command('DROP table if exists foo') # 1. create standby and start res = self.standby.create() self.assertEqual(res, 0) res = self.standby.start() self.assertTrue(res.wasSuccessful()) # wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) # 2. Once the WAL receiver starts, signal it to suspend post xlog flush # but before sending the ack. proc = subprocess.Popen(['ps', '-ef'], stdout=subprocess.PIPE) stdout = proc.communicate()[0] search = "wal receiver process" for line in stdout.split('\n'): if (line.find(search) > 0): split_line = re.split(r'\s+', line.strip()) break self.assertTrue(len(split_line) > 0) wal_rcv_pid = int(split_line[1]) logger.info('Suspending WAL Receiver(' + str(wal_rcv_pid) +')...') self.generate_trigger_file('wait_before_send_ack') os.kill(wal_rcv_pid, signal.SIGUSR2) # 3. Now execute a transaction and commit it. The backend is expected # be blocked. logger.info('Create table foo...') # we use subprocess since we expect it'll be blocked. proc = subprocess.Popen(['psql', '-c', 'create table foo (a int)'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) readable = self.wait_stdout(proc, 5.0) self.assertFalse(readable, 'psql did not block') # 4. Resume the WALReceiver and see the transaction passed and its # results are visible. logger.info('Resume the WAL Receiver...') self.generate_trigger_file('resume') os.kill(wal_rcv_pid, signal.SIGUSR2) readable = self.wait_stdout(proc, 5.0) self.assertTrue(readable, 'psql still blocks') proc.communicate() logger.info('No blocked backend found!') logger.info('Verifying if table exists ? ...') PSQL(sql_cmd='select * from foo').run(validateAfter=True) logger.info('Pass')
def _raise_template0_age(self, limit, segdb): """ Increase age of template0 beyond the specified limit on the specified segment. When a new database is created off template0, the limit will be exceeded. Assumption: template0 age =~ 0 or at least not already crossing any of the xid limits. Because this function can only raise the age, cannot decrease it. @param limit: one of WARN_LIMIT, STOP_LIMIT and WRAP_LIMIT. @param segdb: an instance of GpDB class representing the segment on which the limit will be exceeded. """ dburl = dbconn.DbURL(hostname=segdb.hostname, port=segdb.port) databases = [] with dbconn.connect(dburl, utility=True) as conn: sql = "SELECT datname FROM pg_database WHERE datallowconn='t'" for row in dbconn.execSQL(conn, sql): databases.append(row[0]) sql = "SHOW xid_stop_limit" stop_limit_guc = int(dbconn.execSQLForSingleton(conn, sql)) sql = "SHOW xid_warn_limit" warn_limit_guc = int(dbconn.execSQLForSingleton(conn, sql)) sql = ("SELECT datfrozenxid, age(datfrozenxid) FROM pg_database " "WHERE datname='template0'") row = dbconn.execSQL(conn, sql).fetchone() datfxid, age = int(row[0]), int(row[1]) sql = "SELECT get_next_xid()" current_xid = int(dbconn.execSQLForSingleton(conn, sql)) # Estimate of XIDs consumed by vacuum freeze operaiton on all databases. vacuum_xids = len(databases) * 500 logger.info("Estimated xids for vacuume freeze: %d" % vacuum_xids) if limit == self.WARN_LIMIT: target_age = (2**31) - stop_limit_guc - warn_limit_guc target_xid = xid_sum(datfxid, target_age) keep_raising = lambda x: x < target_age elif limit == self.STOP_LIMIT: target_age = (2**31) - stop_limit_guc target_xid = xid_sum(datfxid, target_age) keep_raising = lambda x: x < target_age elif limit == self.WRAP_LIMIT: target_xid = xid_sum(datfxid, 2**31) keep_raising = lambda x: x > 0 logger.info("Target xid = %d, limit = %d" % (target_xid, limit)) self.assertEqual(preceding_xid(target_xid, current_xid), current_xid, "Target xid (%d) precedes current xid (%d)" % (target_xid, current_xid)) while keep_raising(age): with dbconn.connect(dburl, utility=True) as conn: sql = "SELECT get_stop_limit()" stop_limit = int(dbconn.execSQLForSingleton(conn, sql)) # GPDB may stop accepting connections if we spoof nextXid beyond # max_xid. max_xid = xid_sum(stop_limit, -vacuum_xids) new_xid = preceding_xid(target_xid, max_xid) logger.info("Spoofing next xid to %d, current stop limit = %d" % (new_xid, stop_limit)) sql = "SELECT spoof_next_xid('%d'::xid)" dbconn.execSQL(conn, sql % new_xid) conn.commit() sql = ("SELECT age(datfrozenxid) FROM pg_database " "WHERE datname='template0'") age = int(dbconn.execSQLForSingleton(conn, sql)) logger.info("template0 age raised to %d" % age) # The vacuum freeze of all databases advances stop_limit further, # necessary for iterating the while loop. And template0 becomes the # oldest database aka the only culprit to violate the specified # limit. PSQL(sql_cmd='VACUUM FREEZE', dbname='postgres', out_file='vacuum_postgres.out').run(validateAfter=True) for datname in databases: logger.info('vacuum freeze %s' % datname) PSQL(sql_cmd='VACUUM FREEZE', dbname=datname, out_file='vacuum_%s.out' % datname).run(validateAfter=True)
def _reset_age(self, dbname, segdb=None): """ Resets datfrozenxid and relfrozenxid's in pg_class of the specified dbname to a value close to the current xid. This is a recommended way of resetting age of dbname or a database that is created off template0. @param segdb: identifies the segment on which to operate. It is an instance of GpDB class. Note that the database dbname must have all tuples frozen (xmin=2). This holds true of template0 and of a database created off template0, only if there are no modifications done to the database. """ if segdb is None: segdb = self.gparray.master dburl = dbconn.DbURL(hostname=segdb.hostname, port=segdb.port) dburl_dbname = dbconn.DbURL(hostname=segdb.hostname, port=segdb.port, dbname=dbname) with dbconn.connect(dburl, utility=True, allowSystemTableMods="dml") as conn: sql = "SELECT get_next_xid()" next_xid = int(dbconn.execSQLForSingleton(conn, sql)) sql = "UPDATE pg_database SET datfrozenxid='%d'::xid WHERE datname='%s'" dbconn.execSQL(conn, sql % (next_xid, dbname)) conn.commit() if dbname == "template0": self._set_allowconn_template0(True) with dbconn.connect(dburl_dbname, utility=True, allowSystemTableMods="dml") as conn: sql = ("UPDATE pg_class SET relfrozenxid='%d'::xid WHERE " "int8in(xidout(relfrozenxid)) > 0") dbconn.execSQL(conn, sql % next_xid) conn.commit() PSQL(sql_cmd="VACUUM FREEZE pg_class", dbname=dbname, PGOPTIONS="-c 'gp_session_role=utility'", host=segdb.hostname, port=segdb.port, out_file="vacuum_%s.out" % dbname).run(validateAfter=True) with dbconn.connect(dburl_dbname, utility=True, allowSystemTableMods="dml") as conn: dbconn.execSQL(conn, "DELETE FROM pg_stat_last_operation") conn.commit() PSQL(sql_cmd="VACUUM FREEZE pg_stat_last_operation", dbname=dbname, PGOPTIONS="-c 'gp_session_role=utility'", host=segdb.hostname, port=segdb.port, out_file="vacuum_%s.out" % dbname).run(validateAfter=True) if dbname == "template0": self._set_allowconn_template0(False) with dbconn.connect(dburl, utility=True) as conn: sql = "SELECT age(datfrozenxid) FROM pg_database WHERE datname='%s'" age_dbname = dbconn.execSQLForSingleton(conn, sql % dbname) age_dbname = int(age_dbname) logger.info("Age of %s reset to %d" % (dbname, age_dbname)) # We are OK as long as dbname age is less than xid_warn_limit. The # 10000 is just a number assumed to be less than xid_warn_limit. self.assertTrue(age_dbname > 0 and age_dbname < 10000, "age(%s) = %d, next xid = %d" % (dbname, age_dbname, next_xid))