class WalRecTestCase(MPPTestCase): ''' Testcases for SIGTERM on wal receiver''' def __init__(self, methodName): self.gp = GpinitStandby() self.mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgutil = GpUtility() super(WalRecTestCase,self).__init__(methodName) def setUp(self): #Remove standby if present self.gp.run(option='-r') def invoke_sigterm_and_verify(self): ''' Invoke sigterm on wal receiver and verify that a new process is spawned after ''' gpact_stdby = GpactivateStandby() standby_host = gpact_stdby.get_current_standby() standby_port = gpact_stdby.get_standby_port() wal_rec_pid_1 = self.pgutil.get_pid_by_keyword(host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') sig_cmd = "gpssh -h %s -e 'kill -15 %s'" % (standby_host, wal_rec_pid_1) cmd = Command('Issue SIGTERM to wam receiver process', cmdStr=sig_cmd) tinctest.logger.info ('%s' % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: return False wal_rec_pid_2 = self.pgutil.get_pid_by_keyword(host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') if wal_rec_pid_1 == wal_rec_pid_2: return False return True @unittest.skipIf(not config.is_multinode(), "Test applies only to a multinode cluster") def test_sigterm_on_walreceiver(self): self.pgutil.install_standby() self.assertTrue(self.invoke_sigterm_and_verify())
class PromoteTestCase(MPPTestCase): '''testcase for gpstart''' def __init__(self,methodName): self.pgutil = GpUtility() super(PromoteTestCase,self).__init__(methodName) def setUp(self): self.pgutil.check_and_start_gpdb() # We should forcibly recreate standby, as it might has been promoted. self.pgutil.remove_standby() self.pgutil.install_standby() def tearDown(self): self.pgutil.remove_standby() def test_promote_incomplete_stdby(self): ''' remove the standby base dir, try promote and check if fail ''' gpactivate_stdby = GpactivateStandby() gpinit_stdby = GpinitStandby() stdby_mdd = gpactivate_stdby.get_standby_dd() stdby_host = gpinit_stdby.get_standbyhost() stdby_port = gpactivate_stdby.get_standby_port() destDir = os.path.join(stdby_mdd, 'base') self.pgutil.clean_dir(stdby_host,destDir) promote_cmd = "pg_ctl promote -D %s"%stdby_mdd (rc, output) = gpactivate_stdby.run_remote(stdby_host,promote_cmd ,stdby_port,stdby_mdd) self.assertEqual(rc, 0) pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword='master', option='bin') self.assertTrue(int(pid) == -1, 'incomplete standby data directory promote succeeds.')
def test_run_five(self): for i in xrange(5): with NewEnv(PGPORT=self.port, MASTER_DATA_DIRECTORY=self.mdd): pguti = GpUtility() if i == 0: pguti.install_standby(socket.gethostname(), self.mdd) # starting from second time, init standby from new master, standby_dir will be like master_newstandby_newstandby... else: pguti.install_standby(socket.gethostname(), os.path.join(self.mdd,'newstandby')) gpact = GpactivateStandby() self.mdd = gpact.get_standby_dd() self.port = gpact.get_standby_port() gpact.activate() tinctest.logger.info("self.mdd is %s, self.port is %s"%(self.mdd, self.port))
def test_run_five(self): for i in xrange(5): with NewEnv(PGPORT=self.port, MASTER_DATA_DIRECTORY=self.mdd): pguti = GpUtility() if i == 0: pguti.install_standby(socket.gethostname(), self.mdd) # starting from second time, init standby from new master, standby_dir will be like master_newstandby_newstandby... else: pguti.install_standby(socket.gethostname(), os.path.join(self.mdd, 'newstandby')) gpact = GpactivateStandby() self.mdd = gpact.get_standby_dd() self.port = gpact.get_standby_port() gpact.activate() tinctest.logger.info("self.mdd is %s, self.port is %s" % (self.mdd, self.port))
class PromoteTestCase(MPPTestCase): '''testcase for gpstart''' def __init__(self, methodName): self.pgutil = GpUtility() super(PromoteTestCase, self).__init__(methodName) def setUp(self): self.pgutil.check_and_start_gpdb() # We should forcibly recreate standby, as it might has been promoted. self.pgutil.remove_standby() self.pgutil.install_standby() def tearDown(self): self.pgutil.remove_standby() def test_promote_incomplete_stdby(self): ''' remove the standby base dir, try promote and check if fail ''' gpactivate_stdby = GpactivateStandby() gpinit_stdby = GpinitStandby() stdby_mdd = gpactivate_stdby.get_standby_dd() stdby_host = gpinit_stdby.get_standbyhost() stdby_port = gpactivate_stdby.get_standby_port() destDir = os.path.join(stdby_mdd, 'base') self.pgutil.clean_dir(stdby_host, destDir) promote_cmd = "pg_ctl promote -D %s" % stdby_mdd (rc, output) = gpactivate_stdby.run_remote(stdby_host, promote_cmd, stdby_port, stdby_mdd) self.assertEqual(rc, 0) pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword='master', option='bin') self.assertTrue( int(pid) == -1, 'incomplete standby data directory promote succeeds.')
def init_standby(self): pg = GpUtility() pg.install_standby()
class WalReplKillProcessTestCase(TINCTestCase): # this is not hard code, will be updated stdby_host = 'localhost' stdby_port = '5432' def __init__(self, methodName): self.gphome = os.environ.get('GPHOME') self.pgport = os.environ.get('PGPORT') self.pgdatabase = os.environ.get('PGDATABASE') self.stdby_host = 'localhost' self.master_dd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgutil = GpUtility() self.stdby = StandbyVerify() super(WalReplKillProcessTestCase, self).__init__(methodName) def killProcess_byPid(self, signal=9, pid_toKill=[], host="localhost"): pid_list = "" for pid in pid_toKill: pid_list = pid_list + " " + str(pid) kill_cmd = "%s/bin/gpssh -h %s -e 'kill -%s %s'" % ( os.environ.get('GPHOME'), host, signal, pid_list) (rc, result) = self.pgutil.run(kill_cmd) if rc == 0: tinctest.logger.info("Process killed, %s" % result) return True else: tinctest.logger.error( "Killing process error, Status Code non zero, cmd: %s\n" % kill_cmd) return False def kill_walstartup(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] startup_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="startup process") if int(startup_pid) == -1: tinctest.logger.error("error:startup process does not exist!") return False else: pid_list.append(startup_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host) def kill_walreceiver(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] walreceiver_pid = self.pgutil.get_pid_by_keyword( host=stdby_host, pgport=stdby_port, keyword="wal receiver process") if int(walreceiver_pid) == -1: tinctest.logger.error( "error: wal receiver process does not exist!") return False else: pid_list.append(walreceiver_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host) def kill_walsender_check_postmaster_reset(self): pid_list = [] walsender_old_pid = self.pgutil.get_pid_by_keyword( pgport=self.pgport, keyword="wal sender process") if int(walsender_old_pid) == -1: tinctest.logger.error( "error: process wal sender does not exist on host") return False else: pid_list.append(walsender_old_pid) self.killProcess_byPid(pid_toKill=pid_list) sleep(2) walsender_new_pid = self.pgutil.get_pid_by_keyword( pgport=self.pgport, keyword="wal sender process") if walsender_old_pid == walsender_new_pid: raise Exception( "Killing walsender failed to force postmaster reset") else: return True def kill_transc_backend_check_reset(self): dict_process = { 'stats collector process': -1, 'writer process': -1, 'checkpointer process': -1, 'seqserver process': -1, 'ftsprobe process': -1, 'sweeper process': -1, 'wal sender process': -1 } for process in dict_process: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport, keyword=process) dict_process[process] = pid self.kill_transc_backend() for process in dict_process: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport, keyword=process) delay = 1 while dict_process.get(process) == pid and delay < 5: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport, keyword=process) sleep(1) delay = delay + 1 if delay == 5: tinctest.logger.error( "Killing transaction backend process failed to force postmaster reset: %s" % process) raise Exception( "Killing transaction backend process failed to force postmaster reset child process" ) def kill_transc_backend(self): pid_list = [] sql = "SELECT procpid FROM pg_stat_activity WHERE datname='{0}' AND current_query like 'INSERT INTO%'".format( self.pgdatabase) tinctest.logger.info( "running sql command to get transaction backend process: --- %s" % sql) procid = PSQL.run_sql_command(sql, flags='-q -t', dbname=self.pgdatabase) count = 1 while not procid.strip() and count < 5: sleep(1) count += 1 procid = PSQL.run_sql_command(sql, flags='-q -t', dbname=self.pgdatabase) if procid.strip(): tinctest.logger.info("got procid to kill: %s " % procid) pid_list.append(procid) self.killProcess_byPid(pid_toKill=pid_list) else: tinctest.logger.error("There is no active backend process") def check_stdby_stop(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() master_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="master", option="bin") if int(master_pid) != -1: raise Exception("standby should stop but failed!") def start_stdby(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() stdby_dbid = self.get_standby_dbid() activate_stdby = GpactivateStandby() stdby_mdd = activate_stdby.get_standby_dd() stdby_port = activate_stdby.get_standby_port() cmd = "pg_ctl -D %s -o '-p %s --gp_dbid=%s --gp_num_contents_in_cluster=2 --silent-mode=true -i -M master --gp_contentid=-1 -E' start &" % ( stdby_mdd, stdby_port, stdby_dbid) self.run_remote(stdby_host, cmd, stdby_port, stdby_mdd) def run_remote(self, standbyhost, rmt_cmd, pgport='', standbydd=''): '''Runs remote command and returns rc, result ''' export_cmd = "source %s/greenplum_path.sh;export PGPORT=%s;export MASTER_DATA_DIRECTORY=%s" % ( self.gphome, pgport, standbydd) remote_cmd = "gpssh -h %s -e \"%s; %s\"" % (standbyhost, export_cmd, rmt_cmd) cmd = Command(name='Running Remote command', cmdStr='%s' % remote_cmd) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() return result.rc, result.stdout def check_mirror_seg(self): dbstate = DbStateClass('run_validation') dbstate.check_mirrorintegrity(master=True) def get_standby_dbid(self): std_sql = "select dbid from gp_segment_configuration where content='-1' and role='m';" standby_dbid = PSQL.run_sql_command(std_sql, flags='-q -t', dbname='template1') return standby_dbid.strip() def run_transaction_backend(self): tinctest.logger.info("local path for backend.sql is %s" % local_path('')) for file in os.listdir(local_path('')): if fnmatch.fnmatch(file, 'backend.sql'): PSQL.run_sql_file(local_path(file)) def get_down_segment(self): query = 'select * from gp_segment_configuration where mode <> \'s\' and status <>\'u\'' result = PSQL.run_sql_command(query, flags='-q -t', dbname='template1') return result.strip() def check_gpdb_status(self): down_segments = self.get_down_segment() self.assertEqual(down_segments, '') def gpstart_helper(self): '''helper method to run in scenario test''' (rc, result) = self.pgutil.run('gpstart -a') self.assertIn(rc, (0, 1)) def gpstop_helper(self): '''helper method to run in scenario test''' cmd = Command('run gpstop', cmdStr='gpstop -a') cmd.run(validateAfter=True) def gpinitstandby_helper(self): '''helper method to create a new standby''' self.pgutil.install_standby() def removestandby_helper(self): ''' helper method to remove standby''' self.pgutil.remove_standby() def verify_standby_sync(self): if (self.stdby.check_gp_segment_config()) and ( self.stdby.check_pg_stat_replication()) and ( self.stdby.check_standby_processes()): return True else: raise Exception('standby and master out of sync!') def kill_standby_postmaster(self): pid_list = [] delay = 0 postmaster_pid = self.pgutil.get_pid_by_keyword( host=WalReplKillProcessTestCase.stdby_host, pgport=WalReplKillProcessTestCase.stdby_port, keyword="master", option="bin") while int(postmaster_pid) == -1 and delay < 20: sleep(1) delay = delay + 1 postmaster_pid = self.pgutil.get_pid_by_keyword( host=WalReplKillProcessTestCase.stdby_host, pgport=WalReplKillProcessTestCase.stdby_port, keyword="master", option="bin") if int(postmaster_pid) == -1 or delay == 20: tinctest.logger.error( "error: standby postmaster process does not exist!") return False else: pid_list.append(postmaster_pid) return self.killProcess_byPid( pid_toKill=pid_list, host=WalReplKillProcessTestCase.stdby_host) def initial_setup(self): keyword = 'rh55-qavm65' config = GPDBConfig() (seg_host, seg_port) = config.get_hostandport_of_segment(psegmentNumber=0, pRole='p') cur_path = local_path('') dir1 = os.path.join(cur_path, 'dml', 'sql', 'insert_from_external.sql.in') dir2 = os.path.join(cur_path, 'dml', 'sql', 'insert_from_external.sql') dir3 = os.path.join(cur_path, 'dml', 'expected', 'insert_from_external.ans.in') dir4 = os.path.join(cur_path, 'dml', 'expected', 'insert_from_external.ans') f1 = open(dir1, 'r') f2 = open(dir2, 'w') f3 = open(dir3, 'r') f4 = open(dir4, 'w') for line in f1: f2.write(line.replace(keyword, seg_host)) f1.close() f2.close() for line in f3: f4.write(line.replace(keyword, seg_host)) f3.close() f4.close() dir5 = os.path.join(cur_path, 'dml', 'sql', 'insert_with_gpload.sql.in') dir6 = os.path.join(cur_path, 'dml', 'sql', 'insert_with_gpload.sql') yaml_path = local_path('dml/sql/config/gpl.yaml') f5 = open(dir5, 'r') f6 = open(dir6, 'w') for line in f5: f6.write(line.replace('gpl.yaml', yaml_path)) f5.close() f6.close() dir7 = os.path.join(cur_path, 'dml', 'sql', 'config', 'gpl.yaml.in') dir8 = os.path.join(cur_path, 'dml', 'sql', 'config', 'gpl.yaml') f7 = open(dir7, 'r') f8 = open(dir8, 'w') for line in f7: if 'DATABASE' in line: f8.write(line.replace('tangp3', os.environ.get('PGDATABASE'))) elif 'USER' in line: f8.write(line.replace('tangp3', os.environ.get('USER'))) elif 'HOST' in line: f8.write(line.replace('rh55-qavm61', socket.gethostname())) elif 'PORT' in line and '5432' in line: f8.write(line.replace('5432', os.environ.get('PGPORT'))) elif 'mydata' in line: f8.write( line.replace('mydata', local_path('dml/sql/gpload/mydata'))) else: f8.write(line) f7.close() f8.close() dir9 = os.path.join(cur_path, 'dml', 'expected', 'insert_with_gpload.ans.in') dir10 = os.path.join(cur_path, 'dml', 'expected', 'insert_with_gpload.ans') f9 = open(dir9, 'r') f10 = open(dir10, 'w') for line in f9: f10.write(line.replace('gpl.yaml', yaml_path)) f9.close() f10.close() dir11 = os.path.join(cur_path, 'dml', 'sql', 'select_from_copy_table.sql.in') dir12 = os.path.join(cur_path, 'dml', 'sql', 'select_from_copy_table.sql') f11 = open(dir11, 'r') f12 = open(dir12, 'w') for line in f11: if 'tenk.data' in line: f12.write( line.replace('tenk.data', local_path('dml/sql/_data/tenk.data'))) else: f12.write(line) f11.close() f12.close() dir13 = os.path.join(cur_path, 'dml', 'expected', 'select_from_copy_table.ans.in') dir14 = os.path.join(cur_path, 'dml', 'expected', 'select_from_copy_table.ans') f13 = open(dir13, 'r') f14 = open(dir14, 'w') for line in f13: if 'tenk.data' in line: f14.write( line.replace('tenk.data', local_path('dml/sql/_data/tenk.data'))) else: f14.write(line) f13.close() f14.close() external_table = local_path('dml/sql/_data/quote.csv') clean_file = 'rm -rf /tmp/quote.csv' rmt_cmd = "gpssh -h %s -e '%s' " % (seg_host, clean_file) cmd = Command(name='Running a remote command', cmdStr=rmt_cmd) cmd.run(validateAfter=False) command = 'scp %s %s:/tmp' % (external_table, seg_host) cmd = Command(name='run %s' % command, cmdStr='%s' % command) try: cmd.run(validateAfter=True) except Exception, e: tinctest.logger.error("Error running command %s\n" % e)
class GpstopTestCase(MPPTestCase): '''testcase for gpstart''' origin_mdd = os.environ.get('MASTER_DATA_DIRECTORY') def __init__(self,methodName): self.gputil = GpUtility() super(GpstopTestCase,self).__init__(methodName) def setUp(self): self.gputil.check_and_start_gpdb() stdby_presence = self.gputil.check_standby_presence() if stdby_presence: self.gputil.remove_standby() self.gputil.install_standby() def tearDown(self): self.gputil.remove_standby() self.gputil.run('gpstart -a') self.gputil.run('gprecoverseg -a') def test_gpstop_from_master(self): self.assertTrue(self.gputil.gpstop_and_verify()) self.gputil.run('gpstart -a') def test_gpstop_master_only(self): self.assertTrue(self.gputil.gpstop_and_verify(option = '-m')) self.gputil.run('gpstart -a') def test_gpstop_fast(self): #run transactions, and stop fast, check if transaction aborted, and the cluster was stopped self.assertTrue(self.gputil.gpstop_and_verify(option = '-M fast')) self.gputil.run('gpstart -a') def test_gpstop_immediate(self): self.assertTrue(self.gputil.gpstop_and_verify(option = '-M immediate')) self.gputil.run('gpstart -a') def test_gpstop_smart(self): self.assertTrue(self.gputil.gpstop_and_verify(option = '-M smart')) self.gputil.run('gpstart -a') def test_gpdb_restart(self): self.assertTrue(self.gputil.gpstop_and_verify('-r')) def test_gpdb_reload(self): self.assertTrue(self.gputil.gpstop_and_verify('-u')) def test_gpstop_except_stdby(self): self.assertTrue(self.gputil.gpstop_and_verify('-y')) self.gputil.run('gpstart -y') def test_gpstop_after_failover(self): tinctest.logger.info("test gpstop from new master after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_port = activatestdby.get_standby_port() standby_mdd = activatestdby.get_standby_dd() activatestdby.activate() (rc,stdout)=activatestdby.run_remote(standby_host, rmt_cmd='gpstop -a -M fast', pgport=standby_port,standbydd=standby_mdd) self.assertEqual(0,rc) activatestdby.run_remote(standby_host, rmt_cmd='gpstart -a', pgport=standby_port, standbydd=standby_mdd) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
class GpstateTestCase(MPPTestCase): '''testcase for gpstart''' origin_mdd = os.environ.get('MASTER_DATA_DIRECTORY') def __init__(self,methodName): self.gputil = GpUtility() self.mirrorConfig = [] self.master_port = os.environ.get('PGPORT') self.masterdd = os.environ.get('MASTER_DATA_DIRECTORY') self.activatestdby = "" super(GpstateTestCase,self).__init__(methodName) def setUp(self): self.gputil.check_and_start_gpdb() stdby_presence = self.gputil.check_standby_presence() if stdby_presence: self.gputil.remove_standby() self.gputil.install_standby() get_mirror_sql = '''select port, hostname, fselocation from gp_segment_configuration, pg_filespace_entry where dbid = fsedbid and content != -1 and preferred_role=\'m\' ;''' segments=self.gputil.run_SQLQuery(get_mirror_sql, dbname='template1') for seg in segments: port = seg[0] host = seg[1] dir = seg[2] self.mirrorConfig.append(port) self.mirrorConfig.append(host) self.mirrorConfig.append(dir) self.activatestdby = GpactivateStandby() def tearDown(self): del self.mirrorConfig[:] self.gputil.remove_standby() def test_gpstate_disp_recovery(self): ''' run gpstate with -f option''' standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() standby_pid = self.gputil.get_pid_by_keyword(host=standby_host, user=os.environ.get('USER'), pgport=standby_port,keyword='master',option='bin') (rc, stdout) = self.gputil.run('gpstate -f') self.assertEqual(rc, 0) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.split('=') if "Standby address" in line: stdby_addr = items[1].strip() self.assertEqual(stdby_addr, standby_host) elif "Standby data directory" in line: stdby_dir = items[1].strip() self.assertEqual(stdby_dir, standby_dir) elif "Standby port" in line: stdby_port = int(items[1].strip()) self.assertEqual(stdby_port, int(standby_port)) elif "Standby PID" in line: pid = items[1].strip() self.assertEqual(pid, standby_pid) def test_gpstate_disp_failover(self): '''test if the master configuration detail changed after activating standby''' standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() standby_pid = self.gputil.get_pid_by_keyword(host=standby_host, user=os.environ.get('USER'), pgport=standby_port,keyword='master',option='bin') self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -s', pgport=standby_port, standbydd=standby_dir) self.assertIn(rc, (0,1)) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.strip().split('=') if "Master host" in line: master_host = items[1].strip() self.assertEqual(master_host, standby_host) elif "Master postgres process ID" in line: master_pid = items[1].strip() self.assertEqual(master_pid, standby_pid) elif "Master data directory" in line: master_dir = items[1].strip() self.assertEqual(master_dir, standby_dir) elif "Master port" in line: master_port = int(items[1].strip()) self.assertEqual(master_port, int(standby_port)) self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port) def test_gpstate_active_segment_failover(self): ''' test if gpstate show correct # of up and down nodes after failover''' count_up_seg = '''select count(*) from gp_segment_configuration where content != -1 and status = \'u\';''' count_down_seg = '''select count(*) from gp_segment_configuration where content != -1 and status = \'d\';''' number_up_segment = PSQL.run_sql_command(count_up_seg, flags = '-q -t', dbname='template1') number_down_segment = PSQL.run_sql_command(count_down_seg, flags = '-q -t', dbname='template1') standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -Q', pgport=standby_port, standbydd=standby_dir) self.assertIn(rc, (0,1)) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.strip().split('=') if "up segments" in line: self.assertEqual(number_up_segment.strip(),items[1].strip()) elif "down segments" in line: self.assertEqual(number_down_segment.strip(),items[1].strip()) self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port) def test_gpstate_disp_mirror_failover(self): ''' check if new master is able to get correct mirror configuration with gpstate -m''' inside_block = False keywords = ("Mirror","Datadir","Port") standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -m', pgport=standby_port, standbydd=standby_dir) self.assertEqual(rc, 0) for line in stdout: if inside_block: line_split = line.split('') line_split = [elem for elem in line_split if elem != ''] mirror_host = line_split[2] mirror_dir = line_split[3] mirror_port = line_split[4] self.assertTrue(mirror_host in self.mirrorConfig) self.assertTrue(mirror_dir in self.mirrorConfig) self.assertTrue(mirror_port in self.mirrorConfig) elif not all (s in line for s in keywords): continue else: inside_block = True self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port)
class WalReplKillProcessTestCase(TINCTestCase): # this is not hard code, will be updated stdby_host = 'localhost' stdby_port = '5432' def __init__(self,methodName): self.gphome = os.environ.get('GPHOME') self.pgport = os.environ.get('PGPORT') self.pgdatabase = os.environ.get('PGDATABASE') self.stdby_host = 'localhost' self.master_dd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgutil = GpUtility() self.stdby = StandbyVerify() super(WalReplKillProcessTestCase,self).__init__(methodName) def killProcess_byPid(self, signal=9, pid_toKill=[], host="localhost"): pid_list = "" for pid in pid_toKill: pid_list = pid_list + " " + str(pid) kill_cmd = "%s/bin/gpssh -h %s -e 'kill -%s %s'" % (os.environ.get('GPHOME'), host, signal, pid_list) (rc, result) = self.pgutil.run(kill_cmd) if rc == 0: tinctest.logger.info("Process killed, %s" % result) return True else: tinctest.logger.error("Killing process error, Status Code non zero, cmd: %s\n"%kill_cmd) return False def kill_walstartup(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] startup_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="startup process") if int(startup_pid) == -1: tinctest.logger.error("error:startup process does not exist!") return False else: pid_list.append(startup_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host) def kill_walreceiver(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] walreceiver_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="wal receiver process") if int(walreceiver_pid) == -1: tinctest.logger.error("error: wal receiver process does not exist!") return False else: pid_list.append(walreceiver_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host) def kill_walsender_check_postmaster_reset(self): pid_list = [] walsender_old_pid=self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword="wal sender process") if int(walsender_old_pid) == -1: tinctest.logger.error("error: process wal sender does not exist on host") return False else: pid_list.append(walsender_old_pid) self.killProcess_byPid(pid_toKill=pid_list) sleep(2) walsender_new_pid=self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword="wal sender process") if walsender_old_pid == walsender_new_pid: raise Exception("Killing walsender failed to force postmaster reset") else: return True def kill_transc_backend_check_reset(self): dict_process = { 'stats collector process': -1, 'writer process': -1, 'checkpointer process': -1,'seqserver process': -1, 'ftsprobe process': -1,'sweeper process': -1,'wal sender process': -1} for process in dict_process: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword=process) dict_process[process] = pid self.kill_transc_backend() for process in dict_process: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword=process) delay = 1 while dict_process.get(process) == pid and delay < 5: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword=process) sleep(1) delay = delay +1 if delay == 5: tinctest.logger.error("Killing transaction backend process failed to force postmaster reset: %s"%process) raise Exception("Killing transaction backend process failed to force postmaster reset child process") def kill_transc_backend(self): pid_list = [] sql = "SELECT procpid FROM pg_stat_activity WHERE datname='{0}' AND current_query like 'INSERT INTO%'".format(self.pgdatabase) tinctest.logger.info("running sql command to get transaction backend process: --- %s"%sql) procid = PSQL.run_sql_command(sql, flags = '-q -t', dbname= self.pgdatabase) count = 1 while not procid.strip() and count < 5: sleep(1) count += 1 procid = PSQL.run_sql_command(sql, flags = '-q -t', dbname= self.pgdatabase) if procid.strip(): tinctest.logger.info("got procid to kill: %s " % procid) pid_list.append(procid) self.killProcess_byPid(pid_toKill = pid_list) else: tinctest.logger.error("There is no active backend process") def check_stdby_stop(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() master_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="master", option = "bin") if int(master_pid) != -1: raise Exception("standby should stop but failed!") def start_stdby(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() stdby_dbid = self.get_standby_dbid() activate_stdby = GpactivateStandby() stdby_mdd = activate_stdby.get_standby_dd() stdby_port = activate_stdby.get_standby_port() cmd="pg_ctl -D %s -o '-p %s --gp_dbid=%s --gp_num_contents_in_cluster=2 --silent-mode=true -i -M master --gp_contentid=-1 -x 0 -E' start &"%(stdby_mdd, stdby_port, stdby_dbid) self.run_remote(stdby_host,cmd,stdby_port,stdby_mdd) def run_remote(self, standbyhost, rmt_cmd, pgport = '', standbydd = ''): '''Runs remote command and returns rc, result ''' export_cmd = "source %s/greenplum_path.sh;export PGPORT=%s;export MASTER_DATA_DIRECTORY=%s" % (self.gphome, pgport, standbydd) remote_cmd = "gpssh -h %s -e \"%s; %s\"" % (standbyhost, export_cmd, rmt_cmd) cmd = Command(name='Running Remote command', cmdStr='%s' % remote_cmd) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() return result.rc,result.stdout def check_mirror_seg(self): dbstate = DbStateClass('run_validation') dbstate.check_mirrorintegrity(master=True) def get_standby_dbid(self): std_sql = "select dbid from gp_segment_configuration where content='-1' and role='m';" standby_dbid = PSQL.run_sql_command(std_sql, flags = '-q -t', dbname= 'template1') return standby_dbid.strip() def run_transaction_backend(self): tinctest.logger.info("local path for backend.sql is %s"%local_path('')) for file in os.listdir(local_path('')): if fnmatch.fnmatch(file,'backend.sql'): PSQL.run_sql_file(local_path(file)) def get_down_segment(self): query = 'select * from gp_segment_configuration where mode <> \'s\' and status <>\'u\'' result = PSQL.run_sql_command(query, flags = '-q -t', dbname='template1') return result.strip() def check_gpdb_status(self): down_segments = self.get_down_segment() self.assertEqual(down_segments,'') def gpstart_helper(self): '''helper method to run in scenario test''' (rc, result) = self.pgutil.run('gpstart -a') self.assertIn(rc,(0,1)) def gpstop_helper(self): '''helper method to run in scenario test''' cmd = Command('run gpstop', cmdStr = 'gpstop -a') cmd.run(validateAfter=True) def gpinitstandby_helper(self): '''helper method to create a new standby''' self.pgutil.install_standby() def removestandby_helper(self): ''' helper method to remove standby''' self.pgutil.remove_standby() def verify_standby_sync(self): if (self.stdby.check_gp_segment_config()) and (self.stdby.check_pg_stat_replication()) and (self.stdby.check_standby_processes()): return True else: raise Exception('standby and master out of sync!') def kill_standby_postmaster(self): pid_list = [] delay = 0 postmaster_pid = self.pgutil.get_pid_by_keyword(host=WalReplKillProcessTestCase.stdby_host, pgport=WalReplKillProcessTestCase.stdby_port, keyword="master", option="bin") while int(postmaster_pid) == -1 and delay < 20: sleep(1) delay = delay + 1 postmaster_pid = self.pgutil.get_pid_by_keyword(host=WalReplKillProcessTestCase.stdby_host, pgport=WalReplKillProcessTestCase.stdby_port, keyword="master", option="bin") if int(postmaster_pid) == -1 or delay == 20: tinctest.logger.error("error: standby postmaster process does not exist!") return False else: pid_list.append(postmaster_pid) return self.killProcess_byPid(pid_toKill=pid_list, host=WalReplKillProcessTestCase.stdby_host) def initial_setup(self): keyword = 'rh55-qavm65' config = GPDBConfig() (seg_host,seg_port) = config.get_hostandport_of_segment(psegmentNumber = 0, pRole = 'p') cur_path = local_path('') dir1 = os.path.join(cur_path, 'dml', 'sql','insert_from_external.sql.in') dir2 = os.path.join(cur_path, 'dml', 'sql','insert_from_external.sql') dir3 = os.path.join(cur_path, 'dml', 'expected','insert_from_external.ans.in') dir4 = os.path.join(cur_path, 'dml', 'expected','insert_from_external.ans') f1 = open(dir1,'r') f2 = open(dir2,'w') f3 = open(dir3,'r') f4 = open(dir4,'w') for line in f1: f2.write(line.replace(keyword,seg_host)) f1.close() f2.close() for line in f3: f4.write(line.replace(keyword,seg_host)) f3.close() f4.close() dir5 = os.path.join(cur_path, 'dml', 'sql','insert_with_gpload.sql.in') dir6 = os.path.join(cur_path, 'dml', 'sql','insert_with_gpload.sql') yaml_path = local_path('dml/sql/config/gpl.yaml') f5 = open(dir5,'r') f6 = open(dir6,'w') for line in f5: f6.write(line.replace('gpl.yaml',yaml_path)) f5.close() f6.close() dir7 = os.path.join(cur_path,'dml','sql','config','gpl.yaml.in') dir8 = os.path.join(cur_path,'dml','sql','config','gpl.yaml') f7 = open(dir7,'r') f8 = open(dir8,'w') for line in f7: if 'DATABASE' in line: f8.write(line.replace('tangp3',os.environ.get('PGDATABASE'))) elif 'USER' in line: f8.write(line.replace('tangp3',os.environ.get('USER'))) elif 'HOST' in line: f8.write(line.replace('rh55-qavm61',socket.gethostname())) elif 'PORT' in line and '5432' in line: f8.write(line.replace('5432',os.environ.get('PGPORT'))) elif 'mydata' in line: f8.write(line.replace('mydata',local_path('dml/sql/gpload/mydata'))) else: f8.write(line) f7.close() f8.close() dir9 = os.path.join(cur_path,'dml','expected','insert_with_gpload.ans.in') dir10 = os.path.join(cur_path,'dml','expected','insert_with_gpload.ans') f9 = open(dir9,'r') f10 = open(dir10,'w') for line in f9: f10.write(line.replace('gpl.yaml',yaml_path)) f9.close() f10.close() dir11 = os.path.join(cur_path,'dml','sql','select_from_copy_table.sql.in') dir12 = os.path.join(cur_path,'dml','sql','select_from_copy_table.sql') f11 = open(dir11,'r') f12 = open(dir12,'w') for line in f11: if 'tenk.data' in line: f12.write(line.replace('tenk.data',local_path('dml/sql/_data/tenk.data'))) else: f12.write(line) f11.close() f12.close() dir13 = os.path.join(cur_path,'dml','expected','select_from_copy_table.ans.in') dir14 = os.path.join(cur_path,'dml','expected','select_from_copy_table.ans') f13 = open(dir13,'r') f14 = open(dir14,'w') for line in f13: if 'tenk.data' in line: f14.write(line.replace('tenk.data',local_path('dml/sql/_data/tenk.data'))) else: f14.write(line) f13.close() f14.close() external_table = local_path('dml/sql/_data/quote.csv') clean_file = 'rm -rf /tmp/quote.csv' rmt_cmd = "gpssh -h %s -e '%s' " % (seg_host, clean_file) cmd = Command(name='Running a remote command', cmdStr = rmt_cmd) cmd.run(validateAfter=False) command = 'scp %s %s:/tmp'%(external_table,seg_host) cmd = Command(name='run %s'%command, cmdStr = '%s' % command) try: cmd.run(validateAfter=True) except Exception, e: tinctest.logger.error("Error running command %s\n" % e)
class GpstartTestCase(MPPTestCase): ''' testcase for gpstart gpstart may return status code 1 as well as 0 in the success case. The difference is whether it produces WARNING or not, but here we don't care. ''' origin_mdd = os.environ.get('MASTER_DATA_DIRECTORY') def __init__(self,methodName): self.gputil = GpUtility() self.stdby = StandbyVerify() super(GpstartTestCase,self).__init__(methodName) def setUp(self): self.gputil.check_and_start_gpdb() stdby_presence = self.gputil.check_standby_presence() # We should forcibly recreate standby, as it might has been promoted. if stdby_presence: self.gputil.remove_standby() self.gputil.install_standby() def tearDown(self): self.gputil.remove_standby() """ Gpstart test cases in recovery mode """ def test_gpstart_from_master(self): """ tag """ self.gputil.check_and_stop_gpdb() (rc, stdout) = self.gputil.run('gpstart -a ') self.assertIn(rc, (0, 1)) self.assertTrue(self.gputil.gpstart_and_verify()) sleep(2) self.assertTrue(self.stdby.check_gp_segment_config(),'standby master not cofigured') self.assertTrue(self.stdby.check_pg_stat_replication(),'standby not in replication status') self.assertTrue(self.stdby.check_standby_processes(), 'standby processes not running') (rc, output) = self.gputil.run(command = 'ps -ef|grep "wal sender "|grep -v grep') self.assertIsNotNone(output) def test_gpstart_master_only(self): """ tag """ self.gputil.check_and_stop_gpdb() (rc, stdout) = self.gputil.run('export GPSTART_INTERNAL_MASTER_ONLY=1; ' 'gpstart -a -m ') self.assertIn(rc, (0, 1)) self.assertTrue(self.gputil.gpstart_and_verify()) (rc,output) = self.gputil.run('PGDATABASE=template1 ' "PGOPTIONS='-c gp_session_role=utility' " 'psql') self.assertEqual(rc, 0) (rc, output) = self.gputil.run('psql template1') # should fail due to master only mode self.assertEqual(rc, 2) self.gputil.run('gpstop -a -m') self.gputil.run('gpstart -a') def test_gpstart_restricted_mode_master(self): """Test -R option with standby.""" self.gputil.check_and_stop_gpdb() (rc, stdout) = self.gputil.run('gpstart -a -R') self.assertIn(rc, (0, 1)) self.assertTrue(self.gputil.gpstart_and_verify()) (rc,output) = self.gputil.run(command = 'psql template1') self.assertIn(rc, (0, 1)) self.gputil.run('gpstop -ar') def test_gpstart_master_w_timeout(self): """Test -t option with standby.""" self.gputil.check_and_stop_gpdb() (rc, output) = self.gputil.run('gpstart -a -t 30') self.assertIn(rc, (0, 1)) self.assertTrue(self.gputil.gpstart_and_verify()) self.gputil.run('gpstop -ar') def test_gpstart_no_standby(self): """Test -y with standby configured.""" self.gputil.check_and_stop_gpdb() (rc, stdout) = self.gputil.run('gpstart -a -y') self.assertIn(rc, (0, 1)) self.assertTrue(self.gputil.gpstart_and_verify()) self.assertFalse(self.stdby.check_standby_processes(), 'gpstart without standby failed, standby was running') self.gputil.run('gpstop -ar') def test_gpstart_wo_standby(self): """Test -y without standby configured.""" self.gputil.remove_standby() self.gputil.check_and_stop_gpdb() (rc, stdout) = self.gputil.run('gpstart -a -y') self.assertIn(rc, (0, 1)) self.assertTrue(self.gputil.gpstart_and_verify()) self.assertFalse(self.stdby.check_standby_processes(), 'standby processes presented') self.gputil.run('gpstop -ar') """ Gpstart, test case in failover mode """ def test_gpstart_master_only_after_failover(self): """ for test purpose, failing back to old master should remove standby from primary after activate standby """ tinctest.logger.info("start master only with -m option after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_mdd = activatestdby.get_standby_dd() standby_port = activatestdby.get_standby_port() activatestdby.activate() self.stdby._run_remote_command(standby_host,command = 'gpstop -a') stdout = self.stdby._run_remote_command(standby_host,command = 'export GPSTART_INTERNAL_MASTER_ONLY=1; gpstart -a -m') self.assertNotRegexpMatches(stdout,"ERROR","Start master only after failover failed") self.assertTrue(self.gputil.gpstart_and_verify(master_dd = standby_mdd, host = standby_host)) self.stdby._run_remote_command(standby_host,command = 'gpstop -a -m') self.gputil.run(command = 'gpstop -ar') self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port) def test_gpstart_master_after_failover(self): """ failover, start from new master, then recover the cluster back to have the old master active. """ tinctest.logger.info("failover, and run gpstart master test") self.gputil.check_and_start_gpdb() activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_mdd = activatestdby.get_standby_dd() standby_port = activatestdby.get_standby_port() activatestdby.activate() self.stdby._run_remote_command(standby_host, command = 'gpstop -a') stdout = self.stdby._run_remote_command(standby_host,command = 'gpstart -a') self.assertNotRegexpMatches(stdout,"FATAL","ERROR") self.assertTrue(self.gputil.gpstart_and_verify(master_dd = standby_mdd, host = standby_host)) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port) def test_gpstart_original_master_after_promote(self): """ failover, start from new master, then recover the cluster back to have the old master active. """ tinctest.logger.info("activate and run gpstart for original master") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_mdd = activatestdby.get_standby_dd() standby_port = activatestdby.get_standby_port() activatestdby.activate() (rc, stdout) = self.gputil.run('gpstart -a -v') self.gputil.run('pg_controldata %s' % self.origin_mdd) self.stdby._run_remote_command(standby_host, command = 'pg_controldata %s' % standby_mdd) self.assertNotEqual(rc, 0) # This below error message comes from gpstart product code (if its modified change it here as well.) self.assertRegexpMatches(stdout,"Standby activated, this node no more can act as master.") self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
class GpstopTestCase(MPPTestCase): '''testcase for gpstart''' origin_mdd = os.environ.get('MASTER_DATA_DIRECTORY') def __init__(self,methodName): self.gputil = GpUtility() super(GpstopTestCase,self).__init__(methodName) def setUp(self): self.gputil.check_and_start_gpdb() stdby_presence = self.gputil.check_standby_presence() if stdby_presence: self.gputil.remove_standby() self.gputil.install_standby() def tearDown(self): self.gputil.remove_standby() self.gputil.run('gpstart -a') def test_gpstop_from_master(self): self.assertTrue(self.gputil.gpstop_and_verify()) self.gputil.run('gpstart -a') def test_gpstop_master_only(self): self.assertTrue(self.gputil.gpstop_and_verify(option = '-m')) self.gputil.run('gpstart -a') def test_gpstop_fast(self): #run transactions, and stop fast, check if transaction aborted, and the cluster was stopped self.assertTrue(self.gputil.gpstop_and_verify(option = '-M fast')) self.gputil.run('gpstart -a') def test_gpstop_immediate(self): self.assertTrue(self.gputil.gpstop_and_verify(option = '-M immediate')) self.gputil.run('gpstart -a') def test_gpstop_smart(self): self.assertTrue(self.gputil.gpstop_and_verify(option = '-M smart')) self.gputil.run('gpstart -a') def test_gpdb_restart(self): self.assertTrue(self.gputil.gpstop_and_verify('-r')) def test_gpdb_reload(self): self.assertTrue(self.gputil.gpstop_and_verify('-u')) def test_gpstop_except_stdby(self): self.assertTrue(self.gputil.gpstop_and_verify('-y')) self.gputil.run('gpstart -y') def test_gpstop_after_failover(self): tinctest.logger.info("test gpstop from new master after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_port = activatestdby.get_standby_port() standby_mdd = activatestdby.get_standby_dd() activatestdby.activate() (rc,stdout)=activatestdby.run_remote(standby_host, rmt_cmd='gpstop -a -M fast', pgport=standby_port,standbydd=standby_mdd) self.assertEqual(0,rc) activatestdby.run_remote(standby_host, rmt_cmd='gpstart -a', pgport=standby_port, standbydd=standby_mdd) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)