def test_13_gp_persistent_relation_node_uaotable_del(self): tablename = "uao_table_test13" tinctest.logger.info("-------------------------------") tinctest.logger.info( "test_13 Verify the eof mark in pg_aoseg and gp_persistant_rel_node table for uao relation after delete " ) tinctest.logger.info("-------------------------------\n") out_file = os.path.join(self.outpath, "create_tab_gp_persistent_relation_node_uaotable_del_13.out") sql_file = os.path.join(self.sqlpath, "create_tab_gp_persistent_relation_node_uaotable_del_13.sql") ans_file = os.path.join(self.anspath, "create_tab_gp_persistent_relation_node_uaotable_del_13.ans") # create uao table and insert 10 rows sql_out = PSQL.run_sql_file(sql_file=sql_file, out_file=out_file) assert Gpdiff.are_files_equal(out_file, ans_file) # get relid for newly created table relid = self.get_relid(file_name=tablename) # get utility mode connection info utilitymodeinfo = self.get_utilitymode_conn_info(relid=relid) u_port = utilitymodeinfo[0] u_host = utilitymodeinfo[1] # get eof info before delete assert self.is_same_eof_uao_on_segment(relid=relid, host=u_host, port=u_port) # delete 1 row sql_cmd3 = "delete from %s where i = (select min(i) from %s );" % (tablename, tablename) PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd3, host=u_host, port=u_port, flags="-q -t") self.vacuum_full(tablename=tablename) # get eof info after delete # eof_pg_aoseg_aft=self.get_eofuncompressed_pgaoseg_on_segment(relid=relid,host=u_host,port= u_port) # eof_gp_persistant_rel_node_aft=self.get_eof_gp_persistent_relation_node_on_segment(relid=relid,host=u_host,port= u_port) # assert (int(eof_pg_aoseg_aft) == (int(eof_gp_persistant_rel_node_aft))) assert self.is_same_eof_uao_on_segment(relid=relid, host=u_host, port=u_port)
def test_17_gp_persistent_relation_node_uaocs_table_eof_del(self): tablename = "uaocs_table_test14" tinctest.logger.info("-------------------------------") tinctest.logger.info( "test_17 Verify the eof mark in pg_aoseg and gp_persistant_rel_node table for uaocs relation after delete " ) tinctest.logger.info("-------------------------------\n") out_file = os.path.join(self.outpath, "create_tab_gp_persistent_relation_node_uaocs_table_upd_14.out") sql_file = os.path.join(self.sqlpath, "create_tab_gp_persistent_relation_node_uaocs_table_upd_14.sql") ans_file = os.path.join(self.anspath, "create_tab_gp_persistent_relation_node_uaocs_table_upd_14.ans") # create uaocs table and insert 10 rows sql_out = PSQL.run_sql_file(sql_file=sql_file, out_file=out_file) assert Gpdiff.are_files_equal(out_file, ans_file) # get relid for newly created table relid = self.get_relid(file_name=tablename) # get utility mode connection info utilitymodeinfo = self.get_utilitymode_conn_uaocs_info(relid=relid) u_port = utilitymodeinfo[0] u_host = utilitymodeinfo[1] assert self.is_same_eof_uaocs_on_segment(relid=relid, host=u_host, port=u_port) # update 1 row sql_cmd3 = "update %s set k = k+ 100 where i = (select min(i) from %s );" % (tablename, tablename) PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd3, host=u_host, port=u_port, flags="-q -t") self.vacuum() # self.vacuum(tablename=tablename) assert self.is_same_eof_uaocs_on_segment(relid=relid, host=u_host, port=u_port)
def test_10_use_udf_gp_aovisimap_hidden_info_uao_del_vacuum(self): tablename = "uao_table_test11" tinctest.logger.info("-------------------------------") tinctest.logger.info( "test_10 Verify the hidden tup_count using udf gp_aovisimap_hidden_info(oid) for uao relation after delete and vacuum" ) tinctest.logger.info("-------------------------------\n") out_file = os.path.join(self.outpath, "create_tab_tupcount_in_pg_aoseg_uaotable_upd_11.out") sql_file = os.path.join(self.sqlpath, "create_tab_tupcount_in_pg_aoseg_uaotable_upd_11.sql") ans_file = os.path.join(self.anspath, "create_tab_tupcount_in_pg_aoseg_uaotable_upd_11.ans") # create uao table and insert 10 rows sql_out = PSQL.run_sql_file(sql_file=sql_file, out_file=out_file) assert Gpdiff.are_files_equal(out_file, ans_file) # get relid for newly created table relid = self.get_relid(file_name=tablename) # get relid for newly created table relid = self.get_relid(file_name=tablename) # get utility mode connection info utilitymodeinfo = self.get_utilitymode_conn_info(relid=relid) u_port = utilitymodeinfo[0] u_host = utilitymodeinfo[1] assert 0 == int(self.get_hidden_tup_cnt(relid=relid, host=u_host, port=u_port)) # delete row sql_cmd3 = "delete from %s ;" % (tablename) PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd3, host=u_host, port=u_port, flags="-q -t") assert int(self.get_hidden_tup_cnt(relid=relid, host=u_host, port=u_port)) > 0 self.vacuum_full(tablename=tablename) assert 0 == int(self.get_hidden_tup_cnt(relid=relid, host=u_host, port=u_port))
def test_09_call_udf_gp_aovisimap_forupdate(self): tablename='uao_visimap_test09' tinctest.logger.info("-------------------------------") tinctest.logger.info('test_09 Verify the usage of UDF gp_aovisimap in utility mode for update tuple') tinctest.logger.info("-------------------------------\n") out_file = os.path.join(self.outpath,'create_tab_gp_aovisimap_upd_09.out') sql_file = os.path.join(self.sqlpath,'create_tab_gp_aovisimap_upd_09.sql') ans_file= os.path.join(self.anspath,'create_tab_gp_aovisimap_upd_09.ans') #create uao table and insert 10 rows sql_out=PSQL.run_sql_file(sql_file = sql_file,out_file=out_file) assert Gpdiff.are_files_equal(out_file, ans_file) #get relid for newly created table relid = self.get_relid(file_name=tablename) utilitymodeinfo=self.get_utilitymode_conn_info( relid=relid) u_port=utilitymodeinfo[0] u_host=utilitymodeinfo[1] #login to segment in utility mode and execute the gp_aovisimap(relid) UDF before_tablerowcnt=self.get_rowcnt_table_on_segment(tablename=tablename, host=u_host,port=u_port) before_visimaprowcnt=self.get_visimap_cnt_on_segment(relid=relid,host=u_host,port=u_port) assert(int(before_visimaprowcnt) == 0) sql_cmd="update %s set j = j || '_9';" % (tablename) PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=u_host, port=u_port,flags='-q -t') after_visimaprowcnt=self.get_visimap_cnt_on_segment(relid=relid,host=u_host,port=u_port) assert(int(after_visimaprowcnt) > 0)
def test_18_gp_persistent_relation_node_uaocs_table_eof_upd(self): tablename ='uaocs_table_test14' tinctest.logger.info("-------------------------------") tinctest.logger.info('test_18 Verify the eof mark in pg_aoseg and gp_persistant_rel_node table for uaocs relation after update ') tinctest.logger.info("-------------------------------\n") out_file = os.path.join(self.outpath,'create_tab_gp_persistent_relation_node_uaocs_table_upd_14.out') sql_file = os.path.join(self.sqlpath,'create_tab_gp_persistent_relation_node_uaocs_table_upd_14.sql') ans_file= os.path.join(self.anspath,'create_tab_gp_persistent_relation_node_uaocs_table_upd_14.ans') #create uaocs table and insert 10 rows sql_out=PSQL.run_sql_file(sql_file = sql_file,out_file=out_file) assert Gpdiff.are_files_equal(out_file, ans_file) #get relid for newly created table relid = self.get_relid(file_name=tablename ) #get utility mode connection info utilitymodeinfo=self.get_utilitymode_conn_uaocs_info( relid=relid) u_port=utilitymodeinfo[0] u_host=utilitymodeinfo[1] assert (self.is_same_eof_uaocs_on_segment(relid=relid,host=u_host,port= u_port)) # delete 1 row sql_cmd3="delete from %s where i = (select min(i) from %s );" % (tablename, tablename) PSQL.run_sql_command_utility_mode(sql_cmd= sql_cmd3,host=u_host, port=u_port,flags='-q -t') self.vacuum_full(tablename=tablename) assert (self.is_same_eof_uaocs_on_segment(relid=relid,host=u_host,port= u_port))
def test_20_use_udf_gp_aovisimap_hidden_info_uaocs_del(self): tablename ='uaocs_table_test14' tinctest.logger.info("-------------------------------") tinctest.logger.info('test_20 Verify the hidden tup_count using UDF gp_aovisimap_hidden_info(oid) for uaocs relation after delete ') tinctest.logger.info("-------------------------------\n") out_file = os.path.join(self.outpath,'create_tab_gp_persistent_relation_node_uaocs_table_upd_14.out') sql_file = os.path.join(self.sqlpath,'create_tab_gp_persistent_relation_node_uaocs_table_upd_14.sql') ans_file= os.path.join(self.anspath,'create_tab_gp_persistent_relation_node_uaocs_table_upd_14.ans') #create uaocs table and insert 10 rows sql_out=PSQL.run_sql_file(sql_file = sql_file,out_file=out_file) assert Gpdiff.are_files_equal(out_file, ans_file) #get relid for newly created table relid = self.get_relid(file_name=tablename ) #get utility mode connection info utilitymodeinfo=self.get_utilitymode_conn_uaocs_info( relid=relid) u_port=utilitymodeinfo[0] u_host=utilitymodeinfo[1] assert(0 == int(self.get_hidden_tup_cnt(relid=relid,host=u_host,port= u_port))) # delete 1 row sql_cmd3="delete from %s where i = (select min(i) from %s );" % (tablename, tablename) PSQL.run_sql_command_utility_mode(sql_cmd= sql_cmd3,host=u_host, port=u_port,flags='-q -t') assert(1 == int(self.get_hidden_tup_cnt(relid=relid,host=u_host,port= u_port))) self.vacuum_full(tablename=tablename) assert(0 == int(self.get_hidden_tup_cnt(relid=relid,host=u_host,port= u_port)))
def test_08_call_udf_gp_aovisimap_fordelete(self): tinctest.logger.info("-------------------------------") tinctest.logger.info('test_08 Verify the usage of UDF gp_aovisimap in utility mode for deleted tuple') tinctest.logger.info("-------------------------------\n") out_file = os.path.join(self.outpath,'create_tab_gp_aovisimap_del_08.out') sql_file = os.path.join(self.sqlpath,'create_tab_gp_aovisimap_del_08.sql') ans_file= os.path.join(self.anspath,'create_tab_gp_aovisimap_del_08.ans') tablename='uao_visimap_test08' #create uao table and insert 10 rows sql_out=PSQL.run_sql_file(sql_file = sql_file,out_file=out_file) assert Gpdiff.are_files_equal(out_file, ans_file) #get relid for newly created table relid = self.get_relid(file_name=tablename ) #get the segment_id where we'll log in utility mode and then get the hostname and port for this segment utilitymodeinfo=self.get_utilitymode_conn_info( relid=relid) u_port=utilitymodeinfo[0] u_host=utilitymodeinfo[1] before_tablerowcnt=self.get_rowcnt_table_on_segment(tablename=tablename, host=u_host,port=u_port) before_visimaprowcnt=self.get_visimap_cnt_on_segment(relid=relid,host=u_host,port=u_port) assert(int(before_visimaprowcnt) == 0) sql_cmd="delete from uao_visimap_test08 ;" PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=u_host, port=u_port,flags='-q -t') after_tablerowcnt=self.get_rowcnt_table_on_segment(tablename=tablename, host=u_host,port=u_port) after_visimaprowcnt=self.get_visimap_cnt_on_segment(relid=relid,host=u_host,port=u_port) assert(int(after_tablerowcnt) == 0)
def test_21_use_udf_gp_aovisimap_hidden_info_uao_upd_vacuum(self): tablename ='uao_table_test11' tinctest.logger.info("-------------------------------") tinctest.logger.info('test_21 Verify the hidden tup_count using UDF gp_aovisimap_hidden_info(oid) for uao relation after update_vacuum') tinctest.logger.info("-------------------------------\n") out_file = os.path.join(self.outpath,'create_tab_tupcount_in_pg_aoseg_uaotable_upd_11.out') sql_file = os.path.join(self.sqlpath,'create_tab_tupcount_in_pg_aoseg_uaotable_upd_11.sql') ans_file= os.path.join(self.anspath,'create_tab_tupcount_in_pg_aoseg_uaotable_upd_11.ans') #create uao table and insert 10 rows sql_out=PSQL.run_sql_file(sql_file = sql_file,out_file=out_file) assert Gpdiff.are_files_equal(out_file, ans_file) #get relid for newly created table relid = self.get_relid(file_name=tablename ) #get relid for newly created table relid = self.get_relid(file_name=tablename ) #get utility mode connection info utilitymodeinfo=self.get_utilitymode_conn_info( relid=relid) u_port=utilitymodeinfo[0] u_host=utilitymodeinfo[1] assert(0 == int(self.get_hidden_tup_cnt(relid=relid,host=u_host,port= u_port))) # update rows sql_cmd3="update %s set j = 'test11' ;" % ( tablename) PSQL.run_sql_command_utility_mode(sql_cmd= sql_cmd3,host=u_host, port=u_port,flags='-q -t') assert(int(self.get_hidden_tup_cnt(relid=relid,host=u_host,port= u_port)) > 0) self.vacuum_full(tablename=tablename) assert(0 == int(self.get_hidden_tup_cnt(relid=relid,host=u_host,port= u_port)))
def switch_ckpt_switch_xlog(self): ''' @description: pg_switch_xlog on segments ''' sql_cmd = 'select * from pg_switch_xlog();' num_primary = self.config.get_countprimarysegments() for i in range(num_primary): (host, port) = self.config.get_hostandport_of_segment(psegmentNumber=i) PSQL.run_sql_command_utility_mode(sql_cmd, host = host, port = port)
def test_run_sql_command_utility_mode(self): sql_cmd = 'show gp_session_role' out_file = os.path.join(os.path.dirname(inspect.getfile(self.__class__)),'test_utility_mode.out') self.assertFalse(os.path.exists(out_file)) try: PSQL.run_sql_command_utility_mode(sql_cmd = sql_cmd, out_file = out_file) self.assertTrue(os.path.exists(out_file)) with open(out_file, 'r') as f: output = f.read() self.assertIsNotNone(re.search('utility', output)) finally: os.remove(out_file) self.assertFalse(os.path.exists(out_file))
def get_eofuncompressed_pgaoseg_on_segment(self,relid=0,host=None,port=None): tinctest.logger.info("-------------------------------") tinctest.logger.info('getting eofuncompressed from pg_aoseg.pg_aoseg_ '+relid) tinctest.logger.info("-------------------------------\n") sql_cmd="select eofuncompressed from pg_aoseg.pg_aoseg_%s where eof > 0 LIMIT 1;" % relid.strip() eof=PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=host, port=port,flags='-q -t') return eof
def test_run_sql_command_utility_mode(self): sql_cmd = 'show gp_session_role' out_file = os.path.join( os.path.dirname(inspect.getfile(self.__class__)), 'test_utility_mode.out') self.assertFalse(os.path.exists(out_file)) try: PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd, out_file=out_file) self.assertTrue(os.path.exists(out_file)) with open(out_file, 'r') as f: output = f.read() self.assertIsNotNone(re.search('utility', output)) finally: os.remove(out_file) self.assertFalse(os.path.exists(out_file))
def get_rowcnt_table_on_segment(self,tablename=None, host=None,port=None): tinctest.logger.info("-------------------------------") tinctest.logger.info('getting rowcount from table '+tablename +' on segment host='+host +' port=' + port) tinctest.logger.info("-------------------------------\n") sql_cmd="select count(*) from %s;" % tablename rowcount= PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=host, port=port,flags='-q -t') return rowcount
def has_rows(self, sql, utility=False): if not utility: o = PSQL.run_sql_command(sql) else: o = PSQL.run_sql_command_utility_mode(sql, port=self.utility_port) return (o.find("rows)") >= 0 and o.find("(0 rows)") < 0) or o.find("(1 row)") >= 0
def get_visimap_cnt_on_segment(self,relid=0,host=None,port=None): tinctest.logger.info("-------------------------------") tinctest.logger.info('getting rowcount from relid '+ relid +' on segment host='+host +' port=' + port) tinctest.logger.info("-------------------------------\n") sql_cmd="select count(*) from gp_aovisimap(%s);" % relid.strip() rowcount= PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=host, port=port,flags='-q -t') return rowcount
def get_hidden_tup_cnt(self, relid=0,host=None,port=None): tinctest.logger.info("-------------------------------") tinctest.logger.info('get hidden_tupcount from gp_aovisimap_hidden_info ') tinctest.logger.info("-------------------------------\n") sql_cmd="select sum(hidden_tupcount) from gp_aovisimap_hidden_info(%s);" % relid.strip() tupcnt=PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=host, port=port,flags='-q -t') if (len(tupcnt.strip()) == 0): tupcnt='0' return tupcnt
def is_same_eof_uao_on_segment(self,relid=0,host=None,port=None): tinctest.logger.info("-------------------------------") tinctest.logger.info('getting eofuncompressed from gp_persistent_relation_node and pg_aoseg.pg_aoseg_oid ') tinctest.logger.info("-------------------------------\n") sql_cmd="select count(*) from gp_persistent_relation_node a , pg_aoseg.pg_aoseg_%s b where a.segment_file_num = b.segno and relfilenode_oid = %s and mirror_append_only_new_eof != eofuncompressed;" %(relid.strip(),relid.strip()) cnt=PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=host, port=port,flags='-q -t') if (int(cnt) == 0): return True else: return False
def get_segment_cnt(self, relid=0, host=None, port=None): sql_cmd = "select count(*) from gp_toolkit.__gp_aocsseg(%s) group by column_num having count(*) > 1 limit 1" % ( relid) segcnt = PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd, host=host, port=port, flags='-q -t') if (len(segcnt.strip()) == 0): segcnt = '0' return segcnt
def is_same_eof_uaocs_on_segment(self, relid=0, host=None, port=None): tinctest.logger.info("-------------------------------") tinctest.logger.info("getting eofuncompressed from gp_persistent_relation_node and udf get_gp_aocsseg(oid) ") tinctest.logger.info("-------------------------------\n") sql_cmd = ( "select count(*) from gp_persistent_relation_node a , get_gp_aocsseg(%s) b where a.segment_file_num = b.physical_segno and relfilenode_oid = %s and mirror_append_only_new_eof != eof;" % (relid, relid) ) cnt = PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd, host=host, port=port, flags="-q -t") if int(cnt) == 0: return True else: return False
def has_zero_rows(self, sql, utility=False): if not utility: o = PSQL.run_sql_command(sql) else: o = PSQL.run_sql_command_utility_mode(sql, port=self.utility_port) return o.find("(0 rows)") >= 0
def test_xlogPreparedXactSeg(self): """ Test to verify the xlog on segment gets cleaned-up only till point of oldest prepared transaction. The flow of this test is as follows. 1. Initiate the Standby using the Master (primary) postmaster paramerters. 2. A: Inject the fault to suspend Mater after Prepare done. 3. A: Now execute a transaction and commit it. This transaction will be blocked. 4. B: Inject the fault to suspend Mater after Commit done. 5. B: Now execute a transaction and commit it. This master will be blocked. 6. Promote the standby. 7. Verify the result, transaction A results should not be visible and transaction B results should be visible. """ PSQL.run_sql_command('DROP table if exists xansrep_prepare') PSQL.run_sql_command('DROP table if exists xansrep1, xansrep2, xansrep3, xansrep4') fault = Gpfault() # 2. Inject fault at prepared state result = fault.suspend_at( 'transaction_abort_after_distributed_prepared') logger.info(result.stdout) self.assertEqual(result.rc, 0, result.stdout) # 3. Now execute a transaction and commit it. The backend is expected # be blocked. logger.info('Create table xansrep_prepare...') create_xansprep_prepare_query = 'create table xansprep_prepare (a int)' # Due to the suspend, we don't wait for the result subprocess.Popen(['psql', '-c', create_xansprep_prepare_query, '-p', os.environ.get('PGPORT')], stdout=subprocess.PIPE, stderr=subprocess.PIPE) logger.info('Check if suspend fault is hit ...') if not self.check_pg_stat_activity(create_xansprep_prepare_query): logger.error('transaction has not been started yet') triggered = fault.wait_triggered( 'transaction_abort_after_distributed_prepared') self.assertTrue(triggered, 'Fault was not triggered') # Lets trigger switch to xlog on segment PSQL.run_sql_command_utility_mode(sql_cmd='select pg_switch_xlog()', port=2100) PSQL.run_sql_command_utility_mode(sql_cmd='checkpoint', port=2100) # Generate more records on xlog PSQL.run_sql_command('create table xansrep1 (a int)') PSQL.run_sql_command('create table xansrep2 (a int)') PSQL.run_sql_command_utility_mode(sql_cmd='select pg_switch_xlog()', port=2100) PSQL.run_sql_command_utility_mode(sql_cmd='checkpoint', port=2100) PSQL.run_sql_command('create table xansrep3 (a int)') PSQL.run_sql_command('create table xansrep4 (a int)') PSQL.run_sql_command_utility_mode(sql_cmd='select pg_switch_xlog()', port=2100) PSQL.run_sql_command_utility_mode(sql_cmd='checkpoint', port=2100) cmd = Command('gpstop', 'gpstop -air') cmd.run(validateAfter=True)
def test_xlogPreparedXactSeg(self): """ Test to verify the xlog on segment gets cleaned-up only till point of oldest prepared transaction. The flow of this test is as follows. 1. Initiate the Standby using the Master (primary) postmaster paramerters. 2. A: Inject the fault to suspend Mater after Prepare done. 3. A: Now execute a transaction and commit it. This transaction will be blocked. 4. B: Inject the fault to suspend Mater after Commit done. 5. B: Now execute a transaction and commit it. This master will be blocked. 6. Promote the standby. 7. Verify the result, transaction A results should not be visible and transaction B results should be visible. """ PSQL.run_sql_command('DROP table if exists xansrep_prepare') PSQL.run_sql_command( 'DROP table if exists xansrep1, xansrep2, xansrep3, xansrep4') fault = Gpfault() # 2. Inject fault at prepared state result = fault.suspend_at( 'transaction_abort_after_distributed_prepared') logger.info(result.stdout) self.assertEqual(result.rc, 0, result.stdout) # 3. Now execute a transaction and commit it. The backend is expected # be blocked. logger.info('Create table xansrep_prepare...') create_xansprep_prepare_query = 'create table xansprep_prepare (a int)' # Due to the suspend, we don't wait for the result subprocess.Popen([ 'psql', '-c', create_xansprep_prepare_query, '-p', os.environ.get('PGPORT') ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) logger.info('Check if suspend fault is hit ...') if not self.check_pg_stat_activity(create_xansprep_prepare_query): logger.error('transaction has not been started yet') triggered = fault.wait_triggered( 'transaction_abort_after_distributed_prepared') self.assertTrue(triggered, 'Fault was not triggered') # Lets trigger switch to xlog on segment PSQL.run_sql_command_utility_mode(sql_cmd='select pg_switch_xlog()', port=2100) PSQL.run_sql_command_utility_mode(sql_cmd='checkpoint', port=2100) # Generate more records on xlog PSQL.run_sql_command('create table xansrep1 (a int)') PSQL.run_sql_command('create table xansrep2 (a int)') PSQL.run_sql_command_utility_mode(sql_cmd='select pg_switch_xlog()', port=2100) PSQL.run_sql_command_utility_mode(sql_cmd='checkpoint', port=2100) PSQL.run_sql_command('create table xansrep3 (a int)') PSQL.run_sql_command('create table xansrep4 (a int)') PSQL.run_sql_command_utility_mode(sql_cmd='select pg_switch_xlog()', port=2100) PSQL.run_sql_command_utility_mode(sql_cmd='checkpoint', port=2100) cmd = Command('gpstop', 'gpstop -air') cmd.run(validateAfter=True)
def test_master_panic_after_phase1(self): """PANIC master after recording distributed commit. Trigger PANIC in master after completing phase 1 of 2PC, right after recording distributed commit in xlog but before broadcasting COMMIT PREPARED to segments. Master's recovery cycle should correctly broadcast COMMIT PREPARED because master should find distributed commit record in its xlog during recovery. Verify that the transaction is committed after recovery. JIRA: MPP-19044 """ tinctest.logger.info("running test: test_crash_master_after_phase1") gparray = GpArray.initFromCatalog(dbconn.DbURL(), utility=True) assert len(gparray.getHostList()) == 1, "cannot run on multi-node" host = gparray.getHostList()[0] # Must have at least one in-sync and up segment. primaries = [ p for p in gparray.get_list_of_primary_segments_on_host(host) if p.getSegmentMode() == "s" and p.getSegmentStatus() == "u"] assert len(primaries) > 0, "in-sync and up primary not found" primary = primaries[0] tinctest.logger.info("chose primary: %s" % primary.datadir) # Inject suspend fault after recording distributed commit on master. cmd = Command("Suspend master post distributed commit", self.faultcmd % "suspend") cmd.run(validateAfter=True) tinctest.logger.info(cmd.get_results().printResult()) # Trigger the fault. cmd = Command("run DDL", "psql -f %s" % local_path('sql/ao_create.sql')) self.proc = cmd.runNoWait() tinctest.logger.info("runNoWait: %s, pid: %d" % (cmd.cmdStr, self.proc.pid)) commitBlocked = self.filereputil.check_fault_status(fault_name='dtm_xlog_distributed_commit', status="triggered", seg_id='1', num_times_hit=1); # Shutdown of primary (and mirror) should happen only after # the commit is blocked due to suspend fault. assert commitBlocked, "timeout waiting for commit to be blocked" tinctest.logger.info("commit is blocked due to suspend fault") # At this point, segments have already recorded the # transaction as prepared by writing PREPARE record in xlog. # Crash one primary (and its mirror). mirror = None mirrors = [m for m in gparray.get_list_of_mirror_segments_on_host(host) if m.getSegmentMode() == "s" and m.getSegmentStatus() == "u" and primary.getSegmentContentId() == m.getSegmentContentId()] if len(mirrors) > 0: mirror = mirrors[0] tinctest.logger.info("chose mirror: %s" % mirror.datadir) # Pause FTS probes to avoid a failover while we bring down # segments. Note that we bring down both primary and its # mirror, thereby causing double failure. This prevents # FTS from making changes to segment configuration, even # if FTS probes are unpaused. It is necessary to unpause # FTS probes to prevent gang creation from being blocked. PSQL.run_sql_command_utility_mode("SET gp_fts_probe_pause = on") tinctest.logger.info("FTS probes paused") cmdstr = 'pg_ctl -D %s stop -m immediate' % mirror.datadir tinctest.logger.info("bringing down primary: %s" % cmdstr) cmd = Command("Shutdown a primary segment", cmdstr) cmd.run(validateAfter=True) cmdstr = 'pg_ctl -D %s stop -m immediate' % primary.datadir tinctest.logger.info("bringing down primary: %s" % cmdstr) cmd = Command("Shutdown a primary segment", cmdstr) cmd.run(validateAfter=True) if mirror is not None: PSQL.run_sql_command_utility_mode("SET gp_fts_probe_pause = off") tinctest.logger.info("FTS probes unpaused") # Resume master. Master should PANIC and go through crash recovery. cmd = Command("resume master", self.faultcmd % "resume") cmd.run(validateAfter=True) tinctest.logger.info(cmd.get_results().printResult()) (rc, out, err) = self.proc.communicate2() self.proc = None tinctest.logger.info("runNoWait rc: %d, output: %s, err: %s" % (rc, out, err)) # Fail if QD did not PANIC. assert (out.find("commit succeeded") == -1 and err.find("commit succeeded") == -1 and err.find("PANIC") != -1) # Wait for a few seconds to ensure that postmaster reset has started time.sleep(5) # Wait for recovery to complete, timeout after ~ 5 mins. attempts = 1 recoveryComplete = False while attempts < 600 and not recoveryComplete: recoveryComplete = "aaa150" in PSQL.run_sql_command_utility_mode( "select 'aaa' || (100+50)") time.sleep(0.5) attempts = attempts + 1 assert recoveryComplete, "timeout waiting for master to recover" cmdstr = "gpstop -ar" cmd = Command("restart", cmdstr) tinctest.logger.info("restarting the cluster with '%s'" % cmdstr) cmd.run(validateAfter=True) tinctest.logger.info("restart complete") # Verify table got created (commit was successful). assert PSQL.run_sql_file(local_path('sql/ao_select.sql')) gpverify = GpdbVerify() (errorCode, hasError, gpcheckcat_output, repairScript) = gpverify.gpcheckcat() assert errorCode == 0, ("gpcheckcat failed: %s" % gpcheckcat_output[0]) # No need to restart GPDB again in tearDown() self.skipRestart = True
def test_master_panic_after_phase1(self): """PANIC master after recording distributed commit. Trigger PANIC in master after completing phase 1 of 2PC, right after recording distributed commit in xlog but before broadcasting COMMIT PREPARED to segments. Master's recovery cycle should correctly broadcast COMMIT PREPARED because master should find distributed commit record in its xlog during recovery. Verify that the transaction is committed after recovery. JIRA: MPP-19044 """ tinctest.logger.info("running test: test_crash_master_after_phase1") gparray = GpArray.initFromCatalog(dbconn.DbURL(), utility=True) assert len(gparray.getHostList()) == 1, "cannot run on multi-node" host = gparray.getHostList()[0] # Must have at least one in-sync and up segment. primaries = [ p for p in gparray.get_list_of_primary_segments_on_host(host) if p.getSegmentMode() == "s" and p.getSegmentStatus() == "u" ] assert len(primaries) > 0, "in-sync and up primary not found" primary = primaries[0] tinctest.logger.info("chose primary: %s" % primary.datadir) # Inject suspend fault after recording distributed commit on master. cmd = Command("Suspend master post distributed commit", self.faultcmd % "suspend") cmd.run(validateAfter=True) tinctest.logger.info(cmd.get_results().printResult()) # Trigger the fault. cmd = Command("run DDL", "psql -f %s" % local_path('sql/ao_create.sql')) self.proc = cmd.runNoWait() tinctest.logger.info("runNoWait: %s, pid: %d" % (cmd.cmdStr, self.proc.pid)) commitBlocked = self.filereputil.check_fault_status( fault_name='dtm_xlog_distributed_commit', status="triggered", seg_id='1', num_times_hit=1) # Shutdown of primary (and mirror) should happen only after # the commit is blocked due to suspend fault. assert commitBlocked, "timeout waiting for commit to be blocked" tinctest.logger.info("commit is blocked due to suspend fault") # At this point, segments have already recorded the # transaction as prepared by writing PREPARE record in xlog. # Crash one primary (and its mirror). mirror = None mirrors = [ m for m in gparray.get_list_of_mirror_segments_on_host(host) if m.getSegmentMode() == "s" and m.getSegmentStatus() == "u" and primary.getSegmentContentId() == m.getSegmentContentId() ] if len(mirrors) > 0: mirror = mirrors[0] tinctest.logger.info("chose mirror: %s" % mirror.datadir) # Pause FTS probes to avoid a failover while we bring down # segments. Note that we bring down both primary and its # mirror, thereby causing double failure. This prevents # FTS from making changes to segment configuration, even # if FTS probes are unpaused. It is necessary to unpause # FTS probes to prevent gang creation from being blocked. PSQL.run_sql_command_utility_mode("SET gp_fts_probe_pause = on") tinctest.logger.info("FTS probes paused") cmdstr = 'pg_ctl -D %s stop -m immediate' % mirror.datadir tinctest.logger.info("bringing down primary: %s" % cmdstr) cmd = Command("Shutdown a primary segment", cmdstr) cmd.run(validateAfter=True) cmdstr = 'pg_ctl -D %s stop -m immediate' % primary.datadir tinctest.logger.info("bringing down primary: %s" % cmdstr) cmd = Command("Shutdown a primary segment", cmdstr) cmd.run(validateAfter=True) if mirror is not None: PSQL.run_sql_command_utility_mode("SET gp_fts_probe_pause = off") tinctest.logger.info("FTS probes unpaused") # Resume master. Master should PANIC and go through crash recovery. cmd = Command("resume master", self.faultcmd % "resume") cmd.run(validateAfter=True) tinctest.logger.info(cmd.get_results().printResult()) (rc, out, err) = self.proc.communicate2() self.proc = None tinctest.logger.info("runNoWait rc: %d, output: %s, err: %s" % (rc, out, err)) # Fail if QD did not PANIC. assert (out.find("commit succeeded") == -1 and err.find("commit succeeded") == -1 and err.find("PANIC") != -1) # Wait for recovery to complete, timeout after ~ 5 mins. attempts = 1 recoveryComplete = False while attempts < 600 and not recoveryComplete: recoveryComplete = "aaa150" in PSQL.run_sql_command_utility_mode( "select 'aaa' || (100+50)") time.sleep(0.5) attempts = attempts + 1 assert recoveryComplete, "timeout waiting for master to recover" cmdstr = "gpstop -ar" cmd = Command("restart", cmdstr) tinctest.logger.info("restarting the cluster with '%s'" % cmdstr) cmd.run(validateAfter=True) tinctest.logger.info("restart complete") # Verify table got created (commit was successful). assert PSQL.run_sql_file(local_path('sql/ao_select.sql')) gpverify = GpdbVerify() (errorCode, hasError, gpcheckcat_output, repairScript) = gpverify.gpcheckcat() assert errorCode == 0, ("gpcheckcat failed: %s" % gpcheckcat_output[0]) # No need to restart GPDB again in tearDown() self.skipRestart = True
def get_segment_cnt(self, relid=0,host=None,port=None): sql_cmd="select count(*) from gp_toolkit.__gp_aocsseg(%s) group by column_num having count(*) > 1 limit 1" % (relid) segcnt=PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=host, port=port,flags='-q -t') if (len(segcnt.strip()) == 0): segcnt='0' return segcnt