def test_get_host_for_command_for_local_uses_local_hostname(self): cmd = Command('name', 'hostname') cmd.run(validateAfter=True) hostname = cmd.get_results().stdout.strip() result = get_host_for_command(True, cmd) expected_result = hostname self.assertEqual(result, expected_result)
def test00_pg_hba_conf_file(self): os.environ[self.GP_COMMAND_FAULT_POINT] = 'gpexpand tar segment template' cmd = Command(name='run gpexpand', cmdStr='gpexpand -D %s -i %s' % (self.TEST_DB, self.EXPANSION_INPUT_FILE)) with self.assertRaisesRegexp(ExecutionError, 'Fault Injection'): cmd.run(validateAfter=True) #Read from the pg_hba.conf file and ensure that #The address of the new hosts is present. cmd = Command(name='get the temp pg_hba.conf file', cmdStr="ls %s" % os.path.join(os.path.dirname(self.MASTER_DATA_DIRECTORY), 'gpexpand*', 'pg_hba.conf')) cmd.run(validateAfter=True) results = cmd.get_results() temp_pg_hba_conf = results.stdout.strip() actual_values = set() expected_values = set([self.primary_host_address, self.mirror_host_address]) with open(temp_pg_hba_conf) as f: for line in f: if line.strip() == '# %s' % self.primary_host_name or\ line.strip() == '# %s' % self.mirror_host_name: address = f.next().strip().split()[3] address = address[:address.rfind('/')] actual_values.add(address) self.assertEqual(actual_values, expected_values) GpStart(name='start the database in master only mode', masterOnly=True).run(validateAfter=True) Command(name='rollback the expansion', cmdStr='gpexpand -r -D %s' % self.TEST_DB).run(validateAfter=True) GpStart(name='start the database').run(validateAfter=True)
def _run_remote_command(self, host, command): rmt_cmd = "gpssh -h %s -e '%s' " % (host, command) cmd = Command(name='Running a remote command', cmdStr = rmt_cmd) cmd.run(validateAfter=False) result = cmd.get_results() tinctest.logger.info('%s\n%s' %(rmt_cmd, result.stdout)) return result.stdout
def get_full_timestamp_for_incremental_with_nbu(netbackup_service_host, netbackup_block_size, incremental_timestamp): if dump_prefix: get_inc_files_cmd = ( "gp_bsa_query_agent --netbackup-service-host=%s --netbackup-list-dumped-objects=%sgp_dump_*_increments" % (netbackup_service_host, dump_prefix) ) else: get_inc_files_cmd = ( "gp_bsa_query_agent --netbackup-service-host=%s --netbackup-list-dumped-objects=gp_dump_*_increments" % netbackup_service_host ) cmd = Command("Query NetBackup server to get the list of increments files backed up", get_inc_files_cmd) cmd.run(validateAfter=True) files_list = cmd.get_results().stdout.split("\n") for line in files_list: fname = line.strip() restore_file_with_nbu(netbackup_service_host, netbackup_block_size, fname) contents = get_lines_from_file(fname) if incremental_timestamp in contents: full_timestamp = get_timestamp_from_increments_filename(fname) return full_timestamp return None
def drop_database(dbname, retries=5, sleep_interval=5): """ Execute dropdb against the given database. @type dbname: string @param dbname: Name of the database to be deleted @type retires: integer @param retries: Number of attempts to drop the database. @type sleep_interval: integer @param sleep_interval: Time in seconds between retry attempts @rtype: boolean @return: True if successful, False otherwise @raise PSQLException: When the database does not exist """ # TBD: Use shell when available if not PSQL.database_exists(dbname): tinctest.logger.error("Database %s does not exist." % dbname) raise PSQLException("Database %s does not exist" % dbname) cmd = Command(name="drop database", cmdStr="dropdb %s" % (dbname)) tinctest.logger.debug("Dropping database: %s" % cmd) count = 0 while count < retries: cmd.run(validateAfter=False) result = cmd.get_results() tinctest.logger.debug("Output - %s" % result) if result.rc == 0 and not result.stderr: return True time.sleep(sleep_interval) count += 1 return False
def get_latest_full_ts_with_nbu(dbname, backup_dir, dump_prefix, netbackup_service_host, netbackup_block_size): if dump_prefix: get_rpt_files_cmd = "gp_bsa_query_agent --netbackup-service-host=%s --netbackup-list-dumped-objects=%sgp_dump_*.rpt" % (netbackup_service_host, dump_prefix) else: get_rpt_files_cmd = "gp_bsa_query_agent --netbackup-service-host=%s --netbackup-list-dumped-objects=gp_dump_*.rpt" % netbackup_service_host cmd = Command("Query NetBackup server to get the list of report files backed up", get_rpt_files_cmd) cmd.run(validateAfter=True) files_list = cmd.get_results().stdout.split('\n') for line in files_list: fname = line.strip() if fname == '': continue if backup_dir not in fname: continue if ("No object matched the specified predicate" in fname) or ("No objects of the format" in fname): return None restore_file_with_nbu(netbackup_service_host, netbackup_block_size, fname) timestamp = get_full_ts_from_report_file(dbname, fname, dump_prefix, netbackup_service_host=netbackup_service_host, netbackup_block_size=netbackup_block_size) logger.debug('Timestamp = %s' % timestamp) if timestamp is not None: return timestamp raise Exception('No full backup found for given incremental on the specified NetBackup server')
def setUpClass(cls): # we need an empty db to run the tests tinctest.logger.info("recreate database wet using dropdb/createdb") cmd = Command('recreatedb', 'dropdb wet; createdb wet') cmd.run(validateAfter=False) cls.drop_roles() super(LegacyWETTestCase, cls).setUpClass() source_dir = cls.get_source_dir() config = GPDBConfig() host, _ = config.get_hostandport_of_segment(0) port = mppUtil.getOpenPort(8080) tinctest.logger.info("gpfdist host = {0}, port = {1}".format(host, port)) cls.config = config data_dir = os.path.join(source_dir, 'data') cls.gpfdist = GPFDIST(port, host, directory=data_dir) cls.gpfdist.startGpfdist() # WET writes into this directory. data_out_dir = os.path.join(cls.gpfdist.getdir(), 'output') shutil.rmtree(data_out_dir, ignore_errors=True) os.mkdir(data_out_dir)
def test_autovacuum_signaling(self): """ Raise the nextXid to oldest_frozenxid + autovacuum_freeze_max_age. Run a transaction. Ensure that no autovacuum daemon is started. """ dburl = dbconn.DbURL() with dbconn.connect(dburl) as conn: oldest_xid = int(dbconn.execSQLForSingleton(conn, 'select get_oldest_xid()')) autovacuum_freeze_max_age = int(dbconn.execSQLForSingleton(conn, 'show autovacuum_freeze_max_age')) autovacuum_xid_limit = xid_sum(oldest_xid, autovacuum_freeze_max_age) logger.info('Raising master xid to autovacuum_xid_limit %d' % autovacuum_xid_limit) dbconn.execSQLForSingleton(conn, "select spoof_next_xid('%d'::xid)" % autovacuum_xid_limit) # A new connection to the postmaster, at this point, will ensure that we roll through # the ServerLoop and potentially fork an autovacuum process... if enabled. # Burn a transaction to trigger any undesirable behavior that we're disabling. with dbconn.connect(dburl) as conn: self.assertEqual(1, int(dbconn.execSQLForSingleton(conn, 'select 1'))) cmd = Command('check for autovacuum', 'ps -ef | grep -v grep | grep postgres | grep autovacuum') cmd.run() self.assertEqual(cmd.get_results().stdout, "", "Seriously? Found a postgres autovacuum process!") self._basic_sanity_check('clean')
def test_autovacuum_signaling_on_segment(self): """ Same as above, but on a segment. """ # connect to the master to build gparray primary, _ = self._get_primary_mirror_pair() logger.info('Isolated segment %d at %s:%d' % (primary.dbid, primary.hostname, primary.port)) dburl = dbconn.DbURL(hostname=primary.hostname, port=primary.port) with dbconn.connect(dburl, utility=True) as conn: oldest_xid = int(dbconn.execSQLForSingleton(conn, 'select get_oldest_xid()')) autovacuum_freeze_max_age = int(dbconn.execSQLForSingleton(conn, 'show autovacuum_freeze_max_age')) autovacuum_xid_limit = xid_sum(oldest_xid, autovacuum_freeze_max_age) logger.info('Raising segment xid to autovacuum_xid_limit %d' % autovacuum_xid_limit) dbconn.execSQLForSingleton(conn, "select spoof_next_xid('%d'::xid)" % autovacuum_xid_limit) # A new connection to the postmaster, at this point, will ensure that we roll through # the ServerLoop and potentially fork an autovacuum process... if enabled. with dbconn.connect(dburl, utility=True) as conn: self.assertEqual(1, int(dbconn.execSQLForSingleton(conn, 'select 1'))) cmd = Command('check for autovacuum', 'ssh %s ps -ef | grep -v grep | grep postgres | grep autovacuum' % primary.hostname) cmd.run() self.assertEqual(cmd.get_results().stdout, "", "Seriously? Found a postgres autovacuum process!") self._basic_sanity_check('clean')
def check_integrityresults(self): """ Check gpverify results from the last token @return: True or False, -1 is still running """ sql = "select vertoken from gp_verification_history order by 1 desc limit 1" out= PSQL.run_sql_command(sql, flags='-q -t', dbname='postgres') last_token = out.strip() if not last_token: return 0 # No records of gpverify cmd = Command(name='gpverify', cmdStr="gpverify --results --token %s" % (last_token)) cmd.run(validateAfter=False) result = cmd.get_results() state = result.stdout[len(result.stdout)-2] if state.find(self.GPVERIFY_STATE["success"])>0: tinctest.logger.info("gpverify at %s: Successful" % last_token) return True elif state.find(self.GPVERIFY_STATE["running"])>0: tinctest.logger.info("gpverify at %s: Running" % last_token) return -1 elif state.find(self.GPVERIFY_STATE["fail"])>0: tinctest.logger.info("gpverify at %s: Failed" % last_token) return False else: tinctest.logger.info("gpverify has not start") return 0
def impl(context, filetype, dir): if dir == 'master_data_directory': dir = master_data_dir if filetype == 'report': filename = '%s/gp_restore_%s.rpt' % (dir, context.backup_timestamp) if not os.path.isfile(filename): raise Exception('Report file %s is not present in master data directory' % filename) elif filetype == 'status': gparray = GpArray.initFromCatalog(dbconn.DbURL()) if dir == 'segment_data_directory': primary_segs = [seg for seg in gparray.getDbList() if seg.isSegmentPrimary()] for seg in primary_segs: host = seg.getSegmentHostName() seg_data_dir = seg.getSegmentDataDirectory() cmd = Command('check status file', "ls %s/gp_restore_status_*_%s" % (seg_data_dir, context.backup_timestamp), ctxt=REMOTE, remoteHost=host) cmd.run(validateAfter=True) results = cmd.get_results() if not results.stdout.strip(): raise Exception('Status file ending with timestamp %s is not present in segment %s data directory' % (context.backup_timestamp, host)) else: count = 0 primary_segs = [seg for seg in gparray.getDbList() if seg.isSegmentPrimary()] for seg in primary_segs: host = seg.getSegmentHostName() cmd = Command('check status file', "ls %s/gp_restore_status_*_%s" % (dir, context.backup_timestamp), ctxt=REMOTE, remoteHost=host) cmd.run(validateAfter=True) results = cmd.get_results() if results.stdout.strip(): count += 1 else: raise Exception('Status file not found in segment: %s' % host) segs = len(primary_segs) if count != segs: raise Exception('Expected %d status file but found %d' % (segs, count))
def impl(context): cmd = """ps ux | grep "/bin/postgres \-D %s " | grep -v grep""" % (context.remote_mirror_datadir) cmd=Command(name='user command', cmdStr=cmd, ctxt=REMOTE, remoteHost=context.remote_mirror_segdbname) cmd.run(validateAfter=True) res = cmd.get_results() if not res.stdout.strip(): raise Exception('Mirror segment "%s" not active on "%s"' % (context.remote_mirror_datadir, context.remote_mirror_segdbname))
def run_repair_script(self, repair_script_dir, dbname=None, alldb=True, online=False, testname=None, outputFile=None, host=None, port=None): ''' @summary : Run the gpcehckcat repair script generated by gpcehckcat ''' if not os.path.exists(repair_script_dir): repair_script_dir = '%s/%s' % (self.tinchome, repair_script_dir) tinctest.logger.debug('Using repair script dir ... %s' % repair_script_dir) repair_scripts = glob.glob(repair_script_dir + '/*.sh') ok = 0 for repair_script in repair_scripts: repair_cmd = "/bin/bash %s" % str(repair_script).strip() tinctest.logger.info('Running repair script ... %s' % repair_cmd) if host and host not in (socket.gethostname(), 'localhost'): cmd = Command(name=' Running Gpcheckcat.. ', cmdStr = repair_cmd, ctxt=REMOTE, remoteHost=host) else: cmd = Command(name=' Running Gpcheckcat.. ', cmdStr = repair_cmd) cmd.run(validateAfter=False) result = cmd.get_results() # Get Error Code from running repair script if result.rc != 0: ok = result.rc if ok != 0: return False return True
def test_uao_crash_compaction_before_cleanup_phase_master_with_aocs(self): setup_file = self.get_sql_files("uao_crash_compaction_before_cleanup_phase_master_with_aocs_setup")[0] (sql_file1, out_file1,ans_file1) = self.get_sql_files("uao_crash_compaction_before_cleanup_phase_master_with_aocs1") (sql_file2, out_file2, ans_file2) = self.get_sql_files("uao_crash_compaction_before_cleanup_phase_master_with_aocs2") if not os.path.exists(os.path.dirname(out_file1)): os.mkdir(os.path.dirname(out_file1)) set_fault_in_master_panic = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f compaction_before_cleanup_phase -y panic --seg_dbid 1' % (os.getenv('GPHOME'), os.getenv('PGPORT')) set_fault_in_master_reset = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f compaction_before_cleanup_phase -y reset --seg_dbid 1' % (os.getenv('GPHOME'), os.getenv('PGPORT')) cmd_type = 'fault injector' PSQL.run_sql_file(setup_file) gpfaultinjector = Command(cmd_type, set_fault_in_master_panic) gpfaultinjector.run() PSQL.run_sql_file(sql_file1, out_file=out_file1) result1 = Gpdiff.are_files_equal(out_file1, ans_file1) # The connection is accepted before it is actually ready, leading to panic messages # Therefore we have to sleep here. sleep(5) gpfaultinjector = Command(cmd_type, set_fault_in_master_reset) gpfaultinjector.run() PSQL.run_sql_file(sql_file2, out_file=out_file2) result2 = Gpdiff.are_files_equal(out_file2, ans_file2) self.assertTrue(result2)
def run_gpfaultinjector(self, fault_type, fault_name): cmd_str = 'gpfaultinjector -s 1 -y {0} -f {1}'.format( fault_type, fault_name) cmd = Command(cmd_str, cmd_str) cmd.run() return cmd.get_results()
def test_uao_crash_compaction_before_cleanup_phase(self): setup_file = self.get_sql_files("uao_crash_compaction_before_cleanup_phase_setup")[0] (sql_file1, out_file1, ans_file1) = self.get_sql_files("uao_crash_compaction_before_cleanup_phase1") (sql_file2, out_file2, ans_file2) = self.get_sql_files("uao_crash_compaction_before_cleanup_phase2") if not os.path.exists(os.path.dirname(out_file1)): os.mkdir(os.path.dirname(out_file1)) PSQL.run_sql_file(setup_file) set_fault_in_seg_panic = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f compaction_before_cleanup_phase -y panic --seg_dbid 2' % (os.getenv('GPHOME'), os.getenv('PGPORT')) set_fault_in_seg_reset = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f compaction_before_cleanup_phase -y reset --seg_dbid 2' % (os.getenv('GPHOME'), os.getenv('PGPORT')) cmd_type = 'fault injector' gpfaultinjector = Command(cmd_type, set_fault_in_seg_panic) gpfaultinjector.run() PSQL.run_sql_file(sql_file1, out_file=out_file1) result1 = Gpdiff.are_files_equal(out_file1, ans_file1) gpfaultinjector = Command(cmd_type, set_fault_in_seg_reset) gpfaultinjector.run() PSQL.run_sql_file(sql_file2, out_file=out_file2) result2 = Gpdiff.are_files_equal(out_file2, ans_file2) self.assertTrue(result1) self.assertTrue(result2)
def test_uaocs_crash_alterdropcol(self): setup_file = self.get_sql_files("uaocs_crash_update_setup")[0] (sql_file1, out_file1,ans_file1) = self.get_sql_files("uaocs_crash_alterdropcol1") (sql_file2, out_file2, ans_file2) = self.get_sql_files("uaocs_crash_alterdropcol2") if not os.path.exists(os.path.dirname(out_file1)): os.mkdir(os.path.dirname(out_file1)) set_fault_in_seg_panic = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f appendonly_delete -t foo -y panic --seg_dbid 2' % (os.getenv('GPHOME'), os.getenv('PGPORT')) set_fault_in_seg_reset = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f appendonly_delete -t foo -y reset --seg_dbid 2' % (os.getenv('GPHOME'), os.getenv('PGPORT')) cmd_type = 'fault injector' PSQL.run_sql_file(setup_file) gpfaultinjector = Command(cmd_type, set_fault_in_seg_panic) gpfaultinjector.run() PSQL.run_sql_file(sql_file1, out_file=out_file1) result1 = Gpdiff.are_files_equal(out_file1, ans_file1) # The connection is accepted before it is actually ready, leading to panic messages # Therefore we have to sleep here. sleep(5) gpfaultinjector = Command(cmd_type, set_fault_in_seg_reset) gpfaultinjector.run() PSQL.run_sql_file(sql_file2, out_file=out_file2) result2 = Gpdiff.are_files_equal(out_file2, ans_file2) self.assertTrue(result2)
def wait_kill_and_verify_dump_agent_on_master(self, datadir, wait_log_msg, verify_log_msg): """ """ status_file_prefix = self.dump_file_prefix + '_status_*_1_' cur_dump_date_folder = self.get_cur_date_as_dump_folder() log_dir = os.path.join(datadir, DEFAULT_DUMP_LOC, cur_dump_date_folder) last_timestamp = self.get_latest_matching_file(log_dir, status_file_prefix) if last_timestamp: tinctest.logger.info('The latest timestamp matched for file: %s is %s, wait for new status file' % (status_file_prefix, last_timestamp)) else: tinctest.logger.info('Found no existing file matching %s, wait for new status file' % status_file_prefix) dump_agent_pid = self.get_dump_proc_pid(datadir) tinctest.logger.info("Obtained segment dump agent process id %s" % dump_agent_pid) self.backup_timestamp = self.get_latest_log_timestamp(log_dir, status_file_prefix, last_timestamp) log_file_path = self.get_latest_matching_file_path(log_dir, status_file_prefix) self.wait_for_log_msg(log_file_path, wait_log_msg) tinctest.logger.info("Crash segment dump agent with kill -9 %s" % dump_agent_pid) kill_cmd = Command(name = 'kill dump_agent', cmdStr='kill -9 %s' % dump_agent_pid) kill_cmd.run(validateAfter = True) self.wait_gpcrondump_exit() self.verify_dump_crash_detected(datadir, cur_dump_date_folder, verify_log_msg)
def impl(context, filetype, dir): if filetype == "report": if dir == "master_data_directory": dir = master_data_dir filenames = os.listdir(dir) for filename in filenames: if filename.startswith("gp_restore") and filename.endswith(".rpt"): filename = "%s/%s" % (dir, filename) os.remove(filename) if filetype == "status": gparray = GpArray.initFromCatalog(dbconn.DbURL()) if dir == "segment_data_directory": primary_segs = [seg for seg in gparray.getDbList() if seg.isSegmentPrimary()] for seg in primary_segs: host = seg.getSegmentHostName() seg_data_dir = seg.getSegmentDataDirectory() cmd = Command( "remove status file", "rm -f %s/gp_restore_status_*" % (seg_data_dir), ctxt=REMOTE, remoteHost=host ) cmd.run(validateAfter=True) else: primary_segs = [seg for seg in gparray.getDbList() if seg.isSegmentPrimary()] for seg in primary_segs: host = seg.getSegmentHostName() cmd = Command("remove status file", "rm -f %s/gp_restore_status_*" % dir, ctxt=REMOTE, remoteHost=host) cmd.run(validateAfter=True)
def test_uao_crash_compaction_before_cleanup_phase_master_with_aocs(self): setup_file = self.get_sql_files("uao_crash_compaction_before_cleanup_phase_master_with_aocs_setup")[0] (sql_file1, out_file1,ans_file1) = self.get_sql_files("uao_crash_compaction_before_cleanup_phase_master_with_aocs1") (sql_file2, out_file2, ans_file2) = self.get_sql_files("uao_crash_compaction_before_cleanup_phase_master_with_aocs2") if not os.path.exists(os.path.dirname(out_file1)): os.mkdir(os.path.dirname(out_file1)) set_fault_in_master_panic = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f compaction_before_cleanup_phase -y panic --seg_dbid 1' % (os.getenv('GPHOME'), os.getenv('PGPORT')) set_fault_in_master_reset = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f compaction_before_cleanup_phase -y reset --seg_dbid 1' % (os.getenv('GPHOME'), os.getenv('PGPORT')) cmd_type = 'fault injector' PSQL.run_sql_file(setup_file) gpfaultinjector = Command(cmd_type, set_fault_in_master_panic) gpfaultinjector.run() PSQL.run_sql_file(sql_file1, out_file=out_file1) result1 = Gpdiff.are_files_equal(out_file1, ans_file1, match_sub=[gpdiff_init_file]) PSQL.wait_for_database_up(); gpfaultinjector = Command(cmd_type, set_fault_in_master_reset) gpfaultinjector.run() PSQL.run_sql_file(sql_file2, out_file=out_file2) result2 = Gpdiff.are_files_equal(out_file2, ans_file2, match_sub=[gpdiff_init_file]) self.assertTrue(result1) self.assertTrue(result2)
def GetSegmentInSync(self, sleepTime=60, repeatCnt=120, greenplum_path=""): """ @param sleepTime: Number of seconds to sleep before retry @param repeatCnt: Number of times to repeat retry. Default is 2 hours @return: Return True when the number of segment servers that are in resync is 0 rows """ inSync = "" for cnt in range(repeatCnt): data = "" try: cmd = "psql gptest -c \"SELECT dbid, content, role, preferred_role, status, mode, address, fselocation, port, replication_port FROM gp_segment_configuration, pg_filespace_entry where dbid = fsedbid and mode = 'r'\"" if greenplum_path: cmd = "%s %s" % (greenplum_path, cmd) # use Command instead of ShellCommand #rc, data = self.generalUtil.ShellCommand(cmd) generalUtil = Command(name='psql gptest -c',cmdStr=cmd) generalUtil.run() rc = generalUtil.get_results().rc data = generalUtil.get_results().stdout if rc == 0: if True in ['(0 rows)' in x for x in data]: return rc, True time.sleep(sleepTime) except Exception, e: traceback.print_exc() print "ERRORFOUND GetSegmentInSync %s" % (str(e)) #PrettyPrint('ERRORFOUND GetSegmentInSync', data) TODO print 'ERRORFOUND GetSegmentInSync', data
def create_standby_master(self, gparr): standby_host = self.get_standby_host(gparr) if standby_host is None: raise Exception('Unable to get standby host') cmd = Command('create a standby master', cmdStr='gpinitstandby -s %s -a' % standby_host) cmd.run(validateAfter=True) return standby_host
def impl(context): host = gethostname() psql_cmd = 'psql -U "gpadmin/kerberos-test" -h %s template1 -c """select 1;"""' % host cmd = Command(name='psql connection with kerberos user', cmdStr=psql_cmd) cmd.run(validateAfter=True) results = cmd.get_results()
def check_gpfdist_process(self, wait=60, port=None, raise_assert=True): """ Check for the gpfdist process Wait at least 60s until gpfdist starts, else raise an exception """ if port is None: port = self.port count = 0 # handle escape of string's quotation for localhost and remote host if self.host in ('127.0.0.1',socket.gethostbyname(socket.gethostname()),socket.gethostname(),'localhost'): cmdStr = "%s -ef | grep \'gpfdist -d %s -p %s\' | grep -v grep"%(self.ps_command, self.dir, port) else: cmdStr = 'gpssh -h %s -e "%s -ef | grep \'gpfdist -d %s -p %s\' |grep -v grep"'%(self.host, self.ps_command, self.dir, port) cmd = Command(self.name, cmdStr, self.ctxt, self.host) # run the command for 5 time while count < wait: cmd.run() results = cmd.get_results() if results.rc == 0: return True count = count + 1 time.sleep(1) if raise_assert: raise GPFDISTError("Could not start gpfdist process") else : return False
def remove_remote_symlink(self, host, datadir): datadir_root = os.path.dirname(datadir) segdir = os.path.basename(datadir) sysCmd = 'rm -f {datadir}; mv {datadir_root}/link/{segdir} {datadir_root}/{segdir}; rmdir {datadir_root}/link;'\ .format(datadir_root=datadir_root, datadir=datadir, segdir=segdir) cmd = Command('remove symlinks and restore the data directory', cmdStr=sysCmd, ctxt=REMOTE, remoteHost=host) cmd.run(validateAfter=True)
def setUp(self): # Remove standby if present. # Though the initial intention of tests was to verify # without depending on management utility scripts, # the reality after all is some other tests might have # left a standby and there is not good way other than # the gp management script to remove it. cmd_str = 'gpinitstandby -a -r' cmd = Command(name='gpinitstandby -r', cmdStr=cmd_str) tinctest.logger.info(cmd_str) cmd.run(validateAfter=False) # For each test case we create a fresh standby and start it. self.db_name = self.__class__.db_name self.standby = Standby(self.standby_datadir, self.standby_port) self.standby.stop() shutil.rmtree(self.basepath, True) try: os.makedirs(self.basepath) except OSError, e: if e.errno != 17: raise pass
def check_gpfdist_process(self, wait=60, port=None, raise_assert=True): """ Check for the gpfdist process Wait at least 60s until gpfdist starts, else raise an exception @var wait: wait at least 60s for gpfdist @var port: Port Number @var raise_assert: raise gpfdist error by default """ if port is None: port = self.port process_started = False count = 0 while (not process_started and count<wait): cmd_str = "%s -ef | grep \"gpfdist -p %s\" | grep -v grep" % (self.ps_command, port) cmd = Command(name='check for gpfdist', cmdStr=cmd_str) cmd.run() content = cmd.get_results().stdout if len(content)>0: if content.find("gpfdist -p %s" % port)>0: process_started = self.is_gpfdist_connected(port) if process_started: return True count = count + 1 time.sleep(1) if raise_assert: raise GpfdistError("Could not start gpfdist process") else: print "Could not start gpfdist process"
def create_remote_symlink(self, host, datadir): datadir_root = os.path.dirname(datadir) segdir = os.path.basename(datadir) sysCmd = 'mkdir -p {datadir_root}/link; mv {datadir} {datadir_root}/link/{segdir}; ln -s {datadir_root}/link/{segdir} {datadir}'\ .format(datadir_root=datadir_root, datadir=datadir, segdir=segdir) cmd = Command('create remote symlinks', cmdStr=sysCmd, ctxt=REMOTE, remoteHost=host) cmd.run(validateAfter=True)
def test_uao_crash_vacuum_with_ins_fault(self): setup_file = self.get_sql_files("uaocs_crash_update_setup")[0] (sql_file1, out_file1,ans_file1) = self.get_sql_files("uao_crash_vacuum1") (sql_file2, out_file2, ans_file2) = self.get_sql_files("uao_crash_vacuum2") if not os.path.exists(os.path.dirname(out_file1)): os.mkdir(os.path.dirname(out_file1)) set_fault_in_seg_panic = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f appendonly_insert -t foo -y panic --seg_dbid 2' % (os.getenv('GPHOME'), os.getenv('PGPORT')) set_fault_in_seg_reset = 'source %s/greenplum_path.sh;gpfaultinjector -p %s -f appendonly_insert -t foo -y reset --seg_dbid 2' % (os.getenv('GPHOME'), os.getenv('PGPORT')) cmd_type = 'fault injector' PSQL.run_sql_file(setup_file) gpfaultinjector = Command(cmd_type, set_fault_in_seg_panic) gpfaultinjector.run() PSQL.run_sql_file(sql_file1, out_file=out_file1) result1 = Gpdiff.are_files_equal(out_file1, ans_file1, match_sub=[gpdiff_init_file]) PSQL.wait_for_database_up(); gpfaultinjector = Command(cmd_type, set_fault_in_seg_reset) gpfaultinjector.run() PSQL.run_sql_file(sql_file2, out_file=out_file2) result2 = Gpdiff.are_files_equal(out_file2, ans_file2, match_sub=[gpdiff_init_file]) self.assertTrue(result1) self.assertTrue(result2)
def get_info(): """ Get the current platform @return: type platform of the current system @rtype : String """ myos = platform.system() if myos == "Darwin": return 'OSX' elif myos == "Linux": if os.path.exists("/etc/SuSE-release"): return 'SUSE' elif os.path.exists("/etc/redhat-release"): cmd_str = "cat /etc/redhat-release" cmd = Command("run cat for RHEL version", cmd_str) cmd.run() result = cmd.get_results() msg = result.stdout if msg.find("5") != -1: return 'RHEL5' else: return 'RHEL6' elif myos == "SunOS": return 'SOL' return None
def remove_standby_master(self, gparr): if not gparr.standbyMaster: raise Exception( 'Standby master not configured. Cannot remove standby') cmd = Command('remove standby master', cmdStr='gpinitstandby -r -a') cmd.run(validateAfter=True)
def run(self, validate=True): print "Running delete system: %s" % self Command.run(self, validateAfter=validate) result = self.get_results() return result
def setUp(self): # cleanup cmd = Command('gpinitstandby', 'gpinitstandby -ar') # don't care the result in case standby is not configured cmd.run()
def run_cmd(command): cmd = Command(name='run %s' % command, cmdStr='%s' % command) try: cmd.run(validateAfter=True) except ExecutionError, e: print 'caught exception %s' % e
def test_smart_shutdown(self): # 1. Verify if the system is UP and there is no WAL Receiver running # 2. Perform basebackup and deploy it into some dest. directory # 3. Copy recover.conf into the dest. directory to be used by Standby # 4. Initiate the Standby using the Master (primary) postmaster # paramerters # 5. Perform some transaction to generate xlog. Then do a smart shutdown # 6. Once the primary DB is down, find the last checkpoint from pg_control # on primary. Check the last modified xlog seg from the standby and find # if the last checkpoint from primary exists # 7. It should be present there! # 0. Stop standby if it's running PSQL.run_sql_command('DROP table if exists foo') standby = Standby('base', 5433) standby.stop() # 1. Verify if the system is UP and there is no WAL sender running self.assertEqual(self.count_walsender(), 0) logger.info('No active WAL Receiver found') # 2. Perform basebackup and deploy it into some dest. # (currenttly hardcoded 'base') directory shutil.rmtree('base', True) logger.info('Performing and deploying base backup ...') standby.create() # 3.Copy recover.conf into the dest. directory to be used by StandBy logger.info('Deploying recovery.conf...') # 4. Initiate the StandBy using the Master (primary) postmaster # paramerters logger.info('Initiating Standby...') res = standby.start() self.assertTrue(res.wasSuccessful()) num_walsender = 0 for i in polling(10, 0.5): num_walsender = self.count_walsender() if num_walsender > 0: break self.assertEqual(num_walsender, 1) logger.info('Activated WAL Receiver...') # 5. Perform some transaction to generate xlog. Then do a smart shutdown logger.info('Perform some transaction to generate some XLOG') PSQL.run_sql_command('Create table foo (a int)') logger.info('Now perform smart shutdown (gpstop -a)') cmd = Command(name="gpstop smart", cmdStr="source %s/greenplum_path.sh;\ gpstop -a" % os.environ["GPHOME"]) cmd.run(validateAfter=True) # 6. Once the primary DB is down, find the last checkpoint from pg_control # on primary. Check the last modified xlog seg from the standby and find # if the last checkpoint from primary exists logger.info( 'Read the pg_control from primary, find the last checkpoint & see if it made to standby' ) standby_xlog_path = os.path.join('base', 'pg_xlog') cmd = Command( name='pg_controldata ' + os.environ.get('MASTER_DATA_DIRECTORY'), cmdStr='pg_controldata ' + os.environ.get('MASTER_DATA_DIRECTORY')) cmd.run(validateAfter=True) primary_last_ckpt_lsn = self.last_ckpt_lsn((cmd.get_results()).stdout) logger.info("Primary last checkpoint LSN = " + primary_last_ckpt_lsn) standby_last_mod_xlog = self.last_mod_file(standby_xlog_path) logger.info("Last mod standby XLOG = " + standby_last_mod_xlog) cmd = Command(name='xlogdump standby last modifiled xlog', cmdStr="xlogdump " + standby_last_mod_xlog) cmd.run(validateAfter=True) logger.info('See if we find the shutdown LSN in the XLOG seg file') lines = (cmd.get_results()).stdout.splitlines() flag = False for line in range(0, len(lines) - 1): if ((lines[line]).find(primary_last_ckpt_lsn) > -1): self.assertTrue((lines[line]).find("checkpoint") > -1) self.assertTrue((lines[line]).find("shutdown") > -1) flag = True break self.assertTrue(flag) logger.info('PASS') # Re-start the database logger.info('Now restart the DB (gpstart -a)') cmd = Command(name="gpstop smart", cmdStr="source %s/greenplum_path.sh;\ gpstart -a" % os.environ["GPHOME"]) cmd.run(validateAfter=True) # Cleanup. Currently we dont have a clean way of WAL rcv dying logger.info( 'Kill the standby processes as clean standby killing is not supported' ) cmd = Command( name="kill standby", cmdStr= "kill -9 `ps -ef | grep 5433 | grep -v grep | awk '{print $2}'`") cmd.run(validateAfter=True)
def run(self, validate=True): tinctest.logger.info("Running gpinitstandby: %s" % self) Command.run(self, validateAfter=validate) result = self.get_results() return result
def run(self, validate=True): tinctest.logger.info("Running delete system: %s" % self) Command.run(self, validateAfter=validate) result = self.get_results() return result
def impl(context, cmd, cid): dbid = getPrimaryDbIdFromCid(context, cid) cmdStr = '%s -s %s' % (cmd, int(dbid)) cmd = Command(name='user command', cmdStr=cmdStr) cmd.run(validateAfter=True)
def test_fail_back(self): """ This test verifies that the fail-back mode is not allowed. Fail-back means original master acting as the new standby. """ # Verify if the database is up. Run some sql. PSQL.run_sql_command('DROP table if exists foo') Command('remove standby', 'gpinitstandby -ra').run() self.assertEqual(self.standby.create(), 0) res = self.standby.start() self.assertTrue(res.wasSuccessful()) # Wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) logger.info('Activated WAL Receiver...') # Promote the standby & shutdown the old Master # Generate a recovery.conf file for the old Master so # to make him the new standby that connects to the new # master (originally standby) logger.info('Promoting the standby...') self.standby.promote() dburl = dbconn.DbURL() gparray = GpArray.initFromCatalog(dburl, utility=True) numcontent = gparray.getNumSegmentContents() orig_master = gparray.master self.standby.remove_catalog_standby(dburl) if (os.path.exists(os.path.join(orig_master.datadir, 'wal_rcv.pid'))): os.remove(os.path.join(orig_master.datadir, 'wal_rcv.pid')) logger.info('Stop the original master...') cmd = Command("gpstop", "gpstop -aim") cmd.run() self.assertEqual(cmd.get_results().rc, 0, str(cmd)) logger.info( 'Generate recovery.conf for original master to make a new standby...' ) master_recv_conf = open( os.path.join(orig_master.datadir, 'recovery.conf'), 'w') standby_recv_done = open( os.path.join(self.standby.datadir, 'recovery.done')) for line in standby_recv_done: master_recv_conf.write( line.replace("port=" + str(os.environ.get('PGPORT')), "port=" + str(self.standby.port))) master_recv_conf.close() standby_recv_done.close() logger.info( 'Start the old master again (to act as the new standby)...') master = gp.MasterStart("Starting orig Master in standby mode", orig_master.datadir, orig_master.port, orig_master.dbid, numcontent, None, None, None) # -w option would wait forever. master.cmdStr = master.cmdStr.replace(' -w', '') master.run(validateAfter=True) self.assertTrue((master.get_results()).wasSuccessful()) # Have to do this to give the new standby some time to be active subprocess.check_call("psql -c 'create database foo' -p " + str(self.standby.port), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.check_call("psql -c 'drop database foo' -p " + str(self.standby.port), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) time.sleep(3) # The new standby can re-start but should not be able to connect to the new # master (originally standby). Thats the test self.assertTrue( os.path.exists(os.path.join(orig_master.datadir, 'wal_rcv.pid'))) logger.info( 'The WAL receiver pid file exists which means the new standby started\n' 'but still could not connect to the new Master (originally standby) and hence the\n' 'pid file was not cleared') # Remove the recovery.conf file from the new standby directory # as its no more needed os.remove(os.path.join(orig_master.datadir, 'recovery.conf')) logger.info('Stop the original master again...') subprocess.check_call('pg_ctl stop -D ' + orig_master.datadir + ' -m immediate', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # Perform gpstart to get the original master (& cluster) back again cmd = Command("gpstart", "gpstart -a") cmd.run() self.assertTrue(cmd.get_results().rc in (0, 1), str(cmd)) logger.info('Pass')
def run(self, validate=True): print "Running gpinitstandby: %s" % self Command.run(self, validateAfter=validate) result = self.get_results() return result
def initial_setup(self): keyword = 'rh55-qavm65' config = GPDBConfig() (seg_host,seg_port) = config.get_hostandport_of_segment(psegmentNumber = 0, pRole = 'p') cur_path = local_path('') dir1 = os.path.join(cur_path, 'dml', 'sql','insert_from_external.sql.in') dir2 = os.path.join(cur_path, 'dml', 'sql','insert_from_external.sql') dir3 = os.path.join(cur_path, 'dml', 'expected','insert_from_external.ans.in') dir4 = os.path.join(cur_path, 'dml', 'expected','insert_from_external.ans') f1 = open(dir1,'r') f2 = open(dir2,'w') f3 = open(dir3,'r') f4 = open(dir4,'w') for line in f1: f2.write(line.replace(keyword,seg_host)) f1.close() f2.close() for line in f3: f4.write(line.replace(keyword,seg_host)) f3.close() f4.close() dir5 = os.path.join(cur_path, 'dml', 'sql','insert_with_gpload.sql.in') dir6 = os.path.join(cur_path, 'dml', 'sql','insert_with_gpload.sql') yaml_path = local_path('dml/sql/config/gpl.yaml') f5 = open(dir5,'r') f6 = open(dir6,'w') for line in f5: f6.write(line.replace('gpl.yaml',yaml_path)) f5.close() f6.close() dir7 = os.path.join(cur_path,'dml','sql','config','gpl.yaml.in') dir8 = os.path.join(cur_path,'dml','sql','config','gpl.yaml') f7 = open(dir7,'r') f8 = open(dir8,'w') for line in f7: if 'DATABASE' in line: f8.write(line.replace('tangp3',os.environ.get('PGDATABASE'))) elif 'USER' in line: f8.write(line.replace('tangp3',os.environ.get('USER'))) elif 'HOST' in line: f8.write(line.replace('rh55-qavm61',socket.gethostname())) elif 'PORT' in line and '5432' in line: f8.write(line.replace('5432',os.environ.get('PGPORT'))) elif 'mydata' in line: f8.write(line.replace('mydata',local_path('dml/sql/gpload/mydata'))) else: f8.write(line) f7.close() f8.close() dir9 = os.path.join(cur_path,'dml','expected','insert_with_gpload.ans.in') dir10 = os.path.join(cur_path,'dml','expected','insert_with_gpload.ans') f9 = open(dir9,'r') f10 = open(dir10,'w') for line in f9: f10.write(line.replace('gpl.yaml',yaml_path)) f9.close() f10.close() dir11 = os.path.join(cur_path,'dml','sql','select_from_copy_table.sql.in') dir12 = os.path.join(cur_path,'dml','sql','select_from_copy_table.sql') f11 = open(dir11,'r') f12 = open(dir12,'w') for line in f11: if 'tenk.data' in line: f12.write(line.replace('tenk.data',local_path('dml/sql/_data/tenk.data'))) else: f12.write(line) f11.close() f12.close() dir13 = os.path.join(cur_path,'dml','expected','select_from_copy_table.ans.in') dir14 = os.path.join(cur_path,'dml','expected','select_from_copy_table.ans') f13 = open(dir13,'r') f14 = open(dir14,'w') for line in f13: if 'tenk.data' in line: f14.write(line.replace('tenk.data',local_path('dml/sql/_data/tenk.data'))) else: f14.write(line) f13.close() f14.close() external_table = local_path('dml/sql/_data/quote.csv') clean_file = 'rm -rf /tmp/quote.csv' rmt_cmd = "gpssh -h %s -e '%s' " % (seg_host, clean_file) cmd = Command(name='Running a remote command', cmdStr = rmt_cmd) cmd.run(validateAfter=False) command = 'scp %s %s:/tmp'%(external_table,seg_host) cmd = Command(name='run %s'%command, cmdStr = '%s' % command) try: cmd.run(validateAfter=True) except Exception, e: tinctest.logger.error("Error running command %s\n" % e)
def get_nic_up(hostname, nic): address = hostname + '-cm' cmd = Command(name='ifconfig nic', cmdStr='sudo /sbin/ifconfig %s' % nic, remoteHost=address, ctxt=REMOTE) cmd.run(validateAfter=True) return 'UP' in cmd.get_results().stdout
def killProcessUnix(self, segment, processes=[], signal="9"): """Kill a process for a database segment @param segment: segment info, as returned by getSegmentInfo or getFirstSegmentInfo @param processes: list of process pids to kill or 'all' @param signal: UNIX signal to send processes, via kill(1) """ if (platform.uname()[0].lower() == 'sunos'): # Get the Parent Process ID first cmd = "/usr/ucb/ps auxwww |grep postgres | grep %s |grep -v grep | awk '{print \$2}'" % segment[ 0] # use class Command in gppylib.commands.base remote = Command(name='get parent process ID', cmdStr=cmd, remoteHost=self.host) remote.run() rc = remote.get_results().rc if rc != 0: raise Exception("get parent process ID failed with rc: (%d)" % (rc)) parent_id = remote.get_results().stdout if len(parent_id) == 0: # Nothing Found return None cmd = "ps -ef |grep %s |grep -v grep | awk '{print $3}' | xargs pargs" % parent_id[ 0].strip() remote = Command(name='ps -ef', cmdStr=cmd, remoteHost=self.host) remote.run() rc = remote.get_results().rc if rc != 0: raise Exception("get process id failed with rc: (%d)" % (rc)) process_out = remote.get_results().stdout for line in process_out: cur = line.split(":") try: processName = cur[2] if cur[0] == "argv[0]": cmd = "kill -%s %s" % (signal, curproc) remote = Command(name='kill', cmdStr=cmd, remoteHost=self.host) if processes[0] == "all": remote.run() rc = remote.get_results().rc if rc != 0: raise Exception("kill failed with rc: (%d)" % (rc)) out = remote.get_results().stdout else: for process in processes: if processName.find(process.trim()) >= 0: remote.run() rc = remote.get_results().rc if rc != 0: raise Exception( "kill failed with rc: (%d)" % (rc)) out = remote.get_results().stdout except: curproc = cur[0] # Get the Process ID first else: if processes[0] == "all": # Kill all Postgres process cmd = "ps -ef |grep postgres | grep %s |grep -v grep | awk '{print $3}' | xargs kill -%s" % ( segment[0], signal) print cmd remote = Command(name='ps -ef', cmdStr=cmd, remoteHost=self.host) remote.run() rc = remote.get_results().rc if rc != 0: raise Exception("ps -ef failed with rc: (%d)" % (rc)) out = remote.get_results().stdout else: # Kill the specific process for the segment for process in processes: cmd = "ps -ef |grep postgres | grep %s |grep -v grep | grep \"%s\" | awk '{print $3}' | xargs kill -%s" % ( segment[0], process, signal) remote = Command(name='ps -ef', cmdStr=cmd, remoteHost=self.host) remote.run() rc = remote.get_results().rc if rc != 0: raise Exception("ps -ef failed with rc: (%d)" % (rc)) out = remote.get_results().stdout return rc
def run_pg_ctl_reload(datadir): name = "pg_ctl reload" cmd_str = "$GPHOME/bin/pg_ctl reload -D %s" % datadir cmd = Command(name, cmd_str, ctxt=LOCAL) cmd.run(validateAfter=True)
def gpstop_helper(self): '''helper method to run in scenario test''' cmd = Command('run gpstop', cmdStr = 'gpstop -a') cmd.run(validateAfter=True)
def config_primaries_for_replication(gpArray, hba_hostnames): logger.info( "Starting to modify pg_hba.conf on primary segments to allow replication connections" ) try: for segmentPair in gpArray.getSegmentList(): # Start with an empty string so that the later .join prepends a newline to the first entry entries = [''] # Add the samehost replication entry to support single-host development entries.append( 'host replication {username} samehost trust'.format( username=unix.getUserName())) if hba_hostnames: mirror_hostname, _, _ = socket.gethostbyaddr( segmentPair.mirrorDB.getSegmentHostName()) entries.append("host all {username} {hostname} trust".format( username=unix.getUserName(), hostname=mirror_hostname)) entries.append( "host replication {username} {hostname} trust".format( username=unix.getUserName(), hostname=mirror_hostname)) primary_hostname, _, _ = socket.gethostbyaddr( segmentPair.primaryDB.getSegmentHostName()) if mirror_hostname != primary_hostname: entries.append( "host replication {username} {hostname} trust".format( username=unix.getUserName(), hostname=primary_hostname)) else: mirror_ips = gp.IfAddrs.list_addrs( segmentPair.mirrorDB.getSegmentHostName()) for ip in mirror_ips: cidr_suffix = '/128' if ':' in ip else '/32' cidr = ip + cidr_suffix hba_line_entry = "host all {username} {cidr} trust".format( username=unix.getUserName(), cidr=cidr) entries.append(hba_line_entry) mirror_hostname = segmentPair.mirrorDB.getSegmentHostName() segment_pair_ips = gp.IfAddrs.list_addrs(mirror_hostname) primary_hostname = segmentPair.primaryDB.getSegmentHostName() if mirror_hostname != primary_hostname: segment_pair_ips.extend( gp.IfAddrs.list_addrs(primary_hostname)) for ip in segment_pair_ips: cidr_suffix = '/128' if ':' in ip else '/32' cidr = ip + cidr_suffix hba_line_entry = "host replication {username} {cidr} trust".format( username=unix.getUserName(), cidr=cidr) entries.append(hba_line_entry) cmdStr = ". {gphome}/greenplum_path.sh; echo '{entries}' >> {datadir}/pg_hba.conf; pg_ctl -D {datadir} reload".format( gphome=os.environ["GPHOME"], entries="\n".join(entries), datadir=segmentPair.primaryDB.datadir) logger.debug(cmdStr) cmd = Command(name="append to pg_hba.conf", cmdStr=cmdStr, ctxt=base.REMOTE, remoteHost=segmentPair.primaryDB.hostname) cmd.run(validateAfter=True) except Exception as e: logger.error( "Failed while modifying pg_hba.conf on primary segments to allow replication connections: %s" % str(e)) raise else: logger.info( "Successfully modified pg_hba.conf on primary segments to allow replication connections" )
def connection_scenario(self, trigger_content, master_shutdown_mode): # Verify if the system is UP # Setup a standby # Once the WAL receiver starts, signal it to suspend based on where the # input parameter wants # Once suspended, shutdown the Master(primary) based on the input mode. # Release the WAL receiver and it should fail (dead). But later after waiting # for some time it should re-try to connect to the Master and fail again # till the actual Master comes up again. #Note :- Sleeps used in this test are a little larger than normal times #to cope up with events like for e.g. spawning of WAL Receiver which entirely #depends on when the startup process signals the Postmaster to do it # Verify if the database is up. Run some sql. PSQL.run_sql_command('DROP table if exists foo') Command('remove standby', 'gpinitstandby -ra').run() self.assertEqual(self.standby.create(), 0) # Trigger & evidence files cleanup if (os.path.exists(os.path.join(self.standby.datadir, 'wal_rcv.pid'))): os.remove(os.path.join(self.standby.datadir, 'wal_rcv.pid')) if (os.path.exists(os.path.join(self.standby.datadir, 'wal_rcv_test'))): os.remove(os.path.join(self.standby.datadir, 'wal_rcv_test')) # Setup a standby res = self.standby.start() self.assertTrue(res.wasSuccessful()) # Wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) logger.info('Activated WAL Receiver...') # Cleanup the standby configuration from Master catalog # This is to avoid re-start of the standby on Master re-start dburl = dbconn.DbURL() self.standby.remove_catalog_standby(dburl) # Once the WAL receiver starts, signal it to suspend based on where the # input parameter wants wal_rcv_pid = self.get_pid_having_keyword('wal receiver process') logger.info('Suspending WAL Receiver(' + str(wal_rcv_pid) + ') ' + 'with ' + trigger_content) self.generate_trigger_file(self.standby.datadir, 'wal_rcv_test', trigger_content) os.kill(wal_rcv_pid, signal.SIGUSR2) time.sleep(10) self.assertTrue(not os.path.exists( os.path.join(self.standby.datadir, 'wal_rcv.pid'))) # Once suspended, shutdown the Master(primary) based on the input mode. logger.info('Shutdown the Master in ' + master_shutdown_mode + ' mode') if master_shutdown_mode == 'immediate': cmd = Command("gpstop master immediate", "gpstop -aim") elif master_shutdown_mode == 'smart': cmd = Command("gpstop master smart", "gpstop -am") elif master_shutdown_mode == 'fast': cmd = Command("gpstop master fast", "gpstop -afm") cmd.run() self.assertEqual(cmd.get_results().rc, 0, str(cmd)) # Release (resume) the WAL receiver and it should fail (dead). But later after waiting # for some time it should re-try to connect to the Master and fail again # till the actual Master comes up again. logger.info('Resume the WAL Receiver(' + str(wal_rcv_pid) + ')') self.generate_trigger_file(self.standby.datadir, 'wal_rcv_test', "resume") os.kill(wal_rcv_pid, signal.SIGUSR2) time.sleep(10) # The pid file should exist. This is a proof that the WAL receiver came up # but did not get a chance to connect to the Master and hence did not clean up # the pid file self.assertTrue( os.path.exists(os.path.join(self.standby.datadir, 'wal_rcv.pid'))) logger.info( 'The WAL receiver pid file exists which means it restarted\n' 'but still could not connect to the Master (primary) and hence the\n' 'pid file was not cleared') # Stop the standby as its of no use anymore rc = subprocess.Popen('pg_ctl stop -D ' + self.standby.datadir + ' -m immediate', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) #TODO RKT - Ideally, only the Primary should have been started here. But given the current nature of #gpstart supporting Master start only in utility mode and WAL repl not supporting utility #connection to the Master, normal gpstart (Master, Standby and Segment restart) will be used #for time being. This will be changed once utility support is added to WAL based repl. cmd = Command("gpstart", "gpstart -a") cmd.run() self.assertTrue(cmd.get_results().rc in (0, 1), str(cmd)) logger.info('Pass (' + trigger_content + ',' + master_shutdown_mode + ')') # Cleanup for the next iteration shutil.rmtree(self.standby.datadir, True)
def tearDown(self): gpfaultinjector = Command( 'fault injector', 'source $GPHOME/greenplum_path.sh; ' 'gpfaultinjector -f malloc_failure ' '-y reset -H ALL -r primary') gpfaultinjector.run()
def tearDown(self): # cleanup cmd = Command('gpinitstandby', 'gpinitstandby -ar') cmd.run()
def run_command(self, command, expected_rc = 0): cmd = Command(name='run %s'%command, cmdStr = 'source %s/greenplum_path.sh;%s' % (self.gphome, command)) try: cmd.run(validateAfter=True) except Exception, e: tinctest.logger.error("Error running command %s\n" % e)
def run(self, config=None, logdir=None, output=None, host=None, port=None, username=None, password=None, movetemp=None, movetrans=None, showtemp=None, showtrans=None, help=None): ''' @param config: Config File @param logdir: Log dirctory, default gpAdminLogs @param output: Output directory @param host: hostname @param port: port number @param username: username @param password: password @param movetempfilespace: move temporary filespace @param movetransfilespace: move transaction filespace @param showtempfilespace: show temporary filespace @param showtransfilespace: show transaction filespace @param help: show help @return result object (result.rc, result.stdout, result.stderr) ''' cmd_opt = "" if config: cmd_opt += " -c %s " % config if logdir: cmd_opt += " -l %s " % logdir if output: cmd_opt += " -o %s " % logdir if host: cmd_opt += " -h %s " % host if port: cmd_opt += " -p %s " % port if username: cmd_opt += " -U %s " % username if password: cmd_opt += " -W %s " % password if movetemp: cmd_opt += " --movetempfilespace %s " % movetemp if movetrans: cmd_opt += " --movetransfilespace %s " % movetrans if showtemp: cmd_opt += " --showtempfilespace " if showtrans: cmd_opt += " --showtransfilespace " if help: cmd_opt = " -? " filespace_cmd = '%s/bin/gpfilespace %s' % (self.gphome, cmd_opt) # @todo: We shouldn't source greenplum_path.sh cmd = Command(name='Gpfilespace command', cmdStr="source %s/greenplum_path.sh;%s" % (self.gphome,filespace_cmd)) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: raise GPfilespaceException('Issue with Gpfilespace Command') return result
def start_db_with_no_rc_check(self): ''' Gpstart and dont check for rc ''' cmd = Command('Gpstart_a', 'gpstart -a') tinctest.logger.info('Executing command: gpstart -a') cmd.run()
def gpcheckcat(self, dbname=None, alldb=False, online=False, testname=None, outputFile=None, host=None, port=None): """ gpcheckcat wrapper @param dbname: Database name (default gptest) @param alldb: Check all database @param online: Activity (True) vs No Activity (False) @return: errorCode, hasError, gpcheckcat output, repairScriptDir # errorCode from gpcheckcat # SUCCESS=0 # ERROR_REMOVE=1 # ERROR_RESYNC=2 # ERROR_NOREPAIR=3 """ if dbname is None: dbname = self.dbname if port is None: port = "-p %s" % self.pgport else: port = "-p %s" % port if alldb is False: alldb = "" else: alldb = "-A" if online: online = "-O" else: online = "" if testname is None: testname = "" else: testname = "-R %s" % testname if outputFile is None: outputFile = self.LOGPATH + '/checkcatoutput_' + time.strftime( "%Y%m%d%H%M%S") tinctest.logger.info("Running gpcheckcat ...") checkcat_cmd = "%s/bin/lib/gpcheckcat %s %s %s %s %s > %s 2>&1;" \ % (self.gphome, port, alldb, online, testname, dbname, outputFile) if host and host not in (socket.gethostname(), 'localhost'): cmd = Command(name=' Running Gpcheckcat.. ', cmdStr=checkcat_cmd, ctxt=REMOTE, remoteHost=host) else: cmd = Command(name=' Running Gpcheckcat.. ', cmdStr=checkcat_cmd) cmd.run(validateAfter=False) result = cmd.get_results() # Get Error Code from gpcheckcat errorCode = result.rc if host and host not in (socket.gethostname(), 'localhost'): parent_dir = os.path.dirname(outputFile) cmd = Command(name='scp remote gpcheckcat output file to local', cmdStr='scp %s:%s %s' % (host, outputFile, parent_dir)) cmd.run(validateAfter=False) gpcheckcat_output = open(outputFile).readlines() (hasError, repairScriptDir) = self.check_catalogresults(outputFile) return (errorCode, hasError, gpcheckcat_output, repairScriptDir)
def get_backup_files(self, timestamp, dbname=None): if timestamp > 0: logger.debug("Searching for browse times for timestamp %s" % timestamp) query_string = "CVBkpRstWrapper -query --cv-proxy-host %s --cv-proxy-port %s --cv-appid %s --cv-apptype Q_DISTRIBUTED_IDA --cv-clientid %s --cv-instanceId %s --cv-backupsetId %s --cv-filename \"*%s.rpt\" --cv-debuglvl %s --cv-search-allcycles 1" % ( self.cv_proxy_host, self.cv_proxy_port, self.cv_appid, self.cv_clientid, self.cv_instanceid, self.cv_backupsetid, timestamp, self.cv_debuglvl) logger.debug("Command string for get_backup_files: %s\n", query_string) query = Command("Getting file info from the Commserve", query_string) query.run(validateAfter=True) file_info = query.get_results().stdout.split('\n') if len(file_info) > 0: for line in file_info: if len(line.strip()) > 0: (fname, oguid, cvguid, fromtime, totime, self._commcellid, self._cv_subclient_id) = line.strip().split(':') self.cv_prefix = fname[(fname.rfind("/") + 1):fname.rfind("gp_")] self.cv_subclient = self.cv_prefix[:-1] self.cv_appid = self._commcellid + ":" + self._cv_subclient_id #command_string = "CVBkpRstWrapper -query --cv-proxy-host %s --cv-proxy-port %s --cv-appid %s --cv-apptype Q_DISTRIBUTED_IDA --cv-clientid %s --cv-instanceId %s --cv-backupsetId %s --cv-filename \"/\" --cv-browse-fromtime %s --cv-browse-totime %s --cv-debuglvl %s" % (self.cv_proxy_host, self.cv_proxy_port, self.cv_appid, self.cv_clientid, self.cv_instanceid, self.cv_backupsetid, fromtime, totime, self.cv_debuglvl) command_string = "CVBkpRstWrapper -query --cv-proxy-host %s --cv-proxy-port %s --cv-appid %s --cv-apptype Q_DISTRIBUTED_IDA --cv-clientid %s --cv-filename \"/\" --cv-browse-fromtime %s --cv-browse-totime %s --cv-debuglvl %s" % ( self.cv_proxy_host, self.cv_proxy_port, self.cv_appid, self.cv_clientid, fromtime, totime, self.cv_debuglvl) else: raise Exception("No backup files found for timestamp %s" % timestamp) elif dbname is not None: command_string = "CVBkpRstWrapper -query --cv-proxy-host %s --cv-proxy-port %s --cv-appid %s --cv-instanceId %s --cv-backupsetId %s --cv-apptype Q_DISTRIBUTED_IDA --cv-clientid %s --cv-filename \"*%s*\" --cv-debuglvl %s" % ( self.cv_proxy_host, self.cv_proxy_port, self.cv_appid, self.cv_instanceid, self.cv_backupsetid, self.cv_clientid, dbname, self.cv_debuglvl) else: command_string = "CVBkpRstWrapper -query --cv-proxy-host %s --cv-proxy-port %s --cv-appid %s --cv-apptype Q_DISTRIBUTED_IDA --cv-clientid %s --cv-filename \"*\" --cv-debuglvl %s" % ( self.cv_proxy_host, self.cv_proxy_port, self.cv_appid, self.cv_clientid, self.cv_debuglvl) logger.debug("Command string for get_backup_files': %s\n", command_string) cmd = Command("Getting list of backup files from the Commserve", command_string) cmd.run(validateAfter=True) files_list = cmd.get_results().stdout.split('\n') for line in files_list: if len(line.strip()) > 0: (fname, oguid, cvguid, self._commcellid, self._cv_subclient_id) = line.strip().split(':') self._backup_file_guids[fname] = cvguid logger.debug("Caching file [%s] with GUID [%s]" % (fname, cvguid)) self.cv_appid = self._commcellid + ":" + self._cv_subclient_id self._backup_file_list = sorted(self._backup_file_guids.keys(), None, None, True) # For restore scenarios with -s <dbname> # look for the restore timestamp in the latest backup report file name if timestamp == 0: for file in self._backup_file_list: if ".rpt" in file: list = file.split('.') list2 = list[0].split('_') timestamp = list2.pop() logger.debug("Found restore timestamp=%s for database=%s", timestamp, dbname) break self.cv_prefix = "cv" + dbname + "_" return timestamp
def run(self): if self.build: if self.filename: BuildGppkg(self.build, self.filename).run() else: BuildGppkg(self.build, None).run() return if platform.linux_distribution()[0] == 'Ubuntu': try: cmd = Command(name='Check for dpkg', cmdStr='dpkg --version') cmd.run(validateAfter=True) cmd = Command(name='Check for fakeroot', cmdStr='fakeroot --version') cmd.run(validateAfter=True) except Exception as ex: raise ExceptionNoStackTraceNeeded( 'fakeroot and dpkg are both required by gppkg') else: try: cmd = Command(name='Check for rpm', cmdStr='rpm --version') cmd.run(validateAfter=True) results = cmd.get_results().stdout.strip() rpm_version_string = results.split(' ')[-1] if not rpm_version_string.startswith('4.'): raise ExceptionNoStackTraceNeeded( 'gppkg requires rpm version 4.x') except ExecutionError as ex: results = ex.cmd.get_results().stderr.strip() if len(results) != 0 and 'not found' in results: raise ExceptionNoStackTraceNeeded( 'gppkg requires RPM to be available in PATH') if self.master_datadir is None: self.master_datadir = gp.get_masterdatadir() self.master_port = self._get_master_port(self.master_datadir) self._get_gpdb_host_list() if self.migrate: MigratePackages(from_gphome=self.migrate[0], to_gphome=self.migrate[1], standby_host=self.standby_host, segment_host_list=self.segment_host_list).run() return if self.install: pkg = Gppkg.from_package_path(self.install) InstallPackage(pkg, self.master_host, self.standby_host, self.segment_host_list).run() elif self.query: query_type, package_path = self.query QueryPackage(query_type, package_path).run() elif self.remove: # Check for exact match first, then use wildcard for what will be removed. pkg_file_list = ListFilesByPattern( GPPKG_ARCHIVE_PATH, self.remove + GPPKG_EXTENSION).run() if len(pkg_file_list) == 0: # now try wildcard pkg_file_list = ListFilesByPattern( GPPKG_ARCHIVE_PATH, self.remove + '*' + GPPKG_EXTENSION).run() if len(pkg_file_list) == 0: raise ExceptionNoStackTraceNeeded( 'Package %s has not been installed.' % self.remove) # refuse to remove at all if the match is too broad, i.e., > 1 if len(pkg_file_list) > 1: err_msg = "Remove request '%s' too broad. " \ "Multiple packages match remove request: ( %s )." % (self.remove, ", ".join(pkg_file_list)) raise ExceptionNoStackTraceNeeded(err_msg) pkg_file = pkg_file_list[0] pkg = Gppkg.from_package_path( os.path.join(GPPKG_ARCHIVE_PATH, pkg_file)) UninstallPackage(pkg, self.master_host, self.standby_host, self.segment_host_list).run() elif self.update: logger.warning( 'WARNING: The process of updating a package includes removing all' ) logger.warning( 'previous versions of the system objects related to the package. For' ) logger.warning( 'example, previous versions of shared libraries are removed.') logger.warning( 'After the update process, a database function will fail when it is' ) logger.warning( 'called if the function references a package file that has been removed.' ) if self.interactive: if not ask_yesno(None, 'Do you still want to continue ?', 'N'): logger.info('Skipping update of gppkg based on user input') return pkg = Gppkg.from_package_path(self.update) UpdatePackage(pkg, self.master_host, self.standby_host, self.segment_host_list).run() elif self.clean: CleanGppkg(self.standby_host, self.segment_host_list).run()
def run_gpstop_cmd(self, flag='-a', mdd=None, logdir=None, masteronly=None, immediate=None, fast=None, smart=None, quietmode=None, restart=None, timeout=None, parallelproc=None, notstandby=None, verbose=None, version=None, standby=None, reload=None, validate=True): ''' GpStop function @param flag: '-a' is the default option considered .Do not prompt the user for confirmation @param mdd: The master host data directory.If not specified, the value set for $MASTER_DATA_DIRECTORY will be used @param logdir:The directory to write the log file. Defaults to ~/gpAdminLogs. @param masteronly: Shuts down only the master node @param immediate: Immediate shut down. @param fast: Fast shut down. @param smart: Smart shut down. @param quietmode: Command output is not displayed on the screen @param restart: Restart after shutdown is complete @param timeout: Specifies a timeout threshold (in seconds) to wait for a segment instance to shutdown @type timeout: Integer @param parallelproc: The number of segments to stop in parallel @type parallelproc: Integer @param verbose:Displays detailed status, progress and error messages output by the utility @param notstandby:Do not stop the standby master process @param version: Displays the version of this utility. @param standby:Do not stop the standby master process @param reload: This option reloads the pg_hba.conf files of the master and segments and the runtime parameters of the postgresql.conf files but does not shutdown the Greenplum Database array ''' make_cmd = ''.join([self.gphome, '/bin/gpstop']) # Check the version of gpstop if version is not None: arg = '--version' make_cmd = ' '.join([make_cmd, arg]) cmd = Command(name='Run gpstop', cmdStr='source %s/greenplum_path.sh;%s' % (self.gphome, make_cmd)) tinctest.logger.info("Running gpstop : %s" % cmd) cmd.run(validateAfter=validate) result = cmd.get_results() if result.rc != 0 or result.stderr: return False else: tinctest.logger.info((result)) return True # -d The master host data directory if mdd is None: mdd = "" else: mdd = " -d %s" % self.master_dir # -q Quietmode if quietmode is None: quietmode = "" else: quietmode = "-q" # -v Verbose if verbose is None: verbose = "" else: verbose = " -v" # -y notstandby if notstandby is None: notstandby = "" else: notstandby = " -y" # -t nnn Timeout if timeout is None: timeout = "" else: # Check if timeout is an integer try: int(timeout) timeout = " -t %s" % timeout except ValueError, e: if e is not None: raise GPstopException( "Gpstop timeout is not set correctly!")
def inject_fault(self, y=None, f=None, r='mirror', seg_id=None, H='ALL', m='async', sleeptime=None, o=None, p=None, outfile=None, table=None, database=None): ''' PURPOSE : Inject the fault using gpfaultinjector @param y : suspend/resume/reset/panic/fault f : Name of the faulti outfile : output of the command is placed in this file rest_of_them : same as in gpfaultinjector help ''' if (not y) or (not f): raise Exception("Need a value for type and name to continue") if (not os.getenv('MASTER_DATA_DIRECTORY')): raise Exception( 'MASTER_DATA_DIRECTORY environment variable is not set.') fault_cmd = "gpfaultinjector -f %s -m %s -y %s " % (f, m, y) if seg_id: fault_cmd = fault_cmd + " -s %s" % seg_id if sleeptime: fault_cmd = fault_cmd + " -z %s" % sleeptime if o != None: fault_cmd = fault_cmd + " -o %s" % o if p: fault_cmd = fault_cmd + " -p %s" % p if seg_id is None: fault_cmd = fault_cmd + " -H %s -r %s" % (H, r) if sleeptime: fault_cmd = fault_cmd + " --sleep_time_s %s " % sleeptime if table: fault_cmd = fault_cmd + " -t %s " % table if database: fault_cmd = fault_cmd + " -D %s " % database if outfile != None: fault_cmd = fault_cmd + ">" + outfile cmd = Command('fault_command', fault_cmd) cmd.run() result = cmd.get_results() if result.rc != 0 and y != 'status': ok = False out = result.stderr else: ok = True out = result.stdout if not ok and y != 'status': raise Exception("Cmd %s Failed to inject fault %s to %s" % (fault_cmd, f, y)) else: tinctest.logger.info('Injected fault %s ' % fault_cmd) return (ok, out)