def start(self): """ Start the standby postmaster. The options to pg_ctl needs to be determined by gppylib logic. """ dburl = dbconn.DbURL() gparray = GpArray.initFromCatalog(dburl, utility=True) numcontent = gparray.getNumSegmentContents() standby = gparray.standbyMaster master = gp.MasterStart("Starting Master Standby", self.datadir, self.port, standby.dbid, 0, numcontent, None, None, None) # -w option would wait forever. master.cmdStr = master.cmdStr.replace(' -w', '') master.run(validateAfter=True) return master.get_results()
def test_fail_back(self): """ This test verifies that the fail-back mode is not allowed. Fail-back means original master acting as the new standby. """ # Verify if the database is up. Run some sql. PSQL.run_sql_command('DROP table if exists foo') Command('remove standby', 'gpinitstandby -ra').run() self.assertEqual(self.standby.create(), 0) res = self.standby.start() self.assertTrue(res.wasSuccessful()) # Wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) logger.info('Activated WAL Receiver...') # Promote the standby & shutdown the old Master # Generate a recovery.conf file for the old Master so # to make him the new standby that connects to the new # master (originally standby) logger.info('Promoting the standby...') self.standby.promote() dburl = dbconn.DbURL() gparray = GpArray.initFromCatalog(dburl, utility=True) numcontent = gparray.getNumSegmentContents() orig_master = gparray.master self.standby.remove_catalog_standby(dburl) if (os.path.exists(os.path.join(orig_master.datadir, 'wal_rcv.pid'))): os.remove(os.path.join(orig_master.datadir, 'wal_rcv.pid')) logger.info('Stop the original master...') cmd = Command("gpstop", "gpstop -aim") cmd.run() self.assertEqual(cmd.get_results().rc, 0, str(cmd)) logger.info( 'Generate recovery.conf for original master to make a new standby...' ) master_recv_conf = open( os.path.join(orig_master.datadir, 'recovery.conf'), 'w') standby_recv_done = open( os.path.join(self.standby.datadir, 'recovery.done')) for line in standby_recv_done: master_recv_conf.write( line.replace("port=" + str(os.environ.get('PGPORT')), "port=" + str(self.standby.port))) master_recv_conf.close() standby_recv_done.close() logger.info( 'Start the old master again (to act as the new standby)...') master = gp.MasterStart("Starting orig Master in standby mode", orig_master.datadir, orig_master.port, orig_master.dbid, 0, numcontent, None, None, None) # -w option would wait forever. master.cmdStr = master.cmdStr.replace(' -w', '') master.run(validateAfter=True) self.assertTrue((master.get_results()).wasSuccessful()) # Have to do this to give the new standby some time to be active subprocess.check_call("psql -c 'create database foo' -p " + str(self.standby.port), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.check_call("psql -c 'drop database foo' -p " + str(self.standby.port), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) time.sleep(3) # The new standby can re-start but should not be able to connect to the new # master (originally standby). Thats the test self.assertTrue( os.path.exists(os.path.join(orig_master.datadir, 'wal_rcv.pid'))) logger.info( 'The WAL receiver pid file exists which means the new standby started\n' 'but still could not connect to the new Master (originally standby) and hence the\n' 'pid file was not cleared') # Remove the recovery.conf file from the new standby directory # as its no more needed os.remove(os.path.join(orig_master.datadir, 'recovery.conf')) logger.info('Stop the original master again...') rc = subprocess.Popen('pg_ctl stop -D ' + orig_master.datadir + ' -m immediate', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # Perform gpstart to get the original master (& cluster) back again cmd = Command("gpstart", "gpstart -a") cmd.run() self.assertTrue(cmd.get_results().rc in (0, 1), str(cmd)) logger.info('Pass')
class GpactivateStandby(object): '''Class for gpactivatestandby operations ''' standby_port = '5656' db_name = 'walrepl' def __init__(self): self.gpinit = GpinitStandby() self.pgutil = GpUtility() self.runmixin = StandbyRunMixin() self.runmixin.createdb(dbname='walrepl') self.gphome = os.environ.get('GPHOME') self.pgport = os.environ.get('PGPORT') self.mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.config = GPDBConfig() self.host = socket.gethostname() dburl = dbconn.DbURL() gparray = GpArray.initFromCatalog(dburl, utility=True) self.numcontent = gparray.getNumSegmentContents() self.orig_master = gparray.master def run_remote(self, standbyhost, rmt_cmd, pgport='', standbydd=''): '''Runs remote command and returns rc, result ''' export_cmd = "source %s/greenplum_path.sh;export PGPORT=%s;export MASTER_DATA_DIRECTORY=%s" % ( self.gphome, pgport, standbydd) remote_cmd = "gpssh -h %s -e '%s; %s'" % (standbyhost, export_cmd, rmt_cmd) cmd = Command(name='Running Remote command', cmdStr='%s' % remote_cmd) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() return result.rc, result.stdout def activate(self, option=''): ''' Stop the master and activate current standby to master''' standby_host = self.get_current_standby() standby_port = self.get_standby_port() standby_loc = self.get_standby_dd() self.run_remote(self.host, 'gpstop -aim', pgport=self.pgport, standbydd=self.mdd) gpactivate_cmd = 'gpactivatestandby -a -d %s %s' % (standby_loc, option) (rc, result) = self.run_remote(standby_host, gpactivate_cmd, pgport=standby_port, standbydd=standby_loc) tinctest.logger.info( 'Result without force option to activate standby %s' % result) if (rc != 0) and result.find('Force activation required') != -1: tinctest.logger.info( 'activating standby failed, try force activation...') gpactivate_cmd = 'gpactivatestandby -a -f -d %s %s' % (standby_loc, option) (rc, result) = self.run_remote(standby_host, gpactivate_cmd, pgport=standby_port, standbydd=standby_loc) if (rc != 0): tinctest.logger.error('Force activating standby failed!') return False tinctest.logger.info('standby acvitated, host value %s' % standby_host) return True def remove_standby(self): return self.gpinit.run(option='-r') def failback_to_original_master(self): # Check if master is running. bashCmd = ( self.gphome ) + '/bin/pg_ctl status -D $MASTER_DATA_DIRECTORY | grep \'pg_ctl: server is running\'' cmd = Command(name='Running cmd %s' % bashCmd, cmdStr="source %s/greenplum_path.sh; %s" % (self.gphome, bashCmd)) try: cmd.run() except Exception, e: tinctest.logger.error("Error running command %s\n" % e) return result = cmd.get_results() out = result.stdout if not out: tinctest.logger.info('Start the old master again ...') master = gp.MasterStart("Starting orig Master", self.orig_master.datadir, self.orig_master.port, self.orig_master.dbid, 0, self.numcontent, None, None, None) master.run(validateAfter=True) result = master.get_results() tinctest.logger.info('orig Master started result : %s' % result.stdout) if result.rc != 0: raise WalReplException( 'Unable to start original master process') Command('gpinitstandby -ra', 'gpinitstandby -ra').run() # failing back to old master, it takes a little bit to prepare the cluster ready for connection if os.path.exists(local_path('drop_filespace.sql')): PSQL.run_sql_file(local_path('drop_filespace.sql'), dbname=self.db_name)