def __init__(self, config): self.nap_time = config['loop_wait'] self.postgresql = Postgresql(config['postgresql']) self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config)) host, port = config['restapi']['listen'].split(':') self.api = RestApiServer(self, config['restapi']) self.skydns2 = config.get('skydns2') self.next_run = time.time() self.shutdown_member_ttl = 300
def set_up(self): self.p = MockPostgresql() with patch.object(Client, 'machines') as mock_machines: mock_machines.__get__ = Mock( return_value=['http://remotehost:2379']) self.e = Etcd('foo', { 'ttl': 30, 'host': 'remotehost:2379', 'scope': 'test' }) self.e.client.read = etcd_read self.e.client.write = etcd_write self.ha = Ha(self.p, self.e) self.ha.load_cluster_from_dcs() self.ha.cluster = get_unlocked_cluster() self.ha.load_cluster_from_dcs = nop
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) atexit.register(stop_postgresql, postgresql) logging.info("Governor Starting up") # is data directory empty? if postgresql.data_directory_empty(): logging.info("Governor Starting up: Empty Data Dir") # racing to initialize wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql) if etcd.race("/initialize", postgresql.name) or not etcd.members(): logging.info( "Governor Starting up: Initialisation Race ... WON!!!") logging.info("Governor Starting up: Initialise Postgres") postgresql.initialize() logging.info("Governor Starting up: Initialise Complete") etcd.take_leader(postgresql.name) logging.info("Governor Starting up: Starting Postgres") postgresql.start() else: logging.info("Governor Starting up: Initialisation Race ... LOST") logging.info("Governor Starting up: Sync Postgres from Leader") synced_from_leader = False while not synced_from_leader: leader = etcd.current_leader() if not leader: time.sleep(5) continue if postgresql.sync_from_leader(leader): logging.info("Governor Starting up: Sync Completed") postgresql.write_recovery_conf(leader) logging.info("Governor Starting up: Starting Postgres") postgresql.start() synced_from_leader = True else: time.sleep(5) else: logging.info("Governor Starting up: Existing Data Dir") postgresql.follow_no_leader() logging.info("Governor Starting up: Starting Postgres") postgresql.start() wait_for_etcd( "running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql) logging.info("Governor Running: Starting Running Loop") while True: try: ha.run_cycle() # create replication slots if postgresql.is_leader(): logging.info("Governor Running: I am the Leader") for node in etcd.get_client_path( "/members?recursive=true").get("node", {}).get("nodes", []): member = node["key"].split('/')[-1] if member != postgresql.name: postgresql.query( "DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;" % {"slot": member}) etcd.touch_member(postgresql.name, postgresql.connection_string) time.sleep(config["loop_wait"]) except urllib2.URLError: logging.info( "Lost connection to etcd, setting no leader and waiting on etcd" ) postgresql.follow_no_leader() wait_for_etcd( "running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
if os.getenv('GOVERNOR_POSTGRESQL_READ_ONLY_PORT'): config['postgresql']['read_only_port'] = os.getenv( 'GOVERNOR_POSTGRESQL_READ_ONLY_PORT') if os.getenv('GOVERNOR_POSTGRESQL_DATA_DIR'): config['postgresql']['data_dir'] = os.getenv( 'GOVERNOR_POSTGRESQL_DATA_DIR') if os.getenv('GOVERNOR_POSTGRESQL_REPLICATION_NETWORK'): config['postgresql']['replication']['network'] = os.getenv( 'GOVERNOR_POSTGRESQL_REPLICATION_NETWORK') etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) # leave things clean when shutting down, if possible def shutdown(signal, frame): logging.info("Governor Shutting Down: Received Shutdown Signal") try: if ha.has_lock(): logging.info("Governor Shutting Down: Abdicating Leadership") etcd.abdicate(postgresql.name) logging.info("Governor Shutting Down: Removing Membership") etcd.delete_member(postgresql.name) except: logging.exception("Error during Abdication") pass
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) atexit.register(stop_postgresql, postgresql) signal.signal(signal.SIGTERM, signalhandler) logging.info("Governor Starting up") # is data directory empty? if postgresql.data_directory_empty(): logging.info("Governor Starting up: Empty Data Dir") # racing to initialize wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql) if etcd.race("/initialize", postgresql.name): logging.info("Governor Starting up: Initialisation Race ... WON!!!") logging.info("Governor Starting up: Initialise Postgres") postgresql.initialize() logging.info("Governor Starting up: Initialise Complete") etcd.take_leader(postgresql.name) logging.info("Governor Starting up: Starting Postgres") postgresql.start() else: logging.info("Governor Starting up: Initialisation Race ... LOST") logging.info("Governor Starting up: Sync Postgres from Leader") synced_from_leader = False while not synced_from_leader: leader = etcd.current_leader() if not leader: time.sleep(5) continue if postgresql.sync_from_leader(leader): logging.info("Governor Starting up: Sync Completed") postgresql.write_recovery_conf(leader) logging.info("Governor Starting up: Starting Postgres") postgresql.start() synced_from_leader = True else: time.sleep(5) else: logging.info("Governor Starting up: Existing Data Dir") postgresql.follow_no_leader() logging.info("Governor Starting up: Starting Postgres") postgresql.start() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql) logging.info("Governor Running: Starting Running Loop") while True: try: logging.info("Governor Running: %s" % ha.run_cycle()) # create replication slots if postgresql.is_leader(): logging.info("Governor Running: I am the Leader") for node in etcd.members(): member = node["hostname"] if member != postgresql.name: if postgresql.is_leader(): postgresql.ensure_replication_slot( postgresql.replication_slot_name(member) ) else: postgresql.drop_replication_slot( postgresql.replication_slot_name(member) ) etcd.touch_member(postgresql.name, postgresql.connection_string) time.sleep(config["loop_wait"]) except (urllib2.URLError, socket.timeout): logging.info("Lost connection to etcd, setting no leader and waiting on etcd") postgresql.follow_no_leader() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
def test_load_cluster_from_dcs(self): ha = Ha(self.p, self.e) ha.load_cluster_from_dcs() self.e.get_cluster = get_unlocked_cluster ha.load_cluster_from_dcs()