def __init__(self, config): self.nap_time = config['loop_wait'] self.postgresql = Postgresql(config['postgresql']) self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config)) host, port = config['restapi']['listen'].split(':') self.api = RestApiServer(self, config['restapi']) self.skydns2 = config.get('skydns2') self.next_run = time.time() self.shutdown_member_ttl = 300
def set_up(self): os.system = os_system self.p = Postgresql({'name': 'test0', 'data_dir': 'data/test0', 'listen': '127.0.0.1, 127.0.0.2:5432', 'connect_address': '127.0.0.2:5432', 'replication': { 'username': '******', 'password': '******', 'network': '127.0.0.1/32'}, 'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'}}) psycopg2.connect = psycopg2_connect if not os.path.exists(self.p.data_dir): os.makedirs(self.p.data_dir) self.leader = Member('leader', 'postgres://*****:*****@127.0.0.1:5434/postgres', 28)
def __init__(self, config): assert config["etcd"]["ttl"] > 2 * config["loop_wait"] self.nap_time = config['loop_wait'] self.etcd = Etcd(config['etcd']) self.aws = AWSConnection(config) self.postgresql = Postgresql(config['postgresql'], self.aws.on_role_change) self.ha = Ha(self.postgresql, self.etcd) host, port = config['restapi']['listen'].split(':') self.api = RestApiServer(self, config['restapi']) self.next_run = time.time()
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) try: from BaseHTTPServer import HTTPServer host, port = config["haproxy_status"]["listen"].split(":") server = HTTPServer((host, int(port)), handler(postgresql, etcd)) logging.info('listening on %s:%s', host, port) server.serve_forever() except KeyboardInterrupt: print('^C received, shutting down server') server.socket.close()
def set_up(self): subprocess.call = subprocess_call shutil.copy = nop self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0', 'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432', 'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'], 'superuser': {'password': ''}, 'admin': {'username': '******', 'password': '******'}, 'replication': {'username': '******', 'password': '******', 'network': '127.0.0.1/32'}, 'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'}, 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_restart': 'true', 'on_role_change': 'true', 'on_reload': 'true' }, 'restore': 'true'}) psycopg2.connect = psycopg2_connect if not os.path.exists(self.p.data_dir): os.makedirs(self.p.data_dir) self.leader = Member(0, 'leader', 'postgres://*****:*****@127.0.0.1:5435/postgres', None, None, 28) self.other = Member(0, 'test1', 'postgres://*****:*****@127.0.0.1:5433/postgres', None, None, 28) self.me = Member(0, 'test0', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, None, 28)
def set_up(self): subprocess.call = subprocess_call shutil.copy = nop self.p = Postgresql({'name': 'test0', 'data_dir': 'data/test0', 'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432', 'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'], 'superuser': {'password': ''}, 'admin': {'username': '******', 'password': '******'}, 'replication': {'username': '******', 'password': '******', 'network': '127.0.0.1/32'}, 'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'}}, on_change_callback=lambda state: True) psycopg2.connect = psycopg2_connect if not os.path.exists(self.p.data_dir): os.makedirs(self.p.data_dir) self.leader = Member('leader', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, 28)
class Patroni: def __init__(self, config): self.nap_time = config['loop_wait'] self.postgresql = Postgresql(config['postgresql']) self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config)) host, port = config['restapi']['listen'].split(':') self.api = RestApiServer(self, config['restapi']) self.skydns2 = config.get('skydns2') self.next_run = time.time() self.shutdown_member_ttl = 300 @staticmethod def get_dcs(name, config): if 'etcd' in config: assert config['etcd']['ttl'] > 2 * config['loop_wait'] return Etcd(name, config['etcd']) if 'zookeeper' in config: return ZooKeeper(name, config['zookeeper']) raise Exception('Can not find sutable configuration of distributed configuration store') def touch_member(self, ttl=None): connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string if self.ha.cluster: for m in self.ha.cluster.members: # Do not update member TTL when it is far from being expired if m.name == self.postgresql.name and m.real_ttl() > self.shutdown_member_ttl: return True return self.ha.dcs.touch_member(connection_string, ttl) def initialize(self): # wait for etcd to be available while not self.touch_member(): logger.info('waiting on DCS') sleep(5) # is data directory empty? if self.postgresql.data_directory_empty(): # racing to initialize if self.ha.dcs.race('/initialize'): self.postgresql.initialize() self.ha.dcs.take_leader() self.postgresql.start() else: while True: leader = self.ha.dcs.current_leader() if leader and self.postgresql.sync_from_leader(leader): self.postgresql.write_recovery_conf(leader) self.postgresql.start() break sleep(5) elif self.postgresql.is_running(): self.postgresql.load_replication_slots() def schedule_next_run(self): self.next_run += self.nap_time current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time else: self.ha.dcs.sleep(nap_time) def run(self): self.api.start() self.next_run = time.time() while True: self.touch_member() logger.info(self.ha.run_cycle()) try: if self.ha.state_handler.is_leader(): self.ha.cluster and self.ha.state_handler.create_replication_slots(self.ha.cluster) # SkyDNS2 support: publish leader if self.skydns2: self.ha.dcs.client.set(self.skydns2['publish_leader'], '{{"host": "{0}", "port": {1}}}'.format(*self.postgresql.connect_address), ttl=self.skydns2['ttl']) else: self.ha.state_handler.drop_replication_slots() except: logger.exception('Exception when changing replication slots') reap_children() self.schedule_next_run()
def test_create_connection_users(self): cfg = self.p.config cfg['superuser']['username'] = '******' p = Postgresql(cfg) p.create_connection_users()
class TestPostgresql(unittest.TestCase): def __init__(self, method_name='runTest'): self.setUp = self.set_up self.tearDown = self.tear_down super(TestPostgresql, self).__init__(method_name) def set_up(self): subprocess.call = subprocess_call shutil.copy = nop self.p = Postgresql({'name': 'test0', 'data_dir': 'data/test0', 'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432', 'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'], 'superuser': {'password': ''}, 'admin': {'username': '******', 'password': '******'}, 'replication': {'username': '******', 'password': '******', 'network': '127.0.0.1/32'}, 'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'}}, on_change_callback=lambda state: True) psycopg2.connect = psycopg2_connect if not os.path.exists(self.p.data_dir): os.makedirs(self.p.data_dir) self.leader = Member('leader', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, 28) def tear_down(self): shutil.rmtree('data') def test_data_directory_empty(self): self.assertTrue(self.p.data_directory_empty()) def test_initialize(self): self.assertTrue(self.p.initialize()) self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf'))) def test_start(self): self.assertFalse(self.p.start()) self.p.is_running = is_running with open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w'): pass self.assertTrue(self.p.start()) def test_sync_from_leader(self): self.assertTrue(self.p.sync_from_leader(self.leader)) def test_follow_the_leader(self): self.p.demote(self.leader) self.p.follow_the_leader(None) self.p.demote(self.leader) self.p.follow_the_leader(self.leader) self.p.follow_the_leader(Member('leader', 'postgres://*****:*****@127.0.0.1:5435/postgres', None, 28)) def test_create_replication_slots(self): self.p.start() me = Member('test0', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, 28) other = Member('test1', 'postgres://*****:*****@127.0.0.1:5433/postgres', None, 28) cluster = Cluster(True, self.leader, 0, [me, other, self.leader]) self.p.create_replication_slots(cluster) def test_query(self): self.p.query('select 1') self.assertRaises(psycopg2.InterfaceError, self.p.query, 'InterfaceError') self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') self.p._connection.closed = 2 self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') self.p._connection.closed = 2 self.p.disconnect = false self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') def test_is_healthiest_node(self): leader = Member('leader', 'postgres://*****:*****@127.0.0.1:5435/postgres', None, 28) me = Member('test0', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, 28) other = Member('test1', 'postgres://*****:*****@127.0.0.1:5433/postgres', None, 28) cluster = Cluster(True, leader, 0, [me, other, leader]) self.assertTrue(self.p.is_healthiest_node(cluster)) self.p.is_leader = false self.assertFalse(self.p.is_healthiest_node(cluster)) self.p.xlog_position = xlog_position self.assertTrue(self.p.is_healthiest_node(cluster)) self.p.config['maximum_lag_on_failover'] = -2 self.assertFalse(self.p.is_healthiest_node(cluster)) def test_is_leader(self): self.p.is_promoted = True self.assertTrue(self.p.is_leader()) self.assertFalse(self.p.is_promoted) def test_reload(self): self.assertTrue(self.p.reload()) def test_is_healthy(self): self.assertTrue(self.p.is_healthy()) self.p.is_running = is_running self.assertFalse(self.p.is_healthy()) def test_promote(self): self.assertTrue(self.p.promote()) def test_last_operation(self): self.assertEquals(self.p.last_operation(), 0)
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) atexit.register(stop_postgresql, postgresql) logging.info("Governor Starting up") # is data directory empty? if postgresql.data_directory_empty(): logging.info("Governor Starting up: Empty Data Dir") # racing to initialize wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql) if etcd.race("/initialize", postgresql.name): logging.info("Governor Starting up: Initialisation Race ... WON!!!") logging.info("Governor Starting up: Initialise Postgres") postgresql.initialize() logging.info("Governor Starting up: Initialise Complete") etcd.take_leader(postgresql.name) logging.info("Governor Starting up: Starting Postgres") postgresql.start() else: logging.info("Governor Starting up: Initialisation Race ... LOST") logging.info("Governor Starting up: Sync Postgres from Leader") synced_from_leader = False while not synced_from_leader: leader = etcd.current_leader() if not leader: time.sleep(5) continue if postgresql.sync_from_leader(leader): logging.info("Governor Starting up: Sync Completed") postgresql.write_recovery_conf(leader) logging.info("Governor Starting up: Starting Postgres") postgresql.start() synced_from_leader = True else: time.sleep(5) else: logging.info("Governor Starting up: Existing Data Dir") postgresql.follow_no_leader() logging.info("Governor Starting up: Starting Postgres") postgresql.start() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql) logging.info("Governor Running: Starting Running Loop") while True: try: logging.info("Governor Running: %s" % ha.run_cycle()) # create replication slots if postgresql.is_leader(): logging.info("Governor Running: I am the Leader") for node in etcd.get_client_path("/members?recursive=true")["node"]["nodes"]: member = node["key"].split('/')[-1] if member != postgresql.name: postgresql.query("DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;" % {"slot": member}) etcd.touch_member(postgresql.name, postgresql.connection_string) time.sleep(config["loop_wait"]) except urllib2.URLError: logging.info("Lost connection to etcd, setting no leader and waiting on etcd") postgresql.follow_no_leader() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) atexit.register(stop_postgresql, postgresql) logging.info("Governor Starting up") # is data directory empty? if postgresql.data_directory_empty(): logging.info("Governor Starting up: Empty Data Dir") # racing to initialize wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql) if etcd.race("/initialize", postgresql.name) or not etcd.members(): logging.info( "Governor Starting up: Initialisation Race ... WON!!!") logging.info("Governor Starting up: Initialise Postgres") postgresql.initialize() logging.info("Governor Starting up: Initialise Complete") etcd.take_leader(postgresql.name) logging.info("Governor Starting up: Starting Postgres") postgresql.start() else: logging.info("Governor Starting up: Initialisation Race ... LOST") logging.info("Governor Starting up: Sync Postgres from Leader") synced_from_leader = False while not synced_from_leader: leader = etcd.current_leader() if not leader: time.sleep(5) continue if postgresql.sync_from_leader(leader): logging.info("Governor Starting up: Sync Completed") postgresql.write_recovery_conf(leader) logging.info("Governor Starting up: Starting Postgres") postgresql.start() synced_from_leader = True else: time.sleep(5) else: logging.info("Governor Starting up: Existing Data Dir") postgresql.follow_no_leader() logging.info("Governor Starting up: Starting Postgres") postgresql.start() wait_for_etcd( "running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql) logging.info("Governor Running: Starting Running Loop") while True: try: ha.run_cycle() # create replication slots if postgresql.is_leader(): logging.info("Governor Running: I am the Leader") for node in etcd.get_client_path( "/members?recursive=true").get("node", {}).get("nodes", []): member = node["key"].split('/')[-1] if member != postgresql.name: postgresql.query( "DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;" % {"slot": member}) etcd.touch_member(postgresql.name, postgresql.connection_string) time.sleep(config["loop_wait"]) except urllib2.URLError: logging.info( "Lost connection to etcd, setting no leader and waiting on etcd" ) postgresql.follow_no_leader() wait_for_etcd( "running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
class TestPostgresql(unittest.TestCase): def __init__(self, method_name='runTest'): self.setUp = self.set_up self.tearDown = self.tear_down super(TestPostgresql, self).__init__(method_name) def set_up(self): subprocess.call = subprocess_call shutil.copy = nop self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0', 'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432', 'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'], 'superuser': {'password': ''}, 'admin': {'username': '******', 'password': '******'}, 'replication': {'username': '******', 'password': '******', 'network': '127.0.0.1/32'}, 'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'}, 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_restart': 'true', 'on_role_change': 'true', 'on_reload': 'true' }, 'restore': 'true'}) psycopg2.connect = psycopg2_connect if not os.path.exists(self.p.data_dir): os.makedirs(self.p.data_dir) self.leader = Member(0, 'leader', 'postgres://*****:*****@127.0.0.1:5435/postgres', None, None, 28) self.other = Member(0, 'test1', 'postgres://*****:*****@127.0.0.1:5433/postgres', None, None, 28) self.me = Member(0, 'test0', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, None, 28) def tear_down(self): shutil.rmtree('data') def mock_query(self, p): raise psycopg2.OperationalError("not supported") def test_data_directory_empty(self): self.assertTrue(self.p.data_directory_empty()) def test_initialize(self): self.assertTrue(self.p.initialize()) self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf'))) def test_start_stop(self): self.assertFalse(self.p.start()) self.p.is_running = is_running with open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w'): pass self.assertTrue(self.p.start()) self.assertTrue(self.p.stop()) def test_sync_from_leader(self): self.assertTrue(self.p.sync_from_leader(self.leader)) def test_follow_the_leader(self): self.p.demote(self.leader) self.p.follow_the_leader(None) self.p.demote(self.leader) self.p.follow_the_leader(self.leader) self.p.follow_the_leader(self.other) def test_create_connection_users(self): cfg = self.p.config cfg['superuser']['username'] = '******' p = Postgresql(cfg) p.create_connection_users() def test_create_replication_slots(self): self.p.start() cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leader]) self.p.create_replication_slots(cluster) def test_query(self): self.p.query('select 1') self.assertRaises(psycopg2.InterfaceError, self.p.query, 'InterfaceError') self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') self.p._connection.closed = 2 self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') self.p._connection.closed = 2 self.p.disconnect = false self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') def test_is_healthiest_node(self): cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leader]) self.assertTrue(self.p.is_healthiest_node(cluster)) self.p.is_leader = false self.assertFalse(self.p.is_healthiest_node(cluster)) self.p.xlog_position = lambda: 1 self.assertTrue(self.p.is_healthiest_node(cluster)) self.p.xlog_position = lambda: 2 self.assertFalse(self.p.is_healthiest_node(cluster)) self.p.config['maximum_lag_on_failover'] = -2 self.assertFalse(self.p.is_healthiest_node(cluster)) def test_is_leader(self): self.p.is_promoted = True self.assertTrue(self.p.is_leader()) self.assertFalse(self.p.is_promoted) def test_reload(self): self.assertTrue(self.p.reload()) def test_is_healthy(self): self.assertTrue(self.p.is_healthy()) self.p.is_running = is_running self.assertFalse(self.p.is_healthy()) def test_promote(self): self.assertTrue(self.p.promote()) def test_last_operation(self): self.assertEquals(self.p.last_operation(), '0') def test_non_existing_callback(self): self.assertFalse(self.p.call_nowait('foobar')) def test_is_leader_exception(self): self.p.start() self.p.query = self.mock_query self.assertTrue(self.p.stop())
config['postgresql']['listen'] = os.getenv('GOVERNOR_POSTGRESQL_LISTEN') if os.getenv('GOVERNOR_POSTGRESQL_READ_ONLY_PORT'): config['postgresql']['read_only_port'] = os.getenv( 'GOVERNOR_POSTGRESQL_READ_ONLY_PORT') if os.getenv('GOVERNOR_POSTGRESQL_DATA_DIR'): config['postgresql']['data_dir'] = os.getenv( 'GOVERNOR_POSTGRESQL_DATA_DIR') if os.getenv('GOVERNOR_POSTGRESQL_REPLICATION_NETWORK'): config['postgresql']['replication']['network'] = os.getenv( 'GOVERNOR_POSTGRESQL_REPLICATION_NETWORK') etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) # leave things clean when shutting down, if possible def shutdown(signal, frame): logging.info("Governor Shutting Down: Received Shutdown Signal") try: if ha.has_lock(): logging.info("Governor Shutting Down: Abdicating Leadership") etcd.abdicate(postgresql.name) logging.info("Governor Shutting Down: Removing Membership") etcd.delete_member(postgresql.name) except: logging.exception("Error during Abdication")
class Governor: def __init__(self, config): assert config["etcd"]["ttl"] > 2 * config["loop_wait"] self.nap_time = config['loop_wait'] self.etcd = Etcd(config['etcd']) self.aws = AWSConnection(config) self.postgresql = Postgresql(config['postgresql'], self.aws.on_role_change) self.ha = Ha(self.postgresql, self.etcd) host, port = config['restapi']['listen'].split(':') self.api = RestApiServer(self, config['restapi']) self.next_run = time.time() def touch_member(self, ttl=None): connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string return self.etcd.touch_member(self.postgresql.name, connection_string, ttl) def initialize(self): # FIXME: isn't there a better way testing if etcd is writable? # wait for etcd to be available while not self.touch_member(): logging.info('waiting on etcd') sleep(5) # is data directory empty? if self.postgresql.data_directory_empty(): # racing to initialize if self.etcd.race('/initialize', self.postgresql.name): self.postgresql.initialize() self.etcd.take_leader(self.postgresql.name) self.postgresql.start() else: # FIXME: touch_member? while True: leader = self.etcd.current_leader() if leader and self.postgresql.sync_from_leader(leader): self.postgresql.write_recovery_conf(leader) self.postgresql.start() break sleep(5) elif self.postgresql.is_running(): self.postgresql.load_replication_slots() def schedule_next_run(self): self.next_run += self.nap_time current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time else: sleep(nap_time) def run(self): self.api.start() self.next_run = time.time() while True: self.touch_member() logging.info(self.ha.run_cycle()) self.schedule_next_run()
#!/usr/bin/env python from BaseHTTPServer import BaseHTTPRequestHandler from helpers.etcd import Etcd from helpers.postgresql import Postgresql import sys, yaml, socket f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) class StatusHandler(BaseHTTPRequestHandler): def do_GET(self): return self.do_ANY() def do_OPTIONS(self): return self.do_ANY() def do_ANY(self): if postgresql.name == etcd.current_leader()["hostname"]: self.send_response(200) else: self.send_response(503) self.end_headers() self.wfile.write('\r\n') return
#!/usr/bin/env python import sys, os, yaml, time, urllib2, atexit, syslog from socket import gethostname from helpers.postgresql import Postgresql from helpers.kms import Kms from helpers.ec2 import Ec2 # add system path psql_bin_path = "/usr/pgsql-9.4/bin" os.environ['PATH'] += os.pathsep + psql_bin_path # read the config f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() # kms is needed to decryot config kms = Kms(config["kms"]) # configure the postgres ec2 = Ec2() our_ip = ec2.ec2_ip() hostname = gethostname() config["postgresql"]["name"] = hostname.split('.')[0] config["postgresql"]["listen"] = our_ip + ":" + str(config["postgresql"]["port"]) postgresql = Postgresql(config["postgresql"], kms, hostname) # start postgres postgresql.start()
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) atexit.register(stop_postgresql, postgresql) signal.signal(signal.SIGTERM, signalhandler) logging.info("Governor Starting up") # is data directory empty? if postgresql.data_directory_empty(): logging.info("Governor Starting up: Empty Data Dir") # racing to initialize wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql) if etcd.race("/initialize", postgresql.name): logging.info("Governor Starting up: Initialisation Race ... WON!!!") logging.info("Governor Starting up: Initialise Postgres") postgresql.initialize() logging.info("Governor Starting up: Initialise Complete") etcd.take_leader(postgresql.name) logging.info("Governor Starting up: Starting Postgres") postgresql.start() else: logging.info("Governor Starting up: Initialisation Race ... LOST") logging.info("Governor Starting up: Sync Postgres from Leader") synced_from_leader = False while not synced_from_leader: leader = etcd.current_leader() if not leader: time.sleep(5) continue if postgresql.sync_from_leader(leader): logging.info("Governor Starting up: Sync Completed") postgresql.write_recovery_conf(leader) logging.info("Governor Starting up: Starting Postgres") postgresql.start() synced_from_leader = True else: time.sleep(5) else: logging.info("Governor Starting up: Existing Data Dir") postgresql.follow_no_leader() logging.info("Governor Starting up: Starting Postgres") postgresql.start() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql) logging.info("Governor Running: Starting Running Loop") while True: try: logging.info("Governor Running: %s" % ha.run_cycle()) # create replication slots if postgresql.is_leader(): logging.info("Governor Running: I am the Leader") for node in etcd.members(): member = node["hostname"] if member != postgresql.name: if postgresql.is_leader(): postgresql.ensure_replication_slot( postgresql.replication_slot_name(member) ) else: postgresql.drop_replication_slot( postgresql.replication_slot_name(member) ) etcd.touch_member(postgresql.name, postgresql.connection_string) time.sleep(config["loop_wait"]) except (urllib2.URLError, socket.timeout): logging.info("Lost connection to etcd, setting no leader and waiting on etcd") postgresql.follow_no_leader() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
import logging from helpers.etcd import Etcd from helpers.postgresql import Postgresql from helpers.ha import Ha LOG_LEVEL = logging.DEBUG if os.getenv('DEBUG', None) else logging.INFO logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=LOG_LEVEL) f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) # stop postgresql on script exit def stop_postgresql(): postgresql.stop() atexit.register(stop_postgresql) # wait for etcd to be available def wait_for_etcd(message): etcd_ready = False while not etcd_ready: try: etcd.touch_member(postgresql.name, postgresql.connection_string) etcd_ready = True except urllib2.URLError:
import sys, os, yaml, time, urllib2, atexit import logging from helpers.etcd import Etcd from helpers.postgresql import Postgresql from helpers.ha import Ha logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO) f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) # stop postgresql on script exit def stop_postgresql(): postgresql.stop() atexit.register(stop_postgresql) # wait for etcd to be available etcd_ready = False while not etcd_ready: try: etcd.touch_member(postgresql.name, postgresql.connection_string)