class TestEtcd(unittest.TestCase): def __init__(self, method_name='runTest'): self.setUp = self.set_up super(TestEtcd, self).__init__(method_name) def set_up(self): requests.get = requests_get requests.put = requests_put requests.delete = requests_delete time.sleep = time_sleep self.etcd = Etcd({'ttl': 30, 'host': 'localhost', 'scope': 'test'}) def test_get_client_path(self): self.assertRaises(Exception, self.etcd.get_client_path, '', 2) def test_put_client_path(self): self.assertFalse(self.etcd.put_client_path('')) def test_delete_client_path(self): self.assertFalse(self.etcd.delete_client_path('')) def test_get_cluster(self): self.assertRaises(EtcdError, self.etcd.get_cluster) self.etcd.base_client_url = self.etcd.base_client_url.replace('local', 'remote') cluster = self.etcd.get_cluster() self.assertIsInstance(cluster, Cluster) self.etcd.base_client_url = self.etcd.base_client_url.replace('remote', 'other') self.etcd.get_cluster() def test_current_leader(self): self.assertRaises(CurrentLeaderError, self.etcd.current_leader)
def set_up(self): time.sleep = time_sleep with patch.object(Client, 'machines') as mock_machines: mock_machines.__get__ = Mock(return_value=[ 'http://localhost:2379', 'http://localhost:4001' ]) self.etcd = Etcd('foo', { 'ttl': 30, 'host': 'localhost:2379', 'scope': 'test' }) self.etcd.client.write = etcd_write self.etcd.client.read = etcd_read
class TestEtcd(unittest.TestCase): def __init__(self, method_name='runTest'): self.setUp = self.set_up super(TestEtcd, self).__init__(method_name) def set_up(self): time.sleep = time_sleep with patch.object(Client, 'machines') as mock_machines: mock_machines.__get__ = Mock(return_value=[ 'http://localhost:2379', 'http://localhost:4001' ]) self.etcd = Etcd('foo', { 'ttl': 30, 'host': 'localhost:2379', 'scope': 'test' }) self.etcd.client.write = etcd_write self.etcd.client.read = etcd_read def test_get_etcd_client(self): time.sleep = time_sleep_exception with patch.object(etcd.Client, 'machines') as mock_machines: mock_machines.__get__ = Mock(side_effect=etcd.EtcdException) self.assertRaises(Exception, self.etcd.get_etcd_client, {'discovery_srv': 'test'}) def test_get_cluster(self): self.assertIsInstance(self.etcd.get_cluster(), Cluster) self.etcd._base_path = '/service/nocluster' cluster = self.etcd.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsNone(cluster.leader) def test_current_leader(self): self.assertIsInstance(self.etcd.current_leader(), Member) self.etcd._base_path = '/service/noleader' self.assertIsNone(self.etcd.current_leader()) def test_touch_member(self): self.assertFalse(self.etcd.touch_member('', '')) def test_take_leader(self): self.assertFalse(self.etcd.take_leader()) def test_update_leader(self): self.assertTrue(self.etcd.update_leader(MockPostgresql())) def test_race(self): self.assertFalse(self.etcd.race('')) def test_delete_leader(self): self.etcd.client.delete = etcd_delete self.assertFalse(self.etcd.delete_leader())
def set_up(self): time.sleep = time_sleep with patch.object(Client, 'machines') as mock_machines: mock_machines.__get__ = Mock(return_value=['http://localhost:2379', 'http://localhost:4001']) self.etcd = Etcd('foo', {'ttl': 30, 'host': 'localhost:2379', 'scope': 'test'}) self.etcd.client.write = etcd_write self.etcd.client.read = etcd_read
def get_dcs(name, config): if 'etcd' in config: assert config['etcd']['ttl'] > 2 * config['loop_wait'] return Etcd(name, config['etcd']) if 'zookeeper' in config: return ZooKeeper(name, config['zookeeper']) raise Exception('Can not find sutable configuration of distributed configuration store')
class TestEtcd(unittest.TestCase): def __init__(self, method_name='runTest'): self.setUp = self.set_up super(TestEtcd, self).__init__(method_name) def set_up(self): time.sleep = time_sleep with patch.object(Client, 'machines') as mock_machines: mock_machines.__get__ = Mock(return_value=['http://localhost:2379', 'http://localhost:4001']) self.etcd = Etcd('foo', {'ttl': 30, 'host': 'localhost:2379', 'scope': 'test'}) self.etcd.client.write = etcd_write self.etcd.client.read = etcd_read def test_get_etcd_client(self): time.sleep = time_sleep_exception with patch.object(etcd.Client, 'machines') as mock_machines: mock_machines.__get__ = Mock(side_effect=etcd.EtcdException) self.assertRaises(Exception, self.etcd.get_etcd_client, {'discovery_srv': 'test'}) def test_get_cluster(self): self.assertIsInstance(self.etcd.get_cluster(), Cluster) self.etcd._base_path = '/service/nocluster' cluster = self.etcd.get_cluster() self.assertIsInstance(cluster, Cluster) self.assertIsNone(cluster.leader) def test_current_leader(self): self.assertIsInstance(self.etcd.current_leader(), Member) self.etcd._base_path = '/service/noleader' self.assertIsNone(self.etcd.current_leader()) def test_touch_member(self): self.assertFalse(self.etcd.touch_member('', '')) def test_take_leader(self): self.assertFalse(self.etcd.take_leader()) def test_update_leader(self): self.assertTrue(self.etcd.update_leader(MockPostgresql())) def test_race(self): self.assertFalse(self.etcd.race('')) def test_delete_leader(self): self.etcd.client.delete = etcd_delete self.assertFalse(self.etcd.delete_leader())
def __init__(self, config): assert config["etcd"]["ttl"] > 2 * config["loop_wait"] self.nap_time = config['loop_wait'] self.etcd = Etcd(config['etcd']) self.aws = AWSConnection(config) self.postgresql = Postgresql(config['postgresql'], self.aws.on_role_change) self.ha = Ha(self.postgresql, self.etcd) host, port = config['restapi']['listen'].split(':') self.api = RestApiServer(self, config['restapi']) self.next_run = time.time()
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) try: from BaseHTTPServer import HTTPServer host, port = config["haproxy_status"]["listen"].split(":") server = HTTPServer((host, int(port)), handler(postgresql, etcd)) logging.info('listening on %s:%s', host, port) server.serve_forever() except KeyboardInterrupt: print('^C received, shutting down server') server.socket.close()
def set_up(self): self.p = MockPostgresql() with patch.object(Client, 'machines') as mock_machines: mock_machines.__get__ = Mock( return_value=['http://remotehost:2379']) self.e = Etcd('foo', { 'ttl': 30, 'host': 'remotehost:2379', 'scope': 'test' }) self.e.client.read = etcd_read self.e.client.write = etcd_write self.ha = Ha(self.p, self.e) self.ha.load_cluster_from_dcs() self.ha.cluster = get_unlocked_cluster() self.ha.load_cluster_from_dcs = nop
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) atexit.register(stop_postgresql, postgresql) logging.info("Governor Starting up") # is data directory empty? if postgresql.data_directory_empty(): logging.info("Governor Starting up: Empty Data Dir") # racing to initialize wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql) if etcd.race("/initialize", postgresql.name) or not etcd.members(): logging.info( "Governor Starting up: Initialisation Race ... WON!!!") logging.info("Governor Starting up: Initialise Postgres") postgresql.initialize() logging.info("Governor Starting up: Initialise Complete") etcd.take_leader(postgresql.name) logging.info("Governor Starting up: Starting Postgres") postgresql.start() else: logging.info("Governor Starting up: Initialisation Race ... LOST") logging.info("Governor Starting up: Sync Postgres from Leader") synced_from_leader = False while not synced_from_leader: leader = etcd.current_leader() if not leader: time.sleep(5) continue if postgresql.sync_from_leader(leader): logging.info("Governor Starting up: Sync Completed") postgresql.write_recovery_conf(leader) logging.info("Governor Starting up: Starting Postgres") postgresql.start() synced_from_leader = True else: time.sleep(5) else: logging.info("Governor Starting up: Existing Data Dir") postgresql.follow_no_leader() logging.info("Governor Starting up: Starting Postgres") postgresql.start() wait_for_etcd( "running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql) logging.info("Governor Running: Starting Running Loop") while True: try: ha.run_cycle() # create replication slots if postgresql.is_leader(): logging.info("Governor Running: I am the Leader") for node in etcd.get_client_path( "/members?recursive=true").get("node", {}).get("nodes", []): member = node["key"].split('/')[-1] if member != postgresql.name: postgresql.query( "DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;" % {"slot": member}) etcd.touch_member(postgresql.name, postgresql.connection_string) time.sleep(config["loop_wait"]) except urllib2.URLError: logging.info( "Lost connection to etcd, setting no leader and waiting on etcd" ) postgresql.follow_no_leader() wait_for_etcd( "running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) atexit.register(stop_postgresql, postgresql) logging.info("Governor Starting up") # is data directory empty? if postgresql.data_directory_empty(): logging.info("Governor Starting up: Empty Data Dir") # racing to initialize wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql) if etcd.race("/initialize", postgresql.name): logging.info("Governor Starting up: Initialisation Race ... WON!!!") logging.info("Governor Starting up: Initialise Postgres") postgresql.initialize() logging.info("Governor Starting up: Initialise Complete") etcd.take_leader(postgresql.name) logging.info("Governor Starting up: Starting Postgres") postgresql.start() else: logging.info("Governor Starting up: Initialisation Race ... LOST") logging.info("Governor Starting up: Sync Postgres from Leader") synced_from_leader = False while not synced_from_leader: leader = etcd.current_leader() if not leader: time.sleep(5) continue if postgresql.sync_from_leader(leader): logging.info("Governor Starting up: Sync Completed") postgresql.write_recovery_conf(leader) logging.info("Governor Starting up: Starting Postgres") postgresql.start() synced_from_leader = True else: time.sleep(5) else: logging.info("Governor Starting up: Existing Data Dir") postgresql.follow_no_leader() logging.info("Governor Starting up: Starting Postgres") postgresql.start() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql) logging.info("Governor Running: Starting Running Loop") while True: try: logging.info("Governor Running: %s" % ha.run_cycle()) # create replication slots if postgresql.is_leader(): logging.info("Governor Running: I am the Leader") for node in etcd.get_client_path("/members?recursive=true")["node"]["nodes"]: member = node["key"].split('/')[-1] if member != postgresql.name: postgresql.query("DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;" % {"slot": member}) etcd.touch_member(postgresql.name, postgresql.connection_string) time.sleep(config["loop_wait"]) except urllib2.URLError: logging.info("Lost connection to etcd, setting no leader and waiting on etcd") postgresql.follow_no_leader() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
class Governor: def __init__(self, config): assert config["etcd"]["ttl"] > 2 * config["loop_wait"] self.nap_time = config['loop_wait'] self.etcd = Etcd(config['etcd']) self.aws = AWSConnection(config) self.postgresql = Postgresql(config['postgresql'], self.aws.on_role_change) self.ha = Ha(self.postgresql, self.etcd) host, port = config['restapi']['listen'].split(':') self.api = RestApiServer(self, config['restapi']) self.next_run = time.time() def touch_member(self, ttl=None): connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string return self.etcd.touch_member(self.postgresql.name, connection_string, ttl) def initialize(self): # FIXME: isn't there a better way testing if etcd is writable? # wait for etcd to be available while not self.touch_member(): logging.info('waiting on etcd') sleep(5) # is data directory empty? if self.postgresql.data_directory_empty(): # racing to initialize if self.etcd.race('/initialize', self.postgresql.name): self.postgresql.initialize() self.etcd.take_leader(self.postgresql.name) self.postgresql.start() else: # FIXME: touch_member? while True: leader = self.etcd.current_leader() if leader and self.postgresql.sync_from_leader(leader): self.postgresql.write_recovery_conf(leader) self.postgresql.start() break sleep(5) elif self.postgresql.is_running(): self.postgresql.load_replication_slots() def schedule_next_run(self): self.next_run += self.nap_time current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time else: sleep(nap_time) def run(self): self.api.start() self.next_run = time.time() while True: self.touch_member() logging.info(self.ha.run_cycle()) self.schedule_next_run()
#!/usr/bin/env python from BaseHTTPServer import BaseHTTPRequestHandler from helpers.etcd import Etcd from helpers.postgresql import Postgresql import sys, yaml, socket f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) class StatusHandler(BaseHTTPRequestHandler): def do_GET(self): return self.do_ANY() def do_OPTIONS(self): return self.do_ANY() def do_ANY(self): if postgresql.name == etcd.current_leader()["hostname"]: self.send_response(200) else: self.send_response(503) self.end_headers() self.wfile.write('\r\n') return
def run(config): etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) atexit.register(stop_postgresql, postgresql) signal.signal(signal.SIGTERM, signalhandler) logging.info("Governor Starting up") # is data directory empty? if postgresql.data_directory_empty(): logging.info("Governor Starting up: Empty Data Dir") # racing to initialize wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql) if etcd.race("/initialize", postgresql.name): logging.info("Governor Starting up: Initialisation Race ... WON!!!") logging.info("Governor Starting up: Initialise Postgres") postgresql.initialize() logging.info("Governor Starting up: Initialise Complete") etcd.take_leader(postgresql.name) logging.info("Governor Starting up: Starting Postgres") postgresql.start() else: logging.info("Governor Starting up: Initialisation Race ... LOST") logging.info("Governor Starting up: Sync Postgres from Leader") synced_from_leader = False while not synced_from_leader: leader = etcd.current_leader() if not leader: time.sleep(5) continue if postgresql.sync_from_leader(leader): logging.info("Governor Starting up: Sync Completed") postgresql.write_recovery_conf(leader) logging.info("Governor Starting up: Starting Postgres") postgresql.start() synced_from_leader = True else: time.sleep(5) else: logging.info("Governor Starting up: Existing Data Dir") postgresql.follow_no_leader() logging.info("Governor Starting up: Starting Postgres") postgresql.start() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql) logging.info("Governor Running: Starting Running Loop") while True: try: logging.info("Governor Running: %s" % ha.run_cycle()) # create replication slots if postgresql.is_leader(): logging.info("Governor Running: I am the Leader") for node in etcd.members(): member = node["hostname"] if member != postgresql.name: if postgresql.is_leader(): postgresql.ensure_replication_slot( postgresql.replication_slot_name(member) ) else: postgresql.drop_replication_slot( postgresql.replication_slot_name(member) ) etcd.touch_member(postgresql.name, postgresql.connection_string) time.sleep(config["loop_wait"]) except (urllib2.URLError, socket.timeout): logging.info("Lost connection to etcd, setting no leader and waiting on etcd") postgresql.follow_no_leader() wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
def set_up(self): requests.get = requests_get requests.put = requests_put requests.delete = requests_delete time.sleep = time_sleep self.etcd = Etcd({'ttl': 30, 'host': 'localhost', 'scope': 'test'})
import sys, os, yaml, time, urllib2, atexit import logging from helpers.etcd import Etcd from helpers.postgresql import Postgresql from helpers.ha import Ha logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO) f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) # wait for etcd to be available etcd_ready = False while not etcd_ready: try: etcd.touch_member(postgresql.name, postgresql.connection_string) etcd_ready = True except urllib2.URLError: logging.info("waiting on etcd") time.sleep(5) # is data directory empty? if postgresql.data_directory_empty():
from helpers.kms import Kms from helpers.ec2 import Ec2 from helpers.postgresql import Postgresql from socket import gethostname os.environ['PATH'] += os.pathsep + '/usr/sbin' governor_start_cmd = [ '/bin/systemctl', 'start', 'governor' ] governor_stop_cmd = [ '/bin/systemctl', 'stop', 'governor' ] f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() etcd = Etcd(config["etcd"]) kms = Kms(config["kms"]) sns = Sns(config["sns"], kms) # configure the postgres ec2 = Ec2() our_ip = ec2.ec2_ip() hostname = gethostname() config["postgresql"]["listen"] = our_ip + ":" + str(config["postgresql"]["port"]) config["postgresql"]["name"] = hostname.split('.')[0] postgresql = Postgresql(config["postgresql"], kms, hostname) # vars data_dir = "/pg_cluster/pgsql/9.4/data/" archive_file = "/pg_cluster/pgsql/9.4/data.tar.gz" # make a tar.gz backup of a directory
import sys, os, yaml, time, urllib2, atexit import logging from helpers.etcd import Etcd from helpers.postgresql import Postgresql from helpers.ha import Ha LOG_LEVEL = logging.DEBUG if os.getenv('DEBUG', None) else logging.INFO logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=LOG_LEVEL) f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) # stop postgresql on script exit def stop_postgresql(): postgresql.stop() atexit.register(stop_postgresql) # wait for etcd to be available def wait_for_etcd(message): etcd_ready = False while not etcd_ready: try: etcd.touch_member(postgresql.name, postgresql.connection_string) etcd_ready = True
import sys, os, yaml, time, urllib2, atexit import logging from helpers.etcd import Etcd from helpers.postgresql import Postgresql from helpers.ha import Ha logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', level=logging.INFO) f = open(sys.argv[1], "r") config = yaml.load(f.read()) f.close() etcd = Etcd(config["etcd"]) postgresql = Postgresql(config["postgresql"]) ha = Ha(postgresql, etcd) # stop postgresql on script exit def stop_postgresql(): postgresql.stop() atexit.register(stop_postgresql) # wait for etcd to be available etcd_ready = False while not etcd_ready: try: