예제 #1
0
else:
    leader = etcd.current_leader()
    if leader is not None:
        if leader['hostname'] == postgresql.name:
            # still a leader
            postgresql.start()
        else:
            postgresql.follow_the_leader(leader)
    else:
        postgresql.follow_no_leader()
    postgresql.start()


while True:
    etcd.touch_member(postgresql.name, postgresql.connection_string)
    logging.info(ha.run_cycle())

    # create replication slots
    if postgresql.is_leader():
        for member in etcd.members():
            if member['hostname'] != postgresql.name:
                postgresql.query("""
                    DO LANGUAGE plpgsql $$
                    DECLARE somevar VARCHAR;
                    BEGIN
                        SELECT slot_name INTO somevar
                        FROM pg_replication_slots
                        WHERE slot_name = '%(slot)s'
                        LIMIT 1;
                        IF NOT FOUND THEN
                            PERFORM pg_create_physical_replication_slot('%(slot)s');
예제 #2
0
                logging.info("Governor Starting up: Starting Postgres")
                postgresql.start()
                synced_from_leader = True
            else:
                time.sleep(5)
else:
    logging.info("Governor Starting up: Existing Data Dir")
    postgresql.follow_no_leader()
    logging.info("Governor Starting up: Starting Postgres")
    postgresql.start()

wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd")
logging.info("Governor Running: Starting Running Loop")
while True:
    try:
        logging.info("Governor Running: %s" % ha.run_cycle())

        # create replication slots
        if postgresql.is_leader():
            logging.info("Governor Running: I am the Leader")
            for node in etcd.get_client_path("/members?recursive=true")["node"]["nodes"]:
                member = node["key"].split('/')[-1]
                if member != postgresql.name:
                    postgresql.query("DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;" % {"slot": member})
        etcd.touch_member(postgresql.name, postgresql.connection_string)

        time.sleep(config["loop_wait"])
    except urllib2.URLError:
        logging.info("Lost connection to etcd, setting no leader and waiting on etcd")
        postgresql.follow_no_leader()
        wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd")
예제 #3
0
def run(config):
    etcd = Etcd(config["etcd"])
    postgresql = Postgresql(config["postgresql"])
    ha = Ha(postgresql, etcd)

    atexit.register(stop_postgresql, postgresql)
    logging.info("Governor Starting up")
# is data directory empty?
    if postgresql.data_directory_empty():
        logging.info("Governor Starting up: Empty Data Dir")
        # racing to initialize
        wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql)
        if etcd.race("/initialize", postgresql.name):
            logging.info("Governor Starting up: Initialisation Race ... WON!!!")
            logging.info("Governor Starting up: Initialise Postgres")
            postgresql.initialize()
            logging.info("Governor Starting up: Initialise Complete")
            etcd.take_leader(postgresql.name)
            logging.info("Governor Starting up: Starting Postgres")
            postgresql.start()
        else:
            logging.info("Governor Starting up: Initialisation Race ... LOST")
            logging.info("Governor Starting up: Sync Postgres from Leader")
            synced_from_leader = False
            while not synced_from_leader:
                leader = etcd.current_leader()
                if not leader:
                    time.sleep(5)
                    continue
                if postgresql.sync_from_leader(leader):
                    logging.info("Governor Starting up: Sync Completed")
                    postgresql.write_recovery_conf(leader)
                    logging.info("Governor Starting up: Starting Postgres")
                    postgresql.start()
                    synced_from_leader = True
                else:
                    time.sleep(5)
    else:
        logging.info("Governor Starting up: Existing Data Dir")
        postgresql.follow_no_leader()
        logging.info("Governor Starting up: Starting Postgres")
        postgresql.start()

    wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
    logging.info("Governor Running: Starting Running Loop")
    while True:
        try:
            logging.info("Governor Running: %s" % ha.run_cycle())

            # create replication slots
            if postgresql.is_leader():
                logging.info("Governor Running: I am the Leader")
                for node in etcd.get_client_path("/members?recursive=true")["node"]["nodes"]:
                    member = node["key"].split('/')[-1]
                    if member != postgresql.name:
                        postgresql.query("DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;" % {"slot": member})
            etcd.touch_member(postgresql.name, postgresql.connection_string)

            time.sleep(config["loop_wait"])
        except urllib2.URLError:
            logging.info("Lost connection to etcd, setting no leader and waiting on etcd")
            postgresql.follow_no_leader()
            wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
예제 #4
0
def run(config):
    etcd = Etcd(config["etcd"])
    postgresql = Postgresql(config["postgresql"])
    ha = Ha(postgresql, etcd)

    atexit.register(stop_postgresql, postgresql)
    logging.info("Governor Starting up")
    # is data directory empty?
    if postgresql.data_directory_empty():
        logging.info("Governor Starting up: Empty Data Dir")
        # racing to initialize
        wait_for_etcd("cannot initialize member without ETCD", etcd,
                      postgresql)
        if etcd.race("/initialize", postgresql.name) or not etcd.members():
            logging.info(
                "Governor Starting up: Initialisation Race ... WON!!!")
            logging.info("Governor Starting up: Initialise Postgres")
            postgresql.initialize()
            logging.info("Governor Starting up: Initialise Complete")
            etcd.take_leader(postgresql.name)
            logging.info("Governor Starting up: Starting Postgres")
            postgresql.start()
        else:
            logging.info("Governor Starting up: Initialisation Race ... LOST")
            logging.info("Governor Starting up: Sync Postgres from Leader")
            synced_from_leader = False
            while not synced_from_leader:
                leader = etcd.current_leader()
                if not leader:
                    time.sleep(5)
                    continue
                if postgresql.sync_from_leader(leader):
                    logging.info("Governor Starting up: Sync Completed")
                    postgresql.write_recovery_conf(leader)
                    logging.info("Governor Starting up: Starting Postgres")
                    postgresql.start()
                    synced_from_leader = True
                else:
                    time.sleep(5)
    else:
        logging.info("Governor Starting up: Existing Data Dir")
        postgresql.follow_no_leader()
        logging.info("Governor Starting up: Starting Postgres")
        postgresql.start()

    wait_for_etcd(
        "running in readonly mode; cannot participate in cluster HA without etcd",
        etcd, postgresql)
    logging.info("Governor Running: Starting Running Loop")
    while True:
        try:
            ha.run_cycle()
            # create replication slots
            if postgresql.is_leader():
                logging.info("Governor Running: I am the Leader")
                for node in etcd.get_client_path(
                        "/members?recursive=true").get("node",
                                                       {}).get("nodes", []):
                    member = node["key"].split('/')[-1]
                    if member != postgresql.name:
                        postgresql.query(
                            "DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;"
                            % {"slot": member})
            etcd.touch_member(postgresql.name, postgresql.connection_string)

            time.sleep(config["loop_wait"])
        except urllib2.URLError:
            logging.info(
                "Lost connection to etcd, setting no leader and waiting on etcd"
            )
            postgresql.follow_no_leader()
            wait_for_etcd(
                "running in readonly mode; cannot participate in cluster HA without etcd",
                etcd, postgresql)
예제 #5
0
        postgresql.start()
        postgresql.create_replication_user()
    else:
        synced_from_leader = False
        while not synced_from_leader:
            leader = etcd.current_leader()
            if not leader:
                time.sleep(5)
                continue
            if postgresql.sync_from_leader(leader):
                postgresql.write_recovery_conf(leader)
                postgresql.start()
                synced_from_leader = True
            else:
                time.sleep(5)
else:
    postgresql.write_recovery_conf({"address": "postgres://169.0.0.1:5432"})
    postgresql.start()

while True:
    print ha.run_cycle()

    # create replication slots
    if postgresql.is_leader():
        for node in etcd.get_client_path("/members?recursive=true")["node"]["nodes"]:
            member = node["key"].split('/')[-1]
            if member != postgresql.name:
                postgresql.query("DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;" % {"slot": member})

    time.sleep(config["loop_wait"])
예제 #6
0
                postgresql.start(master=False)
                synced_from_leader = True
            else:
                time.sleep(5)
else:
    logging.info("Governor Starting up: Existing Data Dir")
    postgresql.copy_pg_hba()
    postgresql.follow_no_leader()
    logging.info("Governor Starting up: Starting Postgres")
    postgresql.start(master=False)

showtime()
logging.info("Governor Running: Starting Running Loop")
while True:
    try:
        logging.info("Governor Running: %s" % ha.run_cycle())

        # create replication slots
        if postgresql.is_leader():
            logging.debug("Governor Running: I am the Leader")

            for member in etcd.members():
                member = member['hostname']
                if member != postgresql.name:
                    postgresql.create_replication_slot(member)

        etcd.touch_member(postgresql.name,
                          postgresql.advertised_connection_string)

    except SystemExit as e:
        logging.info("Governor Shutting Down: Exiting Running Loop")
예제 #7
0
파일: governor.py 프로젝트: cigan1/governor
            if not leader:
                time.sleep(5)
                continue
            if postgresql.sync_from_leader(leader):
                postgresql.write_recovery_conf(leader)
                postgresql.start()
                synced_from_leader = True
            else:
                time.sleep(5)
else:
    postgresql.write_recovery_conf({"address": "postgres://169.0.0.1:5432"})
    postgresql.start()

while True:
    try:
        syslog.syslog(str(ha.run_cycle()))
    except Exception as e:
        syslog.syslog(str(e))
        break

    # create replication slots
    if postgresql.is_leader():
        try:
            nodes = etcd.get_client_path("/members?recursive=true")["node"]["nodes"]
        except Exception as e:
            syslog.syslog(str(e))
            syslog.syslog("Shutting down postgresql!")
            postgresql.stop()
            break

        for node in nodes:
예제 #8
0
class Patroni:

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.postgresql = Postgresql(config['postgresql'])
        self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config))
        host, port = config['restapi']['listen'].split(':')
        self.api = RestApiServer(self, config['restapi'])
        self.skydns2 = config.get('skydns2')
        self.next_run = time.time()
        self.shutdown_member_ttl = 300

    @staticmethod
    def get_dcs(name, config):
        if 'etcd' in config:
            assert config['etcd']['ttl'] > 2 * config['loop_wait']

            return Etcd(name, config['etcd'])
        if 'zookeeper' in config:
            return ZooKeeper(name, config['zookeeper'])
        raise Exception('Can not find sutable configuration of distributed configuration store')

    def touch_member(self, ttl=None):
        connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string
        if self.ha.cluster:
            for m in self.ha.cluster.members:
                # Do not update member TTL when it is far from being expired
                if m.name == self.postgresql.name and m.real_ttl() > self.shutdown_member_ttl:
                    return True
        return self.ha.dcs.touch_member(connection_string, ttl)

    def initialize(self):
        # wait for etcd to be available
        while not self.touch_member():
            logger.info('waiting on DCS')
            sleep(5)

        # is data directory empty?
        if self.postgresql.data_directory_empty():
            # racing to initialize
            if self.ha.dcs.race('/initialize'):
                self.postgresql.initialize()
                self.ha.dcs.take_leader()
                self.postgresql.start()
            else:
                while True:
                    leader = self.ha.dcs.current_leader()
                    if leader and self.postgresql.sync_from_leader(leader):
                        self.postgresql.write_recovery_conf(leader)
                        self.postgresql.start()
                        break
                    sleep(5)
        elif self.postgresql.is_running():
            self.postgresql.load_replication_slots()

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        else:
            self.ha.dcs.sleep(nap_time)

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            self.touch_member()
            logger.info(self.ha.run_cycle())
            try:
                if self.ha.state_handler.is_leader():
                    self.ha.cluster and self.ha.state_handler.create_replication_slots(self.ha.cluster)

                    # SkyDNS2 support: publish leader
                    if self.skydns2:
                        self.ha.dcs.client.set(self.skydns2['publish_leader'],
                            '{{"host": "{0}", "port": {1}}}'.format(*self.postgresql.connect_address), ttl=self.skydns2['ttl'])
                else:
                    self.ha.state_handler.drop_replication_slots()
            except:
                logger.exception('Exception when changing replication slots')
            reap_children()
            self.schedule_next_run()
예제 #9
0
def run(config):
    etcd = Etcd(config["etcd"])
    postgresql = Postgresql(config["postgresql"])
    ha = Ha(postgresql, etcd)

    atexit.register(stop_postgresql, postgresql)
    signal.signal(signal.SIGTERM, signalhandler)
    logging.info("Governor Starting up")
# is data directory empty?
    if postgresql.data_directory_empty():
        logging.info("Governor Starting up: Empty Data Dir")
        # racing to initialize
        wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql)
        if etcd.race("/initialize", postgresql.name):
            logging.info("Governor Starting up: Initialisation Race ... WON!!!")
            logging.info("Governor Starting up: Initialise Postgres")
            postgresql.initialize()
            logging.info("Governor Starting up: Initialise Complete")
            etcd.take_leader(postgresql.name)
            logging.info("Governor Starting up: Starting Postgres")
            postgresql.start()
        else:
            logging.info("Governor Starting up: Initialisation Race ... LOST")
            logging.info("Governor Starting up: Sync Postgres from Leader")
            synced_from_leader = False
            while not synced_from_leader:
                leader = etcd.current_leader()
                if not leader:
                    time.sleep(5)
                    continue
                if postgresql.sync_from_leader(leader):
                    logging.info("Governor Starting up: Sync Completed")
                    postgresql.write_recovery_conf(leader)
                    logging.info("Governor Starting up: Starting Postgres")
                    postgresql.start()
                    synced_from_leader = True
                else:
                    time.sleep(5)
    else:
        logging.info("Governor Starting up: Existing Data Dir")
        postgresql.follow_no_leader()
        logging.info("Governor Starting up: Starting Postgres")
        postgresql.start()

    wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
    logging.info("Governor Running: Starting Running Loop")
    while True:
        try:
            logging.info("Governor Running: %s" % ha.run_cycle())

            # create replication slots
            if postgresql.is_leader():
                logging.info("Governor Running: I am the Leader")
            for node in etcd.members():
                member = node["hostname"]
                if member != postgresql.name:
                    if postgresql.is_leader():
                        postgresql.ensure_replication_slot(
                            postgresql.replication_slot_name(member)
                        )
                    else:
                        postgresql.drop_replication_slot(
                            postgresql.replication_slot_name(member)
                        )
            etcd.touch_member(postgresql.name, postgresql.connection_string)

            time.sleep(config["loop_wait"])
        except (urllib2.URLError, socket.timeout):
            logging.info("Lost connection to etcd, setting no leader and waiting on etcd")
            postgresql.follow_no_leader()
            wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
예제 #10
0
파일: test_ha.py 프로젝트: billcap/governor
class TestHa(unittest.TestCase):

    def __init__(self, method_name='runTest'):
        self.setUp = self.set_up
        super(TestHa, self).__init__(method_name)

    def set_up(self):
        requests.get = requests_get
        requests.put = requests_put
        requests.delete = requests_delete
        self.p = MockPostgresql()
        self.e = Etcd({'ttl': 30, 'host': 'remotehost', 'scope': 'test'})
        self.ha = Ha(self.p, self.e)
        self.ha.cluster = Cluster(None, None, [])
        self.ha.load_cluster_from_etcd = nop

    def test_start_as_slave(self):
        self.p.is_healthy = false
        self.assertEquals(self.ha.run_cycle(), 'started as a secondary')

    def test_start_as_readonly(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = self.p.is_healthy = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader because i had the session lock')

    def test_acquire_lock_as_master(self):
        self.assertEquals(self.ha.run_cycle(), 'acquired session lock as a leader')

    def test_promoted_by_acquiring_lock(self):
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader by acquiring session lock')

    def test_demote_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.assertEquals(self.ha.run_cycle(), 'demoted self due after trying and failing to obtain lock')

    def test_follow_new_leader_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'following new leader after trying and failing to obtain lock')

    def test_demote_because_not_healthiest(self):
        self.p.is_healthiest_node = false
        self.assertEquals(self.ha.run_cycle(), 'demoting self because i am not the healthiest node')

    def test_follow_new_leader_because_not_healthiest(self):
        self.p.is_healthiest_node = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'following a different leader because i am not the healthiest node')

    def test_promote_because_have_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'promoted self to leader because i had the session lock')

    def test_leader_with_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am the leader with the lock')

    def test_demote_because_not_having_lock(self):
        self.ha.cluster.is_unlocked = false
        self.assertEquals(self.ha.run_cycle(), 'demoting self because i do not have the lock and i was a leader')

    def test_follow_the_leader(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(), 'no action.  i am a secondary and i am following a leader')

    def test_no_etcd_connection_master_demote(self):
        self.ha.load_cluster_from_etcd = dead_etcd
        self.assertEquals(self.ha.run_cycle(), 'demoted self because etcd is not accessible and i was a leader')
예제 #11
0
class TestHa(unittest.TestCase):
    def __init__(self, method_name='runTest'):
        self.setUp = self.set_up
        super(TestHa, self).__init__(method_name)

    def set_up(self):
        self.p = MockPostgresql()
        with patch.object(Client, 'machines') as mock_machines:
            mock_machines.__get__ = Mock(
                return_value=['http://remotehost:2379'])
            self.e = Etcd('foo', {
                'ttl': 30,
                'host': 'remotehost:2379',
                'scope': 'test'
            })
            self.e.client.read = etcd_read
            self.e.client.write = etcd_write
            self.ha = Ha(self.p, self.e)
            self.ha.load_cluster_from_dcs()
            self.ha.cluster = get_unlocked_cluster()
            self.ha.load_cluster_from_dcs = nop

    def test_load_cluster_from_dcs(self):
        ha = Ha(self.p, self.e)
        ha.load_cluster_from_dcs()
        self.e.get_cluster = get_unlocked_cluster
        ha.load_cluster_from_dcs()

    def test_start_as_slave(self):
        self.p.is_healthy = false
        self.assertEquals(self.ha.run_cycle(), 'started as a secondary')

    def test_start_as_readonly(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = self.p.is_healthy = false
        self.ha.has_lock = true
        self.assertEquals(
            self.ha.run_cycle(),
            'promoted self to leader because i had the session lock')

    def test_acquire_lock_as_master(self):
        self.assertEquals(self.ha.run_cycle(),
                          'acquired session lock as a leader')

    def test_promoted_by_acquiring_lock(self):
        self.p.is_leader = false
        self.assertEquals(self.ha.run_cycle(),
                          'promoted self to leader by acquiring session lock')

    def test_demote_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self due after trying and failing to obtain lock')

    def test_follow_new_leader_after_failing_to_obtain_lock(self):
        self.ha.acquire_lock = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'following new leader after trying and failing to obtain lock')

    def test_demote_because_not_healthiest(self):
        self.p.is_healthiest_node = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoting self because i am not the healthiest node')

    def test_follow_new_leader_because_not_healthiest(self):
        self.p.is_healthiest_node = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'following a different leader because i am not the healthiest node'
        )

    def test_promote_because_have_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'promoted self to leader because i had the session lock')

    def test_leader_with_lock(self):
        self.ha.cluster.is_unlocked = false
        self.ha.has_lock = true
        self.assertEquals(self.ha.run_cycle(),
                          'no action.  i am the leader with the lock')

    def test_demote_because_not_having_lock(self):
        self.ha.cluster.is_unlocked = false
        self.assertEquals(
            self.ha.run_cycle(),
            'demoting self because i do not have the lock and i was a leader')

    def test_follow_the_leader(self):
        self.ha.cluster.is_unlocked = false
        self.p.is_leader = false
        self.assertEquals(
            self.ha.run_cycle(),
            'no action.  i am a secondary and i am following a leader')

    def test_no_etcd_connection_master_demote(self):
        self.ha.load_cluster_from_dcs = dead_etcd
        self.assertEquals(
            self.ha.run_cycle(),
            'demoted self because DCS is not accessible and i was a leader')
예제 #12
0
class Governor:

    def __init__(self, config):
        assert config["etcd"]["ttl"] > 2 * config["loop_wait"]

        self.nap_time = config['loop_wait']
        self.etcd = Etcd(config['etcd'])
        self.aws = AWSConnection(config)
        self.postgresql = Postgresql(config['postgresql'], self.aws.on_role_change)
        self.ha = Ha(self.postgresql, self.etcd)
        host, port = config['restapi']['listen'].split(':')
        self.api = RestApiServer(self, config['restapi'])
        self.next_run = time.time()

    def touch_member(self, ttl=None):
        connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string
        return self.etcd.touch_member(self.postgresql.name, connection_string, ttl)

    def initialize(self):
        # FIXME: isn't there a better way testing if etcd is writable?
        # wait for etcd to be available
        while not self.touch_member():
            logging.info('waiting on etcd')
            sleep(5)

        # is data directory empty?
        if self.postgresql.data_directory_empty():
            # racing to initialize
            if self.etcd.race('/initialize', self.postgresql.name):
                self.postgresql.initialize()
                self.etcd.take_leader(self.postgresql.name)
                self.postgresql.start()
            else:
                # FIXME: touch_member?
                while True:
                    leader = self.etcd.current_leader()
                    if leader and self.postgresql.sync_from_leader(leader):
                        self.postgresql.write_recovery_conf(leader)
                        self.postgresql.start()
                        break
                    sleep(5)
        elif self.postgresql.is_running():
            self.postgresql.load_replication_slots()

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        else:
            sleep(nap_time)

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            self.touch_member()
            logging.info(self.ha.run_cycle())

            self.schedule_next_run()
예제 #13
0
        synced_from_leader = False
        while not synced_from_leader:
            leader = etcd.current_leader()
            if not leader:
                time.sleep(5)
                continue
            if postgresql.sync_from_leader(leader):
                postgresql.write_recovery_conf(leader)
                postgresql.start()
                synced_from_leader = True
            else:
                time.sleep(5)
else:
    postgresql.write_recovery_conf({"address": "postgres://169.0.0.1:5432"})
    postgresql.start()

while True:
    logging.info(ha.run_cycle())

    # create replication slots
    if postgresql.is_leader():
        for node in etcd.get_client_path(
                "/members?recursive=true")["node"]["nodes"]:
            member = node["key"].split('/')[-1]
            if member != postgresql.name:
                postgresql.query(
                    "DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;"
                    % {"slot": member})

    time.sleep(config["loop_wait"])