예제 #1
0
 def __init__(self, config):
     self.nap_time = config['loop_wait']
     self.postgresql = Postgresql(config['postgresql'])
     self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config))
     host, port = config['restapi']['listen'].split(':')
     self.api = RestApiServer(self, config['restapi'])
     self.skydns2 = config.get('skydns2')
     self.next_run = time.time()
     self.shutdown_member_ttl = 300
예제 #2
0
 def set_up(self):
     self.p = MockPostgresql()
     with patch.object(Client, 'machines') as mock_machines:
         mock_machines.__get__ = Mock(
             return_value=['http://remotehost:2379'])
         self.e = Etcd('foo', {
             'ttl': 30,
             'host': 'remotehost:2379',
             'scope': 'test'
         })
         self.e.client.read = etcd_read
         self.e.client.write = etcd_write
         self.ha = Ha(self.p, self.e)
         self.ha.load_cluster_from_dcs()
         self.ha.cluster = get_unlocked_cluster()
         self.ha.load_cluster_from_dcs = nop
예제 #3
0
def run(config):
    etcd = Etcd(config["etcd"])
    postgresql = Postgresql(config["postgresql"])
    ha = Ha(postgresql, etcd)

    atexit.register(stop_postgresql, postgresql)
    logging.info("Governor Starting up")
    # is data directory empty?
    if postgresql.data_directory_empty():
        logging.info("Governor Starting up: Empty Data Dir")
        # racing to initialize
        wait_for_etcd("cannot initialize member without ETCD", etcd,
                      postgresql)
        if etcd.race("/initialize", postgresql.name) or not etcd.members():
            logging.info(
                "Governor Starting up: Initialisation Race ... WON!!!")
            logging.info("Governor Starting up: Initialise Postgres")
            postgresql.initialize()
            logging.info("Governor Starting up: Initialise Complete")
            etcd.take_leader(postgresql.name)
            logging.info("Governor Starting up: Starting Postgres")
            postgresql.start()
        else:
            logging.info("Governor Starting up: Initialisation Race ... LOST")
            logging.info("Governor Starting up: Sync Postgres from Leader")
            synced_from_leader = False
            while not synced_from_leader:
                leader = etcd.current_leader()
                if not leader:
                    time.sleep(5)
                    continue
                if postgresql.sync_from_leader(leader):
                    logging.info("Governor Starting up: Sync Completed")
                    postgresql.write_recovery_conf(leader)
                    logging.info("Governor Starting up: Starting Postgres")
                    postgresql.start()
                    synced_from_leader = True
                else:
                    time.sleep(5)
    else:
        logging.info("Governor Starting up: Existing Data Dir")
        postgresql.follow_no_leader()
        logging.info("Governor Starting up: Starting Postgres")
        postgresql.start()

    wait_for_etcd(
        "running in readonly mode; cannot participate in cluster HA without etcd",
        etcd, postgresql)
    logging.info("Governor Running: Starting Running Loop")
    while True:
        try:
            ha.run_cycle()
            # create replication slots
            if postgresql.is_leader():
                logging.info("Governor Running: I am the Leader")
                for node in etcd.get_client_path(
                        "/members?recursive=true").get("node",
                                                       {}).get("nodes", []):
                    member = node["key"].split('/')[-1]
                    if member != postgresql.name:
                        postgresql.query(
                            "DO LANGUAGE plpgsql $$DECLARE somevar VARCHAR; BEGIN SELECT slot_name INTO somevar FROM pg_replication_slots WHERE slot_name = '%(slot)s' LIMIT 1; IF NOT FOUND THEN PERFORM pg_create_physical_replication_slot('%(slot)s'); END IF; END$$;"
                            % {"slot": member})
            etcd.touch_member(postgresql.name, postgresql.connection_string)

            time.sleep(config["loop_wait"])
        except urllib2.URLError:
            logging.info(
                "Lost connection to etcd, setting no leader and waiting on etcd"
            )
            postgresql.follow_no_leader()
            wait_for_etcd(
                "running in readonly mode; cannot participate in cluster HA without etcd",
                etcd, postgresql)
예제 #4
0
if os.getenv('GOVERNOR_POSTGRESQL_READ_ONLY_PORT'):
    config['postgresql']['read_only_port'] = os.getenv(
        'GOVERNOR_POSTGRESQL_READ_ONLY_PORT')

if os.getenv('GOVERNOR_POSTGRESQL_DATA_DIR'):
    config['postgresql']['data_dir'] = os.getenv(
        'GOVERNOR_POSTGRESQL_DATA_DIR')

if os.getenv('GOVERNOR_POSTGRESQL_REPLICATION_NETWORK'):
    config['postgresql']['replication']['network'] = os.getenv(
        'GOVERNOR_POSTGRESQL_REPLICATION_NETWORK')

etcd = Etcd(config["etcd"])
postgresql = Postgresql(config["postgresql"])
ha = Ha(postgresql, etcd)


# leave things clean when shutting down, if possible
def shutdown(signal, frame):
    logging.info("Governor Shutting Down: Received Shutdown Signal")
    try:
        if ha.has_lock():
            logging.info("Governor Shutting Down: Abdicating Leadership")
            etcd.abdicate(postgresql.name)

        logging.info("Governor Shutting Down: Removing Membership")
        etcd.delete_member(postgresql.name)
    except:
        logging.exception("Error during Abdication")
        pass
예제 #5
0
def run(config):
    etcd = Etcd(config["etcd"])
    postgresql = Postgresql(config["postgresql"])
    ha = Ha(postgresql, etcd)

    atexit.register(stop_postgresql, postgresql)
    signal.signal(signal.SIGTERM, signalhandler)
    logging.info("Governor Starting up")
# is data directory empty?
    if postgresql.data_directory_empty():
        logging.info("Governor Starting up: Empty Data Dir")
        # racing to initialize
        wait_for_etcd("cannot initialize member without ETCD", etcd, postgresql)
        if etcd.race("/initialize", postgresql.name):
            logging.info("Governor Starting up: Initialisation Race ... WON!!!")
            logging.info("Governor Starting up: Initialise Postgres")
            postgresql.initialize()
            logging.info("Governor Starting up: Initialise Complete")
            etcd.take_leader(postgresql.name)
            logging.info("Governor Starting up: Starting Postgres")
            postgresql.start()
        else:
            logging.info("Governor Starting up: Initialisation Race ... LOST")
            logging.info("Governor Starting up: Sync Postgres from Leader")
            synced_from_leader = False
            while not synced_from_leader:
                leader = etcd.current_leader()
                if not leader:
                    time.sleep(5)
                    continue
                if postgresql.sync_from_leader(leader):
                    logging.info("Governor Starting up: Sync Completed")
                    postgresql.write_recovery_conf(leader)
                    logging.info("Governor Starting up: Starting Postgres")
                    postgresql.start()
                    synced_from_leader = True
                else:
                    time.sleep(5)
    else:
        logging.info("Governor Starting up: Existing Data Dir")
        postgresql.follow_no_leader()
        logging.info("Governor Starting up: Starting Postgres")
        postgresql.start()

    wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
    logging.info("Governor Running: Starting Running Loop")
    while True:
        try:
            logging.info("Governor Running: %s" % ha.run_cycle())

            # create replication slots
            if postgresql.is_leader():
                logging.info("Governor Running: I am the Leader")
            for node in etcd.members():
                member = node["hostname"]
                if member != postgresql.name:
                    if postgresql.is_leader():
                        postgresql.ensure_replication_slot(
                            postgresql.replication_slot_name(member)
                        )
                    else:
                        postgresql.drop_replication_slot(
                            postgresql.replication_slot_name(member)
                        )
            etcd.touch_member(postgresql.name, postgresql.connection_string)

            time.sleep(config["loop_wait"])
        except (urllib2.URLError, socket.timeout):
            logging.info("Lost connection to etcd, setting no leader and waiting on etcd")
            postgresql.follow_no_leader()
            wait_for_etcd("running in readonly mode; cannot participate in cluster HA without etcd", etcd, postgresql)
예제 #6
0
 def test_load_cluster_from_dcs(self):
     ha = Ha(self.p, self.e)
     ha.load_cluster_from_dcs()
     self.e.get_cluster = get_unlocked_cluster
     ha.load_cluster_from_dcs()