예제 #1
0
파일: __init__.py 프로젝트: forestz/patroni
class Patroni(object):
    PATRONI_CONFIG_VARIABLE = 'PATRONI_CONFIGURATION'

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.tags = {tag: value for tag, value in config.get('tags', {}).items()
                     if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value}
        self.postgresql = Postgresql(config['postgresql'])
        self.dcs = self.get_dcs(self.postgresql.name, config)
        self.version = __version__
        self.api = RestApiServer(self, config['restapi'])
        self.ha = Ha(self)
        self.next_run = time.time()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    @staticmethod
    def get_dcs(name, config):
        if 'etcd' in config:
            from patroni.etcd import Etcd
            return Etcd(name, config['etcd'])
        if 'zookeeper' in config:
            from patroni.zookeeper import ZooKeeper
            return ZooKeeper(name, config['zookeeper'])
        if 'consul' in config:
            from patroni.consul import Consul
            return Consul(name, config['consul'])
        raise PatroniException('Can not find suitable configuration of distributed configuration store')

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            logger.info(self.ha.run_cycle())
            reap_children()
            self.schedule_next_run()
class Patroni(object):
    PATRONI_CONFIG_VARIABLE = 'PATRONI_CONFIGURATION'

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.tags = {
            tag: value
            for tag, value in config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value
        }
        self.postgresql = Postgresql(config['postgresql'])
        self.dcs = get_dcs(self.postgresql.name, config)
        self.version = __version__
        self.api = RestApiServer(self, config['restapi'])
        self.ha = Ha(self)
        self.next_run = time.time()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            logger.info(self.ha.run_cycle())
            reap_children()
            self.schedule_next_run()
class Patroni(object):
    PATRONI_CONFIG_VARIABLE = 'PATRONI_CONFIGURATION'

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.tags = {tag: value for tag, value in config.get('tags', {}).items()
                     if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value}
        self.postgresql = Postgresql(config['postgresql'])
        self.dcs = get_dcs(self.postgresql.name, config)
        self.version = __version__
        self.api = RestApiServer(self, config['restapi'])
        self.ha = Ha(self)
        self.next_run = time.time()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            logger.info(self.ha.run_cycle())
            reap_children()
            self.schedule_next_run()
예제 #4
0
class Patroni:
    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.tags = config.get('tags', dict())
        self.postgresql = Postgresql(config['postgresql'])
        self.dcs = self.get_dcs(self.postgresql.name, config)
        self.api = RestApiServer(self, config['restapi'])
        self.ha = Ha(self)
        self.next_run = time.time()

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    @staticmethod
    def get_dcs(name, config):
        if 'etcd' in config:
            return Etcd(name, config['etcd'])
        if 'zookeeper' in config:
            return ZooKeeper(name, config['zookeeper'])
        raise Exception(
            'Can not find suitable configuration of distributed configuration store'
        )

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            logger.info(self.ha.run_cycle())
            reap_children()
            self.schedule_next_run()
예제 #5
0
class Patroni:
    def __init__(self, config):
        self.nap_time = config["loop_wait"]
        self.tags = config.get("tags", dict())
        self.postgresql = Postgresql(config["postgresql"])
        self.dcs = self.get_dcs(self.postgresql.name, config)
        self.api = RestApiServer(self, config["restapi"])
        self.ha = Ha(self)
        self.next_run = time.time()

    @property
    def nofailover(self):
        return self.tags.get("nofailover", False)

    @staticmethod
    def get_dcs(name, config):
        if "etcd" in config:
            return Etcd(name, config["etcd"])
        if "zookeeper" in config:
            return ZooKeeper(name, config["zookeeper"])
        raise Exception("Can not find suitable configuration of distributed configuration store")

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            logger.info(self.ha.run_cycle())
            reap_children()
            self.schedule_next_run()
예제 #6
0
class Patroni:

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.postgresql = Postgresql(config['postgresql'])
        self.dcs = self.get_dcs(self.postgresql.name, config)
        host, port = config['restapi']['listen'].split(':')
        self.api = RestApiServer(self, config['restapi'])
        self.ha = Ha(self)
        self.next_run = time.time()

    @staticmethod
    def get_dcs(name, config):
        if 'etcd' in config:
            return Etcd(name, config['etcd'])
        if 'zookeeper' in config:
            return ZooKeeper(name, config['zookeeper'])
        raise Exception('Can not find sutable configuration of distributed configuration store')

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            logger.info(self.ha.run_cycle())
            reap_children()
            self.schedule_next_run()
예제 #7
0
파일: __init__.py 프로젝트: forestz/patroni
class Patroni(object):
    PATRONI_CONFIG_VARIABLE = 'PATRONI_CONFIGURATION'

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.tags = {
            tag: value
            for tag, value in config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value
        }
        self.postgresql = Postgresql(config['postgresql'])
        self.dcs = self.get_dcs(self.postgresql.name, config)
        self.version = __version__
        self.api = RestApiServer(self, config['restapi'])
        self.ha = Ha(self)
        self.next_run = time.time()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    @staticmethod
    def get_dcs(name, config):
        if 'etcd' in config:
            from patroni.etcd import Etcd
            return Etcd(name, config['etcd'])
        if 'zookeeper' in config:
            from patroni.zookeeper import ZooKeeper
            return ZooKeeper(name, config['zookeeper'])
        if 'consul' in config:
            from patroni.consul import Consul
            return Consul(name, config['consul'])
        raise PatroniException(
            'Can not find suitable configuration of distributed configuration store'
        )

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            logger.info(self.ha.run_cycle())
            reap_children()
            self.schedule_next_run()
예제 #8
0
class Patroni(object):
    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__
        from patroni.watchdog import Watchdog

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.watchdog.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                    and self.config.set_dynamic_configuration(self.dcs.cluster.config):
                self.reload_config()

            if self.postgresql.role != 'uninitialized':
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)

    def shutdown(self):
        self.api.shutdown()
        self.ha.shutdown()
예제 #9
0
파일: __init__.py 프로젝트: jinty/patroni
class Patroni:

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.postgresql = Postgresql(config['postgresql'])
        self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config))
        host, port = config['restapi']['listen'].split(':')
        self.api = RestApiServer(self, config['restapi'])
        self.next_run = time.time()
        self.shutdown_member_ttl = 300

    @staticmethod
    def get_dcs(name, config):
        if 'etcd' in config:
            return Etcd(name, config['etcd'])
        if 'zookeeper' in config:
            return ZooKeeper(name, config['zookeeper'])
        raise Exception('Can not find sutable configuration of distributed configuration store')

    def touch_member(self, ttl=None):
        connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string
        if self.ha.cluster:
            for m in self.ha.cluster.members:
                # Do not update member TTL when it is far from being expired
                if m.name == self.postgresql.name and m.real_ttl() > self.shutdown_member_ttl:
                    return True
        return self.ha.dcs.touch_member(connection_string, ttl)

    def cleanup_on_failed_initialization(self):
        """ cleanup the DCS if initialization was not successfull """
        logger.info("removing initialize key after failed attempt to initialize the cluster")
        self.ha.dcs.cancel_initialization()
        self.touch_member(self.shutdown_member_ttl)
        self.postgresql.stop()
        self.postgresql.move_data_directory()

    def initialize(self):
        # wait for etcd to be available
        while not self.touch_member():
            logger.info('waiting on DCS')
            sleep(5)

        # is data directory empty?
        if self.postgresql.data_directory_empty():
            while True:
                try:
                    cluster = self.ha.dcs.get_cluster()
                    if not cluster.is_unlocked():  # the leader already exists
                        if not cluster.initialize:
                            self.ha.dcs.initialize()
                        self.postgresql.bootstrap(cluster.leader)
                        break
                    # racing to initialize
                    elif not cluster.initialize and self.ha.dcs.initialize():
                        try:
                            self.postgresql.bootstrap()
                        except:
                            # bail out and clean the initialize flag.
                            self.cleanup_on_failed_initialization()
                            raise
                        self.ha.dcs.take_leader()
                        break
                except DCSError:
                    logger.info('waiting on DCS')
                sleep(5)
        elif self.postgresql.is_running():
            self.postgresql.load_replication_slots()

    def schedule_next_run(self):
        if self.postgresql.is_promoted:
            self.next_run = time.time()
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        else:
            self.ha.dcs.watch(nap_time)

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            self.touch_member()
            logger.info(self.ha.run_cycle())
            try:
                if self.ha.state_handler.is_leader():
                    self.ha.cluster and self.ha.state_handler.create_replication_slots(self.ha.cluster)
                else:
                    self.ha.state_handler.drop_replication_slots()
            except:
                logger.exception('Exception when changing replication slots')
            reap_children()
            self.schedule_next_run()
예제 #10
0
class Patroni(AbstractPatroniDaemon):
    def __init__(self, config):
        from patroni.api import RestApiServer
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.request import PatroniRequest
        from patroni.watchdog import Watchdog

        super(Patroni, self).__init__(config)

        self.version = __version__
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.request = PatroniRequest(self.config, True)
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')
                time.sleep(5)

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self, sighup=False, local=False):
        try:
            super(Patroni, self).reload_config(sighup, local)
            if local:
                self.tags = self.get_tags()
                self.request.reload_config(self.config)
                self.api.reload_config(self.config['restapi'])
            self.watchdog.reload_config(self.config)
            self.postgresql.reload_config(self.config['postgresql'], sighup)
            self.dcs.reload_config(self.config)
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()
        super(Patroni, self).run()

    def _run_cycle(self):
        logger.info(self.ha.run_cycle())

        if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                and self.config.set_dynamic_configuration(self.dcs.cluster.config):
            self.reload_config()

        if self.postgresql.role != 'uninitialized':
            self.config.save_cache()

        self.schedule_next_run()

    def _shutdown(self):
        try:
            self.api.shutdown()
        except Exception:
            logger.exception('Exception during RestApi.shutdown')
        try:
            self.ha.shutdown()
        except Exception:
            logger.exception('Exception during Ha.shutdown')
예제 #11
0
class Patroni(object):
    def __init__(self):
        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.nap_time = self.config['loop_wait']
        self.next_run = time.time()

        self._reload_config_scheduled = False
        self._received_sighup = False
        self._received_sigterm = False

    def load_dynamic_configuration(self):
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    self.config.set_dynamic_configuration(cluster.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    self.config.set_dynamic_configuration(
                        self.config['bootstrap']['dcs'])
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value
        }

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.nap_time = self.config['loop_wait']
            self.dcs.set_ttl(self.config.get('ttl') or 30)
            self.dcs.set_retry_timeout(
                self.config.get('retry_timeout') or self.nap_time)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def schedule_next_run(self):
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(
                    cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            reap_children()
            self.schedule_next_run()

    def setup_signal_handlers(self):
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
        signal.signal(signal.SIGCHLD, sigchld_handler)
예제 #12
0
파일: __init__.py 프로젝트: zalando/patroni
class Patroni(object):

    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {tag: value for tag, value in self.config.get('tags', {}).items()
                if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value}

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s', self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
예제 #13
0
class Patroni(object):

    def __init__(self):
        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    self.config.set_dynamic_configuration(cluster.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    self.config.set_dynamic_configuration(self.config['bootstrap']['dcs'])
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {tag: value for tag, value in self.config.get('tags', {}).items()
                if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value}

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s', self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            reap_children()
            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
        signal.signal(signal.SIGCHLD, sigchld_handler)
예제 #14
0
class Patroni(object):
    def __init__(self, conf):
        from patroni.api import RestApiServer
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.log import PatroniLogger
        from patroni.postgresql import Postgresql
        from patroni.request import PatroniRequest
        from patroni.watchdog import Watchdog

        self.setup_signal_handlers()

        self.version = __version__
        self.logger = PatroniLogger()
        self.config = conf
        self.logger.reload_config(self.config.get('log', {}))
        self.dcs = get_dcs(self.config)
        self.watchdog = Watchdog(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.request = PatroniRequest(self.config, True)
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                #TODO: 动态参数 cluster.config 有哪些?如何设置?
                if cluster and cluster.config and cluster.config.data:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                        self.watchdog.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(
                            self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')
                time.sleep(5)

    def get_tags(self):
        return {
            tag: value
            for tag, value in self.config.get('tags', {}).items()
            if tag not in ('clonefrom', 'nofailover', 'noloadbalance',
                           'nosync') or value
        }

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self, sighup=False):
        try:
            self.tags = self.get_tags()
            self.logger.reload_config(self.config.get('log', {}))
            self.watchdog.reload_config(self.config)
            if sighup:
                self.request.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'], sighup)
            self.dcs.reload_config(self.config)
        except Exception:
            logger.exception('Failed to reload config_file=%s',
                             self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        with self._sigterm_lock:
            if not self._received_sigterm:
                self._received_sigterm = True
                sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    @property
    def received_sigterm(self):
        with self._sigterm_lock:
            return self._received_sigterm

    def run(self):
        # SGS: 启动API
        self.api.start()
        # SGS: 启动日志记录
        self.logger.start()
        self.next_run = time.time()

        while not self.received_sigterm:
            # SGS: 是否收到SIGHUP信号 如何触发这个信息,直接用kill命令下发?
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config(True)
                else:
                    self.postgresql.config.reload_config(
                        self.config['postgresql'], True)
            # SGS: 定时检查本patroni关联的member的状态并做响应处理,具体见 run_cycle 函数里的注释
            logger.info(self.ha.run_cycle())

            if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \
                    and self.config.set_dynamic_configuration(self.dcs.cluster.config):
                self.reload_config()

            if self.postgresql.role != 'uninitialized':
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        from threading import Lock

        self._received_sighup = False
        self._sigterm_lock = Lock()
        self._received_sigterm = False
        if os.name != 'nt':
            signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)

    def shutdown(self):
        with self._sigterm_lock:
            self._received_sigterm = True
        try:
            self.api.shutdown()
        except Exception:
            logger.exception('Exception during RestApi.shutdown')
        try:
            self.ha.shutdown()
        except Exception:
            logger.exception('Exception during Ha.shutdown')
        self.logger.shutdown()