def test_config_reload(self): watchdog = Watchdog({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required'}}) self.assertTrue(watchdog.activate()) self.assertTrue(watchdog.is_running) watchdog.reload_config({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'off'}}) self.assertFalse(watchdog.is_running) watchdog.reload_config({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required'}}) self.assertFalse(watchdog.is_running) watchdog.keepalive() self.assertTrue(watchdog.is_running) watchdog.disable() watchdog.reload_config({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required', 'driver': 'unknown'}}) self.assertFalse(watchdog.is_healthy) self.assertFalse(watchdog.activate()) watchdog.reload_config({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required'}}) self.assertFalse(watchdog.is_running) watchdog.keepalive() self.assertTrue(watchdog.is_running) watchdog.reload_config({'ttl': 60, 'loop_wait': 15, 'watchdog': {'mode': 'required'}}) watchdog.keepalive()
class Patroni(object): def __init__(self): from patroni.api import RestApiServer from patroni.config import Config from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.version import __version__ from patroni.watchdog import Watchdog self.setup_signal_handlers() self.version = __version__ self.config = Config() self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config and cluster.config.data: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration( self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') def get_tags(self): return { tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value } @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self): try: self.tags = self.get_tags() self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql']) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() while not self._received_sigterm: if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config() logger.info(self.ha.run_cycle()) if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \ and self.config.set_dynamic_configuration(self.dcs.cluster.config): self.reload_config() if self.postgresql.role != 'uninitialized': self.config.save_cache() self.schedule_next_run() def setup_signal_handlers(self): self._received_sighup = False self._received_sigterm = False signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) def shutdown(self): self.api.shutdown() self.ha.shutdown()
class Patroni(AbstractPatroniDaemon): def __init__(self, config): from patroni.api import RestApiServer from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.request import PatroniRequest from patroni.watchdog import Watchdog super(Patroni, self).__init__(config) self.version = __version__ self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.request = PatroniRequest(self.config, True) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config and cluster.config.data: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration( self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') time.sleep(5) def get_tags(self): return { tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value } @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self, sighup=False, local=False): try: super(Patroni, self).reload_config(sighup, local) if local: self.tags = self.get_tags() self.request.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.watchdog.reload_config(self.config) self.postgresql.reload_config(self.config['postgresql'], sighup) self.dcs.reload_config(self.config) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() super(Patroni, self).run() def _run_cycle(self): logger.info(self.ha.run_cycle()) if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \ and self.config.set_dynamic_configuration(self.dcs.cluster.config): self.reload_config() if self.postgresql.role != 'uninitialized': self.config.save_cache() self.schedule_next_run() def _shutdown(self): try: self.api.shutdown() except Exception: logger.exception('Exception during RestApi.shutdown') try: self.ha.shutdown() except Exception: logger.exception('Exception during Ha.shutdown')
class Patroni(object): def __init__(self, conf): from patroni.api import RestApiServer from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.log import PatroniLogger from patroni.postgresql import Postgresql from patroni.request import PatroniRequest from patroni.watchdog import Watchdog self.setup_signal_handlers() self.version = __version__ self.logger = PatroniLogger() self.config = conf self.logger.reload_config(self.config.get('log', {})) self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.request = PatroniRequest(self.config, True) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() #TODO: 动态参数 cluster.config 有哪些?如何设置? if cluster and cluster.config and cluster.config.data: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration( self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') time.sleep(5) def get_tags(self): return { tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value } @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self, sighup=False): try: self.tags = self.get_tags() self.logger.reload_config(self.config.get('log', {})) self.watchdog.reload_config(self.config) if sighup: self.request.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql'], sighup) self.dcs.reload_config(self.config) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): with self._sigterm_lock: if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() @property def received_sigterm(self): with self._sigterm_lock: return self._received_sigterm def run(self): # SGS: 启动API self.api.start() # SGS: 启动日志记录 self.logger.start() self.next_run = time.time() while not self.received_sigterm: # SGS: 是否收到SIGHUP信号 如何触发这个信息,直接用kill命令下发? if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config(True) else: self.postgresql.config.reload_config( self.config['postgresql'], True) # SGS: 定时检查本patroni关联的member的状态并做响应处理,具体见 run_cycle 函数里的注释 logger.info(self.ha.run_cycle()) if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \ and self.config.set_dynamic_configuration(self.dcs.cluster.config): self.reload_config() if self.postgresql.role != 'uninitialized': self.config.save_cache() self.schedule_next_run() def setup_signal_handlers(self): from threading import Lock self._received_sighup = False self._sigterm_lock = Lock() self._received_sigterm = False if os.name != 'nt': signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) def shutdown(self): with self._sigterm_lock: self._received_sigterm = True try: self.api.shutdown() except Exception: logger.exception('Exception during RestApi.shutdown') try: self.ha.shutdown() except Exception: logger.exception('Exception during Ha.shutdown') self.logger.shutdown()
class Patroni(object): def __init__(self): from patroni.api import RestApiServer from patroni.config import Config from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.version import __version__ from patroni.watchdog import Watchdog self.setup_signal_handlers() self.version = __version__ self.config = Config() self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config and cluster.config.data: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') def get_tags(self): return {tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value} @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self): try: self.tags = self.get_tags() self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql']) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() while not self._received_sigterm: if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config() logger.info(self.ha.run_cycle()) if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \ and self.config.set_dynamic_configuration(self.dcs.cluster.config): self.reload_config() if self.postgresql.role != 'uninitialized': self.config.save_cache() self.schedule_next_run() def setup_signal_handlers(self): self._received_sighup = False self._received_sigterm = False signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) def shutdown(self): self.api.shutdown() self.ha.shutdown()