def __init__(self, conf): from patroni.api import RestApiServer from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.log import PatroniLogger from patroni.postgresql import Postgresql from patroni.request import PatroniRequest from patroni.watchdog import Watchdog self.setup_signal_handlers() self.version = __version__ self.logger = PatroniLogger() self.config = conf self.logger.reload_config(self.config.get('log', {})) self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.request = PatroniRequest(self.config, True) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {}
def test_exceptions(self): wd = Watchdog({ 'ttl': 30, 'loop_wait': 10, 'watchdog': { 'mode': 'bad' } }) wd.impl.close = wd.impl.keepalive = Mock(side_effect=WatchdogError('')) self.assertIsNone(wd.disable()) self.assertIsNone(wd.keepalive())
def test_invalid_timings(self): watchdog = Watchdog({ 'ttl': 30, 'loop_wait': 20, 'watchdog': { 'mode': 'automatic' } }) watchdog.activate() self.assertEquals(len(mock_devices), 1) self.assertFalse(watchdog.is_running)
def test_unsafe_timeout_disable_watchdog_and_exit(self): watchdog = Watchdog({ 'ttl': 30, 'loop_wait': 15, 'watchdog': { 'mode': 'required', 'safety_margin': -1 } }) self.assertEqual(watchdog.activate(), False) self.assertEqual(watchdog.is_running, False)
def test_basic_operation(self): if platform.system() != 'Linux': return watchdog = Watchdog({ 'ttl': 30, 'loop_wait': 10, 'watchdog': { 'mode': 'required' } }) watchdog.activate() self.assertEquals(len(mock_devices), 2) device = mock_devices[-1] self.assertTrue(device.open) self.assertEquals(device.timeout, 15) watchdog.keepalive() self.assertEquals(len(device.writes), 1) watchdog.disable() self.assertFalse(device.open) self.assertEquals(device.writes[-1], b'V')
def test_timeout_does_not_ensure_safe_termination(self): Watchdog({ 'ttl': 30, 'loop_wait': 15, 'watchdog': { 'mode': 'auto' } }).activate() self.assertEquals(len(mock_devices), 2)
def test_watchdog_not_activated(self): self.assertFalse( Watchdog({ 'ttl': 30, 'loop_wait': 10, 'watchdog': { 'mode': 'required' } }).activate())
def test_watchdog_activate(self): with patch.object(LinuxWatchdogDevice, 'open', Mock(side_effect=WatchdogError(''))): self.assertTrue( Watchdog({ 'ttl': 30, 'loop_wait': 10, 'watchdog': { 'mode': 'auto' } }).activate()) self.assertFalse( Watchdog({ 'ttl': 30, 'loop_wait': 10, 'watchdog': { 'mode': 'required' } }).activate())
def test_unsafe_timeout_disable_watchdog_and_exit(self): self.assertRaises( SystemExit, Watchdog({ 'ttl': 30, 'loop_wait': 15, 'watchdog': { 'mode': 'required' } }).activate)
def test_watchdog_not_activated(self): self.assertRaises( SystemExit, Watchdog({ 'ttl': 30, 'loop_wait': 10, 'watchdog': { 'mode': 'required' } }).activate)
def test_parse_mode(self): with patch('patroni.watchdog.base.logger.warning', new_callable=Mock()) as warning_mock: watchdog = Watchdog({ 'ttl': 30, 'loop_wait': 10, 'watchdog': { 'mode': 'bad' } }) self.assertEquals(watchdog.mode, 'off') warning_mock.assert_called_once()
def test_basic_operation(self): watchdog = Watchdog({'ttl': 30, 'loop_wait': 10, 'watchdog': {'mode': 'required'}}) watchdog.activate() self.assertEqual(len(mock_devices), 2) device = mock_devices[-1] self.assertTrue(device.open) self.assertEqual(device.timeout, 24) watchdog.keepalive() self.assertEqual(len(device.writes), 1) watchdog.disable() self.assertFalse(device.open) self.assertEqual(device.writes[-1], b'V')
def __init__(self, config): from patroni.api import RestApiServer from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.request import PatroniRequest from patroni.watchdog import Watchdog super(Patroni, self).__init__(config) self.version = __version__ self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.request = PatroniRequest(self.config, True) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {}
def __init__(self): from patroni.api import RestApiServer from patroni.config import Config from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.version import __version__ from patroni.watchdog import Watchdog self.setup_signal_handlers() self.version = __version__ self.config = Config() self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {}
def test_basic_operation(self): watchdog = Watchdog({'ttl': 30, 'loop_wait': 10, 'watchdog': {'mode': 'required'}}) watchdog.activate() self.assertEquals(len(mock_devices), 2) device = mock_devices[-1] self.assertTrue(device.open) self.assertEquals(device.timeout, 24) watchdog.keepalive() self.assertEquals(len(device.writes), 1) watchdog.disable() self.assertFalse(device.open) self.assertEquals(device.writes[-1], b'V')
def __init__(self, p, d): os.environ[Config.PATRONI_CONFIG_VARIABLE] = """ restapi: listen: 0.0.0.0:8008 bootstrap: users: replicator: password: rep-pass options: - replication postgresql: name: foo data_dir: data/postgresql0 pg_rewind: username: postgres password: postgres watchdog: mode: off zookeeper: exhibitor: hosts: [localhost] port: 8181 """ # We rely on sys.argv in Config, so it's necessary to reset # all the extra values that are coming from py.test sys.argv = sys.argv[:1] self.config = Config() self.postgresql = p self.dcs = d self.api = Mock() self.tags = {'foo': 'bar'} self.nofailover = None self.replicatefrom = None self.api.connection_string = 'http://127.0.0.1:8008' self.clonefrom = None self.nosync = False self.scheduled_restart = {'schedule': future_restart_time, 'postmaster_start_time': str(postmaster_start_time)} self.watchdog = Watchdog(self.config)
class Patroni(object): def __init__(self, conf): from patroni.api import RestApiServer from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.log import PatroniLogger from patroni.postgresql import Postgresql from patroni.request import PatroniRequest from patroni.watchdog import Watchdog self.setup_signal_handlers() self.version = __version__ self.logger = PatroniLogger() self.config = conf self.logger.reload_config(self.config.get('log', {})) self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.request = PatroniRequest(self.config, True) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() #TODO: 动态参数 cluster.config 有哪些?如何设置? if cluster and cluster.config and cluster.config.data: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration( self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') time.sleep(5) def get_tags(self): return { tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value } @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self, sighup=False): try: self.tags = self.get_tags() self.logger.reload_config(self.config.get('log', {})) self.watchdog.reload_config(self.config) if sighup: self.request.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql'], sighup) self.dcs.reload_config(self.config) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): with self._sigterm_lock: if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() @property def received_sigterm(self): with self._sigterm_lock: return self._received_sigterm def run(self): # SGS: 启动API self.api.start() # SGS: 启动日志记录 self.logger.start() self.next_run = time.time() while not self.received_sigterm: # SGS: 是否收到SIGHUP信号 如何触发这个信息,直接用kill命令下发? if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config(True) else: self.postgresql.config.reload_config( self.config['postgresql'], True) # SGS: 定时检查本patroni关联的member的状态并做响应处理,具体见 run_cycle 函数里的注释 logger.info(self.ha.run_cycle()) if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \ and self.config.set_dynamic_configuration(self.dcs.cluster.config): self.reload_config() if self.postgresql.role != 'uninitialized': self.config.save_cache() self.schedule_next_run() def setup_signal_handlers(self): from threading import Lock self._received_sighup = False self._sigterm_lock = Lock() self._received_sigterm = False if os.name != 'nt': signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) def shutdown(self): with self._sigterm_lock: self._received_sigterm = True try: self.api.shutdown() except Exception: logger.exception('Exception during RestApi.shutdown') try: self.ha.shutdown() except Exception: logger.exception('Exception during Ha.shutdown') self.logger.shutdown()
class Patroni(object): def __init__(self): from patroni.api import RestApiServer from patroni.config import Config from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.version import __version__ from patroni.watchdog import Watchdog self.setup_signal_handlers() self.version = __version__ self.config = Config() self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config and cluster.config.data: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration( self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') def get_tags(self): return { tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value } @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self): try: self.tags = self.get_tags() self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql']) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() while not self._received_sigterm: if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config() logger.info(self.ha.run_cycle()) if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \ and self.config.set_dynamic_configuration(self.dcs.cluster.config): self.reload_config() if self.postgresql.role != 'uninitialized': self.config.save_cache() self.schedule_next_run() def setup_signal_handlers(self): self._received_sighup = False self._received_sigterm = False signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) def shutdown(self): self.api.shutdown() self.ha.shutdown()
def test_exceptions(self): wd = Watchdog({'ttl': 30, 'loop_wait': 10, 'watchdog': {'mode': 'bad'}}) wd.impl.close = wd.impl.keepalive = Mock(side_effect=WatchdogError('')) self.assertTrue(wd.activate()) self.assertIsNone(wd.keepalive()) self.assertIsNone(wd.disable())
def test_invalid_timings(self): watchdog = Watchdog({'ttl': 30, 'loop_wait': 20, 'watchdog': {'mode': 'automatic', 'safety_margin': -1}}) watchdog.activate() self.assertEquals(len(mock_devices), 1) self.assertFalse(watchdog.is_running)
def test_config_reload(self): watchdog = Watchdog({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required'}}) self.assertTrue(watchdog.activate()) self.assertTrue(watchdog.is_running) watchdog.reload_config({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'off'}}) self.assertFalse(watchdog.is_running) watchdog.reload_config({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required'}}) self.assertFalse(watchdog.is_running) watchdog.keepalive() self.assertTrue(watchdog.is_running) watchdog.disable() watchdog.reload_config({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required', 'driver': 'unknown'}}) self.assertFalse(watchdog.is_healthy) self.assertFalse(watchdog.activate()) watchdog.reload_config({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required'}}) self.assertFalse(watchdog.is_running) watchdog.keepalive() self.assertTrue(watchdog.is_running) watchdog.reload_config({'ttl': 60, 'loop_wait': 15, 'watchdog': {'mode': 'required'}}) watchdog.keepalive()
def test_unsafe_timeout_disable_watchdog_and_exit(self): watchdog = Watchdog({'ttl': 30, 'loop_wait': 15, 'watchdog': {'mode': 'required', 'safety_margin': -1}}) self.assertEquals(watchdog.activate(), False) self.assertEquals(watchdog.is_running, False)
class Patroni(AbstractPatroniDaemon): def __init__(self, config): from patroni.api import RestApiServer from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.request import PatroniRequest from patroni.watchdog import Watchdog super(Patroni, self).__init__(config) self.version = __version__ self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.request = PatroniRequest(self.config, True) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config and cluster.config.data: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration( self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') time.sleep(5) def get_tags(self): return { tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value } @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self, sighup=False, local=False): try: super(Patroni, self).reload_config(sighup, local) if local: self.tags = self.get_tags() self.request.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.watchdog.reload_config(self.config) self.postgresql.reload_config(self.config['postgresql'], sighup) self.dcs.reload_config(self.config) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() super(Patroni, self).run() def _run_cycle(self): logger.info(self.ha.run_cycle()) if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \ and self.config.set_dynamic_configuration(self.dcs.cluster.config): self.reload_config() if self.postgresql.role != 'uninitialized': self.config.save_cache() self.schedule_next_run() def _shutdown(self): try: self.api.shutdown() except Exception: logger.exception('Exception during RestApi.shutdown') try: self.ha.shutdown() except Exception: logger.exception('Exception during Ha.shutdown')
class Patroni(object): def __init__(self): from patroni.api import RestApiServer from patroni.config import Config from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.version import __version__ from patroni.watchdog import Watchdog self.setup_signal_handlers() self.version = __version__ self.config = Config() self.dcs = get_dcs(self.config) self.watchdog = Watchdog(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config and cluster.config.data: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') def get_tags(self): return {tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value} @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self): try: self.tags = self.get_tags() self.dcs.reload_config(self.config) self.watchdog.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql']) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() while not self._received_sigterm: if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config() logger.info(self.ha.run_cycle()) if self.dcs.cluster and self.dcs.cluster.config and self.dcs.cluster.config.data \ and self.config.set_dynamic_configuration(self.dcs.cluster.config): self.reload_config() if self.postgresql.role != 'uninitialized': self.config.save_cache() self.schedule_next_run() def setup_signal_handlers(self): self._received_sighup = False self._received_sigterm = False signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) def shutdown(self): self.api.shutdown() self.ha.shutdown()