class TestPostgresql(unittest.TestCase): @patch('subprocess.call', Mock(return_value=0)) @patch('psycopg2.connect', psycopg2_connect) def setUp(self): self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0', 'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432', 'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'], 'superuser': {'password': ''}, 'admin': {'username': '******', 'password': '******'}, 'pg_rewind': {'username': '******', 'password': '******'}, 'replication': {'username': '******', 'password': '******', 'network': '127.0.0.1/32'}, 'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'}, 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_restart': 'true', 'on_role_change': 'true', 'on_reload': 'true' }, 'restore': 'true'}) if not os.path.exists(self.p.data_dir): os.makedirs(self.p.data_dir) self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'}) self.leader = Leader(-1, 28, self.leadermem) self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres'}) self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'}) def tearDown(self): shutil.rmtree('data') def test_data_directory_empty(self): self.assertTrue(self.p.data_directory_empty()) def test_initialize(self): self.assertTrue(self.p.initialize()) self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf'))) def test_start(self): self.assertTrue(self.p.start()) self.p.is_running = false open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w').close() self.assertTrue(self.p.start()) def test_stop(self): self.assertTrue(self.p.stop()) with patch('subprocess.call', Mock(return_value=1)): self.assertTrue(self.p.stop()) self.p.is_running = Mock(return_value=True) self.assertFalse(self.p.stop()) def test_restart(self): self.p.start = false self.p.is_running = false self.assertFalse(self.p.restart()) self.assertEquals(self.p.state, 'restart failed (restarting)') def test_sync_from_leader(self): self.assertTrue(self.p.sync_from_leader(self.leader)) @patch('subprocess.call', side_effect=Exception("Test")) def test_pg_rewind(self, mock_call): self.assertTrue(self.p.rewind(self.leader)) subprocess.call = mock_call self.assertFalse(self.p.rewind(self.leader)) @patch('patroni.postgresql.Postgresql.rewind', return_value=False) @patch('patroni.postgresql.Postgresql.remove_data_directory', MagicMock(return_value=True)) @patch('patroni.postgresql.Postgresql.single_user_mode', MagicMock(return_value=1)) def test_follow_the_leader(self, mock_pg_rewind): self.p.demote() self.p.follow_the_leader(None) self.p.demote() self.p.follow_the_leader(self.leader) self.p.follow_the_leader(Leader(-1, 28, self.other)) self.p.rewind = mock_pg_rewind self.p.follow_the_leader(self.leader) self.p.require_rewind() with mock.patch('os.path.islink', MagicMock(return_value=True)): with mock.patch('os.unlink', MagicMock(return_value=True)): with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)): self.p.follow_the_leader(self.leader, recovery=True) self.p.require_rewind() with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)): self.p.rewind.return_value = True self.p.follow_the_leader(self.leader, recovery=True) self.p.rewind.return_value = False self.p.follow_the_leader(self.leader, recovery=True) def test_can_rewind(self): tmp = self.p.pg_rewind self.p.pg_rewind = None self.assertFalse(self.p.can_rewind) self.p.pg_rewind = tmp with mock.patch('subprocess.call', MagicMock(return_value=1)): self.assertFalse(self.p.can_rewind) with mock.patch('subprocess.call', side_effect=OSError("foo")): self.assertFalse(self.p.can_rewind) tmp = self.p.controldata() self.p.controldata = lambda: {'wal_log_hints setting': 'on'} self.assertTrue(self.p.can_rewind) self.p.controldata = tmp def test_create_replica(self): self.p.delete_trigger_file = Mock(side_effect=OSError()) self.assertEquals(self.p.create_replica({'host': '', 'port': '', 'user': ''}, ''), 1) def test_create_connection_users(self): cfg = self.p.config cfg['superuser']['username'] = '******' p = Postgresql(cfg) p.create_connection_users() def test_sync_replication_slots(self): self.p.start() cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem], None) self.p.sync_replication_slots(cluster) self.p.query = Mock(side_effect=psycopg2.OperationalError) self.p.schedule_load_slots = True self.p.sync_replication_slots(cluster) @patch.object(MockConnect, 'closed', 2) def test__query(self): self.assertRaises(PostgresConnectionException, self.p._query, 'blabla') self.p._state = 'restarting' self.assertRaises(RetryFailedError, self.p._query, 'blabla') def test_query(self): self.p.query('select 1') self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError') self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') def test_is_leader(self): self.assertTrue(self.p.is_leader()) def test_reload(self): self.assertTrue(self.p.reload()) def test_is_healthy(self): self.assertTrue(self.p.is_healthy()) self.p.is_running = false self.assertFalse(self.p.is_healthy()) def test_promote(self): self.p._role = 'replica' self.assertTrue(self.p.promote()) self.assertTrue(self.p.promote()) def test_last_operation(self): self.assertEquals(self.p.last_operation(), '0') @patch('subprocess.Popen', Mock(side_effect=OSError())) def test_call_nowait(self): self.assertFalse(self.p.call_nowait('on_start')) def test_non_existing_callback(self): self.assertFalse(self.p.call_nowait('foobar')) def test_is_leader_exception(self): self.p.start() self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported")) self.assertTrue(self.p.stop()) def test_check_replication_lag(self): self.assertTrue(self.p.check_replication_lag(0)) @patch('os.rename', Mock()) @patch('os.path.isdir', Mock(return_value=True)) def test_move_data_directory(self): self.p.is_running = false self.p.move_data_directory() with patch('os.rename', Mock(side_effect=OSError())): self.p.move_data_directory() def test_bootstrap(self): with patch('subprocess.call', Mock(return_value=1)): self.assertRaises(PostgresException, self.p.bootstrap) self.p.bootstrap() self.p.bootstrap(self.leader) def test_remove_data_directory(self): self.p.data_dir = 'data_dir' self.p.remove_data_directory() os.mkdir(self.p.data_dir) self.p.remove_data_directory() open(self.p.data_dir, 'w').close() self.p.remove_data_directory() os.symlink('unexisting', self.p.data_dir) with patch('os.unlink', Mock(side_effect=Exception)): self.p.remove_data_directory() self.p.remove_data_directory() @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string)) @patch('subprocess.check_output', side_effect=subprocess.CalledProcessError) @patch('subprocess.check_output', side_effect=Exception('Failed')) def test_controldata(self, check_output_call_error, check_output_generic_exception): data = self.p.controldata() self.assertEquals(len(data), 50) self.assertEquals(data['Database cluster state'], 'shut down in recovery') self.assertEquals(data['wal_log_hints setting'], 'on') self.assertEquals(int(data['Database block size']), 8192) subprocess.check_output = check_output_call_error data = self.p.controldata() self.assertEquals(data, dict()) subprocess.check_output = check_output_generic_exception self.assertRaises(Exception, self.p.controldata()) def test_read_postmaster_opts(self): m = mock_open(read_data=postmaster_opts_string()) with patch.object(builtins, 'open', m): data = self.p.read_postmaster_opts() self.assertEquals(data['wal_level'], 'hot_standby') self.assertEquals(int(data['max_replication_slots']), 5) self.assertEqual(data.get('D'), None) m.side_effect = IOError("foo") data = self.p.read_postmaster_opts() self.assertEqual(data, dict()) m.side_effect = Exception("foo") self.assertRaises(Exception, self.p.read_postmaster_opts()) @patch('subprocess.Popen') @patch.object(builtins, 'open', MagicMock(return_value=42)) def test_single_user_mode(self, subprocess_popen_mock): subprocess_popen_mock.return_value.wait.return_value = 0 self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0) subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir, '-c', 'archive_command=false', '-c', 'archive_mode=on', 'postgres'], stdin=subprocess.PIPE, stdout=42, stderr=subprocess.STDOUT) subprocess_popen_mock.reset_mock() self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0) subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir, 'postgres'], stdin=subprocess.PIPE, stdout=42, stderr=subprocess.STDOUT) subprocess_popen_mock.return_value = None self.assertEquals(self.p.single_user_mode(), 1) def fake_listdir(path): if path.endswith(os.path.join('pg_xlog', 'archive_status')): return ["a", "b", "c"] return [] @patch('os.listdir', MagicMock(side_effect=fake_listdir)) @patch('os.path.isdir', MagicMock(return_value=True)) @patch('os.unlink', return_value=True) @patch('os.remove', return_value=True) @patch('os.path.islink', return_value=False) @patch('os.path.isfile', return_value=True) def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink): ap = os.path.join(self.p.data_dir, 'pg_xlog', 'archive_status/') self.p.cleanup_archive_status() mock_remove.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')]) mock_unlink.assert_not_called() mock_remove.reset_mock() mock_file.return_value = False mock_link.return_value = True self.p.cleanup_archive_status() mock_unlink.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')]) mock_remove.assert_not_called() mock_unlink.reset_mock() mock_remove.reset_mock() mock_file.side_effect = Exception("foo") mock_link.side_effect = Exception("foo") self.p.cleanup_archive_status() mock_unlink.assert_not_called() mock_remove.assert_not_called()
class TestPostgresql(unittest.TestCase): def __init__(self, method_name='runTest'): self.setUp = self.set_up self.tearDown = self.tear_down super(TestPostgresql, self).__init__(method_name) def set_up(self): subprocess.call = subprocess_call shutil.copy = nop self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0', 'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432', 'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'], 'superuser': {'password': ''}, 'admin': {'username': '******', 'password': '******'}, 'replication': {'username': '******', 'password': '******', 'network': '127.0.0.1/32'}, 'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'}, 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_restart': 'true', 'on_role_change': 'true', 'on_reload': 'true' }, 'restore': 'true'}) psycopg2.connect = psycopg2_connect if not os.path.exists(self.p.data_dir): os.makedirs(self.p.data_dir) self.leadermem = Member(0, 'leader', 'postgres://*****:*****@127.0.0.1:5435/postgres', None, None, 28) self.leader = Leader(-1, None, 28, self.leadermem) self.other = Member(0, 'test1', 'postgres://*****:*****@127.0.0.1:5433/postgres', None, None, 28) self.me = Member(0, 'test0', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, None, 28) def tear_down(self): shutil.rmtree('data') def mock_query(self, p): raise psycopg2.OperationalError("not supported") def test_data_directory_empty(self): self.assertTrue(self.p.data_directory_empty()) def test_initialize(self): self.assertTrue(self.p.initialize()) self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf'))) def test_start_stop(self): self.assertFalse(self.p.start()) self.p.is_running = false with open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w'): pass self.assertTrue(self.p.start()) self.assertTrue(self.p.stop()) def test_sync_from_leader(self): self.assertTrue(self.p.sync_from_leader(self.leader)) def test_follow_the_leader(self): self.p.demote(self.leader) self.p.follow_the_leader(None) self.p.demote(self.leader) self.p.follow_the_leader(self.leader) self.p.follow_the_leader(Leader(-1, None, 28, self.other)) def test_create_replica(self): self.p.delete_trigger_file = raise_exception self.assertEquals(self.p.create_replica({'host': '', 'port': '', 'user': ''}, ''), 1) def test_create_connection_users(self): cfg = self.p.config cfg['superuser']['username'] = '******' p = Postgresql(cfg) p.create_connection_users() def test_sync_replication_slots(self): self.p.start() cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem]) self.p.sync_replication_slots(cluster) def test_query(self): self.p.query('select 1') self.assertRaises(psycopg2.InterfaceError, self.p.query, 'InterfaceError') self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') self.p._connection.closed = 2 self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') self.p._connection.closed = 2 self.p.disconnect = false self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') def test_is_healthiest_node(self): cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem]) self.assertTrue(self.p.is_healthiest_node(cluster)) self.p.is_leader = false self.assertFalse(self.p.is_healthiest_node(cluster)) self.p.xlog_position = lambda: 1 self.assertTrue(self.p.is_healthiest_node(cluster)) self.p.xlog_position = lambda: 2 self.assertFalse(self.p.is_healthiest_node(cluster)) self.p.config['maximum_lag_on_failover'] = -3 self.assertFalse(self.p.is_healthiest_node(cluster)) def test_reload(self): self.assertTrue(self.p.reload()) def test_is_healthy(self): self.assertTrue(self.p.is_healthy()) self.p.is_running = false self.assertFalse(self.p.is_healthy()) def test_promote(self): self.assertTrue(self.p.promote()) self.assertTrue(self.p.promote()) def test_last_operation(self): self.assertEquals(self.p.last_operation(), '0') def test_call_nowait(self): popen = subprocess.Popen subprocess.Popen = raise_exception self.assertFalse(self.p.call_nowait('on_start')) subprocess.Popen = popen def test_non_existing_callback(self): self.assertFalse(self.p.call_nowait('foobar')) def test_is_leader_exception(self): self.p.start() self.p.query = self.mock_query self.assertTrue(self.p.stop()) def test_move_data_directory(self): self.p.is_running = false os.rename = nop os.path.isdir = true self.p.move_data_directory() os.rename = raise_exception self.p.move_data_directory()
class Patroni: def __init__(self, config): self.nap_time = config['loop_wait'] self.postgresql = Postgresql(config['postgresql']) self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config)) host, port = config['restapi']['listen'].split(':') self.api = RestApiServer(self, config['restapi']) self.next_run = time.time() self.shutdown_member_ttl = 300 @staticmethod def get_dcs(name, config): if 'etcd' in config: return Etcd(name, config['etcd']) if 'zookeeper' in config: return ZooKeeper(name, config['zookeeper']) raise Exception('Can not find sutable configuration of distributed configuration store') def touch_member(self, ttl=None): connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string if self.ha.cluster: for m in self.ha.cluster.members: # Do not update member TTL when it is far from being expired if m.name == self.postgresql.name and m.real_ttl() > self.shutdown_member_ttl: return True return self.ha.dcs.touch_member(connection_string, ttl) def cleanup_on_failed_initialization(self): """ cleanup the DCS if initialization was not successfull """ logger.info("removing initialize key after failed attempt to initialize the cluster") self.ha.dcs.cancel_initialization() self.touch_member(self.shutdown_member_ttl) self.postgresql.stop() self.postgresql.move_data_directory() def initialize(self): # wait for etcd to be available while not self.touch_member(): logger.info('waiting on DCS') sleep(5) # is data directory empty? if self.postgresql.data_directory_empty(): while True: try: cluster = self.ha.dcs.get_cluster() if not cluster.is_unlocked(): # the leader already exists if not cluster.initialize: self.ha.dcs.initialize() self.postgresql.bootstrap(cluster.leader) break # racing to initialize elif not cluster.initialize and self.ha.dcs.initialize(): try: self.postgresql.bootstrap() except: # bail out and clean the initialize flag. self.cleanup_on_failed_initialization() raise self.ha.dcs.take_leader() break except DCSError: logger.info('waiting on DCS') sleep(5) elif self.postgresql.is_running(): self.postgresql.load_replication_slots() def schedule_next_run(self): if self.postgresql.is_promoted: self.next_run = time.time() self.next_run += self.nap_time current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time else: self.ha.dcs.watch(nap_time) def run(self): self.api.start() self.next_run = time.time() while True: self.touch_member() logger.info(self.ha.run_cycle()) try: if self.ha.state_handler.is_leader(): self.ha.cluster and self.ha.state_handler.create_replication_slots(self.ha.cluster) else: self.ha.state_handler.drop_replication_slots() except: logger.exception('Exception when changing replication slots') reap_children() self.schedule_next_run()
class Patroni(object): def __init__(self): self.setup_signal_handlers() self.version = __version__ self.config = Config() self.dcs = get_dcs(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') def get_tags(self): return {tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value} @property def nofailover(self): return bool(self.tags.get('nofailover', False)) def reload_config(self): try: self.tags = self.get_tags() self.dcs.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql']) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.dcs.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() while not self._received_sigterm: if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config() reap_children() logger.info(self.ha.run_cycle()) cluster = self.dcs.cluster if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config): self.reload_config() if not self.postgresql.data_directory_empty(): self.config.save_cache() reap_children() self.schedule_next_run() def setup_signal_handlers(self): self._received_sighup = False self._received_sigterm = False signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) signal.signal(signal.SIGCHLD, sigchld_handler)
class TestPostgresql(unittest.TestCase): @patch('subprocess.call', Mock(return_value=0)) @patch('psycopg2.connect', psycopg2_connect) def setUp(self): self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0', 'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432', 'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'], 'superuser': {'password': '******'}, 'admin': {'username': '******', 'password': '******'}, 'pg_rewind': {'username': '******', 'password': '******'}, 'replication': {'username': '******', 'password': '******', 'network': '127.0.0.1/32'}, 'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'}, 'callbacks': {'on_start': 'true', 'on_stop': 'true', 'on_restart': 'true', 'on_role_change': 'true', 'on_reload': 'true' }, 'restore': 'true'}) if not os.path.exists(self.p.data_dir): os.makedirs(self.p.data_dir) self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'}) self.leader = Leader(-1, 28, self.leadermem) self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres'}) self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'}) def tearDown(self): shutil.rmtree('data') def test_data_directory_empty(self): self.assertTrue(self.p.data_directory_empty()) def test_get_initdb_options(self): self.p.initdb_options = [{'encoding': 'UTF8'}, 'data-checksums'] self.assertEquals(self.p.get_initdb_options(), ['--encoding=UTF8', '--data-checksums']) self.p.initdb_options = [{'pgdata': 'bar'}] self.assertRaises(Exception, self.p.get_initdb_options) self.p.initdb_options = [{'foo': 'bar', 1: 2}] self.assertRaises(Exception, self.p.get_initdb_options) self.p.initdb_options = [1] self.assertRaises(Exception, self.p.get_initdb_options) def test_initialize(self): self.assertTrue(self.p.initialize()) self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf'))) def test_start(self): self.assertTrue(self.p.start()) self.p.is_running = false open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w').close() self.assertTrue(self.p.start()) def test_stop(self): self.assertTrue(self.p.stop()) with patch('subprocess.call', Mock(return_value=1)): self.assertTrue(self.p.stop()) self.p.is_running = Mock(return_value=True) self.assertFalse(self.p.stop()) def test_restart(self): self.p.start = false self.p.is_running = false self.assertFalse(self.p.restart()) self.assertEquals(self.p.state, 'restart failed (restarting)') @patch.object(builtins, 'open', MagicMock()) def test_write_pgpass(self): self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******', 'password': '******'}) @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict())) def test_sync_from_leader(self): self.assertTrue(self.p.sync_from_leader(self.leader)) @patch('subprocess.call', side_effect=Exception("Test")) @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict())) def test_pg_rewind(self, mock_call): self.assertTrue(self.p.rewind(self.leader)) subprocess.call = mock_call self.assertFalse(self.p.rewind(self.leader)) @patch('patroni.postgresql.Postgresql.rewind', return_value=False) @patch('patroni.postgresql.Postgresql.remove_data_directory', MagicMock(return_value=True)) @patch('patroni.postgresql.Postgresql.single_user_mode', MagicMock(return_value=1)) @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict())) def test_follow_the_leader(self, mock_pg_rewind): self.p.demote() self.p.follow_the_leader(None) self.p.demote() self.p.follow_the_leader(self.leader) self.p.follow_the_leader(Leader(-1, 28, self.other)) self.p.rewind = mock_pg_rewind self.p.follow_the_leader(self.leader) self.p.require_rewind() with mock.patch('os.path.islink', MagicMock(return_value=True)): with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)): with mock.patch('os.unlink', MagicMock(return_value=True)): self.p.follow_the_leader(self.leader, recovery=True) self.p.require_rewind() with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)): self.p.rewind.return_value = True self.p.follow_the_leader(self.leader, recovery=True) self.p.rewind.return_value = False self.p.follow_the_leader(self.leader, recovery=True) def test_can_rewind(self): tmp = self.p.pg_rewind self.p.pg_rewind = None self.assertFalse(self.p.can_rewind) self.p.pg_rewind = tmp with mock.patch('subprocess.call', MagicMock(return_value=1)): self.assertFalse(self.p.can_rewind) with mock.patch('subprocess.call', side_effect=OSError("foo")): self.assertFalse(self.p.can_rewind) tmp = self.p.controldata() self.p.controldata = lambda: {'wal_log_hints setting': 'on'} self.assertTrue(self.p.can_rewind) self.p.controldata = tmp @patch('time.sleep', Mock()) def test_create_replica(self): self.p.delete_trigger_file = Mock(side_effect=OSError()) with patch('subprocess.call', Mock(side_effect=[1, 0])): self.assertEquals(self.p.create_replica(self.leader, ''), 0) with patch('subprocess.call', Mock(side_effect=[Exception(), 0])): self.assertEquals(self.p.create_replica(self.leader, ''), 0) self.p.config['create_replica_method'] = ['wale', 'basebackup'] self.p.config['wale'] = {'command': 'foo'} with patch('subprocess.call', Mock(return_value=0)): self.assertEquals(self.p.create_replica(self.leader, ''), 0) del self.p.config['wale'] self.assertEquals(self.p.create_replica(self.leader, ''), 0) with patch('subprocess.call', Mock(side_effect=Exception("foo"))): self.assertEquals(self.p.create_replica(self.leader, ''), 1) def test_create_connection_users(self): cfg = self.p.config cfg['superuser']['username'] = '******' p = Postgresql(cfg) p.create_connection_users() def test_sync_replication_slots(self): self.p.start() cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem], None) self.p.sync_replication_slots(cluster) self.p.query = Mock(side_effect=psycopg2.OperationalError) self.p.schedule_load_slots = True self.p.sync_replication_slots(cluster) @patch.object(MockConnect, 'closed', 2) def test__query(self): self.assertRaises(PostgresConnectionException, self.p._query, 'blabla') self.p._state = 'restarting' self.assertRaises(RetryFailedError, self.p._query, 'blabla') def test_query(self): self.p.query('select 1') self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError') self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla') def test_is_leader(self): self.assertTrue(self.p.is_leader()) def test_reload(self): self.assertTrue(self.p.reload()) def test_is_healthy(self): self.assertTrue(self.p.is_healthy()) self.p.is_running = false self.assertFalse(self.p.is_healthy()) def test_promote(self): self.p._role = 'replica' self.assertTrue(self.p.promote()) self.assertTrue(self.p.promote()) def test_last_operation(self): self.assertEquals(self.p.last_operation(), '0') @patch('subprocess.Popen', Mock(side_effect=OSError())) def test_call_nowait(self): self.assertFalse(self.p.call_nowait('on_start')) def test_non_existing_callback(self): self.assertFalse(self.p.call_nowait('foobar')) def test_is_leader_exception(self): self.p.start() self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported")) self.assertTrue(self.p.stop()) def test_check_replication_lag(self): self.assertTrue(self.p.check_replication_lag(0)) @patch('os.rename', Mock()) @patch('os.path.isdir', Mock(return_value=True)) def test_move_data_directory(self): self.p.is_running = false self.p.move_data_directory() with patch('os.rename', Mock(side_effect=OSError())): self.p.move_data_directory() @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict())) def test_bootstrap(self): with patch('subprocess.call', Mock(return_value=1)): self.assertRaises(PostgresException, self.p.bootstrap) self.p.bootstrap() self.p.bootstrap(self.leader) def test_remove_data_directory(self): self.p.data_dir = 'data_dir' self.p.remove_data_directory() os.mkdir(self.p.data_dir) self.p.remove_data_directory() open(self.p.data_dir, 'w').close() self.p.remove_data_directory() os.symlink('unexisting', self.p.data_dir) with patch('os.unlink', Mock(side_effect=Exception)): self.p.remove_data_directory() self.p.remove_data_directory() @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string)) @patch('subprocess.check_output', side_effect=subprocess.CalledProcessError) @patch('subprocess.check_output', side_effect=Exception('Failed')) def test_controldata(self, check_output_call_error, check_output_generic_exception): data = self.p.controldata() self.assertEquals(len(data), 50) self.assertEquals(data['Database cluster state'], 'shut down in recovery') self.assertEquals(data['wal_log_hints setting'], 'on') self.assertEquals(int(data['Database block size']), 8192) subprocess.check_output = check_output_call_error data = self.p.controldata() self.assertEquals(data, dict()) subprocess.check_output = check_output_generic_exception self.assertRaises(Exception, self.p.controldata()) def test_read_postmaster_opts(self): m = mock_open(read_data=postmaster_opts_string()) with patch.object(builtins, 'open', m): data = self.p.read_postmaster_opts() self.assertEquals(data['wal_level'], 'hot_standby') self.assertEquals(int(data['max_replication_slots']), 5) self.assertEqual(data.get('D'), None) m.side_effect = IOError("foo") data = self.p.read_postmaster_opts() self.assertEqual(data, dict()) m.side_effect = Exception("foo") self.assertRaises(Exception, self.p.read_postmaster_opts()) @patch('subprocess.Popen') @patch.object(builtins, 'open', MagicMock(return_value=42)) def test_single_user_mode(self, subprocess_popen_mock): subprocess_popen_mock.return_value.wait.return_value = 0 self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0) subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir, '-c', 'archive_command=false', '-c', 'archive_mode=on', 'postgres'], stdin=subprocess.PIPE, stdout=42, stderr=subprocess.STDOUT) subprocess_popen_mock.reset_mock() self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0) subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir, 'postgres'], stdin=subprocess.PIPE, stdout=42, stderr=subprocess.STDOUT) subprocess_popen_mock.return_value = None self.assertEquals(self.p.single_user_mode(), 1) def fake_listdir(path): if path.endswith(os.path.join('pg_xlog', 'archive_status')): return ["a", "b", "c"] return [] @patch('os.listdir', MagicMock(side_effect=fake_listdir)) @patch('os.path.isdir', MagicMock(return_value=True)) @patch('os.unlink', return_value=True) @patch('os.remove', return_value=True) @patch('os.path.islink', return_value=False) @patch('os.path.isfile', return_value=True) def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink): ap = os.path.join(self.p.data_dir, 'pg_xlog', 'archive_status/') self.p.cleanup_archive_status() mock_remove.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')]) mock_unlink.assert_not_called() mock_remove.reset_mock() mock_file.return_value = False mock_link.return_value = True self.p.cleanup_archive_status() mock_unlink.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')]) mock_remove.assert_not_called() mock_unlink.reset_mock() mock_remove.reset_mock() mock_file.side_effect = Exception("foo") mock_link.side_effect = Exception("foo") self.p.cleanup_archive_status() mock_unlink.assert_not_called() mock_remove.assert_not_called() @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string)) def test_sysid(self): self.assertEqual(self.p.sysid, "6200971513092291716") @patch('os.path.isfile', MagicMock(return_value=True)) @patch('shutil.copy', side_effect=Exception) def test_save_configuration_files(self, mock_copy): shutil.copy = mock_copy self.p.save_configuration_files() @patch('os.path.isfile', MagicMock(side_effect=is_file_raise_on_backup)) @patch('shutil.copy', side_effect=Exception) def test_restore_configuration_files(self, mock_copy): shutil.copy = mock_copy self.p.restore_configuration_files()
class Patroni(object): def __init__(self): from patroni.api import RestApiServer from patroni.config import Config from patroni.dcs import get_dcs from patroni.ha import Ha from patroni.postgresql import Postgresql from patroni.version import __version__ self.setup_signal_handlers() self.version = __version__ self.config = Config() self.dcs = get_dcs(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): from patroni.exceptions import DCSError while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config: if self.config.set_dynamic_configuration(cluster.config): self.dcs.reload_config(self.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']): self.dcs.reload_config(self.config) break except DCSError: logger.warning('Can not get cluster from dcs') def get_tags(self): return {tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value} @property def nofailover(self): return bool(self.tags.get('nofailover', False)) @property def nosync(self): return bool(self.tags.get('nosync', False)) def reload_config(self): try: self.tags = self.get_tags() self.dcs.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql']) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return bool(self.tags.get('noloadbalance', False)) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time # Release the GIL so we don't starve anyone waiting on async_executor lock time.sleep(0.001) # Warn user that Patroni is not keeping up logger.warning("Loop time exceeded, rescheduling immediately.") elif self.ha.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() while not self._received_sigterm: if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config() logger.info(self.ha.run_cycle()) cluster = self.dcs.cluster if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config): self.reload_config() if not self.postgresql.data_directory_empty(): self.config.save_cache() self.schedule_next_run() def setup_signal_handlers(self): self._received_sighup = False self._received_sigterm = False signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler)
class Patroni(object): def __init__(self): self.setup_signal_handlers() self.version = __version__ self.config = Config() self.dcs = get_dcs(self.config) self.load_dynamic_configuration() self.postgresql = Postgresql(self.config['postgresql']) self.api = RestApiServer(self, self.config['restapi']) self.ha = Ha(self) self.tags = self.get_tags() self.next_run = time.time() self.scheduled_restart = {} def load_dynamic_configuration(self): while True: try: cluster = self.dcs.get_cluster() if cluster and cluster.config: self.config.set_dynamic_configuration(cluster.config) elif not self.config.dynamic_configuration and 'bootstrap' in self.config: self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']) break except DCSError: logger.warning('Can not get cluster from dcs') def get_tags(self): return {tag: value for tag, value in self.config.get('tags', {}).items() if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value} @property def nofailover(self): return self.tags.get('nofailover', False) def reload_config(self): try: self.tags = self.get_tags() self.dcs.reload_config(self.config) self.api.reload_config(self.config['restapi']) self.postgresql.reload_config(self.config['postgresql']) except Exception: logger.exception('Failed to reload config_file=%s', self.config.config_file) @property def replicatefrom(self): return self.tags.get('replicatefrom') def sighup_handler(self, *args): self._received_sighup = True def sigterm_handler(self, *args): if not self._received_sigterm: self._received_sigterm = True sys.exit() @property def noloadbalance(self): return self.tags.get('noloadbalance', False) def schedule_next_run(self): self.next_run += self.dcs.loop_wait current_time = time.time() nap_time = self.next_run - current_time if nap_time <= 0: self.next_run = current_time elif self.dcs.watch(nap_time): self.next_run = time.time() def run(self): self.api.start() self.next_run = time.time() while not self._received_sigterm: if self._received_sighup: self._received_sighup = False if self.config.reload_local_configuration(): self.reload_config() logger.info(self.ha.run_cycle()) cluster = self.dcs.cluster if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config): self.reload_config() if not self.postgresql.data_directory_empty(): self.config.save_cache() reap_children() self.schedule_next_run() def setup_signal_handlers(self): self._received_sighup = False self._received_sigterm = False signal.signal(signal.SIGHUP, self.sighup_handler) signal.signal(signal.SIGTERM, self.sigterm_handler) signal.signal(signal.SIGCHLD, sigchld_handler)