Exemple #1
0
class TestPostgresql(unittest.TestCase):

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    def setUp(self):
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0',
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'],
                             'superuser': {'password': ''},
                             'admin': {'username': '******', 'password': '******'},
                             'pg_rewind': {'username': '******', 'password': '******'},
                             'replication': {'username': '******',
                                             'password': '******',
                                             'network': '127.0.0.1/32'},
                             'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        if not os.path.exists(self.p.data_dir):
            os.makedirs(self.p.data_dir)
        self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres'})
        self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})

    def tearDown(self):
        shutil.rmtree('data')

    def test_data_directory_empty(self):
        self.assertTrue(self.p.data_directory_empty())

    def test_initialize(self):
        self.assertTrue(self.p.initialize())
        self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf')))

    def test_start(self):
        self.assertTrue(self.p.start())
        self.p.is_running = false
        open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w').close()
        self.assertTrue(self.p.start())

    def test_stop(self):
        self.assertTrue(self.p.stop())
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertTrue(self.p.stop())
            self.p.is_running = Mock(return_value=True)
            self.assertFalse(self.p.stop())

    def test_restart(self):
        self.p.start = false
        self.p.is_running = false
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    def test_sync_from_leader(self):
        self.assertTrue(self.p.sync_from_leader(self.leader))

    @patch('subprocess.call', side_effect=Exception("Test"))
    def test_pg_rewind(self, mock_call):
        self.assertTrue(self.p.rewind(self.leader))
        subprocess.call = mock_call
        self.assertFalse(self.p.rewind(self.leader))

    @patch('patroni.postgresql.Postgresql.rewind', return_value=False)
    @patch('patroni.postgresql.Postgresql.remove_data_directory', MagicMock(return_value=True))
    @patch('patroni.postgresql.Postgresql.single_user_mode', MagicMock(return_value=1))
    def test_follow_the_leader(self, mock_pg_rewind):
        self.p.demote()
        self.p.follow_the_leader(None)
        self.p.demote()
        self.p.follow_the_leader(self.leader)
        self.p.follow_the_leader(Leader(-1, 28, self.other))
        self.p.rewind = mock_pg_rewind
        self.p.follow_the_leader(self.leader)
        self.p.require_rewind()
        with mock.patch('os.path.islink', MagicMock(return_value=True)):
            with mock.patch('os.unlink', MagicMock(return_value=True)):
                with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)):
                    self.p.follow_the_leader(self.leader, recovery=True)
        self.p.require_rewind()
        with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)):
            self.p.rewind.return_value = True
            self.p.follow_the_leader(self.leader, recovery=True)
            self.p.rewind.return_value = False
            self.p.follow_the_leader(self.leader, recovery=True)

    def test_can_rewind(self):
        tmp = self.p.pg_rewind
        self.p.pg_rewind = None
        self.assertFalse(self.p.can_rewind)
        self.p.pg_rewind = tmp
        with mock.patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with mock.patch('subprocess.call', side_effect=OSError("foo")):
            self.assertFalse(self.p.can_rewind)
        tmp = self.p.controldata()
        self.p.controldata = lambda: {'wal_log_hints setting': 'on'}
        self.assertTrue(self.p.can_rewind)
        self.p.controldata = tmp

    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError())
        self.assertEquals(self.p.create_replica({'host': '', 'port': '', 'user': ''}, ''), 1)

    def test_create_connection_users(self):
        cfg = self.p.config
        cfg['superuser']['username'] = '******'
        p = Postgresql(cfg)
        p.create_connection_users()

    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem], None)
        self.p.sync_replication_slots(cluster)
        self.p.query = Mock(side_effect=psycopg2.OperationalError)
        self.p.schedule_load_slots = True
        self.p.sync_replication_slots(cluster)

    @patch.object(MockConnect, 'closed', 2)
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())

    def test_reload(self):
        self.assertTrue(self.p.reload())

    def test_is_healthy(self):
        self.assertTrue(self.p.is_healthy())
        self.p.is_running = false
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p._role = 'replica'
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')

    @patch('subprocess.Popen', Mock(side_effect=OSError()))
    def test_call_nowait(self):
        self.assertFalse(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    def test_check_replication_lag(self):
        self.assertTrue(self.p.check_replication_lag(0))

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.is_running = false
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError())):
            self.p.move_data_directory()

    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertRaises(PostgresException, self.p.bootstrap)
        self.p.bootstrap()
        self.p.bootstrap(self.leader)

    def test_remove_data_directory(self):
        self.p.data_dir = 'data_dir'
        self.p.remove_data_directory()
        os.mkdir(self.p.data_dir)
        self.p.remove_data_directory()
        open(self.p.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.p.data_dir)
        with patch('os.unlink', Mock(side_effect=Exception)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    @patch('subprocess.check_output', side_effect=subprocess.CalledProcessError)
    @patch('subprocess.check_output', side_effect=Exception('Failed'))
    def test_controldata(self, check_output_call_error, check_output_generic_exception):
        data = self.p.controldata()
        self.assertEquals(len(data), 50)
        self.assertEquals(data['Database cluster state'], 'shut down in recovery')
        self.assertEquals(data['wal_log_hints setting'], 'on')
        self.assertEquals(int(data['Database block size']), 8192)

        subprocess.check_output = check_output_call_error
        data = self.p.controldata()
        self.assertEquals(data, dict())

        subprocess.check_output = check_output_generic_exception
        self.assertRaises(Exception, self.p.controldata())

    def test_read_postmaster_opts(self):
        m = mock_open(read_data=postmaster_opts_string())
        with patch.object(builtins, 'open', m):
            data = self.p.read_postmaster_opts()
            self.assertEquals(data['wal_level'], 'hot_standby')
            self.assertEquals(int(data['max_replication_slots']), 5)
            self.assertEqual(data.get('D'), None)

            m.side_effect = IOError("foo")
            data = self.p.read_postmaster_opts()
            self.assertEqual(data, dict())

            m.side_effect = Exception("foo")
            self.assertRaises(Exception, self.p.read_postmaster_opts())

    @patch('subprocess.Popen')
    @patch.object(builtins, 'open', MagicMock(return_value=42))
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir,
                                                      '-c', 'archive_command=false', '-c', 'archive_mode=on',
                                                       'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.reset_mock()
        self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir,
                                                      'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.return_value = None
        self.assertEquals(self.p.single_user_mode(), 1)

    def fake_listdir(path):
        if path.endswith(os.path.join('pg_xlog', 'archive_status')):
            return ["a", "b", "c"]
        return []

    @patch('os.listdir', MagicMock(side_effect=fake_listdir))
    @patch('os.path.isdir', MagicMock(return_value=True))
    @patch('os.unlink', return_value=True)
    @patch('os.remove', return_value=True)
    @patch('os.path.islink', return_value=False)
    @patch('os.path.isfile', return_value=True)
    def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink):
        ap = os.path.join(self.p.data_dir, 'pg_xlog', 'archive_status/')
        self.p.cleanup_archive_status()
        mock_remove.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')])
        mock_unlink.assert_not_called()

        mock_remove.reset_mock()

        mock_file.return_value = False
        mock_link.return_value = True
        self.p.cleanup_archive_status()
        mock_unlink.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')])
        mock_remove.assert_not_called()

        mock_unlink.reset_mock()
        mock_remove.reset_mock()

        mock_file.side_effect = Exception("foo")
        mock_link.side_effect = Exception("foo")
        self.p.cleanup_archive_status()
        mock_unlink.assert_not_called()
        mock_remove.assert_not_called()
Exemple #2
0
class TestPostgresql(unittest.TestCase):

    def __init__(self, method_name='runTest'):
        self.setUp = self.set_up
        self.tearDown = self.tear_down
        super(TestPostgresql, self).__init__(method_name)

    def set_up(self):
        subprocess.call = subprocess_call
        shutil.copy = nop
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0',
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'],
                             'superuser': {'password': ''},
                             'admin': {'username': '******', 'password': '******'},
                             'replication': {'username': '******',
                                             'password': '******',
                                             'network': '127.0.0.1/32'},
                             'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        psycopg2.connect = psycopg2_connect
        if not os.path.exists(self.p.data_dir):
            os.makedirs(self.p.data_dir)
        self.leadermem = Member(0, 'leader', 'postgres://*****:*****@127.0.0.1:5435/postgres', None, None, 28)
        self.leader = Leader(-1, None, 28, self.leadermem)
        self.other = Member(0, 'test1', 'postgres://*****:*****@127.0.0.1:5433/postgres', None, None, 28)
        self.me = Member(0, 'test0', 'postgres://*****:*****@127.0.0.1:5434/postgres', None, None, 28)

    def tear_down(self):
        shutil.rmtree('data')

    def mock_query(self, p):
        raise psycopg2.OperationalError("not supported")

    def test_data_directory_empty(self):
        self.assertTrue(self.p.data_directory_empty())

    def test_initialize(self):
        self.assertTrue(self.p.initialize())
        self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf')))

    def test_start_stop(self):
        self.assertFalse(self.p.start())
        self.p.is_running = false
        with open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w'):
            pass
        self.assertTrue(self.p.start())
        self.assertTrue(self.p.stop())

    def test_sync_from_leader(self):
        self.assertTrue(self.p.sync_from_leader(self.leader))

    def test_follow_the_leader(self):
        self.p.demote(self.leader)
        self.p.follow_the_leader(None)
        self.p.demote(self.leader)
        self.p.follow_the_leader(self.leader)
        self.p.follow_the_leader(Leader(-1, None, 28, self.other))

    def test_create_replica(self):
        self.p.delete_trigger_file = raise_exception
        self.assertEquals(self.p.create_replica({'host': '', 'port': '', 'user': ''}, ''), 1)

    def test_create_connection_users(self):
        cfg = self.p.config
        cfg['superuser']['username'] = '******'
        p = Postgresql(cfg)
        p.create_connection_users()

    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem])
        self.p.sync_replication_slots(cluster)

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(psycopg2.InterfaceError, self.p.query, 'InterfaceError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')
        self.p._connection.closed = 2
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')
        self.p._connection.closed = 2
        self.p.disconnect = false
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    def test_is_healthiest_node(self):
        cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem])
        self.assertTrue(self.p.is_healthiest_node(cluster))
        self.p.is_leader = false
        self.assertFalse(self.p.is_healthiest_node(cluster))
        self.p.xlog_position = lambda: 1
        self.assertTrue(self.p.is_healthiest_node(cluster))
        self.p.xlog_position = lambda: 2
        self.assertFalse(self.p.is_healthiest_node(cluster))
        self.p.config['maximum_lag_on_failover'] = -3
        self.assertFalse(self.p.is_healthiest_node(cluster))

    def test_reload(self):
        self.assertTrue(self.p.reload())

    def test_is_healthy(self):
        self.assertTrue(self.p.is_healthy())
        self.p.is_running = false
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')

    def test_call_nowait(self):
        popen = subprocess.Popen
        subprocess.Popen = raise_exception
        self.assertFalse(self.p.call_nowait('on_start'))
        subprocess.Popen = popen

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = self.mock_query
        self.assertTrue(self.p.stop())

    def test_move_data_directory(self):
        self.p.is_running = false
        os.rename = nop
        os.path.isdir = true
        self.p.move_data_directory()
        os.rename = raise_exception
        self.p.move_data_directory()
Exemple #3
0
class Patroni:

    def __init__(self, config):
        self.nap_time = config['loop_wait']
        self.postgresql = Postgresql(config['postgresql'])
        self.ha = Ha(self.postgresql, self.get_dcs(self.postgresql.name, config))
        host, port = config['restapi']['listen'].split(':')
        self.api = RestApiServer(self, config['restapi'])
        self.next_run = time.time()
        self.shutdown_member_ttl = 300

    @staticmethod
    def get_dcs(name, config):
        if 'etcd' in config:
            return Etcd(name, config['etcd'])
        if 'zookeeper' in config:
            return ZooKeeper(name, config['zookeeper'])
        raise Exception('Can not find sutable configuration of distributed configuration store')

    def touch_member(self, ttl=None):
        connection_string = self.postgresql.connection_string + '?application_name=' + self.api.connection_string
        if self.ha.cluster:
            for m in self.ha.cluster.members:
                # Do not update member TTL when it is far from being expired
                if m.name == self.postgresql.name and m.real_ttl() > self.shutdown_member_ttl:
                    return True
        return self.ha.dcs.touch_member(connection_string, ttl)

    def cleanup_on_failed_initialization(self):
        """ cleanup the DCS if initialization was not successfull """
        logger.info("removing initialize key after failed attempt to initialize the cluster")
        self.ha.dcs.cancel_initialization()
        self.touch_member(self.shutdown_member_ttl)
        self.postgresql.stop()
        self.postgresql.move_data_directory()

    def initialize(self):
        # wait for etcd to be available
        while not self.touch_member():
            logger.info('waiting on DCS')
            sleep(5)

        # is data directory empty?
        if self.postgresql.data_directory_empty():
            while True:
                try:
                    cluster = self.ha.dcs.get_cluster()
                    if not cluster.is_unlocked():  # the leader already exists
                        if not cluster.initialize:
                            self.ha.dcs.initialize()
                        self.postgresql.bootstrap(cluster.leader)
                        break
                    # racing to initialize
                    elif not cluster.initialize and self.ha.dcs.initialize():
                        try:
                            self.postgresql.bootstrap()
                        except:
                            # bail out and clean the initialize flag.
                            self.cleanup_on_failed_initialization()
                            raise
                        self.ha.dcs.take_leader()
                        break
                except DCSError:
                    logger.info('waiting on DCS')
                sleep(5)
        elif self.postgresql.is_running():
            self.postgresql.load_replication_slots()

    def schedule_next_run(self):
        if self.postgresql.is_promoted:
            self.next_run = time.time()
        self.next_run += self.nap_time
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        else:
            self.ha.dcs.watch(nap_time)

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while True:
            self.touch_member()
            logger.info(self.ha.run_cycle())
            try:
                if self.ha.state_handler.is_leader():
                    self.ha.cluster and self.ha.state_handler.create_replication_slots(self.ha.cluster)
                else:
                    self.ha.state_handler.drop_replication_slots()
            except:
                logger.exception('Exception when changing replication slots')
            reap_children()
            self.schedule_next_run()
Exemple #4
0
class Patroni(object):

    def __init__(self):
        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {tag: value for tag, value in self.config.get('tags', {}).items()
                if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value}

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s', self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            reap_children()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            reap_children()
            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
        signal.signal(signal.SIGCHLD, sigchld_handler)
class TestPostgresql(unittest.TestCase):

    @patch('subprocess.call', Mock(return_value=0))
    @patch('psycopg2.connect', psycopg2_connect)
    def setUp(self):
        self.p = Postgresql({'name': 'test0', 'scope': 'batman', 'data_dir': 'data/test0',
                             'listen': '127.0.0.1, *:5432', 'connect_address': '127.0.0.2:5432',
                             'pg_hba': ['hostssl all all 0.0.0.0/0 md5', 'host all all 0.0.0.0/0 md5'],
                             'superuser': {'password': '******'},
                             'admin': {'username': '******', 'password': '******'},
                             'pg_rewind': {'username': '******', 'password': '******'},
                             'replication': {'username': '******',
                                             'password': '******',
                                             'network': '127.0.0.1/32'},
                             'parameters': {'foo': 'bar'}, 'recovery_conf': {'foo': 'bar'},
                             'callbacks': {'on_start': 'true', 'on_stop': 'true',
                                           'on_restart': 'true', 'on_role_change': 'true',
                                           'on_reload': 'true'
                                           },
                             'restore': 'true'})
        if not os.path.exists(self.p.data_dir):
            os.makedirs(self.p.data_dir)
        self.leadermem = Member(0, 'leader', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5435/postgres'})
        self.leader = Leader(-1, 28, self.leadermem)
        self.other = Member(0, 'test1', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5433/postgres'})
        self.me = Member(0, 'test0', 28, {'conn_url': 'postgres://*****:*****@127.0.0.1:5434/postgres'})

    def tearDown(self):
        shutil.rmtree('data')

    def test_data_directory_empty(self):
        self.assertTrue(self.p.data_directory_empty())

    def test_get_initdb_options(self):
        self.p.initdb_options = [{'encoding': 'UTF8'}, 'data-checksums']
        self.assertEquals(self.p.get_initdb_options(), ['--encoding=UTF8', '--data-checksums'])
        self.p.initdb_options = [{'pgdata': 'bar'}]
        self.assertRaises(Exception, self.p.get_initdb_options)
        self.p.initdb_options = [{'foo': 'bar', 1: 2}]
        self.assertRaises(Exception, self.p.get_initdb_options)
        self.p.initdb_options = [1]
        self.assertRaises(Exception, self.p.get_initdb_options)

    def test_initialize(self):
        self.assertTrue(self.p.initialize())
        self.assertTrue(os.path.exists(os.path.join(self.p.data_dir, 'pg_hba.conf')))

    def test_start(self):
        self.assertTrue(self.p.start())
        self.p.is_running = false
        open(os.path.join(self.p.data_dir, 'postmaster.pid'), 'w').close()
        self.assertTrue(self.p.start())

    def test_stop(self):
        self.assertTrue(self.p.stop())
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertTrue(self.p.stop())
            self.p.is_running = Mock(return_value=True)
            self.assertFalse(self.p.stop())

    def test_restart(self):
        self.p.start = false
        self.p.is_running = false
        self.assertFalse(self.p.restart())
        self.assertEquals(self.p.state, 'restart failed (restarting)')

    @patch.object(builtins, 'open', MagicMock())
    def test_write_pgpass(self):
        self.p.write_pgpass({'host': 'localhost', 'port': '5432', 'user': '******', 'password': '******'})

    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_sync_from_leader(self):
        self.assertTrue(self.p.sync_from_leader(self.leader))

    @patch('subprocess.call', side_effect=Exception("Test"))
    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_pg_rewind(self, mock_call):
        self.assertTrue(self.p.rewind(self.leader))
        subprocess.call = mock_call
        self.assertFalse(self.p.rewind(self.leader))

    @patch('patroni.postgresql.Postgresql.rewind', return_value=False)
    @patch('patroni.postgresql.Postgresql.remove_data_directory', MagicMock(return_value=True))
    @patch('patroni.postgresql.Postgresql.single_user_mode', MagicMock(return_value=1))
    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_follow_the_leader(self, mock_pg_rewind):
        self.p.demote()
        self.p.follow_the_leader(None)
        self.p.demote()
        self.p.follow_the_leader(self.leader)
        self.p.follow_the_leader(Leader(-1, 28, self.other))
        self.p.rewind = mock_pg_rewind
        self.p.follow_the_leader(self.leader)
        self.p.require_rewind()
        with mock.patch('os.path.islink', MagicMock(return_value=True)):
            with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)):
                with mock.patch('os.unlink', MagicMock(return_value=True)):
                    self.p.follow_the_leader(self.leader, recovery=True)
        self.p.require_rewind()
        with mock.patch('patroni.postgresql.Postgresql.can_rewind', new_callable=PropertyMock(return_value=True)):
            self.p.rewind.return_value = True
            self.p.follow_the_leader(self.leader, recovery=True)
            self.p.rewind.return_value = False
            self.p.follow_the_leader(self.leader, recovery=True)

    def test_can_rewind(self):
        tmp = self.p.pg_rewind
        self.p.pg_rewind = None
        self.assertFalse(self.p.can_rewind)
        self.p.pg_rewind = tmp
        with mock.patch('subprocess.call', MagicMock(return_value=1)):
            self.assertFalse(self.p.can_rewind)
        with mock.patch('subprocess.call', side_effect=OSError("foo")):
            self.assertFalse(self.p.can_rewind)
        tmp = self.p.controldata()
        self.p.controldata = lambda: {'wal_log_hints setting': 'on'}
        self.assertTrue(self.p.can_rewind)
        self.p.controldata = tmp

    @patch('time.sleep', Mock())
    def test_create_replica(self):
        self.p.delete_trigger_file = Mock(side_effect=OSError())
        with patch('subprocess.call', Mock(side_effect=[1, 0])):
            self.assertEquals(self.p.create_replica(self.leader, ''), 0)
        with patch('subprocess.call', Mock(side_effect=[Exception(), 0])):
            self.assertEquals(self.p.create_replica(self.leader, ''), 0)

        self.p.config['create_replica_method'] = ['wale', 'basebackup']
        self.p.config['wale'] = {'command': 'foo'}
        with patch('subprocess.call', Mock(return_value=0)):
            self.assertEquals(self.p.create_replica(self.leader, ''), 0)
            del self.p.config['wale']
            self.assertEquals(self.p.create_replica(self.leader, ''), 0)

        with patch('subprocess.call', Mock(side_effect=Exception("foo"))):
            self.assertEquals(self.p.create_replica(self.leader, ''), 1)

    def test_create_connection_users(self):
        cfg = self.p.config
        cfg['superuser']['username'] = '******'
        p = Postgresql(cfg)
        p.create_connection_users()

    def test_sync_replication_slots(self):
        self.p.start()
        cluster = Cluster(True, self.leader, 0, [self.me, self.other, self.leadermem], None)
        self.p.sync_replication_slots(cluster)
        self.p.query = Mock(side_effect=psycopg2.OperationalError)
        self.p.schedule_load_slots = True
        self.p.sync_replication_slots(cluster)

    @patch.object(MockConnect, 'closed', 2)
    def test__query(self):
        self.assertRaises(PostgresConnectionException, self.p._query, 'blabla')
        self.p._state = 'restarting'
        self.assertRaises(RetryFailedError, self.p._query, 'blabla')

    def test_query(self):
        self.p.query('select 1')
        self.assertRaises(PostgresConnectionException, self.p.query, 'RetryFailedError')
        self.assertRaises(psycopg2.OperationalError, self.p.query, 'blabla')

    def test_is_leader(self):
        self.assertTrue(self.p.is_leader())

    def test_reload(self):
        self.assertTrue(self.p.reload())

    def test_is_healthy(self):
        self.assertTrue(self.p.is_healthy())
        self.p.is_running = false
        self.assertFalse(self.p.is_healthy())

    def test_promote(self):
        self.p._role = 'replica'
        self.assertTrue(self.p.promote())
        self.assertTrue(self.p.promote())

    def test_last_operation(self):
        self.assertEquals(self.p.last_operation(), '0')

    @patch('subprocess.Popen', Mock(side_effect=OSError()))
    def test_call_nowait(self):
        self.assertFalse(self.p.call_nowait('on_start'))

    def test_non_existing_callback(self):
        self.assertFalse(self.p.call_nowait('foobar'))

    def test_is_leader_exception(self):
        self.p.start()
        self.p.query = Mock(side_effect=psycopg2.OperationalError("not supported"))
        self.assertTrue(self.p.stop())

    def test_check_replication_lag(self):
        self.assertTrue(self.p.check_replication_lag(0))

    @patch('os.rename', Mock())
    @patch('os.path.isdir', Mock(return_value=True))
    def test_move_data_directory(self):
        self.p.is_running = false
        self.p.move_data_directory()
        with patch('os.rename', Mock(side_effect=OSError())):
            self.p.move_data_directory()

    @patch('patroni.postgresql.Postgresql.write_pgpass', MagicMock(return_value=dict()))
    def test_bootstrap(self):
        with patch('subprocess.call', Mock(return_value=1)):
            self.assertRaises(PostgresException, self.p.bootstrap)
        self.p.bootstrap()
        self.p.bootstrap(self.leader)

    def test_remove_data_directory(self):
        self.p.data_dir = 'data_dir'
        self.p.remove_data_directory()
        os.mkdir(self.p.data_dir)
        self.p.remove_data_directory()
        open(self.p.data_dir, 'w').close()
        self.p.remove_data_directory()
        os.symlink('unexisting', self.p.data_dir)
        with patch('os.unlink', Mock(side_effect=Exception)):
            self.p.remove_data_directory()
        self.p.remove_data_directory()

    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    @patch('subprocess.check_output', side_effect=subprocess.CalledProcessError)
    @patch('subprocess.check_output', side_effect=Exception('Failed'))
    def test_controldata(self, check_output_call_error, check_output_generic_exception):
        data = self.p.controldata()
        self.assertEquals(len(data), 50)
        self.assertEquals(data['Database cluster state'], 'shut down in recovery')
        self.assertEquals(data['wal_log_hints setting'], 'on')
        self.assertEquals(int(data['Database block size']), 8192)

        subprocess.check_output = check_output_call_error
        data = self.p.controldata()
        self.assertEquals(data, dict())

        subprocess.check_output = check_output_generic_exception
        self.assertRaises(Exception, self.p.controldata())

    def test_read_postmaster_opts(self):
        m = mock_open(read_data=postmaster_opts_string())
        with patch.object(builtins, 'open', m):
            data = self.p.read_postmaster_opts()
            self.assertEquals(data['wal_level'], 'hot_standby')
            self.assertEquals(int(data['max_replication_slots']), 5)
            self.assertEqual(data.get('D'), None)

            m.side_effect = IOError("foo")
            data = self.p.read_postmaster_opts()
            self.assertEqual(data, dict())

            m.side_effect = Exception("foo")
            self.assertRaises(Exception, self.p.read_postmaster_opts())

    @patch('subprocess.Popen')
    @patch.object(builtins, 'open', MagicMock(return_value=42))
    def test_single_user_mode(self, subprocess_popen_mock):
        subprocess_popen_mock.return_value.wait.return_value = 0
        self.assertEquals(self.p.single_user_mode(options=dict(archive_mode='on', archive_command='false')), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir,
                                                      '-c', 'archive_command=false', '-c', 'archive_mode=on',
                                                       'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.reset_mock()
        self.assertEquals(self.p.single_user_mode(command="CHECKPOINT"), 0)
        subprocess_popen_mock.assert_called_once_with(['postgres', '--single', '-D', self.p.data_dir,
                                                      'postgres'], stdin=subprocess.PIPE,
                                                      stdout=42,
                                                      stderr=subprocess.STDOUT)
        subprocess_popen_mock.return_value = None
        self.assertEquals(self.p.single_user_mode(), 1)

    def fake_listdir(path):
        if path.endswith(os.path.join('pg_xlog', 'archive_status')):
            return ["a", "b", "c"]
        return []

    @patch('os.listdir', MagicMock(side_effect=fake_listdir))
    @patch('os.path.isdir', MagicMock(return_value=True))
    @patch('os.unlink', return_value=True)
    @patch('os.remove', return_value=True)
    @patch('os.path.islink', return_value=False)
    @patch('os.path.isfile', return_value=True)
    def test_cleanup_archive_status(self, mock_file, mock_link, mock_remove, mock_unlink):
        ap = os.path.join(self.p.data_dir, 'pg_xlog', 'archive_status/')
        self.p.cleanup_archive_status()
        mock_remove.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')])
        mock_unlink.assert_not_called()

        mock_remove.reset_mock()

        mock_file.return_value = False
        mock_link.return_value = True
        self.p.cleanup_archive_status()
        mock_unlink.assert_has_calls([mock.call(ap+'a'), mock.call(ap+'b'), mock.call(ap+'c')])
        mock_remove.assert_not_called()

        mock_unlink.reset_mock()
        mock_remove.reset_mock()

        mock_file.side_effect = Exception("foo")
        mock_link.side_effect = Exception("foo")
        self.p.cleanup_archive_status()
        mock_unlink.assert_not_called()
        mock_remove.assert_not_called()

    @patch('subprocess.check_output', MagicMock(return_value=0, side_effect=pg_controldata_string))
    def test_sysid(self):
        self.assertEqual(self.p.sysid, "6200971513092291716")

    @patch('os.path.isfile', MagicMock(return_value=True))
    @patch('shutil.copy', side_effect=Exception)
    def test_save_configuration_files(self, mock_copy):
        shutil.copy = mock_copy
        self.p.save_configuration_files()

    @patch('os.path.isfile', MagicMock(side_effect=is_file_raise_on_backup))
    @patch('shutil.copy', side_effect=Exception)
    def test_restore_configuration_files(self, mock_copy):
        shutil.copy = mock_copy
        self.p.restore_configuration_files()
Exemple #6
0
class Patroni(object):

    def __init__(self):
        from patroni.api import RestApiServer
        from patroni.config import Config
        from patroni.dcs import get_dcs
        from patroni.ha import Ha
        from patroni.postgresql import Postgresql
        from patroni.version import __version__

        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        from patroni.exceptions import DCSError
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    if self.config.set_dynamic_configuration(cluster.config):
                        self.dcs.reload_config(self.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    if self.config.set_dynamic_configuration(self.config['bootstrap']['dcs']):
                        self.dcs.reload_config(self.config)
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {tag: value for tag, value in self.config.get('tags', {}).items()
                if tag not in ('clonefrom', 'nofailover', 'noloadbalance', 'nosync') or value}

    @property
    def nofailover(self):
        return bool(self.tags.get('nofailover', False))

    @property
    def nosync(self):
        return bool(self.tags.get('nosync', False))

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s', self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return bool(self.tags.get('noloadbalance', False))

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
            # Release the GIL so we don't starve anyone waiting on async_executor lock
            time.sleep(0.001)
            # Warn user that Patroni is not keeping up
            logger.warning("Loop time exceeded, rescheduling immediately.")
        elif self.ha.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
Exemple #7
0
class Patroni(object):

    def __init__(self):
        self.setup_signal_handlers()

        self.version = __version__
        self.config = Config()
        self.dcs = get_dcs(self.config)
        self.load_dynamic_configuration()

        self.postgresql = Postgresql(self.config['postgresql'])
        self.api = RestApiServer(self, self.config['restapi'])
        self.ha = Ha(self)

        self.tags = self.get_tags()
        self.next_run = time.time()
        self.scheduled_restart = {}

    def load_dynamic_configuration(self):
        while True:
            try:
                cluster = self.dcs.get_cluster()
                if cluster and cluster.config:
                    self.config.set_dynamic_configuration(cluster.config)
                elif not self.config.dynamic_configuration and 'bootstrap' in self.config:
                    self.config.set_dynamic_configuration(self.config['bootstrap']['dcs'])
                break
            except DCSError:
                logger.warning('Can not get cluster from dcs')

    def get_tags(self):
        return {tag: value for tag, value in self.config.get('tags', {}).items()
                if tag not in ('clonefrom', 'nofailover', 'noloadbalance') or value}

    @property
    def nofailover(self):
        return self.tags.get('nofailover', False)

    def reload_config(self):
        try:
            self.tags = self.get_tags()
            self.dcs.reload_config(self.config)
            self.api.reload_config(self.config['restapi'])
            self.postgresql.reload_config(self.config['postgresql'])
        except Exception:
            logger.exception('Failed to reload config_file=%s', self.config.config_file)

    @property
    def replicatefrom(self):
        return self.tags.get('replicatefrom')

    def sighup_handler(self, *args):
        self._received_sighup = True

    def sigterm_handler(self, *args):
        if not self._received_sigterm:
            self._received_sigterm = True
            sys.exit()

    @property
    def noloadbalance(self):
        return self.tags.get('noloadbalance', False)

    def schedule_next_run(self):
        self.next_run += self.dcs.loop_wait
        current_time = time.time()
        nap_time = self.next_run - current_time
        if nap_time <= 0:
            self.next_run = current_time
        elif self.dcs.watch(nap_time):
            self.next_run = time.time()

    def run(self):
        self.api.start()
        self.next_run = time.time()

        while not self._received_sigterm:
            if self._received_sighup:
                self._received_sighup = False
                if self.config.reload_local_configuration():
                    self.reload_config()

            logger.info(self.ha.run_cycle())

            cluster = self.dcs.cluster
            if cluster and cluster.config and self.config.set_dynamic_configuration(cluster.config):
                self.reload_config()

            if not self.postgresql.data_directory_empty():
                self.config.save_cache()

            reap_children()
            self.schedule_next_run()

    def setup_signal_handlers(self):
        self._received_sighup = False
        self._received_sigterm = False
        signal.signal(signal.SIGHUP, self.sighup_handler)
        signal.signal(signal.SIGTERM, self.sigterm_handler)
        signal.signal(signal.SIGCHLD, sigchld_handler)