Example #1
0
 def test_random_host_selector_with_serverset(self):
     testutil.initialize_kazoo_client_manager(ZK_HOSTS)
     kazoo_client = KazooClientManager().get_client()
     kazoo_client.ensure_path(HostSelectorTestCase.SERVER_SET_PATH)
     host_provider = HostsProvider(HostSelectorTestCase.PORT_LIST,
                                   HostSelectorTestCase.SERVER_SET_PATH)
     self.assertTrue(host_provider.initialized)
     self.assertTrue(host_provider.hosts)
     # Since there is no live hosts in the server set, host provider should
     # still use the static host list.
     self.assertEqual(host_provider._current_host_tuple,
                      host_provider._static_host_tuple)
     random_host_selector = RandomHostSelector(
         host_provider, expire_time=0, retry_time=0,
         invalidation_threshold=1.0)
     self.assertTrue(random_host_selector.get_host() in
                     HostSelectorTestCase.PORT_LIST)
     server_set = ServerSet(HostSelectorTestCase.SERVER_SET_PATH, ZK_HOSTS)
     g = server_set.join(HostSelectorTestCase.PORT_LIST[0], use_ip=False)
     g.get()
     no_of_iterations = 100
     # After the first endpoint joins, random host selector should only
     # start to use hosts in the server set.
     returned_hosts = [random_host_selector.get_host()
                       for i in xrange(no_of_iterations)]
     self.assertEqual(len(set(returned_hosts)), 1)
     self.assertEqual(len(host_provider.hosts), 1)
     g = server_set.join(HostSelectorTestCase.PORT_LIST[1], use_ip=False)
     g.get()
     # After the second endpoint joins the server set, random host selector
     # should return both endpoints now.
     returned_hosts = [random_host_selector.get_host()
                       for i in xrange(no_of_iterations)]
     self.assertEqual(len(set(returned_hosts)), 2)
     self.assertEqual(len(host_provider.hosts), 2)
Example #2
0
 def test_get_zk_hosts_directly(self):
     """ Test passing zk_hosts in directly.
     """
     testutil.initialize_kazoo_client_manager(ZK_HOSTS)
     kz_client_manager = KazooClientManager(ZK_HOSTS)
     self.assertEqual(kz_client_manager.get_client().hosts,
                      ",".join(ZK_HOSTS))
Example #3
0
def initialize_kazoo_client_manager(zk_hosts):
    """ Initialize kazoo client manager. Note that all unit tests in kazoo_utils should
     use the initialization here."""
    kazoo_manager = KazooClientManager(zk_hosts,
                                       max_num_consecutive_failures=1,
                                       health_check_interval=0.0)
    kazoo_manager._client_callbacks = []
Example #4
0
 def test_serverset_destroy(self):
     testutil.initialize_kazoo_client_manager(ZK_HOSTS)
     client = KazooClientManager().get_client()
     client.start()
     fd, tmp_file = tempfile.mkstemp()
     server_set = ServerSet(
         ServerSetWithFileTestCase.SERVER_SET_DESTROY_PATH,
         ZK_HOSTS,
         waiting_in_secs=0.01)
     server_set.join(ServerSetWithFileTestCase.PORT_1, use_ip=False)
     server_set.join(ServerSetWithFileTestCase.PORT_2, use_ip=False)
     # update the local file manually here, suppose there is a daemon
     with open(tmp_file, 'w') as f:
         f.write(ServerSetWithFileTestCase.END_POINT_1 + "\n" +
                 ServerSetWithFileTestCase.END_POINT_2)
     # Give time to let server set join to do its magic.
     gevent.sleep(1)
     server_set._destroy(ServerSetWithFileTestCase.END_POINT_1)
     # update the local file manually here, suppose there is a daemon
     with open(tmp_file, 'w') as f:
         f.write(ServerSetWithFileTestCase.END_POINT_2)
     gevent.sleep(1)
     children = client.get_children(
         ServerSetWithFileTestCase.SERVER_SET_DESTROY_PATH)
     for child in children:
         self.assertFalse(
             child.endswith(ServerSetWithFileTestCase.END_POINT_1))
     self.FILE_WATCH._clear_all_watches()
     os.remove(tmp_file)
Example #5
0
 def test_serverset_destroy(self):
     testutil.initialize_kazoo_client_manager(ZK_HOSTS)
     client = KazooClientManager().get_client()
     client.start()
     fd, tmp_file = tempfile.mkstemp()
     server_set = ServerSet(ServerSetWithFileTestCase.SERVER_SET_DESTROY_PATH,
                            ZK_HOSTS,
                            waiting_in_secs=0.01)
     server_set.join(ServerSetWithFileTestCase.PORT_1, use_ip=False)
     server_set.join(ServerSetWithFileTestCase.PORT_2, use_ip=False)
     # update the local file manually here, suppose there is a daemon
     with open(tmp_file, 'w') as f:
         f.write(ServerSetWithFileTestCase.END_POINT_1 +
                 "\n" +
                 ServerSetWithFileTestCase.END_POINT_2)
     # Give time to let server set join to do its magic.
     gevent.sleep(1)
     server_set._destroy(ServerSetWithFileTestCase.END_POINT_1)
     # update the local file manually here, suppose there is a daemon
     with open(tmp_file, 'w') as f:
         f.write(ServerSetWithFileTestCase.END_POINT_2)
     gevent.sleep(1)
     children = client.get_children(
         ServerSetWithFileTestCase.SERVER_SET_DESTROY_PATH)
     for child in children:
         self.assertFalse(child.endswith(ServerSetWithFileTestCase.END_POINT_1))
     self.FILE_WATCH._clear_all_watches()
     os.remove(tmp_file)
Example #6
0
 def _kazoo_client(self):
     # Make the connection timeout long so the executive shell creating the
     # metaconfigs will not timeout.
     kazoo_client = KazooClientManager(self.zk_hosts,
                                       start_timeout=200.0,
                                       session_timeout=200.0).get_client()
     if not kazoo_client:
         KazooClientManager(self.zk_hosts)._reconnect()
         kazoo_client = KazooClientManager(self.zk_hosts).get_client()
     return kazoo_client
Example #7
0
    def get_data(self):
        """Get the version(timestamp) from zk and read the data from S3 and return."""
        KazooClientManager(self.zk_hosts).get_client().ensure_path(
            self.zk_path)
        # znode_data stores the timestamp used to determine the s3 path.
        znode_data, znode = KazooClientManager(self.zk_hosts).get_client().get(
            self.zk_path)

        s3_path = self._get_s3_path_with_timestamp(znode_data)
        return s3config.S3Config(self.aws_keyfile, self.s3_bucket,
                                 self.s3_endpoint).get_config_string(s3_path)
Example #8
0
    def test_server_set(self):
        """Test various failure scenarios on server set implementation.

        1. When a new server joins the set, the watcher should be notified.
        2. When the underlying zk client disconnects and then recovers,
           the server set should be transparent to server set participants
           and watchers.
        3. When the underlying zk client messes up beyond recovery,
           the underlying client should be replaced, and this should be
           transparent to server set participants and watchers.

        """
        all_children = []
        watcher_triggered = Event()

        def server_set_watcher(children):
            while all_children:
                all_children.pop()
            for child in children:
                all_children.append(child)
            watcher_triggered.set()

        testutil.initialize_kazoo_client_manager(ZK_HOSTS)
        client = KazooClientManager().get_client()
        server_set = ServerSet(ServerSetTestCase.SERVER_SET_PATH,
                               ZK_HOSTS,
                               waiting_in_secs=0.01)
        server_set.join(ServerSetTestCase.PORT_1, use_ip=True).join()
        server_set.monitor(server_set_watcher).join()
        watcher_triggered.wait(1)
        # Now the server set should only contain end point 1
        self.assertEqual(all_children, [ServerSetTestCase.END_POINT_1])
        watcher_triggered.clear()
        server_set.join(ServerSetTestCase.PORT_2, use_ip=True).join()
        watcher_triggered.wait(1)
        all_children.sort()
        # Now the server set should contain both end point 1 and 2
        self.assertEqual(all_children, ServerSetTestCase.END_POINTS)
        # Test recoverable failure
        client.stop()
        watcher_triggered.clear()
        client.start()
        watcher_triggered.wait(1)
        # Server set should remain the same when the client recovers
        all_children.sort()
        self.assertEqual(all_children, ServerSetTestCase.END_POINTS)
        # Test client change
        client.stop()
        watcher_triggered.clear()
        # give the monit greenlet a chance to detect failures
        gevent.sleep(1)
        # Assert the client has been replaced with a new one
        self.assertFalse(KazooClientManager().get_client() is client)
        watcher_triggered.wait(1)
        # Server set should survive the underlying client being swapped out
        all_children.sort()
        self.assertEqual(all_children, ServerSetTestCase.END_POINTS)
Example #9
0
 def test_serverset_destroy(self):
     testutil.initialize_kazoo_client_manager(ZK_HOSTS)
     client = KazooClientManager().get_client()
     server_set = ServerSet(ServerSetTestCase.SERVER_SET_DESTROY_PATH,
                            ZK_HOSTS,
                            waiting_in_secs=0.01)
     server_set.join(ServerSetTestCase.PORT_1, use_ip=False)
     server_set.join(ServerSetTestCase.PORT_2, use_ip=False)
     # Give time to let server set join to do its magic.
     gevent.sleep(1)
     server_set._destroy(ServerSetTestCase.END_POINT_1)
     gevent.sleep(1)
     children = client.get_children(
         ServerSetTestCase.SERVER_SET_DESTROY_PATH)
     for child in children:
         self.assertFalse(child.endswith(ServerSetTestCase.END_POINT_1))
Example #10
0
 def test_serverset_destroy(self):
     testutil.initialize_kazoo_client_manager(ZK_HOSTS)
     client = KazooClientManager().get_client()
     server_set = ServerSet(ServerSetTestCase.SERVER_SET_DESTROY_PATH,
                            ZK_HOSTS,
                            waiting_in_secs=0.01)
     server_set.join(ServerSetTestCase.PORT_1, use_ip=False)
     server_set.join(ServerSetTestCase.PORT_2, use_ip=False)
     # Give time to let server set join to do its magic.
     gevent.sleep(1)
     server_set._destroy(ServerSetTestCase.END_POINT_1)
     gevent.sleep(1)
     children = client.get_children(
         ServerSetTestCase.SERVER_SET_DESTROY_PATH)
     for child in children:
         self.assertFalse(child.endswith(ServerSetTestCase.END_POINT_1))
Example #11
0
    def test_server_set(self):
        """Test various failure scenarios on server set implementation.

        1. When a new server joins the set, the watcher should be notified.
        2. When the underlying zk client disconnects and then recovers,
           the server set should be transparent to server set participants
           and watchers.
        3. When the underlying zk client messes up beyond recovery,
           the underlying client should be replaced, and this should be
           transparent to server set participants and watchers.

        """
        all_children = []
        watcher_triggered = Event()

        def server_set_watcher(children):
            while all_children:
                all_children.pop()
            for child in children:
                all_children.append(child)
            watcher_triggered.set()

        testutil.initialize_kazoo_client_manager(ZK_HOSTS)
        client = KazooClientManager().get_client()
        server_set = ServerSet(ServerSetTestCase.SERVER_SET_PATH,
                               ZK_HOSTS,
                               waiting_in_secs=0.01)
        server_set.join(ServerSetTestCase.PORT_1, use_ip=True).join()
        server_set.monitor(server_set_watcher).join()
        watcher_triggered.wait(1)
        # Now the server set should only contain end point 1
        self.assertEqual(all_children, [ServerSetTestCase.END_POINT_1])
        watcher_triggered.clear()
        server_set.join(ServerSetTestCase.PORT_2, use_ip=True).join()
        watcher_triggered.wait(1)
        all_children.sort()
        # Now the server set should contain both end point 1 and 2
        self.assertEqual(all_children, ServerSetTestCase.END_POINTS)
        # Test recoverable failure
        client.stop()
        watcher_triggered.clear()
        client.start()
        watcher_triggered.wait(1)
        # Server set should remain the same when the client recovers
        all_children.sort()
        self.assertEqual(all_children, ServerSetTestCase.END_POINTS)
        # Test client change
        client.stop()
        watcher_triggered.clear()
        # give the monit greenlet a chance to detect failures
        gevent.sleep(1)
        # Assert the client has been replaced with a new one
        self.assertFalse(KazooClientManager().get_client() is client)
        watcher_triggered.wait(1)
        # Server set should survive the underlying client being swapped out
        all_children.sort()
        self.assertEqual(all_children, ServerSetTestCase.END_POINTS)
Example #12
0
def get_data_from_zk(zk_path, mode):
    kazoo_client = KazooClientManager(ZK_HOSTS).get_client()

    if not kazoo_client.exists(zk_path):
        log.error("Path %s does not exist" % (zk_path))
        exit(_ZK_PATH_NOT_EXIST_CODE)

    try:
        if mode == "CONFIG":
            value, stat = kazoo_client.get(zk_path)
            return value, stat.version
        elif mode == 'SERVERSET':
            value = _get_hosts_from_zk(kazoo_client, zk_path)
            return value, None
        else:
            log.error("Unrecognized mode: %s" % mode)
            exit(_UNRECOGNIZED_MODE_CODE)
    except:
        log.error("Failed to get value from zookeeper path %s with mode %s"
                  % (zk_path, mode))
        exit(_FAILED_TO_GET_DATA_FROM_ZK_CODE)
Example #13
0
def get_data_from_zk(zk_path, mode):
    kazoo_client = KazooClientManager(ZK_HOSTS).get_client()

    if not kazoo_client.exists(zk_path):
        log.error("Path %s does not exist" % (zk_path))
        exit(_ZK_PATH_NOT_EXIST_CODE)

    try:
        if mode == "CONFIG":
            value, stat = kazoo_client.get(zk_path)
            return value, stat.version
        elif mode == 'SERVERSET':
            value = _get_hosts_from_zk(kazoo_client, zk_path)
            return value, None
        else:
            log.error("Unrecognized mode: %s" % mode)
            exit(_UNRECOGNIZED_MODE_CODE)
    except:
        log.error("Failed to get value from zookeeper path %s with mode %s" %
                  (zk_path, mode))
        exit(_FAILED_TO_GET_DATA_FROM_ZK_CODE)
Example #14
0
 def test_random_host_selector_with_serverset(self):
     testutil.initialize_kazoo_client_manager(ZK_HOSTS)
     kazoo_client = KazooClientManager().get_client()
     kazoo_client.ensure_path(HostSelectorTestCase.SERVER_SET_PATH)
     host_provider = HostsProvider(HostSelectorTestCase.PORT_LIST,
                                   HostSelectorTestCase.SERVER_SET_PATH)
     self.assertTrue(host_provider.initialized)
     self.assertTrue(host_provider.hosts)
     # Since there is no live hosts in the server set, host provider should
     # still use the static host list.
     self.assertEqual(host_provider._current_host_tuple,
                      host_provider._static_host_tuple)
     random_host_selector = RandomHostSelector(host_provider,
                                               expire_time=0,
                                               retry_time=0,
                                               invalidation_threshold=1.0)
     self.assertTrue(
         random_host_selector.get_host() in HostSelectorTestCase.PORT_LIST)
     server_set = ServerSet(HostSelectorTestCase.SERVER_SET_PATH, ZK_HOSTS)
     g = server_set.join(HostSelectorTestCase.PORT_LIST[0], use_ip=False)
     g.get()
     no_of_iterations = 100
     # After the first endpoint joins, random host selector should only
     # start to use hosts in the server set.
     returned_hosts = [
         random_host_selector.get_host() for i in xrange(no_of_iterations)
     ]
     self.assertEqual(len(set(returned_hosts)), 1)
     self.assertEqual(len(host_provider.hosts), 1)
     g = server_set.join(HostSelectorTestCase.PORT_LIST[1], use_ip=False)
     g.get()
     # After the second endpoint joins the server set, random host selector
     # should return both endpoints now.
     returned_hosts = [
         random_host_selector.get_host() for i in xrange(no_of_iterations)
     ]
     self.assertEqual(len(set(returned_hosts)), 2)
     self.assertEqual(len(host_provider.hosts), 2)
Example #15
0
    def test_server_set(self):
        """Test various failure scenarios on server set implementation.

        1. When a new server joins the set, the watcher should be notified.
           In practice there is a daemon monitoring the server set change in
           zk and update the local file.
        2. When the underlying zk client disconnects and then recovers,
           the server set should be transparent to server set participants
           and watchers.
        3. When the underlying zk client messes up beyond recovery,
           it should be transparent to server set participants and watchers.

        Although when a local file is being watched, now all the code paths
        about the above behaviors got affected, we still want to test all the
        scenarios to make sure nothing breaks when a file is used.

        NOTE: to simulate the behavior in practice, when a server joins or
        leaves, we assume that there is a daemon to make the corresponding
        change to the local file.
        """
        fd, tmp_file = tempfile.mkstemp()
        all_children = []
        watcher_triggered = Event()

        def server_set_watcher(children):
            while all_children:
                all_children.pop()
            for child in children:
                all_children.append(child)
            watcher_triggered.set()

        testutil.initialize_kazoo_client_manager(ZK_HOSTS)
        client = KazooClientManager().get_client()
        server_set = ServerSet(ServerSetWithFileTestCase.SERVER_SET_PATH,
                               ZK_HOSTS,
                               waiting_in_secs=0.01,
                               file_path=tmp_file)
        server_set.join(ServerSetWithFileTestCase.PORT_1, use_ip=False).join()
        # update the local file manually here, suppose there is a daemon
        with open(tmp_file, 'w') as f:
            f.write(ServerSetWithFileTestCase.END_POINT_1)
        gevent.sleep(1)
        server_set.monitor(server_set_watcher).join()
        watcher_triggered.wait(1)
        # Now the server set should only contain end point 1
        self.assertEqual(all_children, [ServerSetWithFileTestCase.END_POINT_1])
        watcher_triggered.clear()
        server_set.join(ServerSetWithFileTestCase.PORT_2, use_ip=False).join()
        # update the local file manually here, suppose there is a daemon
        with open(tmp_file, 'w') as f:
            f.write(ServerSetWithFileTestCase.END_POINT_1 + "\n" +
                    ServerSetWithFileTestCase.END_POINT_2)
        gevent.sleep(1)
        watcher_triggered.wait(1)
        all_children.sort()
        # Now the server set should contain both end point 1 and 2
        self.assertEqual(all_children, ServerSetWithFileTestCase.END_POINTS)
        # Test recoverable failure
        client.stop()
        watcher_triggered.clear()
        client.start()
        watcher_triggered.wait(1)
        # Server set should remain the same when the client recovers
        all_children.sort()
        self.assertEqual(all_children, ServerSetWithFileTestCase.END_POINTS)
        # Test client change
        client.stop()
        watcher_triggered.clear()
        # give the monit greenlet a chance to detect failures
        gevent.sleep(1)
        watcher_triggered.wait(1)
        # Server set should survive the underlying client being swapped out
        all_children.sort()
        self.assertEqual(all_children, ServerSetWithFileTestCase.END_POINTS)

        self.FILE_WATCH._clear_all_watches()
        os.remove(tmp_file)
Example #16
0
    def update_zk(self, old_value, value, force_update=True):
        """Update the s3 file and update the zk_node.

        All the operations inside this function is guarded by a distributed zk lock. It prevents
        race condition where multiple calls to try to update the config.
        Inside the lock, the given ``old_value`` is checked against the value in s3(located by zk
        node value) by default. Abort if they do not match unless force_update is True. Then the
        new data is uploaded to s3 whose key is suffixed with the current timestamp. Finally the
        zk node is updated with the current timestamp(which triggers zk_update_monitor to download).
        The last two steps cannot be reversed because we can only trigger zk_update_monitor to
        download when the new data is already in s3.

        If enable_audit_history is True, and author and comment are both given,
        we will log this change to audit history.

        Args:
            old_value: A string, which should be equal to the current value in zk.
                             old_value will be ignored if force_update is True
            value: A string, value to update to.
            force_update: Boolean, force update zk regardless if old_value matches s3_value or not. Default to be True.
        Returns:
            True if successfully updated, otherwise False.

        """
        # Avoid potential kazoo client problem.
        if not KazooClientManager(self.zk_hosts).get_client():
            KazooClientManager(self.zk_hosts)._reconnect()

        KazooClientManager(self.zk_hosts).get_client().ensure_path(self.zk_path)

        # Try to get the lock.
        lock = KazooClientManager(self.zk_hosts).get_client().Lock(self.zk_lock_path)
        if not lock.acquire(blocking=False):
            raise Exception('ZK lock is hold by someone else. Try later.')

        try:
            znode_data, znode = KazooClientManager(self.zk_hosts).get_client().get(self.zk_path)
            # Only allow update if the given old value is the current s3 value, or the value of the
            # zk_node is empty(which is the case where the zk node is updated for the first time).
            if not force_update and znode_data:
                s3_path_with_timestamp = self._get_s3_path_with_timestamp(znode_data)
                try:
                    s3_value = s3config.S3Config(self.aws_keyfile, self.s3_bucket, self.s3_endpoint).get_config_string(
                        s3_path_with_timestamp)
                except ValueError as e:
                    log.error("Failed to get s3 value from s3 path %s: %s" %
                              (s3_path_with_timestamp, str(e)))
                    raise Exception('Old s3 key %s located by the zk node value does not exist: '
                                    '%s This is possibly due to s3 inconsistency. Try later.' %
                                    (s3_path_with_timestamp, str(e)))

                if old_value != s3_value:
                    raise Exception('Old value is not equal to s3 value for zk path %s, old_value: %s, s3_value: %s' %
                                    (self.zk_path, old_value, s3_value))

            update_time = time.time()
            current_timestamp_str = str(update_time)
            s3_path_with_timestamp = self._get_s3_path_with_timestamp(current_timestamp_str)

            result = s3config.S3Config(self.aws_keyfile, self.s3_bucket, self.s3_endpoint).put_config_string(s3_path_with_timestamp, value)
            if result is not None:
                raise Exception('Error writing to s3 path %s for zk path %s: %s' % (
                    s3_path_with_timestamp, self.zk_path, result))

            # Write the index also to S3(this will be the same data stored in zk). It is used to
            # provide easy access to the S3 data in case zk is down.
            s3config.S3Config(self.aws_keyfile, self.s3_bucket, self.s3_endpoint).put_config_string(
                self.s3_file_path, current_timestamp_str)

            # Try 10 times in case the write to zk failed. We want to make sure zk is changed because
            # s3 is already overwritten. Otherwise, there will be inconsistency between s3 file and
            # local config file.
            for i in xrange(0, 10):
                try:
                    KazooClientManager(self.zk_hosts).get_client().set(
                        self.zk_path, current_timestamp_str)
                    return True
                except Exception as e:
                    print e
                    log.info('Zk write failed for zk path %s with %s for the %d time' % (
                        self.zk_path, e, i + 1))
                    KazooClientManager(self.zk_hosts)._reconnect()
            raise Exception('Failed to write to zk path %s even though we already wrote to s3.' % (
                self.zk_path))
        finally:
            lock.release()
Example #17
0
def _kazoo_client(zk_hosts):
    return KazooClientManager(zk_hosts).get_client()
Example #18
0
def _zk_path_exists(zk_hosts, path):
    # KazooClientManager is a singleton per cluster so we don't need
    # to be concerned about unnecessary object creation.
    kazoo = KazooClientManager(zk_hosts)
    return kazoo.get_client().exists(path)
Example #19
0
    def test_data_watcher(self):
        """Test various scenarios for data watcher:

        1. When data get changed, watcher callback should be invoked.
        2. When the underlying zk client disconnects and then recovers,
           the watcher callback should be invoked.
        3. When the underlying zk client messes up beyond recovery,
           the underlying client should be replaced, and once the new client
           is in place, the watcher callback should be invoked again.

        """
        data_stat = []
        watcher_triggered = Event()

        def data_watch(data, stat):
            while data_stat:
                data_stat.pop()
            data_stat.append(data)
            data_stat.append(stat)
            watcher_triggered.set()

        testutil.initialize_kazoo_client_manager(ZK_HOSTS)
        client = KazooClientManager().get_client()
        client.create(DataWatcherTestCase.TEST_PATH,
                      DataWatcherTestCase.DATA_0)
        data_watcher = DataWatcher(DataWatcherTestCase.TEST_PATH,
                                   ZK_HOSTS,
                                   waiting_in_secs=0.01)
        data_watcher.watch(data_watch).join()
        watcher_triggered.wait(1)
        # Now the data and version should be foo and 0.
        self.assertEqual(data_stat[0], DataWatcherTestCase.DATA_0)
        self.assertEqual(data_stat[1].version, 0)
        watcher_triggered.clear()
        client.set(DataWatcherTestCase.TEST_PATH, DataWatcherTestCase.DATA_1)
        watcher_triggered.wait(1)
        # Make sure that watch callback is triggered.
        self.assertEqual(data_stat[0], DataWatcherTestCase.DATA_1)
        self.assertEqual(data_stat[1].version, 1)
        data_stat.pop()
        data_stat.pop()
        # Test recoverable failure
        watcher_triggered.clear()
        client.stop()
        client.start()
        # Here the client actually will call check the znode in the
        # background.
        watcher_triggered.wait(1)
        # Since nothing changed, no notification from the client.
        self.assertFalse(data_stat)
        # Test client change
        client.stop()
        watcher_triggered.clear()
        # give the monit greenlet a chance to detect failures.
        gevent.sleep(1)
        # Assert the client has been replaced with a new one.
        self.assertFalse(KazooClientManager().get_client() is client)
        watcher_triggered.wait(1)
        # Make sure that watch callback is triggered when client is replaced.
        self.assertEqual(data_stat[0], DataWatcherTestCase.DATA_1)
        self.assertEqual(data_stat[1].version, 1)
Example #20
0
    def update_zk(self, old_value, value, force_update=True):
        """Update the s3 file and update the zk_node.

        All the operations inside this function is guarded by a distributed zk lock. It prevents
        race condition where multiple calls to try to update the config.
        Inside the lock, the given ``old_value`` is checked against the value in s3(located by zk
        node value) by default. Abort if they do not match unless force_update is True. Then the
        new data is uploaded to s3 whose key is suffixed with the current timestamp. Finally the
        zk node is updated with the current timestamp(which triggers zk_update_monitor to download).
        The last two steps cannot be reversed because we can only trigger zk_update_monitor to
        download when the new data is already in s3.

        If enable_audit_history is True, and author and comment are both given,
        we will log this change to audit history.

        Args:
            old_value: A string, which should be equal to the current value in zk.
                             old_value will be ignored if force_update is True
            value: A string, value to update to.
            force_update: Boolean, force update zk regardless if old_value matches s3_value or not. Default to be True.
        Returns:
            True if successfully updated, otherwise False.

        """
        # Avoid potential kazoo client problem.
        if not KazooClientManager(self.zk_hosts).get_client():
            KazooClientManager(self.zk_hosts)._reconnect()

        KazooClientManager(self.zk_hosts).get_client().ensure_path(
            self.zk_path)

        # Try to get the lock.
        lock = KazooClientManager(self.zk_hosts).get_client().Lock(
            self.zk_lock_path)
        if not lock.acquire(blocking=False):
            raise Exception('ZK lock is hold by someone else. Try later.')

        try:
            znode_data, znode = KazooClientManager(
                self.zk_hosts).get_client().get(self.zk_path)
            # Only allow update if the given old value is the current s3 value, or the value of the
            # zk_node is empty(which is the case where the zk node is updated for the first time).
            if not force_update and znode_data:
                s3_path_with_timestamp = self._get_s3_path_with_timestamp(
                    znode_data)
                try:
                    s3_value = s3config.S3Config(
                        self.aws_keyfile, self.s3_bucket,
                        self.s3_endpoint).get_config_string(
                            s3_path_with_timestamp)
                except ValueError as e:
                    log.error("Failed to get s3 value from s3 path %s: %s" %
                              (s3_path_with_timestamp, str(e)))
                    raise Exception(
                        'Old s3 key %s located by the zk node value does not exist: '
                        '%s This is possibly due to s3 inconsistency. Try later.'
                        % (s3_path_with_timestamp, str(e)))

                if old_value != s3_value:
                    raise Exception(
                        'Old value is not equal to s3 value for zk path %s, old_value: %s, s3_value: %s'
                        % (self.zk_path, old_value, s3_value))

            update_time = time.time()
            current_timestamp_str = str(update_time)
            s3_path_with_timestamp = self._get_s3_path_with_timestamp(
                current_timestamp_str)

            result = s3config.S3Config(self.aws_keyfile, self.s3_bucket,
                                       self.s3_endpoint).put_config_string(
                                           s3_path_with_timestamp, value)
            if result is not None:
                raise Exception(
                    'Error writing to s3 path %s for zk path %s: %s' %
                    (s3_path_with_timestamp, self.zk_path, result))

            # Write the index also to S3(this will be the same data stored in zk). It is used to
            # provide easy access to the S3 data in case zk is down.
            s3config.S3Config(self.aws_keyfile, self.s3_bucket,
                              self.s3_endpoint).put_config_string(
                                  self.s3_file_path, current_timestamp_str)

            # Try 10 times in case the write to zk failed. We want to make sure zk is changed because
            # s3 is already overwritten. Otherwise, there will be inconsistency between s3 file and
            # local config file.
            for i in xrange(0, 10):
                try:
                    KazooClientManager(self.zk_hosts).get_client().set(
                        self.zk_path, current_timestamp_str)
                    return True
                except Exception as e:
                    print e
                    log.info(
                        'Zk write failed for zk path %s with %s for the %d time'
                        % (self.zk_path, e, i + 1))
                    KazooClientManager(self.zk_hosts)._reconnect()
            raise Exception(
                'Failed to write to zk path %s even though we already wrote to s3.'
                % (self.zk_path))
        finally:
            lock.release()
Example #21
0
    def test_data_watcher(self):
        """Test various scenarios for data watcher:

        1. When data get changed, watcher callback should be invoked.
        2. When the underlying zk client disconnects and then recovers,
           the watcher callback should be invoked.
        3. When the underlying zk client messes up beyond recovery,
           the underlying client should be replaced, and once the new client
           is in place, the watcher callback should be invoked again.

        """
        data_stat = []
        watcher_triggered = Event()

        def data_watch(data, stat):
            while data_stat:
                data_stat.pop()
            data_stat.append(data)
            data_stat.append(stat)
            watcher_triggered.set()

        testutil.initialize_kazoo_client_manager(ZK_HOSTS)
        client = KazooClientManager().get_client()
        client.create(DataWatcherTestCase.TEST_PATH,
                      DataWatcherTestCase.DATA_0)
        data_watcher = DataWatcher(DataWatcherTestCase.TEST_PATH,
                                   ZK_HOSTS,
                                   waiting_in_secs=0.01)
        data_watcher.watch(data_watch).join()
        watcher_triggered.wait(1)
        # Now the data and version should be foo and 0.
        self.assertEqual(data_stat[0], DataWatcherTestCase.DATA_0)
        self.assertEqual(data_stat[1].version, 0)
        watcher_triggered.clear()
        client.set(DataWatcherTestCase.TEST_PATH, DataWatcherTestCase.DATA_1)
        watcher_triggered.wait(1)
        # Make sure that watch callback is triggered.
        self.assertEqual(data_stat[0], DataWatcherTestCase.DATA_1)
        self.assertEqual(data_stat[1].version, 1)
        data_stat.pop()
        data_stat.pop()
        # Test recoverable failure
        watcher_triggered.clear()
        client.stop()
        client.start()
        # Here the client actually will call check the znode in the
        # background.
        watcher_triggered.wait(1)
        # Since nothing changed, no notification from the client.
        self.assertFalse(data_stat)
        # Test client change
        client.stop()
        watcher_triggered.clear()
        # give the monit greenlet a chance to detect failures.
        gevent.sleep(1)
        # Assert the client has been replaced with a new one.
        self.assertFalse(KazooClientManager().get_client() is client)
        watcher_triggered.wait(1)
        # Make sure that watch callback is triggered when client is replaced.
        self.assertEqual(data_stat[0], DataWatcherTestCase.DATA_1)
        self.assertEqual(data_stat[1].version, 1)
Example #22
0
    def test_server_set(self):
        """Test various failure scenarios on server set implementation.

        1. When a new server joins the set, the watcher should be notified.
           In practice there is a daemon monitoring the server set change in
           zk and update the local file.
        2. When the underlying zk client disconnects and then recovers,
           the server set should be transparent to server set participants
           and watchers.
        3. When the underlying zk client messes up beyond recovery,
           it should be transparent to server set participants and watchers.

        Although when a local file is being watched, now all the code paths
        about the above behaviors got affected, we still want to test all the
        scenarios to make sure nothing breaks when a file is used.

        NOTE: to simulate the behavior in practice, when a server joins or
        leaves, we assume that there is a daemon to make the corresponding
        change to the local file.
        """
        fd, tmp_file = tempfile.mkstemp()
        all_children = []
        watcher_triggered = Event()

        def server_set_watcher(children):
            while all_children:
                all_children.pop()
            for child in children:
                all_children.append(child)
            watcher_triggered.set()

        testutil.initialize_kazoo_client_manager(ZK_HOSTS)
        client = KazooClientManager().get_client()
        server_set = ServerSet(ServerSetWithFileTestCase.SERVER_SET_PATH,
                               ZK_HOSTS,
                               waiting_in_secs=0.01,
                               file_path=tmp_file)
        server_set.join(ServerSetWithFileTestCase.PORT_1, use_ip=False).join()
        # update the local file manually here, suppose there is a daemon
        with open(tmp_file, 'w') as f:
            f.write(ServerSetWithFileTestCase.END_POINT_1)
        gevent.sleep(1)
        server_set.monitor(server_set_watcher).join()
        watcher_triggered.wait(1)
        # Now the server set should only contain end point 1
        self.assertEqual(all_children, [ServerSetWithFileTestCase.END_POINT_1])
        watcher_triggered.clear()
        server_set.join(ServerSetWithFileTestCase.PORT_2, use_ip=False).join()
        # update the local file manually here, suppose there is a daemon
        with open(tmp_file, 'w') as f:
            f.write(ServerSetWithFileTestCase.END_POINT_1 +
                    "\n" +
                    ServerSetWithFileTestCase.END_POINT_2)
        gevent.sleep(1)
        watcher_triggered.wait(1)
        all_children.sort()
        # Now the server set should contain both end point 1 and 2
        self.assertEqual(all_children, ServerSetWithFileTestCase.END_POINTS)
        # Test recoverable failure
        client.stop()
        watcher_triggered.clear()
        client.start()
        watcher_triggered.wait(1)
        # Server set should remain the same when the client recovers
        all_children.sort()
        self.assertEqual(all_children, ServerSetWithFileTestCase.END_POINTS)
        # Test client change
        client.stop()
        watcher_triggered.clear()
        # give the monit greenlet a chance to detect failures
        gevent.sleep(1)
        watcher_triggered.wait(1)
        # Server set should survive the underlying client being swapped out
        all_children.sort()
        self.assertEqual(all_children, ServerSetWithFileTestCase.END_POINTS)

        self.FILE_WATCH._clear_all_watches()
        os.remove(tmp_file)
Example #23
0
 def test_get_zk_hosts_directly(self):
     """ Test passing zk_hosts in directly.
     """
     testutil.initialize_kazoo_client_manager(ZK_HOSTS)
     kz_client_manager = KazooClientManager(ZK_HOSTS)
     self.assertEqual(kz_client_manager.get_client().hosts, ",".join(ZK_HOSTS))