class ConsistentScheduler(object):
    '''
        LibPartitionHelper abstract out workers and work_items, and their
        mapping to partitions. So application can only deal with the work
        items it owns, without bothering about partition mapping.

        This class also provides syncronization premitives to ensure apps
        to clean up b4 giving up their partitions
    '''
    _MAX_WAIT_4_ALLOCATION = 6 + randint(0, 9)

    def __init__(self,
                 service_name=None,
                 zookeeper='127.0.0.1:2181',
                 delete_hndlr=None,
                 add_hndlr=None,
                 bucketsize=47,
                 item2part_func=None,
                 partitioner=None,
                 logger=None):
        if logger:
            self._logger = logger
        else:
            self._logger = logging.getLogger(__name__)
        self._service_name = service_name or os.path.basename(sys.argv[0])
        self._item2part_func = item2part_func or self._device2partition
        self._zookeeper_srvr = zookeeper
        self._bucketsize = bucketsize
        self._delete_hndlr = delete_hndlr
        self._add_hndlr = add_hndlr
        self._partitioner = partitioner or self._partitioner_func
        self._partitions = {}
        self._con_hash = None
        self._last_log = ''
        self._last_log_cnt = 0
        self._partition_set = map(str, range(self._bucketsize))
        self._zk_path = '/'.join(['/contrail_cs', self._service_name])
        self._zk = KazooClient(self._zookeeper_srvr)
        self._zk.add_listener(self._zk_lstnr)
        self._zk.start()
        self._pc = self._zk.SetPartitioner(path=self._zk_path,
                                           set=self._partition_set,
                                           partition_func=self._partitioner)
        self._wait_allocation = 0
        gevent.sleep(0)

    def _zk_lstnr(self, state):
        self._supress_log('zk state change to %s' % str(state))

    def schedule(self, items, lock_timeout=30):
        gevent.sleep(0)
        ret = False
        if self._pc.failed:
            raise Exception("Lost or unable to acquire partition")
        elif self._pc.release:
            self._supress_log('Releasing...')
            self._release()
        elif self._pc.allocating:
            self._supress_log('Waiting for allocation...')
            self._pc.wait_for_acquire(lock_timeout)
            if self._wait_allocation < self._MAX_WAIT_4_ALLOCATION:
                self._wait_allocation += 1
            else:
                raise StopIteration('Giving up after %d tries!' %
                                    (self._wait_allocation))
        elif self._pc.acquired:
            self._supress_log('got work: ', list(self._pc))
            ret = True
            self._wait_allocation = 0
            self._populate_work_items(items)
            self._supress_log('work items: ',
                              self._items2name(self.work_items()),
                              'from the list', self._items2name(items))
        return ret

    def work_items(self):
        return sum(self._partitions.values(), [])

    def finish(self):
        self._inform_delete(self._partitions.keys())
        self._pc.finish()

    def _items2name(self, items):
        return map(lambda x: x.name, items)

    def _supress_log(self, *s):
        slog = ' '.join(map(str, s))
        dl = ''
        if slog != self._last_log_cnt:
            if self._last_log_cnt:
                dl += ' ' * 4
                dl += '.' * 8
                dl += '[last print repeats %d times]' % self._last_log_cnt
                self._last_log_cnt = 0
            dl += slog
            self._last_log = slog
            self._logger.debug(dl)
        else:
            self._last_log_cnt += 1

    def _consistent_hash(self, members):
        if self._con_hash is None:
            self._con_hash = ConsistentHash(members)
            self._supress_log('members:', self._con_hash.nodes)
        cur, updtd = set(self._con_hash.nodes), set(members)
        if cur != updtd:
            newm = updtd - cur
            rmvd = cur - updtd
            if newm:
                self._supress_log('new workers:', newm)
                self._con_hash.add_nodes(list(newm))
            if rmvd:
                self._supress_log('workers left:', rmvd)
                self._con_hash.del_nodes(list(rmvd))
        return self._con_hash

    def _consistent_hash_get_node(self, members, partition):
        return self._consistent_hash(members).get_node(partition)

    def _partitioner_func(self, identifier, members, _partitions):
        return [p for p in _partitions \
                if self._consistent_hash_get_node(members, p) == identifier]

    def _release(self):
        old = set(self._pc)
        new = set(
            self._partitioner(self._pc._identifier, list(self._pc._party),
                              self._partition_set))
        rmvd = old - new
        added = new - old
        if rmvd:
            self._inform_delete(list(rmvd))
        if added:
            self._inform_will_add(list(added))
        self._pc.release_set()

    def _list_items_in(self, partitions):
        return sum([self._partitions[k] for k in partitions if k in \
                    self._partitions], [])

    def _inform_will_add(self, partitions):
        if callable(self._add_hndlr):
            self._add_hndlr(self._list_items_in(partitions))

    def _inform_delete(self, partitions):
        if callable(self._delete_hndlr):
            self._delete_hndlr(self._list_items_in(partitions))

    def _populate_work_items(self, items):
        self._refresh_work_items()
        for i in items:
            part = str(self._item2part_func(i.name))
            if part in list(self._pc):
                if part not in self._partitions:
                    self._partitions[part] = []
                if i.name not in map(lambda x: x.name, self._partitions[part]):
                    self._partitions[part].append(i)
        self._logger.debug('@populate_work_items(%s): done!' % ' '.join(
            map(
                lambda v: str(v[0]) + ':' + ','.join(
                    map(lambda x: x.name, v[1])), self._partitions.items())))
        gevent.sleep(0)

    def _device2partition(self, key):
        return struct.unpack(
            'Q',
            hashlib.md5(key).digest()[-8:])[0] % self._bucketsize

    def _refresh_work_items(self):
        for k in self._partitions:
            self._partitions[k] = []
class ConsistentScheduler(object):
    '''
        LibPartitionHelper abstract out workers and work_items, and their
        mapping to partitions. So application can only deal with the work
        items it owns, without bothering about partition mapping.

        This class also provides syncronization premitives to ensure apps
        to clean up b4 giving up their partitions
    '''
    _MAX_WAIT_4_ALLOCATION = 6 + randint(0, 9)

    def __init__(self,
                 service_name=None,
                 zookeeper='127.0.0.1:2181',
                 delete_hndlr=None,
                 add_hndlr=None,
                 bucketsize=47,
                 item2part_func=None,
                 partitioner=None,
                 logger=None,
                 cluster_id=''):
        if logger:
            self._logger = logger
        else:
            self._logger = logging.getLogger(__name__)
        self._service_name = service_name or os.path.basename(sys.argv[0])
        self._item2part_func = item2part_func or self._device2partition
        self._zookeeper_srvr = zookeeper
        self._zk = None
        self._bucketsize = bucketsize
        self._delete_hndlr = delete_hndlr
        self._add_hndlr = add_hndlr
        self._partitioner = partitioner or self._partitioner_func
        self._partitions = {}
        self._con_hash = None
        self._last_log = ''
        self._last_log_cnt = 0
        self._partition_set = map(str, range(self._bucketsize))

        self._cluster_id = cluster_id
        if self._cluster_id:
            self._zk_path = '/' + self._cluster_id + '/contrail_cs' + '/' + self._service_name
        else:
            self._zk_path = '/'.join(['/contrail_cs', self._service_name])
        self._conn_state = None
        self._sandesh_connection_info_update(status='INIT', message='')

        while True:
            self._logger.error("Consistent scheduler zk start")
            self._zk = KazooClient(self._zookeeper_srvr,
                                   handler=SequentialGeventHandler())
            self._zk.add_listener(self._zk_lstnr)
            try:
                self._zk.start()
                while self._conn_state != ConnectionStatus.UP:
                    gevent.sleep(1)
                break
            except Exception as e:
                # Update connection info
                self._sandesh_connection_info_update(status='DOWN',
                                                     message=str(e))
                self._zk.remove_listener(self._zk_lstnr)
                try:
                    self._zk.stop()
                    self._zk.close()
                except Exception as ex:
                    template = "Exception {0} in Consistent scheduler zk stop/close. Args:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("%s : traceback %s for %s" % \
                        (messag, traceback.format_exc(), self._service_name))
                finally:
                    self._zk = None
                gevent.sleep(1)
        self._pc = self._zk.SetPartitioner(path=self._zk_path,
                                           set=self._partition_set,
                                           partition_func=self._partitioner)
        self._wait_allocation = 0
        gevent.sleep(0)

    def _sandesh_connection_info_update(self, status, message):
        new_conn_state = getattr(ConnectionStatus, status)
        ConnectionState.update(conn_type=ConnectionType.ZOOKEEPER,
                               name='Zookeeper',
                               status=new_conn_state,
                               message=message,
                               server_addrs=self._zookeeper_srvr.split(','))

        if ((self._conn_state and self._conn_state != ConnectionStatus.DOWN)
                and new_conn_state == ConnectionStatus.DOWN):
            msg = 'Connection to Zookeeper down: %s' % (message)
            self._supress_log(msg)
        if (self._conn_state and self._conn_state != new_conn_state
                and new_conn_state == ConnectionStatus.UP):
            msg = 'Connection to Zookeeper ESTABLISHED'
            self._supress_log(msg)

        self._conn_state = new_conn_state

    # end _sandesh_connection_info_update

    def _zk_lstnr(self, state):
        self._logger.error("Consistent scheduler listen %s" % str(state))
        if state == KazooState.CONNECTED:
            # Update connection info
            self._sandesh_connection_info_update(status='UP', message='')
        elif state == KazooState.LOST:
            self._logger.error("Consistent scheduler connection LOST")
            # Lost the session with ZooKeeper Server
            # Best of option we have is to exit the process and restart all
            # over again
            self._sandesh_connection_info_update(
                status='DOWN', message='Connection to Zookeeper lost')
            os._exit(2)
        elif state == KazooState.SUSPENDED:
            self._logger.error("Consistent scheduler connection SUSPENDED")
            # Update connection info
            self._sandesh_connection_info_update(
                status='INIT',
                message='Connection to zookeeper lost. Retrying')

    def schedule(self, items, lock_timeout=30):
        gevent.sleep(0)
        ret = False
        if self._pc.failed:
            self._logger.error('Lost or unable to acquire partition')
            os._exit(2)
        elif self._pc.release:
            self._supress_log('Releasing...')
            self._release()
        elif self._pc.allocating:
            self._supress_log('Waiting for allocation...')
            self._pc.wait_for_acquire(lock_timeout)
            if self._wait_allocation < self._MAX_WAIT_4_ALLOCATION:
                self._wait_allocation += 1
            else:
                self._logger.error('Giving up after %d tries!' %
                                   (self._wait_allocation))
                os._exit(2)
        elif self._pc.acquired:
            self._supress_log('got work: ', list(self._pc))
            ret = True
            self._wait_allocation = 0
            self._populate_work_items(items)
            self._supress_log('work items: ',
                              self._items2name(self.work_items()),
                              'from the list', self._items2name(items))
        return ret

    def members(self):
        return list(self._con_hash.nodes)

    def partitions(self):
        return list(self._pc)

    def work_items(self):
        return sum(self._partitions.values(), [])

    def finish(self):
        self._inform_delete(self._partitions.keys())
        self._pc.finish()
        self._zk.remove_listener(self._zk_lstnr)
        gevent.sleep(1)
        try:
            self._zk.stop()
        except:
            self._logger.error("Stopping kazooclient failed")
        else:
            self._logger.error("Stopping kazooclient successful")
        try:
            self._zk.close()
        except:
            self._logger.error("Closing kazooclient failed")
        else:
            self._logger.error("Closing kazooclient successful")

    def _items2name(self, items):
        return map(lambda x: x.name, items)

    def _supress_log(self, *s):
        slog = ' '.join(map(str, s))
        dl = ''
        if slog != self._last_log_cnt:
            if self._last_log_cnt:
                dl += ' ' * 4
                dl += '.' * 8
                dl += '[last print repeats %d times]' % self._last_log_cnt
                self._last_log_cnt = 0
            dl += slog
            self._last_log = slog
            self._logger.debug(dl)
        else:
            self._last_log_cnt += 1

    def _consistent_hash(self, members):
        if self._con_hash is None:
            self._con_hash = ConsistentHash(members)
            self._logger.error('members: %s' % (str(self._con_hash.nodes)))
        cur, updtd = set(self._con_hash.nodes), set(members)
        if cur != updtd:
            newm = updtd - cur
            rmvd = cur - updtd
            if newm:
                self._logger.error('new members: %s' % (str(newm)))
                self._con_hash.add_nodes(list(newm))
            if rmvd:
                self._logger.error('members left: %s' % (str(rmvd)))
                self._con_hash.del_nodes(list(rmvd))
        return self._con_hash

    def _consistent_hash_get_node(self, members, partition):
        return self._consistent_hash(members).get_node(partition)

    def _partitioner_func(self, identifier, members, _partitions):
        partitions = [p for p in _partitions \
            if self._consistent_hash_get_node(members, p) == identifier]
        self._logger.error('partitions: %s' % (str(partitions)))
        return partitions

    def _release(self):
        old = set(self._pc)
        new = set(
            self._partitioner(self._pc._identifier, list(self._pc._party),
                              self._partition_set))
        rmvd = old - new
        added = new - old
        if rmvd:
            self._inform_delete(list(rmvd))
        if added:
            self._inform_will_add(list(added))
        self._pc.release_set()

    def _list_items_in(self, partitions):
        return sum([self._partitions[k] for k in partitions if k in \
                    self._partitions], [])

    def _inform_will_add(self, partitions):
        if callable(self._add_hndlr):
            self._add_hndlr(self._list_items_in(partitions))

    def _inform_delete(self, partitions):
        if callable(self._delete_hndlr):
            self._delete_hndlr(self._list_items_in(partitions))

    def _populate_work_items(self, items):
        self._refresh_work_items()
        for i in items:
            part = str(self._item2part_func(i.name))
            if part in list(self._pc):
                if part not in self._partitions:
                    self._partitions[part] = []
                if i.name not in map(lambda x: x.name, self._partitions[part]):
                    self._partitions[part].append(i)
        self._logger.debug('@populate_work_items(%s): done!' % ' '.join(
            map(
                lambda v: str(v[0]) + ':' + ','.join(
                    map(lambda x: x.name, v[1])), self._partitions.items())))
        gevent.sleep(0)

    def _device2partition(self, key):
        return struct.unpack(
            'Q',
            hashlib.md5(key).digest()[-8:])[0] % self._bucketsize

    def _refresh_work_items(self):
        for k in self._partitions:
            self._partitions[k] = []
Ejemplo n.º 3
0
class Partitioner(object):
    """Partitioner is used to handle distributed a set of
    topics/partitions among a group of consumers.

    :param topics: kafka topics
    :type topics: list
    :param acquire: function to be called when a set of partitions
                    has been acquired. It should usually allocate the consumers.
    :param release: function to be called when the acquired
                    partitions have to be release. It should usually stops the consumers.

    """
    def __init__(self, config, topics, acquire, release):
        self.log = logging.getLogger(self.__class__.__name__)
        self.config = config
        # Clients
        self.kazoo_client = None
        self.kafka_client = None
        self.topics = topics
        self.acquired_partitions = defaultdict(list)
        self.partitions_set = set()
        # User callbacks
        self.acquire = acquire
        self.release = release
        # We guarantee that the user defined release function call follows
        # always the acquire. release function will never be called twice in a
        # row. Initialize to true because no partitions have been acquired at
        # startup.
        self.released_flag = True
        # Kafka metadata refresh
        self.force_partitions_refresh = True
        self.last_partitions_refresh = 0
        # Kazoo partitioner
        self._partitioner = None
        # Map Kazoo partitioner state to actions
        self.actions = {
            PartitionState.ALLOCATING: self._allocating,
            PartitionState.ACQUIRED: self._acquire,
            PartitionState.RELEASE: self._release,
            PartitionState.FAILURE: self._fail
        }

        self.kazoo_retry = None
        self.zk_group_path = build_zk_group_path(
            self.config.group_path,
            self.topics,
        ) if self.config.use_group_sha else self.config.group_path

    def start(self):
        """Create a new group and wait until the partitions have been
        acquired. This function should never be called twice.

        :raises: PartitionerError upon partitioner failures

        .. note: This is a blocking operation.
        """
        self.kazoo_retry = KazooRetry(**KAZOO_RETRY_DEFAULTS)
        self.kazoo_client = KazooClient(
            self.config.zookeeper,
            connection_retry=self.kazoo_retry,
        )
        self.kafka_client = KafkaClient(self.config.broker_list)

        self.log.debug("Starting a new group for topics %s", self.topics)
        self.released_flag = True
        self._refresh()

    def __enter__(self):
        self.start()

    def __exit__(self, exc_type, exc_value, traceback):
        self.stop()

    def stop(self):
        """Leave the group and release the partitions."""
        self.log.debug("Stopping group for topics %s", self.topics)
        self.release_and_finish()
        self._close_connections()

    def refresh(self):
        """Rebalance upon group changes, such as when a consumer
        joins/leaves the group, the partitions for a topics change, or the
        partitioner itself fails (connection to zookeeper lost).
        This method should be called periodically to make sure that the
        group is in sync.

        :raises: PartitionerError upon partitioner failures
        """
        self.log.debug("Refresh group for topics %s", self.topics)
        self._refresh()

    def _refresh(self):
        while True:
            partitioner = self._get_partitioner()
            self._handle_group(partitioner)
            if self.acquired_partitions:
                break

    def need_partitions_refresh(self):
        return (self.force_partitions_refresh or self.last_partitions_refresh <
                time.time() - PARTITIONS_REFRESH_TIMEOUT)

    def _get_partitioner(self):
        """Get an instance of the partitioner. When the partitions set changes
         we need to destroy the partitioner and create another one.
        If the partitioner does not exist yet, create a new partitioner.
        If the partitions set changed, destroy the partitioner and create a new
        partitioner. Different consumer will eventually use
        the same partitions set.

        :param partitions: the partitions set to use for partitioner.
        :type partitions: set
        """
        if self.need_partitions_refresh() or not self._partitioner:
            try:
                partitions = self.get_partitions_set()
            except Exception:
                self.log.exception("Failed to get partitions set from Kafka."
                                   "Releasing the group.")
                self.release_and_finish()
                raise PartitionerError(
                    "Failed to get partitions set from Kafka", )
            self.force_partitions_refresh = False
            self.last_partitions_refresh = time.time()
            if partitions != self.partitions_set:
                # If partitions changed we release the consumers, destroy the
                # partitioner and disconnect from zookeeper.
                self.log.info(
                    "Partitions set changed. New partitions: %s. "
                    "Old partitions %s. Rebalancing...",
                    [p for p in partitions if p not in self.partitions_set],
                    [p for p in self.partitions_set if p not in partitions])
                # We need to destroy the existing partitioner before creating
                # a new one.
                self.release_and_finish()
                self._partitioner = self._create_partitioner(partitions)
                self.partitions_set = partitions
        return self._partitioner

    def _create_partitioner(self, partitions):
        """Connect to zookeeper and create a partitioner"""
        if self.kazoo_client.state != KazooState.CONNECTED:
            try:
                self.kazoo_client.start()
            except Exception:
                self.log.exception("Impossible to connect to zookeeper")
                self.release_and_finish()
                raise PartitionerError("Zookeeper connection failure")

        self.log.debug(
            "Creating partitioner for group %s, topic %s,"
            " partitions set %s", self.config.group_id, self.topics,
            partitions)
        return self.kazoo_client.SetPartitioner(
            path=self.zk_group_path,
            set=partitions,
            time_boundary=self.config.partitioner_cooldown,
        )

    def release_and_finish(self):
        """Release consumers and terminate the partitioner"""
        if self._partitioner:
            self._release(self._partitioner)
            self._partitioner.finish()
        self._partitioner = None

    def _close_connections(self):
        self.kafka_client.close()
        self.partitions_set = set()
        self.last_partitions_refresh = 0
        self.kazoo_client.stop()
        self.kazoo_client.close()
        self.kazoo_retry = None

    def _handle_group(self, partitioner):
        """Handle group status changes, for example when a new
        consumer joins or leaves the group.
        """
        if partitioner:
            try:
                self.actions[partitioner.state](partitioner)
            except KeyError:
                self.log.exception("Unexpected partitioner state.")
                self.release_and_finish()
                raise PartitionerError("Invalid partitioner state %s" %
                                       partitioner.state)

    def _allocating(self, partitioner):
        """Usually we don't want to do anything but waiting in
        allocating state.
        """
        partitioner.wait_for_acquire()

    def _acquire(self, partitioner):
        """Acquire kafka topics-[partitions] and start the
        consumers for them.
        """
        acquired_partitions = self._get_acquired_partitions(partitioner)
        if acquired_partitions != self.acquired_partitions:
            # TODO: Decrease logging level
            self.log.info(
                "Total number of acquired partitions = %s"
                "It was %s before. Added partitions %s. Removed partitions %s",
                len(acquired_partitions),
                len(self.acquired_partitions),
                [
                    p for p in acquired_partitions
                    if p not in self.acquired_partitions
                ],
                [
                    p for p in self.acquired_partitions
                    if p not in acquired_partitions
                ],
            )
            self.acquired_partitions = acquired_partitions
            try:
                self.acquire(copy.deepcopy(self.acquired_partitions))
                self.released_flag = False
            except Exception:
                self.log.exception("Acquire action failed.")
                trace = traceback.format_exc()
                self.release_and_finish()
                raise PartitionerError(
                    "Acquire action failed."
                    "Acquire error: {trace}".format(trace=trace))

    def _release(self, partitioner):
        """Release the consumers and acquired partitions.
        This function is executed either at termination time or
        whenever there is a group change.
        """
        self.log.debug("Releasing partitions")
        try:
            if not self.released_flag:
                self.release(self.acquired_partitions)
                self.released_flag = True
        except Exception:
            trace = traceback.format_exc()
            self.log.exception("Release action failed.")
            raise PartitionerError(
                "Release action failed."
                "Release error: {trace}".format(trace=trace), )
        partitioner.release_set()
        self.acquired_partitions.clear()
        self.force_partitions_refresh = True

    def _fail(self, partitioner):
        """Handle zookeeper failures.
        Executed when the consumer group is not able to recover
        the connection. In this case, we cowardly stop
        the running consumers.
        """
        self.log.error("Lost or unable to acquire partitions")
        self.release_and_finish()
        raise PartitionerZookeeperError(
            "Internal partitioner error. "
            "Lost connection to zookeeper: {cluster}".format(
                cluster=self.config.zookeeper, ))

    def _get_acquired_partitions(self, partitioner):
        """Retrieve acquired partitions from a partitioner.

        :returns: acquired topic and partitions
        :rtype: dict {<topic>: <[partitions]>}
        """
        acquired_partitions = defaultdict(list)
        for partition in partitioner:
            topic, partition_id = partition.rsplit('-', 1)
            acquired_partitions[topic].append(int(partition_id))
        return acquired_partitions

    def get_partitions_set(self):
        """ Load partitions metadata from kafka and create
        a set containing "<topic>-<partition_id>"

        :returns: partitions for user topics
        :rtype: set
        :raises PartitionerError: if no partitions have been found
        """
        topic_partitions = get_kafka_topics(self.kafka_client)
        partitions = []
        missing_topics = set()
        for topic in self.topics:
            kafka_topic = kafka_bytestring(topic)
            if kafka_topic not in topic_partitions:
                missing_topics.add(topic)
            else:
                partitions += [
                    "{0}-{1}".format(topic, p)
                    for p in topic_partitions[kafka_topic]
                ]
        if missing_topics:
            self.log.info("Missing topics: %s", missing_topics)
        if not partitions:
            self.release_and_finish()
            raise PartitionerError(
                "No partitions found for topics: {topics}".format(
                    topics=self.topics))
        return set(partitions)