예제 #1
0
    def _handle_fetch_response(self, request, send_time, response):
        """The callback for fetch completion"""
        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition_data in partitions:
                partition, offset = partition_data[:2]
                fetch_offsets[TopicPartition(topic, partition)] = offset

        partitions = set([
            TopicPartition(topic, partition_data[0])
            for topic, partitions in response.topics
            for partition_data in partitions
        ])
        metric_aggregator = FetchResponseMetricAggregator(
            self._sensors, partitions)

        # randomized ordering should improve balance for short-lived consumers
        random.shuffle(response.topics)
        for topic, partitions in response.topics:
            random.shuffle(partitions)
            for partition_data in partitions:
                tp = TopicPartition(topic, partition_data[0])
                completed_fetch = CompletedFetch(tp, fetch_offsets[tp],
                                                 response.API_VERSION,
                                                 partition_data[1:],
                                                 metric_aggregator)
                self._completed_fetches.append(completed_fetch)

        if response.API_VERSION >= 1:
            self._sensors.fetch_throttle_time_sensor.record(
                response.throttle_time_ms)
        self._sensors.fetch_latency.record((time.time() - send_time) * 1000)
예제 #2
0
    def _handle_metadata_update(self, cluster):
        # if we encounter any unauthorized topics, raise an exception
        if cluster.unauthorized_topics:
            raise errors.TopicAuthorizationFailedError(
                cluster.unauthorized_topics)

        if self._subscription.subscribed_pattern:
            topics = []
            for topic in cluster.topics(
                    self.config['exclude_internal_topics']):
                if self._subscription.subscribed_pattern.match(topic):
                    topics.append(topic)

            if set(topics) != self._subscription.subscription:
                self._subscription.change_subscription(topics)
                self._client.set_topics(
                    self._subscription.group_subscription())

        # check if there are any changes to the metadata which should trigger
        # a rebalance
        if self._subscription.partitions_auto_assigned():
            metadata_snapshot = self._build_metadata_snapshot(
                self._subscription, cluster)
            if self._metadata_snapshot != metadata_snapshot:
                self._metadata_snapshot = metadata_snapshot

                # If we haven't got group coordinator support,
                # just assign all partitions locally
                if self._auto_assign_all_partitions():
                    self._subscription.assign_from_subscribed([
                        TopicPartition(topic, partition)
                        for topic in self._subscription.subscription
                        for partition in self._metadata_snapshot[topic]
                    ])
예제 #3
0
    def _handle_produce_response(self, node_id, send_time, batches, response):
        """Handle a produce response."""
        # if we have a response, parse it
        log.debug('Parsing produce response: %r', response)
        if response:
            batches_by_partition = dict([(batch.topic_partition, batch)
                                         for batch in batches])

            for topic, partitions in response.topics:
                for partition_info in partitions:
                    if response.API_VERSION < 2:
                        partition, error_code, offset = partition_info
                        ts = None
                    else:
                        partition, error_code, offset, ts = partition_info
                    tp = TopicPartition(topic, partition)
                    error = errors.for_code(error_code)
                    batch = batches_by_partition[tp]
                    self._complete_batch(batch, error, offset, ts)

            if response.API_VERSION > 0:
                self._sensors.record_throttle_time(response.throttle_time_ms,
                                                   node=node_id)

        else:
            # this is the acks = 0 case, just complete all requests
            for batch in batches:
                self._complete_batch(batch, None, -1, None)
예제 #4
0
    def _send_messages(self, topic, partition, *msg, **kwargs):
        key = kwargs.pop('key', None)

        # Guarantee that msg is actually a list or tuple (should always be true)
        if not isinstance(msg, (list, tuple)):
            raise TypeError("msg is not a list or tuple!")

        for m in msg:
            # The protocol allows to have key & payload with null values both,
            # (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense.
            if m is None:
                if key is None:
                    raise TypeError("key and payload can't be null in one")
            # Raise TypeError if any non-null message is not encoded as bytes
            elif not isinstance(m, six.binary_type):
                raise TypeError(
                    "all produce message payloads must be null or type bytes")

        # Raise TypeError if the key is not encoded as bytes
        if key is not None and not isinstance(key, six.binary_type):
            raise TypeError("the key must be type bytes")

        if self.async_send:
            for idx, m in enumerate(msg):
                try:
                    item = (TopicPartition(topic, partition), m, key)
                    if self.async_queue_put_timeout == 0:
                        self.queue.put_nowait(item)
                    else:
                        self.queue.put(item, True,
                                       self.async_queue_put_timeout)
                except Full:
                    raise AsyncProducerQueueFull(
                        msg[idx:], 'Producer async queue overfilled. '
                        'Current queue size %d.' % self.queue.qsize())
            resp = []
        else:
            messages = create_message_set([(m, key) for m in msg], self.codec,
                                          key, self.codec_compresslevel)
            req = ProduceRequestPayload(topic, partition, messages)
            try:
                resp = self.client.send_produce_request(
                    [req],
                    acks=self.req_acks,
                    timeout=self.ack_timeout,
                    fail_on_error=self.sync_fail_on_error)
            except Exception:
                log.exception("Unable to send messages")
                raise
        return resp
예제 #5
0
    def with_partitions(self, partitions_to_add):
        """Returns a copy of cluster metadata with partitions added"""
        new_metadata = ClusterMetadata(**self.config)
        new_metadata._brokers = copy.deepcopy(self._brokers)
        new_metadata._partitions = copy.deepcopy(self._partitions)
        new_metadata._broker_partitions = copy.deepcopy(
            self._broker_partitions)
        new_metadata._groups = copy.deepcopy(self._groups)
        new_metadata.internal_topics = copy.deepcopy(self.internal_topics)
        new_metadata.unauthorized_topics = copy.deepcopy(
            self.unauthorized_topics)

        for partition in partitions_to_add:
            new_metadata._partitions[partition.topic][
                partition.partition] = partition

            if partition.leader is not None and partition.leader != -1:
                new_metadata._broker_partitions[partition.leader].add(
                    TopicPartition(partition.topic, partition.partition))

        return new_metadata
예제 #6
0
 def _handle_offset_fetch_response(self, future, response):
     offsets = {}
     for topic, partitions in response.topics:
         for partition, offset, metadata, error_code in partitions:
             tp = TopicPartition(topic, partition)
             error_type = errors.for_code(error_code)
             if error_type is not errors.NoError:
                 error = error_type()
                 log.debug(
                     "Group %s failed to fetch offset for partition"
                     " %s: %s", self.group_id, tp, error)
                 if error_type is errors.GroupLoadInProgressError:
                     # just retry
                     future.failure(error)
                 elif error_type is errors.NotCoordinatorForGroupError:
                     # re-discover the coordinator and retry
                     self.coordinator_dead(error_type())
                     future.failure(error)
                 elif error_type is errors.UnknownTopicOrPartitionError:
                     log.warning(
                         "OffsetFetchRequest -- unknown topic %s"
                         " (have you committed any offsets yet?)", topic)
                     continue
                 else:
                     log.error("Unknown error fetching offsets for %s: %s",
                               tp, error)
                     future.failure(error)
                 return
             elif offset >= 0:
                 # record the position with the offset
                 # (-1 indicates no committed offset to fetch)
                 offsets[tp] = OffsetAndMetadata(offset, metadata)
             else:
                 log.debug(
                     "Group %s has no committed offset for partition"
                     " %s", self.group_id, tp)
     future.success(offsets)
예제 #7
0
    def _get_leader_for_partition(self, topic, partition):
        """
        Returns the leader for a partition or None if the partition exists
        but has no leader.

        Raises:
            UnknownTopicOrPartitionError: If the topic or partition is not part
                of the metadata.
            LeaderNotAvailableError: If the server has metadata, but there is no
        current leader.
        """

        key = TopicPartition(topic, partition)

        # Use cached metadata if it is there
        if self.topics_to_brokers.get(key) is not None:
            return self.topics_to_brokers[key]

        # Otherwise refresh metadata

        # If topic does not already exist, this will raise
        # UnknownTopicOrPartitionError if not auto-creating
        # LeaderNotAvailableError otherwise until partitions are created
        self.load_metadata_for_topics(topic)

        # If the partition doesn't actually exist, raise
        if partition not in self.topic_partitions.get(topic, []):
            raise UnknownTopicOrPartitionError(key)

        # If there's no leader for the partition, raise
        leader = self.topic_partitions[topic][partition]
        if leader == -1:
            raise LeaderNotAvailableError((topic, partition))

        # Otherwise return the BrokerMetadata
        return self.brokers[leader]
예제 #8
0
    def _handle_offset_response(self, future, response):
        """Callback for the response of the list offset call above.

        Arguments:
            future (Future): the future to update based on response
            response (OffsetResponse): response from the server

        Raises:
            AssertionError: if response does not match partition
        """
        timestamp_offset_map = {}
        for topic, part_data in response.topics:
            for partition_info in part_data:
                partition, error_code = partition_info[:2]
                partition = TopicPartition(topic, partition)
                error_type = for_code(error_code)
                if error_type is NoError:
                    if response.API_VERSION == 0:
                        offsets = partition_info[2]
                        assert len(
                            offsets
                        ) <= 1, 'Expected OffsetResponse with one offset'
                        if not offsets:
                            offset = UNKNOWN_OFFSET
                        else:
                            offset = offsets[0]
                        log.debug(
                            "Handling v0 ListOffsetResponse response for %s. "
                            "Fetched offset %s", partition, offset)
                        if offset != UNKNOWN_OFFSET:
                            timestamp_offset_map[partition] = (offset, None)
                    else:
                        timestamp, offset = partition_info[2:]
                        log.debug(
                            "Handling ListOffsetResponse response for %s. "
                            "Fetched offset %s, timestamp %s", partition,
                            offset, timestamp)
                        if offset != UNKNOWN_OFFSET:
                            timestamp_offset_map[partition] = (offset,
                                                               timestamp)
                elif error_type is UnsupportedForMessageFormatError:
                    # The message format on the broker side is before 0.10.0,
                    # we simply put None in the response.
                    log.debug(
                        "Cannot search by timestamp for partition %s because the"
                        " message format version is before 0.10.0", partition)
                elif error_type is NotLeaderForPartitionError:
                    log.debug(
                        "Attempt to fetch offsets for partition %s failed due"
                        " to obsolete leadership information, retrying.",
                        partition)
                    future.failure(error_type(partition))
                    return
                elif error_type is UnknownTopicOrPartitionError:
                    log.warn(
                        "Received unknown topic or partition error in ListOffset "
                        "request for partition %s. The topic/partition " +
                        "may not exist or the user may not have Describe access "
                        "to it.", partition)
                    future.failure(error_type(partition))
                    return
                else:
                    log.warning(
                        "Attempt to fetch offsets for partition %s failed due to:"
                        " %s", partition, error_type)
                    future.failure(error_type(partition))
                    return
        if not future.is_done:
            future.success(timestamp_offset_map)
예제 #9
0
 def partitions(self):
     return [
         TopicPartition(topic, partition) for topic, partitions in
         self.assignment  # pylint: disable-msg=no-member
         for partition in partitions
     ]
예제 #10
0
    def load_metadata_for_topics(self, *topics, **kwargs):
        """Fetch broker and topic-partition metadata from the server.

        Updates internal data: broker list, topic/partition list, and
        topic/partition -> broker map. This method should be called after
        receiving any error.

        Note: Exceptions *will not* be raised in a full refresh (i.e. no topic
        list). In this case, error codes will be logged as errors.
        Partition-level errors will also not be raised here (a single partition
        w/o a leader, for example).

        Arguments:
            *topics (optional): If a list of topics is provided,
                the metadata refresh will be limited to the specified topics
                only.
            ignore_leadernotavailable (bool): suppress LeaderNotAvailableError
                so that metadata is loaded correctly during auto-create.
                Default: False.

        Raises:
            UnknownTopicOrPartitionError: Raised for topics that do not exist,
                unless the broker is configured to auto-create topics.
            LeaderNotAvailableError: Raised for topics that do not exist yet,
                when the broker is configured to auto-create topics. Retry
                after a short backoff (topics/partitions are initializing).
        """
        if 'ignore_leadernotavailable' in kwargs:
            ignore_leadernotavailable = kwargs['ignore_leadernotavailable']
        else:
            ignore_leadernotavailable = False

        if topics:
            self.reset_topic_metadata(*topics)
        else:
            self.reset_all_metadata()

        resp = self.send_metadata_request(topics)

        log.debug('Updating broker metadata: %s', resp.brokers)
        log.debug('Updating topic metadata: %s',
                  [topic for _, topic, _ in resp.topics])

        self.brokers = dict([(nodeId, BrokerMetadata(nodeId, host, port, None))
                             for nodeId, host, port in resp.brokers])

        for error, topic, partitions in resp.topics:
            # Errors expected for new topics
            if error:
                error_type = kafka.errors.kafka_errors.get(error, UnknownError)
                if error_type in (UnknownTopicOrPartitionError,
                                  LeaderNotAvailableError):
                    log.error('Error loading topic metadata for %s: %s (%s)',
                              topic, error_type, error)
                    if topic not in topics:
                        continue
                    elif (error_type is LeaderNotAvailableError
                          and ignore_leadernotavailable):
                        continue
                raise error_type(topic)

            self.topic_partitions[topic] = {}
            for error, partition, leader, _, _ in partitions:

                self.topic_partitions[topic][partition] = leader

                # Populate topics_to_brokers dict
                topic_part = TopicPartition(topic, partition)

                # Check for partition errors
                if error:
                    error_type = kafka.errors.kafka_errors.get(
                        error, UnknownError)

                    # If No Leader, topics_to_brokers topic_partition -> None
                    if error_type is LeaderNotAvailableError:
                        log.error('No leader for topic %s partition %d', topic,
                                  partition)
                        self.topics_to_brokers[topic_part] = None
                        continue

                    # If one of the replicas is unavailable -- ignore
                    # this error code is provided for admin purposes only
                    # we never talk to replicas, only the leader
                    elif error_type is ReplicaNotAvailableError:
                        log.debug(
                            'Some (non-leader) replicas not available for topic %s partition %d',
                            topic, partition)

                    else:
                        raise error_type(topic_part)

                # If Known Broker, topic_partition -> BrokerMetadata
                if leader in self.brokers:
                    self.topics_to_brokers[topic_part] = self.brokers[leader]

                # If Unknown Broker, fake BrokerMetadata so we don't lose the id
                # (not sure how this could happen. server could be in bad state)
                else:
                    self.topics_to_brokers[topic_part] = BrokerMetadata(
                        leader, None, None, None)
예제 #11
0
    def _handle_offset_commit_response(self, offsets, future, send_time,
                                       response):
        # TODO look at adding request_latency_ms to response (like java kafka)
        self.consumer_sensors.commit_latency.record(
            (time.time() - send_time) * 1000)
        unauthorized_topics = set()

        for topic, partitions in response.topics:
            for partition, error_code in partitions:
                tp = TopicPartition(topic, partition)
                offset = offsets[tp]

                error_type = errors.for_code(error_code)
                if error_type is errors.NoError:
                    log.debug("Group %s committed offset %s for partition %s",
                              self.group_id, offset, tp)
                    if self._subscription.is_assigned(tp):
                        self._subscription.assignment[
                            tp].committed = offset.offset
                elif error_type is errors.GroupAuthorizationFailedError:
                    log.error("Not authorized to commit offsets for group %s",
                              self.group_id)
                    future.failure(error_type(self.group_id))
                    return
                elif error_type is errors.TopicAuthorizationFailedError:
                    unauthorized_topics.add(topic)
                elif error_type in (errors.OffsetMetadataTooLargeError,
                                    errors.InvalidCommitOffsetSizeError):
                    # raise the error to the user
                    log.debug(
                        "OffsetCommit for group %s failed on partition %s"
                        " %s", self.group_id, tp, error_type.__name__)
                    future.failure(error_type())
                    return
                elif error_type is errors.GroupLoadInProgressError:
                    # just retry
                    log.debug("OffsetCommit for group %s failed: %s",
                              self.group_id, error_type.__name__)
                    future.failure(error_type(self.group_id))
                    return
                elif error_type in (errors.GroupCoordinatorNotAvailableError,
                                    errors.NotCoordinatorForGroupError,
                                    errors.RequestTimedOutError):
                    log.debug("OffsetCommit for group %s failed: %s",
                              self.group_id, error_type.__name__)
                    self.coordinator_dead(error_type())
                    future.failure(error_type(self.group_id))
                    return
                elif error_type in (errors.UnknownMemberIdError,
                                    errors.IllegalGenerationError,
                                    errors.RebalanceInProgressError):
                    # need to re-join group
                    error = error_type(self.group_id)
                    log.debug("OffsetCommit for group %s failed: %s",
                              self.group_id, error)
                    self.reset_generation()
                    future.failure(errors.CommitFailedError())
                    return
                else:
                    log.error(
                        "Group %s failed to commit partition %s at offset"
                        " %s: %s", self.group_id, tp, offset,
                        error_type.__name__)
                    future.failure(error_type())
                    return

        if unauthorized_topics:
            log.error("Not authorized to commit to topics %s for group %s",
                      unauthorized_topics, self.group_id)
            future.failure(
                errors.TopicAuthorizationFailedError(unauthorized_topics))
        else:
            future.success(None)
예제 #12
0
    def update_metadata(self, metadata):
        """Update cluster state given a MetadataResponse.

        Arguments:
            metadata (MetadataResponse): broker response to a metadata request

        Returns: None
        """
        # In the common case where we ask for a single topic and get back an
        # error, we should fail the future
        if len(metadata.topics) == 1 and metadata.topics[0][0] != 0:
            error_code, topic = metadata.topics[0][:2]
            error = for_code(error_code)(topic)
            return self.failed_update(error)

        if not metadata.brokers:
            log.warning("No broker metadata found in MetadataResponse")

        _new_brokers = {}
        for broker in metadata.brokers:
            if metadata.API_VERSION == 0:
                node_id, host, port = broker
                rack = None
            else:
                node_id, host, port, rack = broker
            _new_brokers.update(
                {node_id: BrokerMetadata(node_id, host, port, rack)})

        if metadata.API_VERSION == 0:
            _new_controller = None
        else:
            _new_controller = _new_brokers.get(metadata.controller_id)

        _new_partitions = {}
        _new_broker_partitions = collections.defaultdict(set)
        _new_unauthorized_topics = set()
        _new_internal_topics = set()

        for topic_data in metadata.topics:
            if metadata.API_VERSION == 0:
                error_code, topic, partitions = topic_data
                is_internal = False
            else:
                error_code, topic, is_internal, partitions = topic_data
            if is_internal:
                _new_internal_topics.add(topic)
            error_type = for_code(error_code)
            if error_type is NoError:
                _new_partitions[topic] = {}
                for p_error, partition, leader, replicas, isr in partitions:
                    _new_partitions[topic][partition] = PartitionMetadata(
                        topic=topic,
                        partition=partition,
                        leader=leader,
                        replicas=replicas,
                        isr=isr,
                        error=p_error)
                    if leader != -1:
                        _new_broker_partitions[leader].add(
                            TopicPartition(topic, partition))

            elif error_type is LeaderNotAvailableError:
                log.warning(
                    "Topic %s is not available during auto-create"
                    " initialization", topic)
            elif error_type is UnknownTopicOrPartitionError:
                log.error("Topic %s not found in cluster metadata", topic)
            elif error_type is TopicAuthorizationFailedError:
                log.error("Topic %s is not authorized for this client", topic)
                _new_unauthorized_topics.add(topic)
            elif error_type is InvalidTopicError:
                log.error("'%s' is not a valid topic name", topic)
            else:
                log.error("Error fetching metadata for topic %s: %s", topic,
                          error_type)

        with self._lock:
            self._brokers = _new_brokers
            self.controller = _new_controller
            self._partitions = _new_partitions
            self._broker_partitions = _new_broker_partitions
            self.unauthorized_topics = _new_unauthorized_topics
            self.internal_topics = _new_internal_topics
            f = None
            if self._future:
                f = self._future
            self._future = None
            self._need_update = False

        now = time.time() * 1000
        self._last_refresh_ms = now
        self._last_successful_refresh_ms = now

        if f:
            f.success(self)
        log.debug("Updated cluster metadata to %s", self)

        for listener in self._listeners:
            listener(self)

        if self.need_all_topic_metadata:
            # the listener may change the interested topics,
            # which could cause another metadata refresh.
            # If we have already fetched all topics, however,
            # another fetch should be unnecessary.
            self._need_update = False
예제 #13
0
    def send(self,
             topic,
             value=None,
             key=None,
             partition=None,
             timestamp_ms=None):
        """Publish a message to a topic.

        Arguments:
            topic (str): topic where the message will be published
            value (optional): message value. Must be type bytes, or be
                serializable to bytes via configured value_serializer. If value
                is None, key is required and message acts as a 'delete'.
                See kafka compaction documentation for more details:
                http://kafka.apache.org/documentation.html#compaction
                (compaction requires kafka >= 0.8.1)
            partition (int, optional): optionally specify a partition. If not
                set, the partition will be selected using the configured
                'partitioner'.
            key (optional): a key to associate with the message. Can be used to
                determine which partition to send the message to. If partition
                is None (and producer's partitioner config is left as default),
                then messages with the same key will be delivered to the same
                partition (but if key is None, partition is chosen randomly).
                Must be type bytes, or be serializable to bytes via configured
                key_serializer.
            timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC)
                to use as the message timestamp. Defaults to current time.

        Returns:
            FutureRecordMetadata: resolves to RecordMetadata

        Raises:
            KafkaTimeoutError: if unable to fetch topic metadata, or unable
                to obtain memory buffer prior to configured max_block_ms
        """
        assert value is not None or self.config['api_version'] >= (0, 8, 1), (
            'Null messages require kafka >= 0.8.1')
        assert not (value is None
                    and key is None), 'Need at least one: key or value'
        key_bytes = value_bytes = None
        try:
            self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)

            key_bytes = self._serialize(self.config['key_serializer'], topic,
                                        key)
            value_bytes = self._serialize(self.config['value_serializer'],
                                          topic, value)
            assert type(key_bytes) in (bytes, bytearray, memoryview,
                                       type(None))
            assert type(value_bytes) in (bytes, bytearray, memoryview,
                                         type(None))

            partition = self._partition(topic, partition, key, value,
                                        key_bytes, value_bytes)

            message_size = self._estimate_size_in_bytes(key_bytes, value_bytes)
            self._ensure_valid_record_size(message_size)

            tp = TopicPartition(topic, partition)
            log.debug("Sending (key=%r value=%r) to %s", key, value, tp)
            result = self._accumulator.append(tp,
                                              timestamp_ms,
                                              key_bytes,
                                              value_bytes,
                                              self.config['max_block_ms'],
                                              estimated_size=message_size)
            future, batch_is_full, new_batch_created = result
            if batch_is_full or new_batch_created:
                log.debug(
                    "Waking up the sender since %s is either full or"
                    " getting a new batch", tp)
                self._sender.wakeup()

            return future
            # handling exceptions and record the errors;
            # for API exceptions return them in the future,
            # for other exceptions raise directly
        except errors.BrokerResponseError as e:
            log.debug("Exception occurred during message send: %s", e)
            return FutureRecordMetadata(
                FutureProduceResult(TopicPartition(topic, partition)), -1,
                None, None,
                len(key_bytes) if key_bytes is not None else -1,
                len(value_bytes) if value_bytes is not None else -1).failure(e)