Ejemplo n.º 1
0
def kafka_get_topics_offsets(host, topic, port=9092):
    """Return available partitions and their offsets for the given topic.

    Args:
        host (str): Kafka host.
        topic (str): Kafka topic.
        port (int): Kafka port.

    Returns:
        [(int, int, int)]: [(partition, start_offset, end_offset)].
    """
    brokers = ['{}:{}'.format(host, port)]
    client = SimpleClient(brokers)

    offsets = []
    partitions = client.get_partition_ids_for_topic(topic)

    offsets_responses_end = client.send_offset_request([
        OffsetRequestPayload(topic, partition, -1, 1)
        for partition in partitions
    ])
    offsets_responses_start = client.send_offset_request([
        OffsetRequestPayload(topic, partition, -2, 1)
        for partition in partitions
    ])

    for start_offset, end_offset in zip(offsets_responses_start,
                                        offsets_responses_end):
        offsets.append((start_offset.partition, start_offset.offsets[0],
                        end_offset.offsets[0]))

    return offsets
Ejemplo n.º 2
0
    def _get_highwater_offsets(self, kafka_hosts_ports):
        """
        Fetch highwater offsets for each topic/partition from Kafka cluster.

        Do this for all partitions in the cluster because even if it has no
        consumers, we may want to measure whether producers are successfully
        producing. No need to limit this for performance because fetching broker
        offsets from Kafka is a relatively inexpensive operation.
        """
        kafka_conn = SimpleClient(kafka_hosts_ports,
                                  timeout=self.kafka_timeout)
        try:
            broker_topics_partitions = kafka_conn.topics_to_brokers.keys()
            # batch a bunch of requests into a single network call
            offsets_request = [
                OffsetRequestPayload(topic, partition, -1, 1)
                for topic, partition in broker_topics_partitions
            ]
            offsets_response = kafka_conn.send_offset_request(offsets_request)
            highwater_offsets = {(x.topic, x.partition): x.offsets[0]
                                 for x in offsets_response}
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')
        return highwater_offsets
Ejemplo n.º 3
0
def get_logsize(client, topic, partitions=None):
    if partitions is None:
        # 获取 partitions
        partitions = client.get_partition_ids_for_topic(topic)

    data = {}

    # 判断 topic 是否在 client.topic_partitions, 不在则创建
    # client.topic_partitions = {}   # topic -> partition -> leader
    if topic not in client.topic_partitions:
        client.load_metadata_for_topics(topic)

    # 验证 partition 是否有问题
    for partition in partitions:
        if client.topic_partitions[topic][partition] == -1:
            # 有问题则删除
            partitions.remove(partition)
            data[partition] = None

    reqs = []
    for partition in partitions:
        reqs.append(OffsetRequestPayload(topic, partition, -1, 1))

    resps = client.send_offset_request(reqs)

    for resp in resps:
        pending = resp.offsets[0]
        partition = resp.partition
        data[partition] = pending

    return data
Ejemplo n.º 4
0
 def current_offset(self, topic, partition):
     try:
         offsets, = self.client.send_offset_request(
             [OffsetRequestPayload(topic, partition, -1, 1)])
     except Exception:
         # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
         self.zk.child.dump_logs()
         self.server.child.dump_logs()
         raise
     else:
         return offsets.offsets[0]
Ejemplo n.º 5
0
def current_offset(client, topic, partition, kafka_broker=None):
    """Get the current offset of a topic's partition
    """
    try:
        offsets, = client.send_offset_request(
            [OffsetRequestPayload(topic, partition, -1, 1)])
    except Exception:
        # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
        if kafka_broker:
            kafka_broker.dump_logs()
        raise
    else:
        return offsets.offsets[0]
Ejemplo n.º 6
0
    def reset_partition_offset(self, partition):
        """Update offsets using auto_offset_reset policy (smallest|largest)

        Arguments:
            partition (int): the partition for which offsets should be updated

        Returns: Updated offset on success, None on failure
        """
        LATEST = -1
        EARLIEST = -2
        if self.auto_offset_reset == 'largest':
            reqs = [OffsetRequestPayload(self.topic, partition, LATEST, 1)]
        elif self.auto_offset_reset == 'smallest':
            reqs = [OffsetRequestPayload(self.topic, partition, EARLIEST, 1)]
        else:
            # Let's raise an reasonable exception type if user calls
            # outside of an exception context
            if sys.exc_info() == (None, None, None):
                raise OffsetOutOfRangeError(
                    'Cannot reset partition offsets without a '
                    'valid auto_offset_reset setting '
                    '(largest|smallest)')
            # Otherwise we should re-raise the upstream exception
            # b/c it typically includes additional data about
            # the request that triggered it, and we do not want to drop that
            raise  # pylint: disable=E0704

        # send_offset_request
        log.info('Resetting topic-partition offset to %s for %s:%d',
                 self.auto_offset_reset, self.topic, partition)
        try:
            (resp, ) = self.client.send_offset_request(reqs)
        except KafkaError as e:
            log.error('%s sending offset request for %s:%d',
                      e.__class__.__name__, self.topic, partition)
        else:
            self.offsets[partition] = resp.offsets[0]
            self.fetch_offsets[partition] = resp.offsets[0]
            return resp.offsets[0]
Ejemplo n.º 7
0
    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' %
                                       (self.server.host, self.server.port))
            self.client_async = KafkaClient(
                bootstrap_servers='%s:%d' %
                (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(
                    self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')

        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError,
                        UnknownTopicOrPartitionError,
                        FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError(
                            'Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}
Ejemplo n.º 8
0
def topic_offsets(kafka_brokers, topic):
    client = SimpleClient(insure_is_array(kafka_brokers))
    topic_partitions = client.topic_partitions
    if topic not in topic_partitions:
        raise KafkaException("topic {} doesn't exists".format(topic))
    partitions = topic_partitions[topic]
    offset_requests = [
        OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
    ]
    offsets_responses = client.send_offset_request(offset_requests)
    client.close()
    partitions_and_offsets = {}
    for offset in offsets_responses:
        if offset.topic == topic:
            topic_offset = 0
            topic_partition = TopicPartition(topic=offset.topic,
                                             partition=offset.partition)
            if offset.offsets[0]:
                topic_offset = offset.offsets[0]
            partitions_and_offsets[topic_partition] = topic_offset

    return partitions_and_offsets
Ejemplo n.º 9
0
    def get_partition_offsets(self, topic, partition, request_time_ms,
                              max_num_offsets):
        """Request available fetch offsets for a single topic/partition

        Keyword Arguments:
            topic (str): topic for offset request
            partition (int): partition for offset request
            request_time_ms (int): Used to ask for all messages before a
                certain time (ms). There are two special values.
                Specify -1 to receive the latest offset (i.e. the offset of the
                next coming message) and -2 to receive the earliest available
                offset. Note that because offsets are pulled in descending
                order, asking for the earliest offset will always return you a
                single element.
            max_num_offsets (int): Maximum offsets to include in the OffsetResponse

        Returns:
            a list of offsets in the OffsetResponse submitted for the provided
            topic / partition. See:
            https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol#AGuideToTheKafkaProtocol-OffsetAPI
        """
        reqs = [
            OffsetRequestPayload(topic, partition, request_time_ms,
                                 max_num_offsets)
        ]

        (resp, ) = self._client.send_offset_request(reqs)

        check_error(resp)

        # Just for sanity..
        # probably unnecessary
        assert resp.topic == topic
        assert resp.partition == partition

        return resp.offsets
Ejemplo n.º 10
0
    def pending(self, partitions=None):
        """
        Gets the pending message count

        Keyword Arguments:
            partitions (list): list of partitions to check for, default is to check all
        """
        if partitions is None:
            partitions = self.offsets.keys()

        total = 0
        reqs = []

        for partition in partitions:
            reqs.append(OffsetRequestPayload(self.topic, partition, -1, 1))

        resps = self.client.send_offset_request(reqs)
        for resp in resps:
            partition = resp.partition
            pending = resp.offsets[0]
            offset = self.offsets[partition]
            total += pending - offset

        return total
Ejemplo n.º 11
0
def get_topics_watermarks(kafka_client, topics, raise_on_error=True):
    """ Get current topic watermarks.

    NOTE: This method does not refresh client metadata. It is up to the caller
    to use avoid using stale metadata.

    If any partition leader is not available, the request fails for all the
    other topics. This is the tradeoff of sending all topic requests in batch
    and save both in performance and Kafka load.

    :param kafka_client: a connected KafkaToolClient
    :param topics: topic list or dict {<topic>: [partitions]}
    :param raise_on_error: if False the method ignores missing topics
      and missing partitions. It still may fail on the request send.
    :returns: a dict topic: partition: Part
    :raises:
      :py:class:`~kafka_utils.util.error.UnknownTopic`: upon missing
      topics and raise_on_error=True

      :py:class:`~kafka_utils.util.error.UnknownPartition`: upon missing
      partitions and raise_on_error=True

      FailedPayloadsError: upon send request error.
    """
    topics = _verify_topics_and_partitions(
        kafka_client,
        topics,
        raise_on_error,
    )
    highmark_offset_reqs = []
    lowmark_offset_reqs = []

    for topic, partitions in six.iteritems(topics):
        # Batch watermark requests
        for partition in partitions:
            # Request the the latest offset
            highmark_offset_reqs.append(
                OffsetRequestPayload(topic, partition, -1, max_offsets=1))
            # Request the earliest offset
            lowmark_offset_reqs.append(
                OffsetRequestPayload(topic, partition, -2, max_offsets=1))

    watermark_offsets = {}

    if not (len(highmark_offset_reqs) + len(lowmark_offset_reqs)):
        return watermark_offsets

    # fail_on_error = False does not prevent network errors
    highmark_resps = kafka_client.send_offset_request(
        highmark_offset_reqs,
        fail_on_error=False,
        callback=_check_fetch_response_error,
    )
    lowmark_resps = kafka_client.send_offset_request(
        lowmark_offset_reqs,
        fail_on_error=False,
        callback=_check_fetch_response_error,
    )

    # At this point highmark and lowmark should ideally have the same length.
    assert len(highmark_resps) == len(lowmark_resps)
    aggregated_offsets = defaultdict(lambda: defaultdict(dict))
    for resp in highmark_resps:
        aggregated_offsets[resp.topic][resp.partition]['highmark'] = \
            resp.offsets[0]
    for resp in lowmark_resps:
        aggregated_offsets[resp.topic][resp.partition]['lowmark'] = \
            resp.offsets[0]

    for topic, partition_watermarks in six.iteritems(aggregated_offsets):
        for partition, watermarks in six.iteritems(partition_watermarks):
            watermark_offsets.setdefault(
                topic,
                {},
            )[partition] = PartitionOffsets(
                topic,
                partition,
                watermarks['highmark'],
                watermarks['lowmark'],
            )
    return watermark_offsets
Ejemplo n.º 12
0
    def seek(self, offset, whence=None, partition=None):
        """
        Alter the current offset in the consumer, similar to fseek

        Arguments:
            offset: how much to modify the offset
            whence: where to modify it from, default is None

                * None is an absolute offset
                * 0    is relative to the earliest available offset (head)
                * 1    is relative to the current offset
                * 2    is relative to the latest known offset (tail)

            partition: modify which partition, default is None.
                If partition is None, would modify all partitions.
        """

        if whence is None: # set an absolute offset
            if partition is None:
                for tmp_partition in self.offsets:
                    self.offsets[tmp_partition] = offset
            else:
                self.offsets[partition] = offset
        elif whence == 1:  # relative to current position
            if partition is None:
                for tmp_partition, _offset in self.offsets.items():
                    self.offsets[tmp_partition] = _offset + offset
            else:
                self.offsets[partition] += offset
        elif whence in (0, 2):  # relative to beginning or end
            reqs = []
            deltas = {}
            if partition is None:
                # divide the request offset by number of partitions,
                # distribute the remained evenly
                (delta, rem) = divmod(offset, len(self.offsets))
                for tmp_partition, r in izip_longest(self.offsets.keys(),
                                                     repeat(1, rem),
                                                     fillvalue=0):
                    deltas[tmp_partition] = delta + r

                for tmp_partition in self.offsets.keys():
                    if whence == 0:
                        reqs.append(OffsetRequestPayload(self.topic, tmp_partition, -2, 1))
                    elif whence == 2:
                        reqs.append(OffsetRequestPayload(self.topic, tmp_partition, -1, 1))
                    else:
                        pass
            else:
                deltas[partition] = offset
                if whence == 0:
                    reqs.append(OffsetRequestPayload(self.topic, partition, -2, 1))
                elif whence == 2:
                    reqs.append(OffsetRequestPayload(self.topic, partition, -1, 1))
                else:
                    pass

            resps = self.client.send_offset_request(reqs)
            for resp in resps:
                self.offsets[resp.partition] = \
                    resp.offsets[0] + deltas[resp.partition]
        else:
            raise ValueError('Unexpected value for `whence`, %d' % whence)

        # Reset queue and fetch offsets since they are invalid
        self.fetch_offsets = self.offsets.copy()
        self.count_since_commit += 1
        if self.auto_commit:
            self.commit()

        self.queue = queue.Queue()