def test_kafka_consumer_offsets_search_many_partitions(kafka_consumer, kafka_producer, topic):
    tp0 = TopicPartition(topic, 0)
    tp1 = TopicPartition(topic, 1)

    send_time = int(time.time() * 1000)
    timeout = 10
    p0msg = kafka_producer.send(
        topic, partition=0, value=b"XXX",
        timestamp_ms=send_time).get(timeout)
    p1msg = kafka_producer.send(
        topic, partition=1, value=b"XXX",
        timestamp_ms=send_time).get(timeout)

    consumer = kafka_consumer
    offsets = consumer.offsets_for_times({
        tp0: send_time,
        tp1: send_time
    })

    assert offsets == {
        tp0: OffsetAndTimestamp(p0msg.offset, send_time),
        tp1: OffsetAndTimestamp(p1msg.offset, send_time)
    }

    offsets = consumer.beginning_offsets([tp0, tp1])
    assert offsets == {
        tp0: p0msg.offset,
        tp1: p1msg.offset
    }

    offsets = consumer.end_offsets([tp0, tp1])
    assert offsets == {
        tp0: p0msg.offset + 1,
        tp1: p1msg.offset + 1
    }
Beispiel #2
0
    def test_kafka_consumer_offsets_search_many_partitions(self):
        tp0 = TopicPartition(self.topic, 0)
        tp1 = TopicPartition(self.topic, 1)

        kafka_producer = self.kafka_producer()
        send_time = int(time.time() * 1000)
        p0msg = kafka_producer.send(
            self.topic, partition=0, value=b"XXX",
            timestamp_ms=send_time).get()
        p1msg = kafka_producer.send(
            self.topic, partition=1, value=b"XXX",
            timestamp_ms=send_time).get()

        consumer = self.kafka_consumer()
        offsets = consumer.offsets_for_times({
            tp0: send_time,
            tp1: send_time
        })

        self.assertEqual(offsets, {
            tp0: OffsetAndTimestamp(p0msg.offset, send_time),
            tp1: OffsetAndTimestamp(p1msg.offset, send_time)
        })

        offsets = consumer.beginning_offsets([tp0, tp1])
        self.assertEqual(offsets, {
            tp0: p0msg.offset,
            tp1: p1msg.offset
        })

        offsets = consumer.end_offsets([tp0, tp1])
        self.assertEqual(offsets, {
            tp0: p0msg.offset + 1,
            tp1: p1msg.offset + 1
        })
Beispiel #3
0
    def _setup_consumer(self):
        """
        prepare offset numbers etc. for reading from Topic
        """
        # <WTF> https://github.com/dpkp/kafka-python/issues/601
        self.available_topics = self.client.topics()
        # </WTF>

        # might as well use it
        assert self.topic in self.available_topics

        if (self.start_params is None) != (self.end_params is None):
            raise ValueError("Both start and end params must be set or both must be None")

        if self.start_params is None:
            # setup partitions to read through
            # TODO not checked with multiple partitions since inheriting from foxglove
            # An offset is assigned to make repeatability (via a locking file) possible later on.
            # and it's easier to terminate the fetch loop this way.
            p_id = self.client.partitions_for_topic(self.topic)
            topic_partitions = [TopicPartition(topic=self.topic, partition=p) for p in list(p_id)]
            starts = self.client.beginning_offsets(topic_partitions)
            ends = self.client.end_offsets(topic_partitions)

            self.start_p_offsets = {
                tp: OffsetAndTimestamp(offset=offset, timestamp=None) for tp, offset in starts.items()
            }
            self.end_p_offsets = {
                tp: OffsetAndTimestamp(offset=offset - 1, timestamp=None) for tp, offset in ends.items()
            }

        else:
            # TODO - this code was inherited from Foxglove and hasn't be checked through
            # setup start and end partitions and offsets
            # self.client.seek_to_beginning()
            # datetime is only start/end implemented
            assert isinstance(self.start_params, datetime) and isinstance(self.end_params, datetime)
            start = int(self.start_params.timestamp() * 1000)
            end = int(self.end_params.timestamp() * 1000)

            partitions = self.client.partitions_for_topic(self.topic)
            tx = {TopicPartition(topic=self.topic, partition=p): start for p in list(partitions)}
            self.start_p_offsets = self.client.offsets_for_times(tx)

            # if you give a timestamp after the last record it returns None
            for tp, offset_details in self.start_p_offsets.items():
                if offset_details is None:
                    raise ValueError("Start date outside of available messages")

            tx = {TopicPartition(topic=self.topic, partition=p): end for p in list(partitions)}
            self.end_p_offsets = self.client.offsets_for_times(tx)

            # as above - out of range, for end offset give something useful
            for tp, offset_details in self.end_p_offsets.items():
                if offset_details is None:
                    # go to last message. I'm not 100% sure this is correct
                    end_offsets = self.client.end_offsets([tp])
                    offset = end_offsets[tp] - 1
                    self.end_p_offsets[tp] = OffsetAndTimestamp(offset=offset, timestamp=None)
Beispiel #4
0
    def test_commit_for_times_atomic(self, mock_kconsumer):
        partition_to_offset = {
            TopicPartition("topic1", 0): None,
            TopicPartition("topic2", 0): OffsetAndTimestamp(123, 123),
        }

        consumer_commit_for_times(mock_kconsumer, partition_to_offset, atomic=True)
        assert mock_kconsumer.commit.call_count == 0
Beispiel #5
0
 def get_offsets_by_times(self, timestamps, timeout_ms):
     offsets = self._retrieve_offsets(timestamps, timeout_ms)
     for tp in timestamps:
         if tp not in offsets:
             offsets[tp] = None
         else:
             offset, timestamp = offsets[tp]
             offsets[tp] = OffsetAndTimestamp(offset, timestamp)
     return offsets
Beispiel #6
0
    def test_commit_for_times(self, mock_kconsumer):
        timestamp = 123
        topics = ["topic1", "topic2", "topic3"]
        parts = [0, 1]

        partition_to_offset = {
            TopicPartition(topic, part): OffsetAndTimestamp(42, timestamp)
            for topic in topics for part in parts
        }

        expected = {
            TopicPartition(topic, part): OffsetAndMetadata(42, metadata=None)
            for topic in topics for part in parts
        }

        consumer_commit_for_times(mock_kconsumer, partition_to_offset)

        mock_kconsumer.commit.assert_called_once_with(expected)
Beispiel #7
0
    def connect(self):
        if self.client is None:
            self.bootstrap_server, self.topic, self.start_params, self.end_params = \
                self._decode_engine_url()
            self.client = KafkaConsumer(bootstrap_servers=self.bootstrap_server)

            # <WTF> https://github.com/dpkp/kafka-python/issues/601
            self.available_topics = self.client.topics()
            # </WTF>
            
            # might as well use it
            assert self.topic in self.available_topics

            # setup start and end partitions and offsets
#             self.client.seek_to_beginning()
            # datetime is only start/end implemented
            assert isinstance(self.start_params, datetime) and isinstance(self.end_params, datetime)
            start = int(self.start_params.timestamp() * 1000)
            end = int(self.end_params.timestamp() * 1000)

            partitions = self.client.partitions_for_topic(self.topic)
            tx = {TopicPartition(topic=self.topic, partition=p):start
                  for p in list(partitions)}
            self.start_p_offsets = self.client.offsets_for_times(tx)

            # if you give a timestamp after the last record it returns None
            for tp, offset_details in self.start_p_offsets.items():
                if offset_details is None:
                    raise ValueError("Start date outside of available messages")

            tx = {TopicPartition(topic=self.topic, partition=p):end
                  for p in list(partitions)}
            self.end_p_offsets = self.client.offsets_for_times(tx)
            
            # as above - out of range, for end offset give something useful
            for tp, offset_details in self.end_p_offsets.items():
                if offset_details is None:
                    # go to last message. I'm not 100% sure this is correct
                    end_offsets = self.client.end_offsets([tp])
                    offset = end_offsets[tp]-1
                    self.end_p_offsets[tp] = OffsetAndTimestamp(offset=offset, timestamp=None)
Beispiel #8
0
def test_offset_for_times(mocker):
    partitions = [kafka.TopicPartition('ut_topic', 0)]
    offsets_for_times = {tp: OffsetAndTimestamp(42, -1) for tp in partitions}
    positions = {tp: 747 for tp in partitions}

    mock = mocker.Mock()
    mock.offsets_for_times.return_value = offsets_for_times
    mock.position.side_effect = lambda tp: positions.get(tp, 0)

    # Uses returned offset for time when provided
    offsets = client.offsets_for_times(mock, partitions, 987654321)
    assert len(offsets) == len(partitions)
    assert all(tp in offsets for tp in partitions)
    assert offsets[partitions[0]] == 42

    # When offsets_for_times returns None returns position at end
    offsets_for_times[partitions[0]] = None
    offsets = client.offsets_for_times(mock, partitions, 987654321)
    assert len(offsets) == len(partitions)
    assert all(tp in offsets for tp in partitions)
    assert offsets[partitions[0]] == 747
Beispiel #9
0
def _answer_offsets_for_times(timestamps):
    return {
        tp: OffsetAndTimestamp(ts - 100, ts)
        for tp, ts in timestamps.items()
    }