Beispiel #1
0
    def test_default_consumes_from_end_offset(self):
        self.add_topic("test.topic", leaders=(1,))
        self.set_responses(
            broker_id=1, api="offset",
            responses=[
                offset.OffsetResponse(
                    topics=[
                        offset.TopicResponse(
                            name="test.topic",
                            partitions=[
                                offset.PartitionResponse(
                                    partition_id=0,
                                    error_code=errors.no_error,
                                    offsets=[99],
                                )
                            ]
                        )
                    ]
                )
            ]
        )
        self.set_responses(
            broker_id=1, api="fetch",
            responses=[
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=0,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet(
                                        messages=[
                                            (
                                                0,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='{"cat": "meow"}',
                                                )
                                            ),
                                            (
                                                1,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='{"dog": "bark"}',
                                                )
                                            ),
                                        ]
                                    )
                                ),
                            ]
                        ),
                    ]
                ),
            ]
        )

        c = single.SingleConsumer(["kafka01"])

        yield c.connect()

        msgs = yield c.consume("test.topic")

        yield c.close()

        self.assertEqual(msgs, [{"cat": "meow"}, {"dog": "bark"}])

        self.assert_sent(
            broker_id=1,
            request=offset.OffsetRequest(
                replica_id=-1,
                topics=[
                    offset.TopicRequest(
                        name="test.topic",
                        partitions=[
                            offset.PartitionRequest(
                                partition_id=0,
                                time=-1,  # alias for 'end of topic'
                                max_offsets=1,
                            )
                        ]
                    )
                ]
            )
        )
        self.assert_sent(
            broker_id=1,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=1000,
                min_bytes=1,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=0,
                                offset=99,
                                max_bytes=(1024 * 1024),
                            ),
                        ]
                    )
                ]
            )
        )
Beispiel #2
0
    def consume(self, topic, start=None):
        """
        Fetches from a given topics returns a list of deserialized values.

        If the given topic is not known to have synced offsets, a call to
        `determine_offsets()` is made first.

        If a topic is unknown entirely the cluster's ``heal()`` method is
        called and the check retried.

        Since error codes and deserialization are taken care of by
        `handle_fetch_response` this method merely yields to wait on the
        deserialized results and returns a flattened list.
        """
        if self.closing:
            return

        if topic not in self.synced_offsets:
            try:
                yield self.determine_offsets(topic, start)
            except NoOffsetsError:
                log.error("Unable to determine offsets for topic %s", topic)
                raise gen.Return([])
            self.synced_offsets.add(topic)

        if topic not in self.allocation or not self.allocation[topic]:
            log.debug("Consuming unknown topic %s, reloading metadata", topic)
            yield self.cluster.heal()

        if topic not in self.allocation or not self.allocation[topic]:
            log.error("Consuming unknown topic %s and not auto-created", topic)
            raise gen.Return([])

        ordered = collections.defaultdict(list)
        for partition_id in self.allocation[topic]:
            leader = self.cluster.get_leader(topic, partition_id)
            ordered[leader].append(partition_id)

        requests = {}
        for leader, partitions in six.iteritems(ordered):
            max_partition_bytes = int(self.max_bytes / len(partitions))
            requests[leader] = fetch.FetchRequest(
                replica_id=CONSUMER_REPLICA_ID,
                max_wait_time=self.max_wait_time,
                min_bytes=self.min_bytes,
                topics=[
                    fetch.TopicRequest(
                        name=topic,
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=partition_id,
                                offset=self.offsets[topic][partition_id],
                                max_bytes=max_partition_bytes,
                            ) for partition_id in partitions
                        ])
                ])

        results = yield self.send(requests)
        raise gen.Return([
            msg for messageset in results.values() for msg in messageset
            if messageset
        ])
Beispiel #3
0
    def test_consume_without_autocommit(self):
        self.add_topic("test.topic", leaders=(1, 8))
        self.allocator.allocation = {"test.topic": [0, 1]}

        self.set_responses(broker_id=3,
                           api="offset_fetch",
                           responses=[
                               offset_fetch.OffsetFetchResponse(topics=[
                                   offset_fetch.TopicResponse(
                                       name="test.topic",
                                       partitions=[
                                           offset_fetch.PartitionResponse(
                                               error_code=errors.no_error,
                                               partition_id=0,
                                               offset=80,
                                               metadata="committed, ok!"),
                                           offset_fetch.PartitionResponse(
                                               error_code=errors.no_error,
                                               partition_id=1,
                                               offset=110,
                                               metadata="committed, ok!"),
                                       ])
                               ]),
                           ])
        self.set_responses(broker_id=3,
                           api="offset_commit",
                           responses=[
                               offset_commit.OffsetCommitResponse(topics=[
                                   offset_commit.TopicResponse(
                                       name="test.topic",
                                       partitions=[
                                           offset_commit.PartitionResponse(
                                               error_code=errors.no_error,
                                               partition_id=1,
                                           )
                                       ]),
                               ]),
                           ])
        self.set_responses(
            broker_id=1,
            api="fetch",
            responses=[
                fetch.FetchResponse(topics=[
                    fetch.TopicResponse(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionResponse(
                                partition_id=0,
                                error_code=errors.no_error,
                                highwater_mark_offset=2,
                                message_set=messages.MessageSet([
                                    (80,
                                     messages.Message(
                                         magic=0,
                                         attributes=0,
                                         key=None,
                                         value='{"cat": "meow"}',
                                     )),
                                ])),
                        ]),
                ])
            ])
        self.set_responses(
            broker_id=8,
            api="fetch",
            responses=[
                fetch.FetchResponse(topics=[
                    fetch.TopicResponse(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionResponse(
                                partition_id=1,
                                error_code=errors.no_error,
                                highwater_mark_offset=2,
                                message_set=messages.MessageSet([
                                    (110,
                                     messages.Message(
                                         magic=0,
                                         attributes=0,
                                         key=None,
                                         value='{"cat": "meow"}',
                                     )),
                                ])),
                        ]),
                ])
            ])

        c = grouped.GroupedConsumer(["kafka01", "kafka02"],
                                    "work-group",
                                    zk_hosts=["zk01", "zk02", "zk03"],
                                    autocommit=False)

        yield c.connect()

        yield c.consume("test.topic")

        self.assert_sent(broker_id=1,
                         request=fetch.FetchRequest(
                             replica_id=-1,
                             max_wait_time=1000,
                             min_bytes=1,
                             topics=[
                                 fetch.TopicRequest(name="test.topic",
                                                    partitions=[
                                                        fetch.PartitionRequest(
                                                            partition_id=0,
                                                            offset=80,
                                                            max_bytes=(1024 *
                                                                       1024),
                                                        ),
                                                    ])
                             ]))
        self.assert_sent(broker_id=8,
                         request=fetch.FetchRequest(
                             replica_id=-1,
                             max_wait_time=1000,
                             min_bytes=1,
                             topics=[
                                 fetch.TopicRequest(name="test.topic",
                                                    partitions=[
                                                        fetch.PartitionRequest(
                                                            partition_id=1,
                                                            offset=110,
                                                            max_bytes=(1024 *
                                                                       1024),
                                                        ),
                                                    ])
                             ]))

        yield c.commit_offsets()

        self.assert_sent(
            broker_id=3,
            request=offset_commit.OffsetCommitV0Request(
                group="work-group",
                topics=[
                    offset_commit.TopicRequest(
                        name="test.topic",
                        partitions=[
                            offset_commit.PartitionRequest(
                                partition_id=0,
                                offset=81,
                                metadata="committed by %s" % c.name),
                            offset_commit.PartitionRequest(
                                partition_id=1,
                                offset=111,
                                metadata="committed by %s" % c.name),
                        ])
                ]))
Beispiel #4
0
    def test_max_bytes_at_partition_level(self):
        self.add_topic("test.topic", leaders=(3, 3))
        self.set_responses(
            broker_id=3, api="fetch",
            responses=[
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=0,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet(
                                        messages=[
                                            (
                                                0,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='{"foo": "bar"}',
                                                )
                                            ),
                                        ]
                                    )
                                ),
                                fetch.PartitionResponse(
                                    partition_id=1,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet(
                                        messages=[
                                            (
                                                0,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='{"bwee": "bwoo"}',
                                                )
                                            ),
                                        ]
                                    )
                                ),
                            ]
                        ),
                    ]
                ),
            ]
        )

        c = FakeConsumer(["kafka01", "kafka02"], max_bytes=(1024 * 1024))

        yield c.connect()

        msgs = yield c.consume("test.topic")

        self.assertEqual(msgs, [{"foo": "bar"}, {"bwee": "bwoo"}])

        self.assert_sent(
            broker_id=3,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=1000,
                min_bytes=1,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=0,
                                offset=0,
                                max_bytes=(512 * 1024),
                            ),
                            fetch.PartitionRequest(
                                partition_id=1,
                                offset=0,
                                max_bytes=(512 * 1024),
                            ),
                        ]
                    )
                ]
            )
        )
Beispiel #5
0
    def test_consumer_tracks_offsets(self):
        self.add_topic("test.topic", leaders=(3, 8))
        self.set_responses(
            broker_id=3, api="fetch",
            responses=[
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=0,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet(
                                        messages=[
                                            (
                                                0,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='{"foo": "bar"}',
                                                )
                                            ),
                                            (
                                                1,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='{"bwee": "bwoo"}',
                                                )
                                            ),
                                        ]
                                    )
                                ),
                            ]
                        ),
                    ]
                ),
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=0,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet([]),
                                ),
                            ]
                        )
                    ]
                )
            ]
        )
        self.set_responses(
            broker_id=8, api="fetch",
            responses=[
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=1,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet(
                                        messages=[
                                            (
                                                0,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='{"meow": "bark"}',
                                                )
                                            ),
                                        ]
                                    )
                                ),
                            ]
                        ),
                    ]
                ),
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=1,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet([])
                                ),
                            ]
                        ),
                    ]
                ),
            ]
        )

        c = FakeConsumer(["kafka01", "kafka02"])

        yield c.connect()

        msgs = yield c.consume("test.topic")

        possible_orders = [
            [{"meow": "bark"}, {"foo": "bar"}, {"bwee": "bwoo"}],
            [{"foo": "bar"}, {"bwee": "bwoo"}, {"meow": "bark"}],
        ]

        self.assertTrue(
            any([msgs == possibility for possibility in possible_orders])
        )

        self.assert_sent(
            broker_id=3,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=1000,
                min_bytes=1,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=0,
                                offset=0,
                                max_bytes=(1024 * 1024),
                            ),
                        ]
                    )
                ]
            )
        )
        self.assert_sent(
            broker_id=8,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=1000,
                min_bytes=1,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=1,
                                offset=0,
                                max_bytes=(1024 * 1024),
                            ),
                        ]
                    )
                ]
            )
        )

        msgs = yield c.consume("test.topic")

        self.assertEqual(msgs, [])

        self.assert_sent(
            broker_id=3,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=1000,
                min_bytes=1,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=0,
                                offset=2,
                                max_bytes=(1024 * 1024),
                            ),
                        ]
                    )
                ]
            )
        )
        self.assert_sent(
            broker_id=8,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=1000,
                min_bytes=1,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=1,
                                offset=1,
                                max_bytes=(1024 * 1024),
                            ),
                        ]
                    )
                ]
            )
        )
Beispiel #6
0
    def test_offset_out_of_range_error(self):
        self.add_topic("test.topic", leaders=(3,))
        self.set_responses(
            broker_id=3, api="fetch",
            responses=[
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=0,
                                    error_code=errors.offset_out_of_range,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet([])
                                ),
                            ]
                        ),
                    ]
                ),
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=0,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet(
                                        messages=[
                                            (
                                                0,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='{"cat": "dog"}',
                                                )
                                            ),
                                        ]
                                    )
                                ),
                            ]
                        ),
                    ]
                ),
            ]
        )

        c = FakeConsumer(["kafka01"])

        yield c.connect()

        c.offsets["test.topic"][0] = 80
        c.synced_offsets.add("test.topic")

        msgs = yield c.consume("test.topic")

        self.assertEqual(msgs, [])

        self.assert_sent(
            broker_id=3,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=1000,
                min_bytes=1,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=0,
                                offset=80,
                                max_bytes=(1024 * 1024),
                            ),
                        ]
                    )
                ]
            )
        )

        msgs = yield c.consume("test.topic")

        self.assertEqual(msgs, [{"cat": "dog"}])

        self.assert_sent(
            broker_id=3,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=1000,
                min_bytes=1,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=0,
                                offset=0,
                                max_bytes=(1024 * 1024),
                            ),
                        ]
                    )
                ]
            )
        )
Beispiel #7
0
    def test_custom_deserializer_and_options(self):
        self.add_topic("test.topic", leaders=(3,))
        self.set_responses(
            broker_id=3, api="fetch",
            responses=[
                fetch.FetchResponse(
                    topics=[
                        fetch.TopicResponse(
                            name="test.topic",
                            partitions=[
                                fetch.PartitionResponse(
                                    partition_id=0,
                                    error_code=errors.no_error,
                                    highwater_mark_offset=2,
                                    message_set=messages.MessageSet(
                                        messages=[
                                            (
                                                0,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='cat',
                                                )
                                            ),
                                            (
                                                1,
                                                messages.Message(
                                                    magic=0, attributes=0,
                                                    key=None,
                                                    value='dog',
                                                )
                                            ),
                                        ]
                                    )
                                ),
                            ]
                        ),
                    ]
                ),
            ]
        )

        results = [Exception(), "bark"]

        def deserializer(val):
            result = results.pop(0)
            if isinstance(result, Exception):
                raise result

            return "%s: %s" % (val, result)

        c = FakeConsumer(
            ["kafka01", "kafka02"],
            deserializer=deserializer,
            max_wait_time=500,
            min_bytes=1024, max_bytes=1024
        )

        yield c.connect()

        msgs = yield c.consume("test.topic")

        self.assertEqual(msgs, ["dog: bark"])

        self.assert_sent(
            broker_id=3,
            request=fetch.FetchRequest(
                replica_id=-1,
                max_wait_time=500,
                min_bytes=1024,
                topics=[
                    fetch.TopicRequest(
                        name="test.topic",
                        partitions=[
                            fetch.PartitionRequest(
                                partition_id=0,
                                offset=0,
                                max_bytes=1024,
                            ),
                        ]
                    )
                ]
            )
        )