def test_roundtrip_large_request(self):
        log.debug('Timestamp Before ProduceRequest')
        # Single message of 5 MBish
        produce = ProduceRequest(self.topic, 0, [
            create_message(self.topic + " message 0: " +
                           ("0123456789" * 10 + '\n') * 51909)
        ])
        log.debug('Timestamp After ProduceRequest')

        produce_resp, = yield self.client.send_produce_request([produce])
        log.debug('Timestamp After Send')
        self.assertEqual(produce_resp.error, 0)
        self.assertEqual(produce_resp.topic, self.topic)
        self.assertEqual(produce_resp.partition, 0)
        self.assertEqual(produce_resp.offset, 0)

        # Fetch request with max size of 6MB
        fetch = FetchRequest(self.topic, 0, 0, 6 * 1048576)
        fetch_resp, = yield self.client.send_fetch_request([fetch],
                                                           max_wait_time=500)
        self.assertEqual(fetch_resp.error, 0)
        self.assertEqual(fetch_resp.topic, self.topic)
        self.assertEqual(fetch_resp.partition, 0)

        messages = list(fetch_resp.messages)
        self.assertEqual(len(messages), 1)
Exemple #2
0
    def test_roundtrip_large_request(self):
        """
        A large request can be produced and fetched.
        """
        log.debug('Timestamp Before ProduceRequest')
        # Single message of a bit less than 1 MiB
        message = create_message(self.topic.encode() + b" message 0: " +
                                 (b"0123456789" * 10 + b'\n') * 90)
        produce = ProduceRequest(self.topic, 0, [message])
        log.debug('Timestamp before send')
        [produce_resp] = yield self.retry_while_broker_errors(
            self.client.send_produce_request, [produce])
        log.debug('Timestamp after send')
        self.assertEqual(produce_resp.error, 0)
        self.assertEqual(produce_resp.topic, self.topic)
        self.assertEqual(produce_resp.partition, 0)
        self.assertEqual(produce_resp.offset, 0)

        # Fetch request with max size of 1 MiB
        fetch = FetchRequest(self.topic, 0, 0, 1024**2)
        fetch_resp, = yield self.client.send_fetch_request([fetch],
                                                           max_wait_time=1000)
        self.assertEqual(fetch_resp.error, 0)
        self.assertEqual(fetch_resp.topic, self.topic)
        self.assertEqual(fetch_resp.partition, 0)

        messages = list(fetch_resp.messages)
        self.assertEqual(len(messages), 1)
    def assert_fetch_offset(self,
                            partition,
                            start_offset,
                            expected_messages,
                            expected_keys=[],
                            max_wait=0.5,
                            fetch_size=1024,
                            topic=None):
        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.
        if topic is None:
            topic = self.topic
        resp, = yield self.client.send_fetch_request(
            [FetchRequest(topic, partition, start_offset, fetch_size)],
            max_wait_time=int(max_wait * 1000))

        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.partition, partition)
        # Convert generator to list
        resp_messages = list(resp.messages)
        messages = [x.message.value for x in resp_messages]
        self.assertEqual(messages, expected_messages)
        if expected_keys:
            keys = [x.message.key for x in resp_messages]
            self.assertEqual(keys, expected_keys)
        self.assertEqual(resp.highwaterMark,
                         start_offset + len(expected_messages))
    def test_consume_none(self):
        fetch = FetchRequest(self.topic, 0, 0, 1024)

        fetch_resp, = yield self.client.send_fetch_request([fetch], max_wait_time=1000)
        self.assertEqual(fetch_resp.error, 0)
        self.assertEqual(fetch_resp.topic, self.topic)
        self.assertEqual(fetch_resp.partition, 0)

        messages = list(fetch_resp.messages)
        self.assertEqual(len(messages), 0)
    def _count_messages(self, topic):
        messages = []
        hosts = '%s:%d,%s:%d' % (
            self.kafka_brokers[0].host, self.kafka_brokers[0].port,
            self.kafka_brokers[1].host, self.kafka_brokers[1].port)
        client = KafkaClient(hosts, clientId="CountMessages", timeout=500)

        try:
            yield ensure_topic_creation(client,
                                        topic,
                                        fully_replicated=False,
                                        reactor=self.reactor)

            # Need to retry this until we have a leader...
            while True:
                # Ask the client to load the latest metadata. This may avoid a
                # NotLeaderForPartitionError I was seeing upon re-start of the
                # broker.
                yield client.load_metadata_for_topics(topic)
                # if there is an error on the metadata for the topic, raise
                if check_error(client.metadata_error_for_topic(topic),
                               False) is None:
                    break
            # Ok, should be safe to get the partitions now...
            partitions = client.topic_partitions[topic]

            requests = [
                FetchRequest(topic, part, 0, 1024 * 1024)
                for part in partitions
            ]
            resps = []
            while not resps:
                try:
                    log.debug("_count_message: Fetching messages")
                    # Prevent log.error() call from causing test failure
                    with patch.object(kclient, 'log'):
                        resps = yield client.send_fetch_request(
                            requests, max_wait_time=400)
                except (NotLeaderForPartitionError,
                        UnknownTopicOrPartitionError,
                        KafkaUnavailableError):  # pragma: no cover
                    log.debug("_count_message: Metadata err, retrying...")
                    yield client.load_metadata_for_topics(topic)
                except FailedPayloadsError as e:  # pragma: no cover
                    if not e.args[1][0][1].check(RequestTimedOutError):
                        raise
                    log.debug("_count_message: Timed out err, retrying...")
        finally:
            yield client.close()
        for fetch_resp in resps:
            messages.extend(list(fetch_resp.messages))

        log.debug("Got %d messages:%r", len(messages), messages)

        returnValue(len(messages))
    def _count_messages(self, topic):
        log.debug("Counting messages on topic %s", topic)
        messages = []
        client = KafkaClient(self.harness.bootstrap_hosts,
                             clientId="CountMessages",
                             timeout=500,
                             reactor=self.reactor)

        try:
            yield ensure_topic_creation(client, topic, fully_replicated=False)

            # Need to retry this until we have a leader...
            while True:
                # Ask the client to load the latest metadata. This may avoid a
                # NotLeaderForPartitionError I was seeing upon re-start of the
                # broker.
                yield client.load_metadata_for_topics(topic)
                # if there is an error on the metadata for the topic, wait
                errno = client.metadata_error_for_topic(topic)
                if errno == 0:
                    break
                else:
                    log.debug("Topic %s in error errno=%d", topic, errno)
                    yield async_delay(1.0)

            # Ok, should be safe to get the partitions now...
            partitions = client.topic_partitions[topic]

            requests = [
                FetchRequest(topic, part, 0, 1024 * 1024)
                for part in partitions
            ]
            resps = []
            while not resps:
                try:
                    log.debug("_count_message: Fetching messages")
                    resps = yield client.send_fetch_request(requests,
                                                            max_wait_time=400)
                except (NotLeaderForPartitionError,
                        UnknownTopicOrPartitionError,
                        KafkaUnavailableError):  # pragma: no cover
                    log.debug("_count_message: Metadata err, retrying...")
                    yield client.load_metadata_for_topics(topic)
                except FailedPayloadsError as e:  # pragma: no cover
                    if not e.args[1][0][1].check(RequestTimedOutError):
                        raise
                    log.debug("_count_message: Timed out err, retrying...")
        finally:
            yield client.close()
        for fetch_resp in resps:
            messages.extend(list(fetch_resp.messages))

        log.debug("Got %d messages: %r", len(messages), messages)

        returnValue(len(messages))
Exemple #7
0
    def test_consume_none(self):
        fetch = FetchRequest(self.topic, 0, 0, 1024)

        [fetch_resp] = yield self.retry_while_broker_errors(
            self.client.send_fetch_request, [fetch], max_wait_time=1000,
        )
        self.assertEqual(fetch_resp.error, 0)
        self.assertEqual(fetch_resp.topic, self.topic)
        self.assertEqual(fetch_resp.partition, 0)

        messages = list(fetch_resp.messages)
        self.assertEqual(len(messages), 0)
Exemple #8
0
    def test_encode_fetch_request(self):
        requests = [
            FetchRequest(b"topic1", 0, 10, 1024),
            FetchRequest(b"topic2", 1, 20, 100),
        ]

        header = b"".join([
            struct.pack('>h', 1),  # Msg Header, Message type = Fetch
            struct.pack('>h', 0),  # Msg Header, API version
            struct.pack('>i', 3),  # Msg Header, Correlation ID
            struct.pack('>h7s', 7, b"client1"),  # Msg Header, The client ID
            struct.pack('>i', -1),  # Replica Id
            struct.pack('>i', 2),  # Max wait time
            struct.pack('>i', 100),  # Min bytes
            struct.pack('>i', 2),  # Num requests
        ])

        topic1 = b"".join([
            struct.pack('>h6s', 6, b'topic1'),  # Topic
            struct.pack('>i', 1),  # Num Payloads
            struct.pack('>i', 0),  # Partition 0
            struct.pack('>q', 10),  # Offset
            struct.pack('>i', 1024),  # Max Bytes
        ])

        topic2 = b"".join([
            struct.pack('>h6s', 6, b'topic2'),  # Topic
            struct.pack('>i', 1),  # Num Payloads
            struct.pack('>i', 1),  # Partition 0
            struct.pack('>q', 20),  # Offset
            struct.pack('>i', 100),  # Max Bytes
        ])

        expected1 = b"".join([header, topic1, topic2])
        expected2 = b"".join([header, topic2, topic1])

        encoded = KafkaCodec.encode_fetch_request(b"client1", 3, requests, 2,
                                                  100)
        self.assertIn(encoded, [expected1, expected2])
Exemple #9
0
    def _do_fetch(self):
        """Send a fetch request if there isn't a request outstanding

        Sends a fetch request to the Kafka cluster to get messages at the
        current offset.  When the response comes back, if there are messages,
        it delivers them to the :attr:`processor` callback and initiates
        another fetch request.  If there is a recoverable error, the fetch is
        retried after :attr:`retry_delay`.

        In the case of an unrecoverable error, :func:`errback` is called on the
        :class:`Deferred` returned by :meth:`start()`.
        """
        # Check for outstanding request.
        if self._request_d:
            log.debug("_do_fetch: Outstanding request: %r", self._request_d)
            return

        # Cleanup our _retry_call, if we have one
        if self._retry_call is not None:
            if self._retry_call.active():
                self._retry_call.cancel()
            self._retry_call = None

        # Do we know our offset yet, or do we need to figure it out?
        if (self._fetch_offset == OFFSET_EARLIEST or
                self._fetch_offset == OFFSET_LATEST):
            # We need to fetch the offset for our topic/partition
            offset_request = OffsetRequest(
                self.topic, self.partition, self._fetch_offset, 1)
            self._request_d = self.client.send_offset_request([offset_request])
            self._request_d.addCallbacks(
                self._handle_offset_response, self._handle_offset_error)
        elif self._fetch_offset == OFFSET_COMMITTED:
            # We need to fetch the committed offset for our topic/partition
            # Note we use the same callbacks, as the responses are "close
            # enough" for our needs here
            if not self.consumer_group:
                # consumer_group must be set for OFFSET_COMMITTED
                failure = Failure(
                    InvalidConsumerGroupError("Bad Group_id:{0!r}".format(
                        self.consumer_group)))
                self._start_d.errback(failure)
            request = OffsetFetchRequest(self.topic, self.partition)
            self._request_d = self.client.send_offset_fetch_request(
                self.consumer_group, [request])
            self._request_d.addCallbacks(
                self._handle_offset_response, self._handle_offset_error)
        else:
            # Create fetch request payload for our partition
            request = FetchRequest(
                self.topic, self.partition, self._fetch_offset,
                self.buffer_size)
            # Send request and add handlers for the response
            self._request_d = self.client.send_fetch_request(
                [request], max_wait_time=self.fetch_max_wait_time,
                min_bytes=self.fetch_min_bytes)
            # We need a temp for this because if the response is already
            # available, _handle_fetch_response() will clear self._request_d
            d = self._request_d
            d.addCallback(self._handle_fetch_response)
            d.addErrback(self._handle_fetch_error)