def test_roundtrip_large_request(self): log.debug('Timestamp Before ProduceRequest') # Single message of 5 MBish produce = ProduceRequest(self.topic, 0, [ create_message(self.topic + " message 0: " + ("0123456789" * 10 + '\n') * 51909) ]) log.debug('Timestamp After ProduceRequest') produce_resp, = yield self.client.send_produce_request([produce]) log.debug('Timestamp After Send') self.assertEqual(produce_resp.error, 0) self.assertEqual(produce_resp.topic, self.topic) self.assertEqual(produce_resp.partition, 0) self.assertEqual(produce_resp.offset, 0) # Fetch request with max size of 6MB fetch = FetchRequest(self.topic, 0, 0, 6 * 1048576) fetch_resp, = yield self.client.send_fetch_request([fetch], max_wait_time=500) self.assertEqual(fetch_resp.error, 0) self.assertEqual(fetch_resp.topic, self.topic) self.assertEqual(fetch_resp.partition, 0) messages = list(fetch_resp.messages) self.assertEqual(len(messages), 1)
def test_roundtrip_large_request(self): """ A large request can be produced and fetched. """ log.debug('Timestamp Before ProduceRequest') # Single message of a bit less than 1 MiB message = create_message(self.topic.encode() + b" message 0: " + (b"0123456789" * 10 + b'\n') * 90) produce = ProduceRequest(self.topic, 0, [message]) log.debug('Timestamp before send') [produce_resp] = yield self.retry_while_broker_errors( self.client.send_produce_request, [produce]) log.debug('Timestamp after send') self.assertEqual(produce_resp.error, 0) self.assertEqual(produce_resp.topic, self.topic) self.assertEqual(produce_resp.partition, 0) self.assertEqual(produce_resp.offset, 0) # Fetch request with max size of 1 MiB fetch = FetchRequest(self.topic, 0, 0, 1024**2) fetch_resp, = yield self.client.send_fetch_request([fetch], max_wait_time=1000) self.assertEqual(fetch_resp.error, 0) self.assertEqual(fetch_resp.topic, self.topic) self.assertEqual(fetch_resp.partition, 0) messages = list(fetch_resp.messages) self.assertEqual(len(messages), 1)
def assert_fetch_offset(self, partition, start_offset, expected_messages, expected_keys=[], max_wait=0.5, fetch_size=1024, topic=None): # There should only be one response message from the server. # This will throw an exception if there's more than one. if topic is None: topic = self.topic resp, = yield self.client.send_fetch_request( [FetchRequest(topic, partition, start_offset, fetch_size)], max_wait_time=int(max_wait * 1000)) self.assertEqual(resp.error, 0) self.assertEqual(resp.partition, partition) # Convert generator to list resp_messages = list(resp.messages) messages = [x.message.value for x in resp_messages] self.assertEqual(messages, expected_messages) if expected_keys: keys = [x.message.key for x in resp_messages] self.assertEqual(keys, expected_keys) self.assertEqual(resp.highwaterMark, start_offset + len(expected_messages))
def test_consume_none(self): fetch = FetchRequest(self.topic, 0, 0, 1024) fetch_resp, = yield self.client.send_fetch_request([fetch], max_wait_time=1000) self.assertEqual(fetch_resp.error, 0) self.assertEqual(fetch_resp.topic, self.topic) self.assertEqual(fetch_resp.partition, 0) messages = list(fetch_resp.messages) self.assertEqual(len(messages), 0)
def _count_messages(self, topic): messages = [] hosts = '%s:%d,%s:%d' % ( self.kafka_brokers[0].host, self.kafka_brokers[0].port, self.kafka_brokers[1].host, self.kafka_brokers[1].port) client = KafkaClient(hosts, clientId="CountMessages", timeout=500) try: yield ensure_topic_creation(client, topic, fully_replicated=False, reactor=self.reactor) # Need to retry this until we have a leader... while True: # Ask the client to load the latest metadata. This may avoid a # NotLeaderForPartitionError I was seeing upon re-start of the # broker. yield client.load_metadata_for_topics(topic) # if there is an error on the metadata for the topic, raise if check_error(client.metadata_error_for_topic(topic), False) is None: break # Ok, should be safe to get the partitions now... partitions = client.topic_partitions[topic] requests = [ FetchRequest(topic, part, 0, 1024 * 1024) for part in partitions ] resps = [] while not resps: try: log.debug("_count_message: Fetching messages") # Prevent log.error() call from causing test failure with patch.object(kclient, 'log'): resps = yield client.send_fetch_request( requests, max_wait_time=400) except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, KafkaUnavailableError): # pragma: no cover log.debug("_count_message: Metadata err, retrying...") yield client.load_metadata_for_topics(topic) except FailedPayloadsError as e: # pragma: no cover if not e.args[1][0][1].check(RequestTimedOutError): raise log.debug("_count_message: Timed out err, retrying...") finally: yield client.close() for fetch_resp in resps: messages.extend(list(fetch_resp.messages)) log.debug("Got %d messages:%r", len(messages), messages) returnValue(len(messages))
def _count_messages(self, topic): log.debug("Counting messages on topic %s", topic) messages = [] client = KafkaClient(self.harness.bootstrap_hosts, clientId="CountMessages", timeout=500, reactor=self.reactor) try: yield ensure_topic_creation(client, topic, fully_replicated=False) # Need to retry this until we have a leader... while True: # Ask the client to load the latest metadata. This may avoid a # NotLeaderForPartitionError I was seeing upon re-start of the # broker. yield client.load_metadata_for_topics(topic) # if there is an error on the metadata for the topic, wait errno = client.metadata_error_for_topic(topic) if errno == 0: break else: log.debug("Topic %s in error errno=%d", topic, errno) yield async_delay(1.0) # Ok, should be safe to get the partitions now... partitions = client.topic_partitions[topic] requests = [ FetchRequest(topic, part, 0, 1024 * 1024) for part in partitions ] resps = [] while not resps: try: log.debug("_count_message: Fetching messages") resps = yield client.send_fetch_request(requests, max_wait_time=400) except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, KafkaUnavailableError): # pragma: no cover log.debug("_count_message: Metadata err, retrying...") yield client.load_metadata_for_topics(topic) except FailedPayloadsError as e: # pragma: no cover if not e.args[1][0][1].check(RequestTimedOutError): raise log.debug("_count_message: Timed out err, retrying...") finally: yield client.close() for fetch_resp in resps: messages.extend(list(fetch_resp.messages)) log.debug("Got %d messages: %r", len(messages), messages) returnValue(len(messages))
def test_consume_none(self): fetch = FetchRequest(self.topic, 0, 0, 1024) [fetch_resp] = yield self.retry_while_broker_errors( self.client.send_fetch_request, [fetch], max_wait_time=1000, ) self.assertEqual(fetch_resp.error, 0) self.assertEqual(fetch_resp.topic, self.topic) self.assertEqual(fetch_resp.partition, 0) messages = list(fetch_resp.messages) self.assertEqual(len(messages), 0)
def test_encode_fetch_request(self): requests = [ FetchRequest(b"topic1", 0, 10, 1024), FetchRequest(b"topic2", 1, 20, 100), ] header = b"".join([ struct.pack('>h', 1), # Msg Header, Message type = Fetch struct.pack('>h', 0), # Msg Header, API version struct.pack('>i', 3), # Msg Header, Correlation ID struct.pack('>h7s', 7, b"client1"), # Msg Header, The client ID struct.pack('>i', -1), # Replica Id struct.pack('>i', 2), # Max wait time struct.pack('>i', 100), # Min bytes struct.pack('>i', 2), # Num requests ]) topic1 = b"".join([ struct.pack('>h6s', 6, b'topic1'), # Topic struct.pack('>i', 1), # Num Payloads struct.pack('>i', 0), # Partition 0 struct.pack('>q', 10), # Offset struct.pack('>i', 1024), # Max Bytes ]) topic2 = b"".join([ struct.pack('>h6s', 6, b'topic2'), # Topic struct.pack('>i', 1), # Num Payloads struct.pack('>i', 1), # Partition 0 struct.pack('>q', 20), # Offset struct.pack('>i', 100), # Max Bytes ]) expected1 = b"".join([header, topic1, topic2]) expected2 = b"".join([header, topic2, topic1]) encoded = KafkaCodec.encode_fetch_request(b"client1", 3, requests, 2, 100) self.assertIn(encoded, [expected1, expected2])
def _do_fetch(self): """Send a fetch request if there isn't a request outstanding Sends a fetch request to the Kafka cluster to get messages at the current offset. When the response comes back, if there are messages, it delivers them to the :attr:`processor` callback and initiates another fetch request. If there is a recoverable error, the fetch is retried after :attr:`retry_delay`. In the case of an unrecoverable error, :func:`errback` is called on the :class:`Deferred` returned by :meth:`start()`. """ # Check for outstanding request. if self._request_d: log.debug("_do_fetch: Outstanding request: %r", self._request_d) return # Cleanup our _retry_call, if we have one if self._retry_call is not None: if self._retry_call.active(): self._retry_call.cancel() self._retry_call = None # Do we know our offset yet, or do we need to figure it out? if (self._fetch_offset == OFFSET_EARLIEST or self._fetch_offset == OFFSET_LATEST): # We need to fetch the offset for our topic/partition offset_request = OffsetRequest( self.topic, self.partition, self._fetch_offset, 1) self._request_d = self.client.send_offset_request([offset_request]) self._request_d.addCallbacks( self._handle_offset_response, self._handle_offset_error) elif self._fetch_offset == OFFSET_COMMITTED: # We need to fetch the committed offset for our topic/partition # Note we use the same callbacks, as the responses are "close # enough" for our needs here if not self.consumer_group: # consumer_group must be set for OFFSET_COMMITTED failure = Failure( InvalidConsumerGroupError("Bad Group_id:{0!r}".format( self.consumer_group))) self._start_d.errback(failure) request = OffsetFetchRequest(self.topic, self.partition) self._request_d = self.client.send_offset_fetch_request( self.consumer_group, [request]) self._request_d.addCallbacks( self._handle_offset_response, self._handle_offset_error) else: # Create fetch request payload for our partition request = FetchRequest( self.topic, self.partition, self._fetch_offset, self.buffer_size) # Send request and add handlers for the response self._request_d = self.client.send_fetch_request( [request], max_wait_time=self.fetch_max_wait_time, min_bytes=self.fetch_min_bytes) # We need a temp for this because if the response is already # available, _handle_fetch_response() will clear self._request_d d = self._request_d d.addCallback(self._handle_fetch_response) d.addErrback(self._handle_fetch_error)