def test_send_produce_request_maintains_request_response_order(self): self.client.ensure_topic_exists('foo') self.client.ensure_topic_exists('bar') requests = [ ProduceRequestPayload('foo', 0, [create_message(b'a'), create_message(b'b')]), ProduceRequestPayload('bar', 1, [create_message(b'a'), create_message(b'b')]), ProduceRequestPayload('foo', 1, [create_message(b'a'), create_message(b'b')]), ProduceRequestPayload('bar', 0, [create_message(b'a'), create_message(b'b')]), ] responses = self.client.send_produce_request(requests) while len(responses): request = requests.pop() response = responses.pop() self.assertEqual(request.topic, response.topic) self.assertEqual(request.partition, response.partition)
def test_encode_produce_request(self): requests = [ ProduceRequestPayload("topic1", 0, [ kafka.protocol.message.Message(b"a"), kafka.protocol.message.Message(b"b") ]), ProduceRequestPayload("topic2", 1, [kafka.protocol.message.Message(b"c")]) ] msg_a_binary = KafkaProtocol._encode_message(create_message(b"a")) msg_b_binary = KafkaProtocol._encode_message(create_message(b"b")) msg_c_binary = KafkaProtocol._encode_message(create_message(b"c")) header = b"".join([ struct.pack('>i', 0x94), # The length of the message overall struct.pack('>h', 0), # Msg Header, Message type = Produce struct.pack('>h', 0), # Msg Header, API version struct.pack('>i', 2), # Msg Header, Correlation ID struct.pack('>h7s', 7, b"client1"), # Msg Header, The client ID struct.pack('>h', 2), # Num acks required struct.pack('>i', 100), # Request Timeout struct.pack('>i', 2), # The number of requests ]) total_len = len(msg_a_binary) + len(msg_b_binary) topic1 = b"".join([ struct.pack('>h6s', 6, b'topic1'), # The topic1 struct.pack('>i', 1), # One message set struct.pack('>i', 0), # Partition 0 struct.pack('>i', total_len + 24), # Size of the incoming message set struct.pack('>q', 0), # No offset specified struct.pack('>i', len(msg_a_binary)), # Length of message msg_a_binary, # Actual message struct.pack('>q', 0), # No offset specified struct.pack('>i', len(msg_b_binary)), # Length of message msg_b_binary, # Actual message ]) topic2 = b"".join([ struct.pack('>h6s', 6, b'topic2'), # The topic1 struct.pack('>i', 1), # One message set struct.pack('>i', 1), # Partition 1 struct.pack('>i', len(msg_c_binary) + 12), # Size of the incoming message set struct.pack('>q', 0), # No offset specified struct.pack('>i', len(msg_c_binary)), # Length of message msg_c_binary, # Actual message ]) expected1 = b"".join([header, topic1, topic2]) expected2 = b"".join([header, topic2, topic1]) encoded = KafkaProtocol.encode_produce_request(b"client1", 2, requests, 2, 100) self.assertIn(encoded, [expected1, expected2])
def send_messages(self, partition, messages): messages = [ create_message(self.msg(str(msg))) for msg in messages ] produce = ProduceRequestPayload(self.topic, partition, messages = messages) resp, = self.client.send_produce_request([produce]) self.assertEqual(resp.error, 0) return [ x.value for x in messages ]
def test_send_produce_request_raises_when_topic_unknown( self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []), ] protocol.decode_metadata_response.return_value = MetadataResponse[0]( resp0_brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) requests = [ ProduceRequestPayload( "topic_doesnt_exist", 0, [create_message("a"), create_message("b")]) ] with self.assertRaises(FailedPayloadsError): client.send_produce_request(requests)
def test_send_produce_request_raises_when_noleader(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567, None), BrokerMetadata(1, 'broker_2', 5678, None) ] resp0_brokers = list(map(itemgetter(0, 1, 2), brokers)) topics = [ (NO_ERROR, 'topic_noleader', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), ] protocol.decode_metadata_response.return_value = MetadataResponse[0]( resp0_brokers, topics) client = SimpleClient(hosts=['broker_1:4567']) requests = [ ProduceRequestPayload( "topic_noleader", 0, [create_message("a"), create_message("b")]) ] with self.assertRaises(FailedPayloadsError): client.send_produce_request(requests)
def send_gzip_message(self, partition, messages): message = create_gzip_message([(self.msg(str(msg)), None) for msg in messages]) produce = ProduceRequestPayload(self.topic, partition, messages=[message]) resp, = self.client.send_produce_request([produce]) self.assertEqual(resp.error, 0)
def send_messages(client, topic, partition, messages): """Send messages to a topic's partition """ messages = [create_message(msg(str(m))) for m in messages] produce = ProduceRequestPayload(topic, partition, messages=messages) resp, = client.send_produce_request([produce]) assert resp.error == 0 return [x.value for x in messages]
def assert_produce_request(self, messages, initial_offset, message_ct, partition=0): produce = ProduceRequestPayload(self.topic, partition, messages=messages) # There should only be one response message from the server. # This will throw an exception if there's more than one. resp = self.client.send_produce_request([ produce ]) self.assert_produce_response(resp, initial_offset) self.assertEqual(self.current_offset(self.topic, partition), initial_offset + message_ct)
def assert_produce_request(client, topic, messages, initial_offset, message_ct, partition=0): """Verify the correctness of a produce request """ produce = ProduceRequestPayload(topic, partition, messages=messages) # There should only be one response message from the server. # This will throw an exception if there's more than one. resp = client.send_produce_request([produce]) assert_produce_response(resp, initial_offset) assert current_offset(client, topic, partition) == initial_offset + message_ct
def _send_messages(self, topic, partition, *msg, **kwargs): key = kwargs.pop('key', None) # Guarantee that msg is actually a list or tuple (should always be true) if not isinstance(msg, (list, tuple)): raise TypeError("msg is not a list or tuple!") for m in msg: # The protocol allows to have key & payload with null values both, # (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense. if m is None: if key is None: raise TypeError("key and payload can't be null in one") # Raise TypeError if any non-null message is not encoded as bytes elif not isinstance(m, six.binary_type): raise TypeError( "all produce message payloads must be null or type bytes") # Raise TypeError if the key is not encoded as bytes if key is not None and not isinstance(key, six.binary_type): raise TypeError("the key must be type bytes") if self.async_send: for idx, m in enumerate(msg): try: item = (TopicPartition(topic, partition), m, key) if self.async_queue_put_timeout == 0: self.queue.put_nowait(item) else: self.queue.put(item, True, self.async_queue_put_timeout) except Full: raise AsyncProducerQueueFull( msg[idx:], 'Producer async queue overfilled. ' 'Current queue size %d.' % (self.queue.qsize(), )) resp = [] else: messages = create_message_set([(m, key) for m in msg], self.codec, key, self.codec_compresslevel) req = ProduceRequestPayload(topic, partition, messages) try: resp = self.client.send_produce_request( [req], acks=self.req_acks, timeout=self.ack_timeout, fail_on_error=self.sync_fail_on_error) except Exception: log.exception("Unable to send messages") raise return resp
def call_back(self, topic, partition): payload = ProduceRequestPayload(topic=topic, partition=partition, messages=[create_message('test call')]) retries = 5 reps = [] while retries and not reps: retries -= 1 try: reps = self.client.send_produce_request(payloads=[payload], fail_on_error=True) except LeaderNotAvailableError: self.client.load_metadata_for_topics() time.sleep(1) return reps
def _send_upstream(queue, client, codec, batch_time, batch_size, req_acks, ack_timeout, retry_options, stop_event, log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR, stop_timeout=ASYNC_STOP_TIMEOUT_SECS, codec_compresslevel=None): """Private method to manage producing messages asynchronously Listens on the queue for a specified number of messages or until a specified timeout and then sends messages to the brokers in grouped requests (one per broker). Messages placed on the queue should be tuples that conform to this format: ((topic, partition), message, key) Currently does not mark messages with task_done. Do not attempt to :meth:`join`! Arguments: queue (threading.Queue): the queue from which to get messages client (kafka.SimpleClient): instance to use for communicating with brokers codec (kafka.protocol.ALL_CODECS): compression codec to use batch_time (int): interval in seconds to send message batches batch_size (int): count of messages that will trigger an immediate send req_acks: required acks to use with ProduceRequests. see server protocol ack_timeout: timeout to wait for required acks. see server protocol retry_options (RetryOptions): settings for retry limits, backoff etc stop_event (threading.Event): event to monitor for shutdown signal. when this event is 'set', the producer will stop sending messages. log_messages_on_error (bool, optional): log stringified message-contents on any produce error, otherwise only log a hash() of the contents, defaults to True. stop_timeout (int or float, optional): number of seconds to continue retrying messages after stop_event is set, defaults to 30. """ request_tries = {} while not stop_event.is_set(): try: client.reinit() except Exception as e: log.warning( 'Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) else: break stop_at = None while not (stop_event.is_set() and queue.empty() and not request_tries): # Handle stop_timeout if stop_event.is_set(): if not stop_at: stop_at = stop_timeout + time.time() if time.time() > stop_at: log.debug('Async producer stopping due to stop_timeout') break timeout = batch_time count = batch_size send_at = time.time() + timeout msgset = defaultdict(list) # Merging messages will require a bit more work to manage correctly # for now, don't look for new batches if we have old ones to retry if request_tries: count = 0 log.debug('Skipping new batch collection to handle retries') else: log.debug('Batching size: %s, timeout: %s', count, timeout) # Keep fetching till we gather enough messages or a # timeout is reached while count > 0 and timeout >= 0: try: topic_partition, msg, key = queue.get(timeout=timeout) except Empty: break # Check if the controller has requested us to stop if topic_partition == STOP_ASYNC_PRODUCER: stop_event.set() break # Adjust the timeout to match the remaining period count -= 1 timeout = send_at - time.time() msgset[topic_partition].append((msg, key)) # Send collected requests upstream for topic_partition, msg in msgset.items(): messages = create_message_set(msg, codec, key, codec_compresslevel) req = ProduceRequestPayload(topic_partition.topic, topic_partition.partition, tuple(messages)) request_tries[req] = 0 if not request_tries: continue reqs_to_retry, error_cls = [], None retry_state = {'do_backoff': False, 'do_refresh': False} def _handle_error(error_cls, request): if issubclass(error_cls, RETRY_ERROR_TYPES) or ( retry_options.retry_on_timeouts and issubclass(error_cls, RequestTimedOutError)): reqs_to_retry.append(request) if issubclass(error_cls, RETRY_BACKOFF_ERROR_TYPES): retry_state['do_backoff'] |= True if issubclass(error_cls, RETRY_REFRESH_ERROR_TYPES): retry_state['do_refresh'] |= True requests = list(request_tries.keys()) log.debug('Sending: %s', requests) responses = client.send_produce_request(requests, acks=req_acks, timeout=ack_timeout, fail_on_error=False) log.debug('Received: %s', responses) for i, response in enumerate(responses): error_cls = None if isinstance(response, FailedPayloadsError): error_cls = response.__class__ orig_req = response.payload elif isinstance(response, ProduceResponsePayload) and response.error: error_cls = kafka_errors.get(response.error, UnknownError) orig_req = requests[i] if error_cls: _handle_error(error_cls, orig_req) log.error( '%s sending ProduceRequestPayload (#%d of %d) ' 'to %s:%d with msgs %s', error_cls.__name__, (i + 1), len(requests), orig_req.topic, orig_req.partition, orig_req.messages if log_messages_on_error else hash(orig_req.messages)) if not reqs_to_retry: request_tries = {} continue # doing backoff before next retry if retry_state['do_backoff'] and retry_options.backoff_ms: log.warning('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) # refresh topic metadata before next retry if retry_state['do_refresh']: log.warning( 'Async producer forcing metadata refresh metadata before retrying' ) try: client.load_metadata_for_topics() except Exception: log.exception("Async producer couldn't reload topic metadata.") # Apply retry limit, dropping messages that are over request_tries = dict( (key, count + 1) for (key, count) in request_tries.items() if key in reqs_to_retry and ( retry_options.limit is None or (count < retry_options.limit))) # Log messages we are going to retry for orig_req in request_tries.keys(): log.info( 'Retrying ProduceRequestPayload to %s:%d with msgs %s', orig_req.topic, orig_req.partition, orig_req.messages if log_messages_on_error else hash(orig_req.messages)) if request_tries or not queue.empty(): log.error('Stopped producer with %d unsent messages', len(request_tries) + queue.qsize())