def test_send_without_response(self): """Imitate producer without acknowledge, in this case client produces messages and kafka does not send response, and we make sure that futures do not stuck in queue forever""" host, port = self.kafka_host, self.kafka_port conn = yield from create_conn(host, port, loop=self.loop) # prepare message msgs = create_message_set([b'foo'], 0, None) req = ProduceRequest(b'bar', 0, msgs) encoder = functools.partial(KafkaProtocol.encode_produce_request, acks=0, timeout=int(10 * 1000)) request_id = 1 client_id = b"aiokafka-python" request = encoder(client_id=client_id, correlation_id=request_id, payloads=[req]) # produce messages without acknowledge for i in range(100): conn.send(request, no_ack=True) # make sure futures no stuck in queue self.assertEqual(len(conn._requests), 0)
def test_send_without_response(self): """Imitate producer without acknowledge, in this case client produces messages and kafka does not send response, and we make sure that futures do not stuck in queue forever""" host, port = self.kafka_host, self.kafka_port conn = yield from create_conn(host, port, loop=self.loop) # prepare message msgs = create_message_set([b'foo'], 0, None) req = ProduceRequest(b'bar', 0, msgs) encoder = functools.partial( KafkaProtocol.encode_produce_request, acks=0, timeout=int(10*1000)) request_id = 1 client_id = b"aiokafka-python" request = encoder(client_id=client_id, correlation_id=request_id, payloads=[req]) # produce messages without acknowledge for i in range(100): conn.send(request, no_ack=True) # make sure futures no stuck in queue self.assertEqual(len(conn._requests), 0)
def test_create_message_set(self): messages = [1, 2, 3] # Default codec is CODEC_NONE. Expect list of regular messages. expect = [sentinel.message] * len(messages) with self.mock_create_message_fns(): message_set = create_message_set(messages) self.assertEqual(message_set, expect) # CODEC_NONE: Expect list of regular messages. expect = [sentinel.message] * len(messages) with self.mock_create_message_fns(): message_set = create_message_set(messages, CODEC_NONE) self.assertEqual(message_set, expect) # CODEC_GZIP: Expect list of one gzip-encoded message. expect = [sentinel.gzip_message] with self.mock_create_message_fns(): message_set = create_message_set(messages, CODEC_GZIP) self.assertEqual(message_set, expect) # CODEC_SNAPPY: Expect list of one snappy-encoded message. expect = [sentinel.snappy_message] with self.mock_create_message_fns(): message_set = create_message_set(messages, CODEC_SNAPPY) self.assertEqual(message_set, expect) # Unknown codec should raise UnsupportedCodecError. with self.assertRaises(UnsupportedCodecError): create_message_set(messages, -1)
def test_create_message_set(self): messages = [(1, "k1"), (2, "k2"), (3, "k3")] # Default codec is CODEC_NONE. Expect list of regular messages. expect = [sentinel.message] * len(messages) with self.mock_create_message_fns(): message_set = create_message_set(messages) self.assertEqual(message_set, expect) # CODEC_NONE: Expect list of regular messages. expect = [sentinel.message] * len(messages) with self.mock_create_message_fns(): message_set = create_message_set(messages, CODEC_NONE) self.assertEqual(message_set, expect) # CODEC_GZIP: Expect list of one gzip-encoded message. expect = [sentinel.gzip_message] with self.mock_create_message_fns(): message_set = create_message_set(messages, CODEC_GZIP) self.assertEqual(message_set, expect) # CODEC_SNAPPY: Expect list of one snappy-encoded message. expect = [sentinel.snappy_message] with self.mock_create_message_fns(): message_set = create_message_set(messages, CODEC_SNAPPY) self.assertEqual(message_set, expect) # Unknown codec should raise UnsupportedCodecError. with self.assertRaises(UnsupportedCodecError): create_message_set(messages, -1)
def _send_messages(self, topic, partition, *msg, **kwargs): key = kwargs.pop('key', None) # Guarantee that msg is actually a list or tuple (should always be true) if not isinstance(msg, (list, tuple)): raise TypeError("msg is not a list or tuple!") for m in msg: # The protocol allows to have key & payload with null values both, # (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense. if m is None: if key is None: raise TypeError("key and payload can't be null in one") # Raise TypeError if any non-null message is not encoded as bytes elif not isinstance(m, six.binary_type): raise TypeError( "all produce message payloads must be null or type bytes") # Raise TypeError if the key is not encoded as bytes if key is not None and not isinstance(key, six.binary_type): raise TypeError("the key must be type bytes") if self.async_send: for idx, m in enumerate(msg): try: item = (TopicPartition(topic, partition), m, key) if self.async_queue_put_timeout == 0: self.queue.put_nowait(item) else: self.queue.put(item, True, self.async_queue_put_timeout) except Full: raise AsyncProducerQueueFull( msg[idx:], 'Producer async queue overfilled. ' 'Current queue size %d.' % (self.queue.qsize(), )) resp = [] else: messages = create_message_set([(m, key) for m in msg], self.codec, key, self.codec_compresslevel) req = ProduceRequestPayload(topic, partition, messages) try: resp = self.client.send_produce_request( [req], acks=self.req_acks, timeout=self.ack_timeout, fail_on_error=self.sync_fail_on_error) except Exception: log.exception("Unable to send messages") raise return resp
def _send_messages(self, topic, partition, *msg, **kwargs): key = kwargs.pop('key', None) # Guarantee that msg is actually a list or tuple (should always be true) if not isinstance(msg, (list, tuple)): raise TypeError("msg is not a list or tuple!") for m in msg: # The protocol allows to have key & payload with null values both, # (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense. if m is None: if key is None: raise TypeError("key and payload can't be null in one") # Raise TypeError if any non-null message is not encoded as bytes elif not isinstance(m, six.binary_type): raise TypeError("all produce message payloads must be null or type bytes") # Raise TypeError if the key is not encoded as bytes if key is not None and not isinstance(key, six.binary_type): raise TypeError("the key must be type bytes") if self.async_send: for idx, m in enumerate(msg): try: item = (TopicPartition(topic, partition), m, key) if self.async_queue_put_timeout == 0: self.queue.put_nowait(item) else: self.queue.put(item, True, self.async_queue_put_timeout) except Full: raise AsyncProducerQueueFull( msg[idx:], 'Producer async queue overfilled. ' 'Current queue size %d.' % self.queue.qsize()) resp = [] else: messages = create_message_set([(m, key) for m in msg], self.codec, key, self.codec_compresslevel) req = ProduceRequestPayload(topic, partition, messages) try: resp = self.client.send_produce_request( [req], acks=self.req_acks, timeout=self.ack_timeout, fail_on_error=self.sync_fail_on_error ) except Exception: log.exception("Unable to send messages") raise return resp
def _send(self, topic, partition, *msgs, key=None): messages = create_message_set(msgs, self._codec, key) req = ProduceRequest(topic, partition, messages) resp = yield from self._client.send_produce_request( [req], acks=self._req_acks, ack_timeout=self._ack_timeout) return resp
def _send_upstream(queue, client, codec, batch_time, batch_size, req_acks, ack_timeout, retry_options, stop_event, log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR, stop_timeout=ASYNC_STOP_TIMEOUT_SECS, codec_compresslevel=None): """Private method to manage producing messages asynchronously Listens on the queue for a specified number of messages or until a specified timeout and then sends messages to the brokers in grouped requests (one per broker). Messages placed on the queue should be tuples that conform to this format: ((topic, partition), message, key) Currently does not mark messages with task_done. Do not attempt to :meth:`join`! Arguments: queue (threading.Queue): the queue from which to get messages client (kafka.SimpleClient): instance to use for communicating with brokers codec (kafka.protocol.ALL_CODECS): compression codec to use batch_time (int): interval in seconds to send message batches batch_size (int): count of messages that will trigger an immediate send req_acks: required acks to use with ProduceRequests. see server protocol ack_timeout: timeout to wait for required acks. see server protocol retry_options (RetryOptions): settings for retry limits, backoff etc stop_event (threading.Event): event to monitor for shutdown signal. when this event is 'set', the producer will stop sending messages. log_messages_on_error (bool, optional): log stringified message-contents on any produce error, otherwise only log a hash() of the contents, defaults to True. stop_timeout (int or float, optional): number of seconds to continue retrying messages after stop_event is set, defaults to 30. """ request_tries = {} while not stop_event.is_set(): try: client.reinit() except Exception as e: log.warning( 'Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) else: break stop_at = None while not (stop_event.is_set() and queue.empty() and not request_tries): # Handle stop_timeout if stop_event.is_set(): if not stop_at: stop_at = stop_timeout + time.time() if time.time() > stop_at: log.debug('Async producer stopping due to stop_timeout') break timeout = batch_time count = batch_size send_at = time.time() + timeout msgset = defaultdict(list) # Merging messages will require a bit more work to manage correctly # for now, don't look for new batches if we have old ones to retry if request_tries: count = 0 log.debug('Skipping new batch collection to handle retries') else: log.debug('Batching size: %s, timeout: %s', count, timeout) # Keep fetching till we gather enough messages or a # timeout is reached while count > 0 and timeout >= 0: try: topic_partition, msg, key = queue.get(timeout=timeout) except Empty: break # Check if the controller has requested us to stop if topic_partition == STOP_ASYNC_PRODUCER: stop_event.set() break # Adjust the timeout to match the remaining period count -= 1 timeout = send_at - time.time() msgset[topic_partition].append((msg, key)) # Send collected requests upstream for topic_partition, msg in msgset.items(): messages = create_message_set(msg, codec, key, codec_compresslevel) req = ProduceRequestPayload(topic_partition.topic, topic_partition.partition, tuple(messages)) request_tries[req] = 0 if not request_tries: continue reqs_to_retry, error_cls = [], None retry_state = {'do_backoff': False, 'do_refresh': False} def _handle_error(error_cls, request): if issubclass(error_cls, RETRY_ERROR_TYPES) or ( retry_options.retry_on_timeouts and issubclass(error_cls, RequestTimedOutError)): reqs_to_retry.append(request) if issubclass(error_cls, RETRY_BACKOFF_ERROR_TYPES): retry_state['do_backoff'] |= True if issubclass(error_cls, RETRY_REFRESH_ERROR_TYPES): retry_state['do_refresh'] |= True requests = list(request_tries.keys()) log.debug('Sending: %s', requests) responses = client.send_produce_request(requests, acks=req_acks, timeout=ack_timeout, fail_on_error=False) log.debug('Received: %s', responses) for i, response in enumerate(responses): error_cls = None if isinstance(response, FailedPayloadsError): error_cls = response.__class__ orig_req = response.payload elif isinstance(response, ProduceResponsePayload) and response.error: error_cls = kafka_errors.get(response.error, UnknownError) orig_req = requests[i] if error_cls: _handle_error(error_cls, orig_req) log.error( '%s sending ProduceRequestPayload (#%d of %d) ' 'to %s:%d with msgs %s', error_cls.__name__, (i + 1), len(requests), orig_req.topic, orig_req.partition, orig_req.messages if log_messages_on_error else hash(orig_req.messages)) if not reqs_to_retry: request_tries = {} continue # doing backoff before next retry if retry_state['do_backoff'] and retry_options.backoff_ms: log.warning('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) # refresh topic metadata before next retry if retry_state['do_refresh']: log.warning( 'Async producer forcing metadata refresh metadata before retrying' ) try: client.load_metadata_for_topics() except Exception: log.exception("Async producer couldn't reload topic metadata.") # Apply retry limit, dropping messages that are over request_tries = dict( (key, count + 1) for (key, count) in request_tries.items() if key in reqs_to_retry and ( retry_options.limit is None or (count < retry_options.limit))) # Log messages we are going to retry for orig_req in request_tries.keys(): log.info( 'Retrying ProduceRequestPayload to %s:%d with msgs %s', orig_req.topic, orig_req.partition, orig_req.messages if log_messages_on_error else hash(orig_req.messages)) if request_tries or not queue.empty(): log.error('Stopped producer with %d unsent messages', len(request_tries) + queue.qsize())
def _send_upstream(queue, client, codec, batch_time, batch_size, req_acks, ack_timeout, retry_options, stop_event, log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR, stop_timeout=ASYNC_STOP_TIMEOUT_SECS, codec_compresslevel=None): """Private method to manage producing messages asynchronously Listens on the queue for a specified number of messages or until a specified timeout and then sends messages to the brokers in grouped requests (one per broker). Messages placed on the queue should be tuples that conform to this format: ((topic, partition), message, key) Currently does not mark messages with task_done. Do not attempt to :meth:`join`! Arguments: queue (threading.Queue): the queue from which to get messages client (kafka.SimpleClient): instance to use for communicating with brokers codec (kafka.protocol.ALL_CODECS): compression codec to use batch_time (int): interval in seconds to send message batches batch_size (int): count of messages that will trigger an immediate send req_acks: required acks to use with ProduceRequests. see server protocol ack_timeout: timeout to wait for required acks. see server protocol retry_options (RetryOptions): settings for retry limits, backoff etc stop_event (threading.Event): event to monitor for shutdown signal. when this event is 'set', the producer will stop sending messages. log_messages_on_error (bool, optional): log stringified message-contents on any produce error, otherwise only log a hash() of the contents, defaults to True. stop_timeout (int or float, optional): number of seconds to continue retrying messages after stop_event is set, defaults to 30. """ request_tries = {} while not stop_event.is_set(): try: client.reinit() except Exception as e: log.warn('Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) else: break stop_at = None while not (stop_event.is_set() and queue.empty() and not request_tries): # Handle stop_timeout if stop_event.is_set(): if not stop_at: stop_at = stop_timeout + time.time() if time.time() > stop_at: log.debug('Async producer stopping due to stop_timeout') break timeout = batch_time count = batch_size send_at = time.time() + timeout msgset = defaultdict(list) # Merging messages will require a bit more work to manage correctly # for now, don't look for new batches if we have old ones to retry if request_tries: count = 0 log.debug('Skipping new batch collection to handle retries') else: log.debug('Batching size: %s, timeout: %s', count, timeout) # Keep fetching till we gather enough messages or a # timeout is reached while count > 0 and timeout >= 0: try: topic_partition, msg, key = queue.get(timeout=timeout) except Empty: break # Check if the controller has requested us to stop if topic_partition == STOP_ASYNC_PRODUCER: stop_event.set() break # Adjust the timeout to match the remaining period count -= 1 timeout = send_at - time.time() msgset[topic_partition].append((msg, key)) # Send collected requests upstream for topic_partition, msg in msgset.items(): messages = create_message_set(msg, codec, key, codec_compresslevel) req = ProduceRequestPayload( topic_partition.topic, topic_partition.partition, tuple(messages)) request_tries[req] = 0 if not request_tries: continue reqs_to_retry, error_cls = [], None retry_state = { 'do_backoff': False, 'do_refresh': False } def _handle_error(error_cls, request): if issubclass(error_cls, RETRY_ERROR_TYPES) or (retry_options.retry_on_timeouts and issubclass(error_cls, RequestTimedOutError)): reqs_to_retry.append(request) if issubclass(error_cls, RETRY_BACKOFF_ERROR_TYPES): retry_state['do_backoff'] |= True if issubclass(error_cls, RETRY_REFRESH_ERROR_TYPES): retry_state['do_refresh'] |= True requests = list(request_tries.keys()) log.debug('Sending: %s', requests) responses = client.send_produce_request(requests, acks=req_acks, timeout=ack_timeout, fail_on_error=False) log.debug('Received: %s', responses) for i, response in enumerate(responses): error_cls = None if isinstance(response, FailedPayloadsError): error_cls = response.__class__ orig_req = response.payload elif isinstance(response, ProduceResponsePayload) and response.error: error_cls = kafka_errors.get(response.error, UnknownError) orig_req = requests[i] if error_cls: _handle_error(error_cls, orig_req) log.error('%s sending ProduceRequestPayload (#%d of %d) ' 'to %s:%d with msgs %s', error_cls.__name__, (i + 1), len(requests), orig_req.topic, orig_req.partition, orig_req.messages if log_messages_on_error else hash(orig_req.messages)) if not reqs_to_retry: request_tries = {} continue # doing backoff before next retry if retry_state['do_backoff'] and retry_options.backoff_ms: log.warn('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms) time.sleep(float(retry_options.backoff_ms) / 1000) # refresh topic metadata before next retry if retry_state['do_refresh']: log.warn('Async producer forcing metadata refresh metadata before retrying') try: client.load_metadata_for_topics() except Exception: log.exception("Async producer couldn't reload topic metadata.") # Apply retry limit, dropping messages that are over request_tries = dict( (key, count + 1) for (key, count) in request_tries.items() if key in reqs_to_retry and (retry_options.limit is None or (count < retry_options.limit)) ) # Log messages we are going to retry for orig_req in request_tries.keys(): log.info('Retrying ProduceRequestPayload to %s:%d with msgs %s', orig_req.topic, orig_req.partition, orig_req.messages if log_messages_on_error else hash(orig_req.messages)) if request_tries or not queue.empty(): log.error('Stopped producer with %d unsent messages', len(request_tries) + queue.qsize())