Ejemplo n.º 1
0
    def test_send_without_response(self):
        """Imitate producer without acknowledge, in this case client produces
        messages and kafka does not send response, and we make sure that
        futures do not stuck in queue forever"""

        host, port = self.kafka_host, self.kafka_port
        conn = yield from create_conn(host, port, loop=self.loop)

        # prepare message
        msgs = create_message_set([b'foo'], 0, None)
        req = ProduceRequest(b'bar', 0, msgs)

        encoder = functools.partial(KafkaProtocol.encode_produce_request,
                                    acks=0,
                                    timeout=int(10 * 1000))

        request_id = 1
        client_id = b"aiokafka-python"
        request = encoder(client_id=client_id,
                          correlation_id=request_id,
                          payloads=[req])
        # produce messages without acknowledge
        for i in range(100):
            conn.send(request, no_ack=True)
        # make sure futures no stuck in queue
        self.assertEqual(len(conn._requests), 0)
Ejemplo n.º 2
0
    def test_send_without_response(self):
        """Imitate producer without acknowledge, in this case client produces
        messages and kafka does not send response, and we make sure that
        futures do not stuck in queue forever"""

        host, port = self.kafka_host, self.kafka_port
        conn = yield from create_conn(host, port, loop=self.loop)

        # prepare message
        msgs = create_message_set([b'foo'], 0, None)
        req = ProduceRequest(b'bar', 0, msgs)

        encoder = functools.partial(
            KafkaProtocol.encode_produce_request,
            acks=0, timeout=int(10*1000))

        request_id = 1
        client_id = b"aiokafka-python"
        request = encoder(client_id=client_id, correlation_id=request_id,
                          payloads=[req])
        # produce messages without acknowledge
        for i in range(100):
            conn.send(request, no_ack=True)
        # make sure futures no stuck in queue
        self.assertEqual(len(conn._requests), 0)
Ejemplo n.º 3
0
    def test_create_message_set(self):
        messages = [1, 2, 3]

        # Default codec is CODEC_NONE. Expect list of regular messages.
        expect = [sentinel.message] * len(messages)
        with self.mock_create_message_fns():
            message_set = create_message_set(messages)
        self.assertEqual(message_set, expect)

        # CODEC_NONE: Expect list of regular messages.
        expect = [sentinel.message] * len(messages)
        with self.mock_create_message_fns():
            message_set = create_message_set(messages, CODEC_NONE)
        self.assertEqual(message_set, expect)

        # CODEC_GZIP: Expect list of one gzip-encoded message.
        expect = [sentinel.gzip_message]
        with self.mock_create_message_fns():
            message_set = create_message_set(messages, CODEC_GZIP)
        self.assertEqual(message_set, expect)

        # CODEC_SNAPPY: Expect list of one snappy-encoded message.
        expect = [sentinel.snappy_message]
        with self.mock_create_message_fns():
            message_set = create_message_set(messages, CODEC_SNAPPY)
        self.assertEqual(message_set, expect)

        # Unknown codec should raise UnsupportedCodecError.
        with self.assertRaises(UnsupportedCodecError):
            create_message_set(messages, -1)
Ejemplo n.º 4
0
    def test_create_message_set(self):
        messages = [(1, "k1"), (2, "k2"), (3, "k3")]

        # Default codec is CODEC_NONE. Expect list of regular messages.
        expect = [sentinel.message] * len(messages)
        with self.mock_create_message_fns():
            message_set = create_message_set(messages)
        self.assertEqual(message_set, expect)

        # CODEC_NONE: Expect list of regular messages.
        expect = [sentinel.message] * len(messages)
        with self.mock_create_message_fns():
            message_set = create_message_set(messages, CODEC_NONE)
        self.assertEqual(message_set, expect)

        # CODEC_GZIP: Expect list of one gzip-encoded message.
        expect = [sentinel.gzip_message]
        with self.mock_create_message_fns():
            message_set = create_message_set(messages, CODEC_GZIP)
        self.assertEqual(message_set, expect)

        # CODEC_SNAPPY: Expect list of one snappy-encoded message.
        expect = [sentinel.snappy_message]
        with self.mock_create_message_fns():
            message_set = create_message_set(messages, CODEC_SNAPPY)
        self.assertEqual(message_set, expect)

        # Unknown codec should raise UnsupportedCodecError.
        with self.assertRaises(UnsupportedCodecError):
            create_message_set(messages, -1)
Ejemplo n.º 5
0
    def _send_messages(self, topic, partition, *msg, **kwargs):
        key = kwargs.pop('key', None)

        # Guarantee that msg is actually a list or tuple (should always be true)
        if not isinstance(msg, (list, tuple)):
            raise TypeError("msg is not a list or tuple!")

        for m in msg:
            # The protocol allows to have key & payload with null values both,
            # (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense.
            if m is None:
                if key is None:
                    raise TypeError("key and payload can't be null in one")
            # Raise TypeError if any non-null message is not encoded as bytes
            elif not isinstance(m, six.binary_type):
                raise TypeError(
                    "all produce message payloads must be null or type bytes")

        # Raise TypeError if the key is not encoded as bytes
        if key is not None and not isinstance(key, six.binary_type):
            raise TypeError("the key must be type bytes")

        if self.async_send:
            for idx, m in enumerate(msg):
                try:
                    item = (TopicPartition(topic, partition), m, key)
                    if self.async_queue_put_timeout == 0:
                        self.queue.put_nowait(item)
                    else:
                        self.queue.put(item, True,
                                       self.async_queue_put_timeout)
                except Full:
                    raise AsyncProducerQueueFull(
                        msg[idx:], 'Producer async queue overfilled. '
                        'Current queue size %d.' % (self.queue.qsize(), ))
            resp = []
        else:
            messages = create_message_set([(m, key) for m in msg], self.codec,
                                          key, self.codec_compresslevel)
            req = ProduceRequestPayload(topic, partition, messages)
            try:
                resp = self.client.send_produce_request(
                    [req],
                    acks=self.req_acks,
                    timeout=self.ack_timeout,
                    fail_on_error=self.sync_fail_on_error)
            except Exception:
                log.exception("Unable to send messages")
                raise
        return resp
Ejemplo n.º 6
0
    def _send_messages(self, topic, partition, *msg, **kwargs):
        key = kwargs.pop('key', None)

        # Guarantee that msg is actually a list or tuple (should always be true)
        if not isinstance(msg, (list, tuple)):
            raise TypeError("msg is not a list or tuple!")

        for m in msg:
            # The protocol allows to have key & payload with null values both,
            # (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense.
            if m is None:
                if key is None:
                    raise TypeError("key and payload can't be null in one")
            # Raise TypeError if any non-null message is not encoded as bytes
            elif not isinstance(m, six.binary_type):
                raise TypeError("all produce message payloads must be null or type bytes")

        # Raise TypeError if the key is not encoded as bytes
        if key is not None and not isinstance(key, six.binary_type):
            raise TypeError("the key must be type bytes")

        if self.async_send:
            for idx, m in enumerate(msg):
                try:
                    item = (TopicPartition(topic, partition), m, key)
                    if self.async_queue_put_timeout == 0:
                        self.queue.put_nowait(item)
                    else:
                        self.queue.put(item, True, self.async_queue_put_timeout)
                except Full:
                    raise AsyncProducerQueueFull(
                        msg[idx:],
                        'Producer async queue overfilled. '
                        'Current queue size %d.' % self.queue.qsize())
            resp = []
        else:
            messages = create_message_set([(m, key) for m in msg], self.codec, key, self.codec_compresslevel)
            req = ProduceRequestPayload(topic, partition, messages)
            try:
                resp = self.client.send_produce_request(
                    [req], acks=self.req_acks, timeout=self.ack_timeout,
                    fail_on_error=self.sync_fail_on_error
                )
            except Exception:
                log.exception("Unable to send messages")
                raise
        return resp
Ejemplo n.º 7
0
 def _send(self, topic, partition, *msgs, key=None):
     messages = create_message_set(msgs, self._codec, key)
     req = ProduceRequest(topic, partition, messages)
     resp = yield from self._client.send_produce_request(
         [req], acks=self._req_acks, ack_timeout=self._ack_timeout)
     return resp
Ejemplo n.º 8
0
def _send_upstream(queue,
                   client,
                   codec,
                   batch_time,
                   batch_size,
                   req_acks,
                   ack_timeout,
                   retry_options,
                   stop_event,
                   log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR,
                   stop_timeout=ASYNC_STOP_TIMEOUT_SECS,
                   codec_compresslevel=None):
    """Private method to manage producing messages asynchronously

    Listens on the queue for a specified number of messages or until
    a specified timeout and then sends messages to the brokers in grouped
    requests (one per broker).

    Messages placed on the queue should be tuples that conform to this format:
        ((topic, partition), message, key)

    Currently does not mark messages with task_done. Do not attempt to
    :meth:`join`!

    Arguments:
        queue (threading.Queue): the queue from which to get messages
        client (kafka.SimpleClient): instance to use for communicating
            with brokers
        codec (kafka.protocol.ALL_CODECS): compression codec to use
        batch_time (int): interval in seconds to send message batches
        batch_size (int): count of messages that will trigger an immediate send
        req_acks: required acks to use with ProduceRequests. see server protocol
        ack_timeout: timeout to wait for required acks. see server protocol
        retry_options (RetryOptions): settings for retry limits, backoff etc
        stop_event (threading.Event): event to monitor for shutdown signal.
            when this event is 'set', the producer will stop sending messages.
        log_messages_on_error (bool, optional): log stringified message-contents
            on any produce error, otherwise only log a hash() of the contents,
            defaults to True.
        stop_timeout (int or float, optional): number of seconds to continue
            retrying messages after stop_event is set, defaults to 30.
    """
    request_tries = {}

    while not stop_event.is_set():
        try:
            client.reinit()
        except Exception as e:
            log.warning(
                'Async producer failed to connect to brokers; backoff for %s(ms) before retrying',
                retry_options.backoff_ms)
            time.sleep(float(retry_options.backoff_ms) / 1000)
        else:
            break

    stop_at = None
    while not (stop_event.is_set() and queue.empty() and not request_tries):

        # Handle stop_timeout
        if stop_event.is_set():
            if not stop_at:
                stop_at = stop_timeout + time.time()
            if time.time() > stop_at:
                log.debug('Async producer stopping due to stop_timeout')
                break

        timeout = batch_time
        count = batch_size
        send_at = time.time() + timeout
        msgset = defaultdict(list)

        # Merging messages will require a bit more work to manage correctly
        # for now, don't look for new batches if we have old ones to retry
        if request_tries:
            count = 0
            log.debug('Skipping new batch collection to handle retries')
        else:
            log.debug('Batching size: %s, timeout: %s', count, timeout)

        # Keep fetching till we gather enough messages or a
        # timeout is reached
        while count > 0 and timeout >= 0:
            try:
                topic_partition, msg, key = queue.get(timeout=timeout)
            except Empty:
                break

            # Check if the controller has requested us to stop
            if topic_partition == STOP_ASYNC_PRODUCER:
                stop_event.set()
                break

            # Adjust the timeout to match the remaining period
            count -= 1
            timeout = send_at - time.time()
            msgset[topic_partition].append((msg, key))

        # Send collected requests upstream
        for topic_partition, msg in msgset.items():
            messages = create_message_set(msg, codec, key, codec_compresslevel)
            req = ProduceRequestPayload(topic_partition.topic,
                                        topic_partition.partition,
                                        tuple(messages))
            request_tries[req] = 0

        if not request_tries:
            continue

        reqs_to_retry, error_cls = [], None
        retry_state = {'do_backoff': False, 'do_refresh': False}

        def _handle_error(error_cls, request):
            if issubclass(error_cls, RETRY_ERROR_TYPES) or (
                    retry_options.retry_on_timeouts
                    and issubclass(error_cls, RequestTimedOutError)):
                reqs_to_retry.append(request)
            if issubclass(error_cls, RETRY_BACKOFF_ERROR_TYPES):
                retry_state['do_backoff'] |= True
            if issubclass(error_cls, RETRY_REFRESH_ERROR_TYPES):
                retry_state['do_refresh'] |= True

        requests = list(request_tries.keys())
        log.debug('Sending: %s', requests)
        responses = client.send_produce_request(requests,
                                                acks=req_acks,
                                                timeout=ack_timeout,
                                                fail_on_error=False)

        log.debug('Received: %s', responses)
        for i, response in enumerate(responses):
            error_cls = None
            if isinstance(response, FailedPayloadsError):
                error_cls = response.__class__
                orig_req = response.payload

            elif isinstance(response,
                            ProduceResponsePayload) and response.error:
                error_cls = kafka_errors.get(response.error, UnknownError)
                orig_req = requests[i]

            if error_cls:
                _handle_error(error_cls, orig_req)
                log.error(
                    '%s sending ProduceRequestPayload (#%d of %d) '
                    'to %s:%d with msgs %s', error_cls.__name__, (i + 1),
                    len(requests), orig_req.topic, orig_req.partition,
                    orig_req.messages
                    if log_messages_on_error else hash(orig_req.messages))

        if not reqs_to_retry:
            request_tries = {}
            continue

        # doing backoff before next retry
        if retry_state['do_backoff'] and retry_options.backoff_ms:
            log.warning('Async producer backoff for %s(ms) before retrying',
                        retry_options.backoff_ms)
            time.sleep(float(retry_options.backoff_ms) / 1000)

        # refresh topic metadata before next retry
        if retry_state['do_refresh']:
            log.warning(
                'Async producer forcing metadata refresh metadata before retrying'
            )
            try:
                client.load_metadata_for_topics()
            except Exception:
                log.exception("Async producer couldn't reload topic metadata.")

        # Apply retry limit, dropping messages that are over
        request_tries = dict(
            (key, count + 1) for (key, count) in request_tries.items()
            if key in reqs_to_retry and (
                retry_options.limit is None or (count < retry_options.limit)))

        # Log messages we are going to retry
        for orig_req in request_tries.keys():
            log.info(
                'Retrying ProduceRequestPayload to %s:%d with msgs %s',
                orig_req.topic, orig_req.partition, orig_req.messages
                if log_messages_on_error else hash(orig_req.messages))

    if request_tries or not queue.empty():
        log.error('Stopped producer with %d unsent messages',
                  len(request_tries) + queue.qsize())
Ejemplo n.º 9
0
 def _send(self, topic, partition, *msgs, key=None):
     messages = create_message_set(msgs, self._codec, key)
     req = ProduceRequest(topic, partition, messages)
     resp = yield from self._client.send_produce_request(
         [req], acks=self._req_acks, ack_timeout=self._ack_timeout)
     return resp
Ejemplo n.º 10
0
def _send_upstream(queue, client, codec, batch_time, batch_size,
                   req_acks, ack_timeout, retry_options, stop_event,
                   log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR,
                   stop_timeout=ASYNC_STOP_TIMEOUT_SECS,
                   codec_compresslevel=None):
    """Private method to manage producing messages asynchronously

    Listens on the queue for a specified number of messages or until
    a specified timeout and then sends messages to the brokers in grouped
    requests (one per broker).

    Messages placed on the queue should be tuples that conform to this format:
        ((topic, partition), message, key)

    Currently does not mark messages with task_done. Do not attempt to
    :meth:`join`!

    Arguments:
        queue (threading.Queue): the queue from which to get messages
        client (kafka.SimpleClient): instance to use for communicating
            with brokers
        codec (kafka.protocol.ALL_CODECS): compression codec to use
        batch_time (int): interval in seconds to send message batches
        batch_size (int): count of messages that will trigger an immediate send
        req_acks: required acks to use with ProduceRequests. see server protocol
        ack_timeout: timeout to wait for required acks. see server protocol
        retry_options (RetryOptions): settings for retry limits, backoff etc
        stop_event (threading.Event): event to monitor for shutdown signal.
            when this event is 'set', the producer will stop sending messages.
        log_messages_on_error (bool, optional): log stringified message-contents
            on any produce error, otherwise only log a hash() of the contents,
            defaults to True.
        stop_timeout (int or float, optional): number of seconds to continue
            retrying messages after stop_event is set, defaults to 30.
    """
    request_tries = {}

    while not stop_event.is_set():
        try:
            client.reinit()
        except Exception as e:
            log.warn('Async producer failed to connect to brokers; backoff for %s(ms) before retrying', retry_options.backoff_ms)
            time.sleep(float(retry_options.backoff_ms) / 1000)
        else:
            break

    stop_at = None
    while not (stop_event.is_set() and queue.empty() and not request_tries):

        # Handle stop_timeout
        if stop_event.is_set():
            if not stop_at:
                stop_at = stop_timeout + time.time()
            if time.time() > stop_at:
                log.debug('Async producer stopping due to stop_timeout')
                break

        timeout = batch_time
        count = batch_size
        send_at = time.time() + timeout
        msgset = defaultdict(list)

        # Merging messages will require a bit more work to manage correctly
        # for now, don't look for new batches if we have old ones to retry
        if request_tries:
            count = 0
            log.debug('Skipping new batch collection to handle retries')
        else:
            log.debug('Batching size: %s, timeout: %s', count, timeout)

        # Keep fetching till we gather enough messages or a
        # timeout is reached
        while count > 0 and timeout >= 0:
            try:
                topic_partition, msg, key = queue.get(timeout=timeout)
            except Empty:
                break

            # Check if the controller has requested us to stop
            if topic_partition == STOP_ASYNC_PRODUCER:
                stop_event.set()
                break

            # Adjust the timeout to match the remaining period
            count -= 1
            timeout = send_at - time.time()
            msgset[topic_partition].append((msg, key))

        # Send collected requests upstream
        for topic_partition, msg in msgset.items():
            messages = create_message_set(msg, codec, key, codec_compresslevel)
            req = ProduceRequestPayload(
                topic_partition.topic,
                topic_partition.partition,
                tuple(messages))
            request_tries[req] = 0

        if not request_tries:
            continue

        reqs_to_retry, error_cls = [], None
        retry_state = {
            'do_backoff': False,
            'do_refresh': False
        }

        def _handle_error(error_cls, request):
            if issubclass(error_cls, RETRY_ERROR_TYPES) or (retry_options.retry_on_timeouts and issubclass(error_cls, RequestTimedOutError)):
                reqs_to_retry.append(request)
            if issubclass(error_cls, RETRY_BACKOFF_ERROR_TYPES):
                retry_state['do_backoff'] |= True
            if issubclass(error_cls, RETRY_REFRESH_ERROR_TYPES):
                retry_state['do_refresh'] |= True

        requests = list(request_tries.keys())
        log.debug('Sending: %s', requests)
        responses = client.send_produce_request(requests,
                                                acks=req_acks,
                                                timeout=ack_timeout,
                                                fail_on_error=False)

        log.debug('Received: %s', responses)
        for i, response in enumerate(responses):
            error_cls = None
            if isinstance(response, FailedPayloadsError):
                error_cls = response.__class__
                orig_req = response.payload

            elif isinstance(response, ProduceResponsePayload) and response.error:
                error_cls = kafka_errors.get(response.error, UnknownError)
                orig_req = requests[i]

            if error_cls:
                _handle_error(error_cls, orig_req)
                log.error('%s sending ProduceRequestPayload (#%d of %d) '
                          'to %s:%d with msgs %s',
                          error_cls.__name__, (i + 1), len(requests),
                          orig_req.topic, orig_req.partition,
                          orig_req.messages if log_messages_on_error
                                            else hash(orig_req.messages))

        if not reqs_to_retry:
            request_tries = {}
            continue

        # doing backoff before next retry
        if retry_state['do_backoff'] and retry_options.backoff_ms:
            log.warn('Async producer backoff for %s(ms) before retrying', retry_options.backoff_ms)
            time.sleep(float(retry_options.backoff_ms) / 1000)

        # refresh topic metadata before next retry
        if retry_state['do_refresh']:
            log.warn('Async producer forcing metadata refresh metadata before retrying')
            try:
                client.load_metadata_for_topics()
            except Exception:
                log.exception("Async producer couldn't reload topic metadata.")

        # Apply retry limit, dropping messages that are over
        request_tries = dict(
            (key, count + 1)
            for (key, count) in request_tries.items()
                if key in reqs_to_retry
                    and (retry_options.limit is None
                    or (count < retry_options.limit))
        )

        # Log messages we are going to retry
        for orig_req in request_tries.keys():
            log.info('Retrying ProduceRequestPayload to %s:%d with msgs %s',
                     orig_req.topic, orig_req.partition,
                     orig_req.messages if log_messages_on_error
                                       else hash(orig_req.messages))

    if request_tries or not queue.empty():
        log.error('Stopped producer with %d unsent messages', len(request_tries) + queue.qsize())