Esempio n. 1
0
    def test_send_produce_request_maintains_request_response_order(self):

        self.client.ensure_topic_exists('foo')
        self.client.ensure_topic_exists('bar')

        requests = [
            ProduceRequestPayload('foo', 0,
                                  [create_message(b'a'),
                                   create_message(b'b')]),
            ProduceRequestPayload('bar', 1,
                                  [create_message(b'a'),
                                   create_message(b'b')]),
            ProduceRequestPayload('foo', 1,
                                  [create_message(b'a'),
                                   create_message(b'b')]),
            ProduceRequestPayload('bar', 0,
                                  [create_message(b'a'),
                                   create_message(b'b')]),
        ]

        responses = self.client.send_produce_request(requests)
        while len(responses):
            request = requests.pop()
            response = responses.pop()
            self.assertEqual(request.topic, response.topic)
            self.assertEqual(request.partition, response.partition)
Esempio n. 2
0
    def test_encode_produce_request(self):
        requests = [
            ProduceRequestPayload("topic1", 0, [
                kafka.protocol.message.Message(b"a"),
                kafka.protocol.message.Message(b"b")
            ]),
            ProduceRequestPayload("topic2", 1,
                                  [kafka.protocol.message.Message(b"c")])
        ]

        msg_a_binary = KafkaProtocol._encode_message(create_message(b"a"))
        msg_b_binary = KafkaProtocol._encode_message(create_message(b"b"))
        msg_c_binary = KafkaProtocol._encode_message(create_message(b"c"))

        header = b"".join([
            struct.pack('>i', 0x94),  # The length of the message overall
            struct.pack('>h', 0),  # Msg Header, Message type = Produce
            struct.pack('>h', 0),  # Msg Header, API version
            struct.pack('>i', 2),  # Msg Header, Correlation ID
            struct.pack('>h7s', 7, b"client1"),  # Msg Header, The client ID
            struct.pack('>h', 2),  # Num acks required
            struct.pack('>i', 100),  # Request Timeout
            struct.pack('>i', 2),  # The number of requests
        ])

        total_len = len(msg_a_binary) + len(msg_b_binary)
        topic1 = b"".join([
            struct.pack('>h6s', 6, b'topic1'),  # The topic1
            struct.pack('>i', 1),  # One message set
            struct.pack('>i', 0),  # Partition 0
            struct.pack('>i',
                        total_len + 24),  # Size of the incoming message set
            struct.pack('>q', 0),  # No offset specified
            struct.pack('>i', len(msg_a_binary)),  # Length of message
            msg_a_binary,  # Actual message
            struct.pack('>q', 0),  # No offset specified
            struct.pack('>i', len(msg_b_binary)),  # Length of message
            msg_b_binary,  # Actual message
        ])

        topic2 = b"".join([
            struct.pack('>h6s', 6, b'topic2'),  # The topic1
            struct.pack('>i', 1),  # One message set
            struct.pack('>i', 1),  # Partition 1
            struct.pack('>i',
                        len(msg_c_binary) +
                        12),  # Size of the incoming message set
            struct.pack('>q', 0),  # No offset specified
            struct.pack('>i', len(msg_c_binary)),  # Length of message
            msg_c_binary,  # Actual message
        ])

        expected1 = b"".join([header, topic1, topic2])
        expected2 = b"".join([header, topic2, topic1])

        encoded = KafkaProtocol.encode_produce_request(b"client1", 2, requests,
                                                       2, 100)
        self.assertIn(encoded, [expected1, expected2])
    def send_messages(self, partition, messages):
        messages = [ create_message(self.msg(str(msg))) for msg in messages ]
        produce = ProduceRequestPayload(self.topic, partition, messages = messages)
        resp, = self.client.send_produce_request([produce])
        self.assertEqual(resp.error, 0)

        return [ x.value for x in messages ]
Esempio n. 4
0
    def test_send_produce_request_raises_when_topic_unknown(
            self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](
            resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        requests = [
            ProduceRequestPayload(
                "topic_doesnt_exist", 0,
                [create_message("a"), create_message("b")])
        ]

        with self.assertRaises(FailedPayloadsError):
            client.send_produce_request(requests)
Esempio n. 5
0
    def test_send_produce_request_raises_when_noleader(self, protocol, conn):
        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](
            resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        requests = [
            ProduceRequestPayload(
                "topic_noleader", 0,
                [create_message("a"), create_message("b")])
        ]

        with self.assertRaises(FailedPayloadsError):
            client.send_produce_request(requests)
Esempio n. 6
0
 def send_gzip_message(self, partition, messages):
     message = create_gzip_message([(self.msg(str(msg)), None)
                                    for msg in messages])
     produce = ProduceRequestPayload(self.topic,
                                     partition,
                                     messages=[message])
     resp, = self.client.send_produce_request([produce])
     self.assertEqual(resp.error, 0)
Esempio n. 7
0
def send_messages(client, topic, partition, messages):
    """Send messages to a topic's partition
    """
    messages = [create_message(msg(str(m))) for m in messages]
    produce = ProduceRequestPayload(topic, partition, messages=messages)
    resp, = client.send_produce_request([produce])
    assert resp.error == 0

    return [x.value for x in messages]
Esempio n. 8
0
    def assert_produce_request(self, messages, initial_offset, message_ct,
                               partition=0):
        produce = ProduceRequestPayload(self.topic, partition, messages=messages)

        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.
        resp = self.client.send_produce_request([ produce ])
        self.assert_produce_response(resp, initial_offset)

        self.assertEqual(self.current_offset(self.topic, partition), initial_offset + message_ct)
Esempio n. 9
0
def assert_produce_request(client, topic, messages, initial_offset, message_ct,
                           partition=0):
    """Verify the correctness of a produce request
    """
    produce = ProduceRequestPayload(topic, partition, messages=messages)

    # There should only be one response message from the server.
    # This will throw an exception if there's more than one.
    resp = client.send_produce_request([produce])
    assert_produce_response(resp, initial_offset)

    assert current_offset(client, topic, partition) == initial_offset + message_ct
Esempio n. 10
0
    def _send_messages(self, topic, partition, *msg, **kwargs):
        key = kwargs.pop('key', None)

        # Guarantee that msg is actually a list or tuple (should always be true)
        if not isinstance(msg, (list, tuple)):
            raise TypeError("msg is not a list or tuple!")

        for m in msg:
            # The protocol allows to have key & payload with null values both,
            # (https://goo.gl/o694yN) but having (null,null) pair doesn't make sense.
            if m is None:
                if key is None:
                    raise TypeError("key and payload can't be null in one")
            # Raise TypeError if any non-null message is not encoded as bytes
            elif not isinstance(m, six.binary_type):
                raise TypeError(
                    "all produce message payloads must be null or type bytes")

        # Raise TypeError if the key is not encoded as bytes
        if key is not None and not isinstance(key, six.binary_type):
            raise TypeError("the key must be type bytes")

        if self.async_send:
            for idx, m in enumerate(msg):
                try:
                    item = (TopicPartition(topic, partition), m, key)
                    if self.async_queue_put_timeout == 0:
                        self.queue.put_nowait(item)
                    else:
                        self.queue.put(item, True,
                                       self.async_queue_put_timeout)
                except Full:
                    raise AsyncProducerQueueFull(
                        msg[idx:], 'Producer async queue overfilled. '
                        'Current queue size %d.' % (self.queue.qsize(), ))
            resp = []
        else:
            messages = create_message_set([(m, key) for m in msg], self.codec,
                                          key, self.codec_compresslevel)
            req = ProduceRequestPayload(topic, partition, messages)
            try:
                resp = self.client.send_produce_request(
                    [req],
                    acks=self.req_acks,
                    timeout=self.ack_timeout,
                    fail_on_error=self.sync_fail_on_error)
            except Exception:
                log.exception("Unable to send messages")
                raise
        return resp
Esempio n. 11
0
 def call_back(self, topic, partition):
     payload = ProduceRequestPayload(topic=topic,
                                     partition=partition,
                                     messages=[create_message('test call')])
     retries = 5
     reps = []
     while retries and not reps:
         retries -= 1
         try:
             reps = self.client.send_produce_request(payloads=[payload],
                                                     fail_on_error=True)
         except LeaderNotAvailableError:
             self.client.load_metadata_for_topics()
             time.sleep(1)
     return reps
Esempio n. 12
0
def _send_upstream(queue,
                   client,
                   codec,
                   batch_time,
                   batch_size,
                   req_acks,
                   ack_timeout,
                   retry_options,
                   stop_event,
                   log_messages_on_error=ASYNC_LOG_MESSAGES_ON_ERROR,
                   stop_timeout=ASYNC_STOP_TIMEOUT_SECS,
                   codec_compresslevel=None):
    """Private method to manage producing messages asynchronously

    Listens on the queue for a specified number of messages or until
    a specified timeout and then sends messages to the brokers in grouped
    requests (one per broker).

    Messages placed on the queue should be tuples that conform to this format:
        ((topic, partition), message, key)

    Currently does not mark messages with task_done. Do not attempt to
    :meth:`join`!

    Arguments:
        queue (threading.Queue): the queue from which to get messages
        client (kafka.SimpleClient): instance to use for communicating
            with brokers
        codec (kafka.protocol.ALL_CODECS): compression codec to use
        batch_time (int): interval in seconds to send message batches
        batch_size (int): count of messages that will trigger an immediate send
        req_acks: required acks to use with ProduceRequests. see server protocol
        ack_timeout: timeout to wait for required acks. see server protocol
        retry_options (RetryOptions): settings for retry limits, backoff etc
        stop_event (threading.Event): event to monitor for shutdown signal.
            when this event is 'set', the producer will stop sending messages.
        log_messages_on_error (bool, optional): log stringified message-contents
            on any produce error, otherwise only log a hash() of the contents,
            defaults to True.
        stop_timeout (int or float, optional): number of seconds to continue
            retrying messages after stop_event is set, defaults to 30.
    """
    request_tries = {}

    while not stop_event.is_set():
        try:
            client.reinit()
        except Exception as e:
            log.warning(
                'Async producer failed to connect to brokers; backoff for %s(ms) before retrying',
                retry_options.backoff_ms)
            time.sleep(float(retry_options.backoff_ms) / 1000)
        else:
            break

    stop_at = None
    while not (stop_event.is_set() and queue.empty() and not request_tries):

        # Handle stop_timeout
        if stop_event.is_set():
            if not stop_at:
                stop_at = stop_timeout + time.time()
            if time.time() > stop_at:
                log.debug('Async producer stopping due to stop_timeout')
                break

        timeout = batch_time
        count = batch_size
        send_at = time.time() + timeout
        msgset = defaultdict(list)

        # Merging messages will require a bit more work to manage correctly
        # for now, don't look for new batches if we have old ones to retry
        if request_tries:
            count = 0
            log.debug('Skipping new batch collection to handle retries')
        else:
            log.debug('Batching size: %s, timeout: %s', count, timeout)

        # Keep fetching till we gather enough messages or a
        # timeout is reached
        while count > 0 and timeout >= 0:
            try:
                topic_partition, msg, key = queue.get(timeout=timeout)
            except Empty:
                break

            # Check if the controller has requested us to stop
            if topic_partition == STOP_ASYNC_PRODUCER:
                stop_event.set()
                break

            # Adjust the timeout to match the remaining period
            count -= 1
            timeout = send_at - time.time()
            msgset[topic_partition].append((msg, key))

        # Send collected requests upstream
        for topic_partition, msg in msgset.items():
            messages = create_message_set(msg, codec, key, codec_compresslevel)
            req = ProduceRequestPayload(topic_partition.topic,
                                        topic_partition.partition,
                                        tuple(messages))
            request_tries[req] = 0

        if not request_tries:
            continue

        reqs_to_retry, error_cls = [], None
        retry_state = {'do_backoff': False, 'do_refresh': False}

        def _handle_error(error_cls, request):
            if issubclass(error_cls, RETRY_ERROR_TYPES) or (
                    retry_options.retry_on_timeouts
                    and issubclass(error_cls, RequestTimedOutError)):
                reqs_to_retry.append(request)
            if issubclass(error_cls, RETRY_BACKOFF_ERROR_TYPES):
                retry_state['do_backoff'] |= True
            if issubclass(error_cls, RETRY_REFRESH_ERROR_TYPES):
                retry_state['do_refresh'] |= True

        requests = list(request_tries.keys())
        log.debug('Sending: %s', requests)
        responses = client.send_produce_request(requests,
                                                acks=req_acks,
                                                timeout=ack_timeout,
                                                fail_on_error=False)

        log.debug('Received: %s', responses)
        for i, response in enumerate(responses):
            error_cls = None
            if isinstance(response, FailedPayloadsError):
                error_cls = response.__class__
                orig_req = response.payload

            elif isinstance(response,
                            ProduceResponsePayload) and response.error:
                error_cls = kafka_errors.get(response.error, UnknownError)
                orig_req = requests[i]

            if error_cls:
                _handle_error(error_cls, orig_req)
                log.error(
                    '%s sending ProduceRequestPayload (#%d of %d) '
                    'to %s:%d with msgs %s', error_cls.__name__, (i + 1),
                    len(requests), orig_req.topic, orig_req.partition,
                    orig_req.messages
                    if log_messages_on_error else hash(orig_req.messages))

        if not reqs_to_retry:
            request_tries = {}
            continue

        # doing backoff before next retry
        if retry_state['do_backoff'] and retry_options.backoff_ms:
            log.warning('Async producer backoff for %s(ms) before retrying',
                        retry_options.backoff_ms)
            time.sleep(float(retry_options.backoff_ms) / 1000)

        # refresh topic metadata before next retry
        if retry_state['do_refresh']:
            log.warning(
                'Async producer forcing metadata refresh metadata before retrying'
            )
            try:
                client.load_metadata_for_topics()
            except Exception:
                log.exception("Async producer couldn't reload topic metadata.")

        # Apply retry limit, dropping messages that are over
        request_tries = dict(
            (key, count + 1) for (key, count) in request_tries.items()
            if key in reqs_to_retry and (
                retry_options.limit is None or (count < retry_options.limit)))

        # Log messages we are going to retry
        for orig_req in request_tries.keys():
            log.info(
                'Retrying ProduceRequestPayload to %s:%d with msgs %s',
                orig_req.topic, orig_req.partition, orig_req.messages
                if log_messages_on_error else hash(orig_req.messages))

    if request_tries or not queue.empty():
        log.error('Stopped producer with %d unsent messages',
                  len(request_tries) + queue.qsize())