Ejemplo n.º 1
0
 def __init__(self, tp, records, buffer):
     self.max_record_size = 0
     now = time.time()
     self.created = now
     self.drained = None
     self.attempts = 0
     self.last_attempt = now
     self.last_append = now
     self.records = records
     self.topic_partition = tp
     self.produce_future = FutureProduceResult(tp)
     self._retry = False
     self._buffer = buffer  # We only save it, we don't write to it
Ejemplo n.º 2
0
 def __init__(self, tp, records, buffer):
     self.max_record_size = 0
     now = time.time()
     self.created = now
     self.drained = None
     self.attempts = 0
     self.last_attempt = now
     self.last_append = now
     self.records = records
     self.topic_partition = tp
     self.produce_future = FutureProduceResult(tp)
     self._retry = False
     self._buffer = buffer  # We only save it, we don't write to it
Ejemplo n.º 3
0
    def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None):
        """Publish a message to a topic.

        Arguments:
            topic (str): topic where the message will be published
            value (optional): message value. Must be type bytes, or be
                serializable to bytes via configured value_serializer. If value
                is None, key is required and message acts as a 'delete'.
                See kafka compaction documentation for more details:
                https://kafka.apache.org/documentation.html#compaction
                (compaction requires kafka >= 0.8.1)
            partition (int, optional): optionally specify a partition. If not
                set, the partition will be selected using the configured
                'partitioner'.
            key (optional): a key to associate with the message. Can be used to
                determine which partition to send the message to. If partition
                is None (and producer's partitioner config is left as default),
                then messages with the same key will be delivered to the same
                partition (but if key is None, partition is chosen randomly).
                Must be type bytes, or be serializable to bytes via configured
                key_serializer.
            headers (optional): a list of header key value pairs. List items
                are tuples of str key and bytes value.
            timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC)
                to use as the message timestamp. Defaults to current time.

        Returns:
            FutureRecordMetadata: resolves to RecordMetadata

        Raises:
            KafkaTimeoutError: if unable to fetch topic metadata, or unable
                to obtain memory buffer prior to configured max_block_ms
        """
        assert value is not None or self.config['api_version'] >= (0, 8, 1), (
            'Null messages require kafka >= 0.8.1')
        assert not (value is None and key is None), 'Need at least one: key or value'
        key_bytes = value_bytes = None
        try:
            self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0)

            key_bytes = self._serialize(
                self.config['key_serializer'],
                topic, key)
            value_bytes = self._serialize(
                self.config['value_serializer'],
                topic, value)
            assert type(key_bytes) in (bytes, bytearray, memoryview, type(None))
            assert type(value_bytes) in (bytes, bytearray, memoryview, type(None))

            partition = self._partition(topic, partition, key, value,
                                        key_bytes, value_bytes)

            if headers is None:
                headers = []
            assert type(headers) == list
            assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers)

            message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers)
            self._ensure_valid_record_size(message_size)

            tp = TopicPartition(topic, partition)
            log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp)
            result = self._accumulator.append(tp, timestamp_ms,
                                              key_bytes, value_bytes, headers,
                                              self.config['max_block_ms'],
                                              estimated_size=message_size)
            future, batch_is_full, new_batch_created = result
            if batch_is_full or new_batch_created:
                log.debug("Waking up the sender since %s is either full or"
                          " getting a new batch", tp)
                self._sender.wakeup()

            return future
            # handling exceptions and record the errors;
            # for API exceptions return them in the future,
            # for other exceptions raise directly
        except Errors.BrokerResponseError as e:
            log.debug("Exception occurred during message send: %s", e)
            return FutureRecordMetadata(
                FutureProduceResult(TopicPartition(topic, partition)),
                -1, None, None,
                len(key_bytes) if key_bytes is not None else -1,
                len(value_bytes) if value_bytes is not None else -1,
                sum(len(h_key.encode("utf-8")) + len(h_value) for h_key, h_value in headers) if headers else -1,
            ).failure(e)
Ejemplo n.º 4
0
class ProducerBatch(object):
    def __init__(self, tp, records, buffer):
        self.max_record_size = 0
        now = time.time()
        self.created = now
        self.drained = None
        self.attempts = 0
        self.last_attempt = now
        self.last_append = now
        self.records = records
        self.topic_partition = tp
        self.produce_future = FutureProduceResult(tp)
        self._retry = False
        self._buffer = buffer  # We only save it, we don't write to it

    @property
    def record_count(self):
        return self.records.next_offset()

    def try_append(self, timestamp_ms, key, value):
        metadata = self.records.append(timestamp_ms, key, value)
        if metadata is None:
            return None

        self.max_record_size = max(self.max_record_size, metadata.size)
        self.last_append = time.time()
        future = FutureRecordMetadata(self.produce_future, metadata.offset,
                                      metadata.timestamp, metadata.crc,
                                      len(key) if key is not None else -1,
                                      len(value) if value is not None else -1)
        return future

    def done(self, base_offset=None, timestamp_ms=None, exception=None):
        log.debug("Produced messages to topic-partition %s with base offset"
                  " %s and error %s.", self.topic_partition, base_offset,
                  exception)  # trace
        if self.produce_future.is_done:
            log.warning('Batch is already closed -- ignoring batch.done()')
            return
        elif exception is None:
            self.produce_future.success((base_offset, timestamp_ms))
        else:
            self.produce_future.failure(exception)

    def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms,
                     is_full):
        """Expire batches if metadata is not available

        A batch whose metadata is not available should be expired if one
        of the following is true:

          * the batch is not in retry AND request timeout has elapsed after
            it is ready (full or linger.ms has reached).

          * the batch is in retry AND request timeout has elapsed after the
            backoff period ended.
        """
        now = time.time()
        since_append = now - self.last_append
        since_ready = now - (self.created + linger_ms / 1000.0)
        since_backoff = now - (self.last_attempt + retry_backoff_ms / 1000.0)
        timeout = request_timeout_ms / 1000.0

        error = None
        if not self.in_retry() and is_full and timeout < since_append:
            error = "%d seconds have passed since last append" % since_append
        elif not self.in_retry() and timeout < since_ready:
            error = "%d seconds have passed since batch creation plus linger time" % since_ready
        elif self.in_retry() and timeout < since_backoff:
            error = "%d seconds have passed since last attempt plus backoff time" % since_backoff

        if error:
            self.records.close()
            self.done(
                -1, None,
                Errors.KafkaTimeoutError(
                    "Batch for %s containing %s record(s) expired: %s" %
                    (self.topic_partition, self.records.next_offset(), error)))
            return True
        return False

    def in_retry(self):
        return self._retry

    def set_retry(self):
        self._retry = True

    def buffer(self):
        return self._buffer

    def __str__(self):
        return 'ProducerBatch(topic_partition=%s, record_count=%d)' % (
            self.topic_partition, self.records.next_offset())
Ejemplo n.º 5
0
class ProducerBatch(object):
    def __init__(self, tp, records, buffer):
        self.max_record_size = 0
        now = time.time()
        self.created = now
        self.drained = None
        self.attempts = 0
        self.last_attempt = now
        self.last_append = now
        self.records = records
        self.topic_partition = tp
        self.produce_future = FutureProduceResult(tp)
        self._retry = False
        self._buffer = buffer  # We only save it, we don't write to it

    @property
    def record_count(self):
        return self.records.next_offset()

    def try_append(self, timestamp_ms, key, value, headers):
        metadata = self.records.append(timestamp_ms, key, value, headers)
        if metadata is None:
            return None

        self.max_record_size = max(self.max_record_size, metadata.size)
        self.last_append = time.time()
        future = FutureRecordMetadata(self.produce_future, metadata.offset,
                                      metadata.timestamp, metadata.crc,
                                      len(key) if key is not None else -1,
                                      len(value) if value is not None else -1,
                                      sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1)
        return future

    def done(self, base_offset=None, timestamp_ms=None, exception=None):
        level = logging.DEBUG if exception is None else logging.WARNING
        log.log(level, "Produced messages to topic-partition %s with base offset"
                  " %s and error %s.", self.topic_partition, base_offset,
                  exception)  # trace
        if self.produce_future.is_done:
            log.warning('Batch is already closed -- ignoring batch.done()')
            return
        elif exception is None:
            self.produce_future.success((base_offset, timestamp_ms))
        else:
            self.produce_future.failure(exception)

    def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms, is_full):
        """Expire batches if metadata is not available

        A batch whose metadata is not available should be expired if one
        of the following is true:

          * the batch is not in retry AND request timeout has elapsed after
            it is ready (full or linger.ms has reached).

          * the batch is in retry AND request timeout has elapsed after the
            backoff period ended.
        """
        now = time.time()
        since_append = now - self.last_append
        since_ready = now - (self.created + linger_ms / 1000.0)
        since_backoff = now - (self.last_attempt + retry_backoff_ms / 1000.0)
        timeout = request_timeout_ms / 1000.0

        error = None
        if not self.in_retry() and is_full and timeout < since_append:
            error = "%d seconds have passed since last append" % (since_append,)
        elif not self.in_retry() and timeout < since_ready:
            error = "%d seconds have passed since batch creation plus linger time" % (since_ready,)
        elif self.in_retry() and timeout < since_backoff:
            error = "%d seconds have passed since last attempt plus backoff time" % (since_backoff,)

        if error:
            self.records.close()
            self.done(-1, None, Errors.KafkaTimeoutError(
                "Batch for %s containing %s record(s) expired: %s" % (
                self.topic_partition, self.records.next_offset(), error)))
            return True
        return False

    def in_retry(self):
        return self._retry

    def set_retry(self):
        self._retry = True

    def buffer(self):
        return self._buffer

    def __str__(self):
        return 'ProducerBatch(topic_partition=%s, record_count=%d)' % (
            self.topic_partition, self.records.next_offset())