def __init__(self, tp, records, buffer): self.max_record_size = 0 now = time.time() self.created = now self.drained = None self.attempts = 0 self.last_attempt = now self.last_append = now self.records = records self.topic_partition = tp self.produce_future = FutureProduceResult(tp) self._retry = False self._buffer = buffer # We only save it, we don't write to it
def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None): """Publish a message to a topic. Arguments: topic (str): topic where the message will be published value (optional): message value. Must be type bytes, or be serializable to bytes via configured value_serializer. If value is None, key is required and message acts as a 'delete'. See kafka compaction documentation for more details: https://kafka.apache.org/documentation.html#compaction (compaction requires kafka >= 0.8.1) partition (int, optional): optionally specify a partition. If not set, the partition will be selected using the configured 'partitioner'. key (optional): a key to associate with the message. Can be used to determine which partition to send the message to. If partition is None (and producer's partitioner config is left as default), then messages with the same key will be delivered to the same partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer. headers (optional): a list of header key value pairs. List items are tuples of str key and bytes value. timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time. Returns: FutureRecordMetadata: resolves to RecordMetadata Raises: KafkaTimeoutError: if unable to fetch topic metadata, or unable to obtain memory buffer prior to configured max_block_ms """ assert value is not None or self.config['api_version'] >= (0, 8, 1), ( 'Null messages require kafka >= 0.8.1') assert not (value is None and key is None), 'Need at least one: key or value' key_bytes = value_bytes = None try: self._wait_on_metadata(topic, self.config['max_block_ms'] / 1000.0) key_bytes = self._serialize( self.config['key_serializer'], topic, key) value_bytes = self._serialize( self.config['value_serializer'], topic, value) assert type(key_bytes) in (bytes, bytearray, memoryview, type(None)) assert type(value_bytes) in (bytes, bytearray, memoryview, type(None)) partition = self._partition(topic, partition, key, value, key_bytes, value_bytes) if headers is None: headers = [] assert type(headers) == list assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers) message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers) self._ensure_valid_record_size(message_size) tp = TopicPartition(topic, partition) log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp) result = self._accumulator.append(tp, timestamp_ms, key_bytes, value_bytes, headers, self.config['max_block_ms'], estimated_size=message_size) future, batch_is_full, new_batch_created = result if batch_is_full or new_batch_created: log.debug("Waking up the sender since %s is either full or" " getting a new batch", tp) self._sender.wakeup() return future # handling exceptions and record the errors; # for API exceptions return them in the future, # for other exceptions raise directly except Errors.BrokerResponseError as e: log.debug("Exception occurred during message send: %s", e) return FutureRecordMetadata( FutureProduceResult(TopicPartition(topic, partition)), -1, None, None, len(key_bytes) if key_bytes is not None else -1, len(value_bytes) if value_bytes is not None else -1, sum(len(h_key.encode("utf-8")) + len(h_value) for h_key, h_value in headers) if headers else -1, ).failure(e)
class ProducerBatch(object): def __init__(self, tp, records, buffer): self.max_record_size = 0 now = time.time() self.created = now self.drained = None self.attempts = 0 self.last_attempt = now self.last_append = now self.records = records self.topic_partition = tp self.produce_future = FutureProduceResult(tp) self._retry = False self._buffer = buffer # We only save it, we don't write to it @property def record_count(self): return self.records.next_offset() def try_append(self, timestamp_ms, key, value): metadata = self.records.append(timestamp_ms, key, value) if metadata is None: return None self.max_record_size = max(self.max_record_size, metadata.size) self.last_append = time.time() future = FutureRecordMetadata(self.produce_future, metadata.offset, metadata.timestamp, metadata.crc, len(key) if key is not None else -1, len(value) if value is not None else -1) return future def done(self, base_offset=None, timestamp_ms=None, exception=None): log.debug("Produced messages to topic-partition %s with base offset" " %s and error %s.", self.topic_partition, base_offset, exception) # trace if self.produce_future.is_done: log.warning('Batch is already closed -- ignoring batch.done()') return elif exception is None: self.produce_future.success((base_offset, timestamp_ms)) else: self.produce_future.failure(exception) def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms, is_full): """Expire batches if metadata is not available A batch whose metadata is not available should be expired if one of the following is true: * the batch is not in retry AND request timeout has elapsed after it is ready (full or linger.ms has reached). * the batch is in retry AND request timeout has elapsed after the backoff period ended. """ now = time.time() since_append = now - self.last_append since_ready = now - (self.created + linger_ms / 1000.0) since_backoff = now - (self.last_attempt + retry_backoff_ms / 1000.0) timeout = request_timeout_ms / 1000.0 error = None if not self.in_retry() and is_full and timeout < since_append: error = "%d seconds have passed since last append" % since_append elif not self.in_retry() and timeout < since_ready: error = "%d seconds have passed since batch creation plus linger time" % since_ready elif self.in_retry() and timeout < since_backoff: error = "%d seconds have passed since last attempt plus backoff time" % since_backoff if error: self.records.close() self.done( -1, None, Errors.KafkaTimeoutError( "Batch for %s containing %s record(s) expired: %s" % (self.topic_partition, self.records.next_offset(), error))) return True return False def in_retry(self): return self._retry def set_retry(self): self._retry = True def buffer(self): return self._buffer def __str__(self): return 'ProducerBatch(topic_partition=%s, record_count=%d)' % ( self.topic_partition, self.records.next_offset())
class ProducerBatch(object): def __init__(self, tp, records, buffer): self.max_record_size = 0 now = time.time() self.created = now self.drained = None self.attempts = 0 self.last_attempt = now self.last_append = now self.records = records self.topic_partition = tp self.produce_future = FutureProduceResult(tp) self._retry = False self._buffer = buffer # We only save it, we don't write to it @property def record_count(self): return self.records.next_offset() def try_append(self, timestamp_ms, key, value, headers): metadata = self.records.append(timestamp_ms, key, value, headers) if metadata is None: return None self.max_record_size = max(self.max_record_size, metadata.size) self.last_append = time.time() future = FutureRecordMetadata(self.produce_future, metadata.offset, metadata.timestamp, metadata.crc, len(key) if key is not None else -1, len(value) if value is not None else -1, sum(len(h_key.encode("utf-8")) + len(h_val) for h_key, h_val in headers) if headers else -1) return future def done(self, base_offset=None, timestamp_ms=None, exception=None): level = logging.DEBUG if exception is None else logging.WARNING log.log(level, "Produced messages to topic-partition %s with base offset" " %s and error %s.", self.topic_partition, base_offset, exception) # trace if self.produce_future.is_done: log.warning('Batch is already closed -- ignoring batch.done()') return elif exception is None: self.produce_future.success((base_offset, timestamp_ms)) else: self.produce_future.failure(exception) def maybe_expire(self, request_timeout_ms, retry_backoff_ms, linger_ms, is_full): """Expire batches if metadata is not available A batch whose metadata is not available should be expired if one of the following is true: * the batch is not in retry AND request timeout has elapsed after it is ready (full or linger.ms has reached). * the batch is in retry AND request timeout has elapsed after the backoff period ended. """ now = time.time() since_append = now - self.last_append since_ready = now - (self.created + linger_ms / 1000.0) since_backoff = now - (self.last_attempt + retry_backoff_ms / 1000.0) timeout = request_timeout_ms / 1000.0 error = None if not self.in_retry() and is_full and timeout < since_append: error = "%d seconds have passed since last append" % (since_append,) elif not self.in_retry() and timeout < since_ready: error = "%d seconds have passed since batch creation plus linger time" % (since_ready,) elif self.in_retry() and timeout < since_backoff: error = "%d seconds have passed since last attempt plus backoff time" % (since_backoff,) if error: self.records.close() self.done(-1, None, Errors.KafkaTimeoutError( "Batch for %s containing %s record(s) expired: %s" % ( self.topic_partition, self.records.next_offset(), error))) return True return False def in_retry(self): return self._retry def set_retry(self): self._retry = True def buffer(self): return self._buffer def __str__(self): return 'ProducerBatch(topic_partition=%s, record_count=%d)' % ( self.topic_partition, self.records.next_offset())