class BaseProducer(abc.ABC): _PRODUCER_CLIENT_ID_SEQUENCE = 0 _COMPRESSORS = { 'gzip': (has_gzip, LegacyRecordBatchBuilder.CODEC_GZIP), 'snappy': (has_snappy, LegacyRecordBatchBuilder.CODEC_SNAPPY), 'lz4': (has_lz4, LegacyRecordBatchBuilder.CODEC_LZ4), } _closed = None # Serves as an uninitialized flag for __del__ _source_traceback = None def __init__(self, *, loop, bootstrap_servers='localhost', client_id=None, metadata_max_age_ms=300000, request_timeout_ms=40000, api_version='auto', acks=_missing, key_serializer=None, value_serializer=None, compression_type=None, max_batch_size=16384, partitioner=DefaultPartitioner(), max_request_size=1048576, linger_ms=0, send_backoff_ms=100, retry_backoff_ms=100, security_protocol="PLAINTEXT", ssl_context=None, connections_max_idle_ms=540000, on_irrecoverable_error=None, enable_idempotence=False, transactional_id=None, transaction_timeout_ms=60000, sasl_mechanism="PLAIN", sasl_plain_password=None, sasl_plain_username=None, sasl_kerberos_service_name='kafka', sasl_kerberos_domain_name=None): if acks not in (0, 1, -1, 'all', _missing): raise ValueError("Invalid ACKS parameter") if compression_type not in ('gzip', 'snappy', 'lz4', None): raise ValueError("Invalid compression type!") if compression_type: checker, compression_attrs = self._COMPRESSORS[compression_type] if not checker(): raise RuntimeError("Compression library for {} not found" .format(compression_type)) else: compression_attrs = 0 self._compression_attrs = compression_attrs if acks is _missing: acks = 1 elif acks == 'all': acks = -1 AIOKafkaProducer._PRODUCER_CLIENT_ID_SEQUENCE += 1 if client_id is None: client_id = 'aiokafka-producer-%s' % \ AIOKafkaProducer._PRODUCER_CLIENT_ID_SEQUENCE self._bootstrap_servers = bootstrap_servers self._client_id = client_id self._metadata_max_age_ms = metadata_max_age_ms self._request_timeout_ms = request_timeout_ms self._api_version = api_version self._acks = acks self._key_serializer = key_serializer self._value_serializer = value_serializer self._compression_type = compression_type self._max_batch_size = max_batch_size self._partitioner = partitioner self._max_request_size = max_request_size self._linger_ms = linger_ms self._send_backoff_ms = send_backoff_ms self._retry_backoff_ms = retry_backoff_ms self._security_protocol = security_protocol self._ssl_context = ssl_context self._connections_max_idle_ms = connections_max_idle_ms self._transaction_timeout_ms = transaction_timeout_ms self._transaction_timeout_ms = transaction_timeout_ms self._on_irrecoverable_error = on_irrecoverable_error self._sasl_mechanism = sasl_mechanism self._sasl_plain_username = sasl_plain_username self._sasl_plain_password = sasl_plain_password self._sasl_kerberos_service_name = sasl_kerberos_service_name self._sasl_kerberos_domain_name = sasl_kerberos_domain_name self.client = AIOKafkaClient( loop=loop, bootstrap_servers=bootstrap_servers, client_id=client_id, metadata_max_age_ms=metadata_max_age_ms, request_timeout_ms=request_timeout_ms, retry_backoff_ms=retry_backoff_ms, api_version=api_version, security_protocol=security_protocol, ssl_context=ssl_context, connections_max_idle_ms=connections_max_idle_ms, sasl_mechanism=sasl_mechanism, sasl_plain_username=sasl_plain_username, sasl_plain_password=sasl_plain_password, sasl_kerberos_service_name=sasl_kerberos_service_name, sasl_kerberos_domain_name=sasl_kerberos_domain_name) self._metadata = self.client.cluster self._loop = loop if loop.get_debug(): self._source_traceback = traceback.extract_stack(sys._getframe(1)) self._closed = False # Warn if producer was not closed properly # We don't attempt to close the Consumer, as __del__ is synchronous def __del__(self, _warnings=warnings): if self._closed is False: if PY_36: kwargs = {'source': self} else: kwargs = {} _warnings.warn("Unclosed AIOKafkaProducer {!r}".format(self), ResourceWarning, **kwargs) context = {'producer': self, 'message': 'Unclosed AIOKafkaProducer'} if self._source_traceback is not None: context['source_traceback'] = self._source_traceback self._loop.call_exception_handler(context) @abc.abstractmethod def _on_set_api_version(self, api_version): ... @abc.abstractmethod def _message_accumulator_for(self, transactional_id, tp): ... @abc.abstractmethod def _transactional_id_or_default(self, transactional_id): ... @abc.abstractmethod def _verify_txn_started(self, transactional_id): ... @abc.abstractmethod def _wait_for_sender(self): ... @abc.abstractmethod def _ensure_transactional(self): ... async def __aenter__(self): await self.start() return self async def __aexit__(self, type, value, traceback): await self.stop() async def start(self): """Connect to Kafka cluster and check server version""" log.debug("Starting the Kafka producer") # trace await self.client.bootstrap() if self._closed: return api_version = self.client.api_version self._verify_api_version(api_version) await self._start_sender() self._on_set_api_version(api_version) self._producer_magic = 0 if api_version < (0, 10) else 1 log.debug("Kafka producer started") def _verify_api_version(self, api_version): if self._compression_type == 'lz4': assert self.client.api_version >= (0, 8, 2), \ 'LZ4 Requires >= Kafka 0.8.2 Brokers' async def stop(self): """Flush all pending data and close all connections to kafka cluster""" if self._closed: return self._closed = True self.client.set_close() await self._wait_for_sender() await self.client.close() log.debug("The Kafka producer has closed.") async def partitions_for(self, topic): """Returns set of all known partitions for the topic.""" return (await self.client._wait_on_metadata(topic)) def _serialize(self, topic, key, value): if self._key_serializer: serialized_key = self._key_serializer(key) else: serialized_key = key if self._value_serializer: serialized_value = self._value_serializer(value) else: serialized_value = value message_size = LegacyRecordBatchBuilder.record_overhead( self._producer_magic) if serialized_key is not None: message_size += len(serialized_key) if serialized_value is not None: message_size += len(serialized_value) if message_size > self._max_request_size: raise MessageSizeTooLargeError( "The message is %d bytes when serialized which is larger than" " the maximum request size you have configured with the" " max_request_size configuration" % message_size) return serialized_key, serialized_value def _partition(self, topic, partition, key, value, serialized_key, serialized_value): if partition is not None: assert partition >= 0 assert partition in self._metadata.partitions_for_topic(topic), \ 'Unrecognized partition' return partition all_partitions = list(self._metadata.partitions_for_topic(topic)) available = list(self._metadata.available_partitions_for_topic(topic)) return self._partitioner( serialized_key, all_partitions, available) async def send( self, topic, value=None, key=None, partition=None, timestamp_ms=None, headers=None, transactional_id=None ): """Publish a message to a topic. Arguments: topic (str): topic where the message will be published value (optional): message value. Must be type bytes, or be serializable to bytes via configured value_serializer. If value is None, key is required and message acts as a 'delete'. See kafka compaction documentation for more details: http://kafka.apache.org/documentation.html#compaction (compaction requires kafka >= 0.8.1) partition (int, optional): optionally specify a partition. If not set, the partition will be selected using the configured 'partitioner'. key (optional): a key to associate with the message. Can be used to determine which partition to send the message to. If partition is None (and producer's partitioner config is left as default), then messages with the same key will be delivered to the same partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer. timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time. Returns: asyncio.Future: object that will be set when message is processed Raises: kafka.KafkaTimeoutError: if we can't schedule this record ( pending buffer is full) in up to `request_timeout_ms` milliseconds. Note: The returned future will wait based on `request_timeout_ms` setting. Cancelling the returned future **will not** stop event from being sent, but cancelling the ``send`` coroutine itself **will**. """ assert value is not None or self.client.api_version >= (0, 8, 1), ( 'Null messages require kafka >= 0.8.1') assert not (value is None and key is None), \ 'Need at least one: key or value' transactional_id = self._transactional_id_or_default(transactional_id) # first make sure the metadata for the topic is available await self.client._wait_on_metadata(topic) # Ensure transaction is started and not committing self._verify_txn_started(transactional_id) if headers is not None: if self.client.api_version < (0, 11): raise UnsupportedVersionError( "Headers not supported before Kafka 0.11") else: # Record parser/builder support only list type, no explicit None headers = [] key_bytes, value_bytes = self._serialize(topic, key, value) partition = self._partition(topic, partition, key, value, key_bytes, value_bytes) tp = TopicPartition(topic, partition) log.debug("Sending (key=%s value=%s) to %s", key, value, tp) message_accumulator = self._message_accumulator_for( transactional_id, tp) fut = await message_accumulator.add_message( tp, key_bytes, value_bytes, self._request_timeout_ms / 1000, timestamp_ms=timestamp_ms, headers=headers) return fut async def send_and_wait(self, topic, value=None, key=None, partition=None, timestamp_ms=None): """Publish a message to a topic and wait the result""" future = await self.send( topic, value, key, partition, timestamp_ms) return (await future)