def _verify_txn_started(self, transactional_id): if self._txn_manager is not None: txn_manager = self._txn_manager if txn_manager.transactional_id is not None and \ not self._txn_manager.is_in_transaction(): raise IllegalOperation( "Can't send messages while not in transaction")
async def send_batch(self, batch, topic, *, partition): """Submit a BatchBuilder for publication. Arguments: batch (BatchBuilder): batch object to be published. topic (str): topic where the batch will be published. partition (int): partition where this batch will be published. Returns: asyncio.Future: object that will be set when the batch is delivered. """ # first make sure the metadata for the topic is available await self.client._wait_on_metadata(topic) # We only validate we have the partition in the metadata here partition = self._partition(topic, partition, None, None, None, None) # Ensure transaction is started and not committing if self._txn_manager is not None: txn_manager = self._txn_manager if txn_manager.transactional_id is not None and \ not self._txn_manager.is_in_transaction(): raise IllegalOperation( "Can't send messages while not in transaction") tp = TopicPartition(topic, partition) log.debug("Sending batch to %s", tp) future = await self._message_accumulator.add_batch( batch, tp, self._request_timeout_ms / 1000) return future
def committed(self, partition): """ Get the last committed offset for the given partition. (whether the commit happened by this process or another). This offset will be used as the position for the consumer in the event of a failure. This call will block to do a remote call to get the latest offset, as those are not cached by consumer (Transactional Producer can change them without Consumer knowledge as of Kafka 0.11.0) Arguments: partition (TopicPartition): the partition to check Returns: The last committed offset, or None if there was no prior commit. Raises: IllegalOperation: If used with ``group_id == None`` """ if self._group_id is None: raise IllegalOperation("Requires group_id") commit_map = yield from self._coordinator.fetch_committed_offsets( [partition]) if partition in commit_map: committed = commit_map[partition].offset if committed == -1: committed = None else: committed = None return committed
def _verify_txn_started(self, transactional_id): try: txn_manager = self._transactions[transactional_id] except KeyError: pass else: assert txn_manager.transactional_id == transactional_id if not txn_manager.is_in_transaction(): raise IllegalOperation( "Can't send messages while not in transaction")
async def send_offsets_to_transaction(self, offsets, group_id): self._ensure_transactional() if not self._txn_manager.is_in_transaction(): raise IllegalOperation("Not in the middle of a transaction") if not group_id or not isinstance(group_id, str): raise ValueError(group_id) # validate `offsets` structure formatted_offsets = commit_structure_validate(offsets) log.debug( "Begin adding offsets %s for consumer group %s to transaction", formatted_offsets, group_id) fut = self._txn_manager.add_offsets_to_txn(formatted_offsets, group_id) await asyncio.shield(fut, loop=self._loop)
def send_offsets_to_transaction(self, transactional_id, offsets, group_id): txn_manager = self._transactions[transactional_id] if not txn_manager.is_in_transaction(): raise IllegalOperation("Not in the middle of a transaction") if not group_id or not isinstance(group_id, str): raise ValueError(group_id) # validate `offsets` structure formatted_offsets = commit_structure_validate(offsets) log.debug( "Begin adding offsets %s for consumer group %s to transaction", formatted_offsets, group_id) fut = txn_manager.add_offsets_to_txn(formatted_offsets, group_id) log.debug('+WAIT FOR RESPONSE OR ERROR %r' % (fut,)) yield from asyncio.shield(fut, loop=self._loop) log.debug('-WAIT FOR RESPONSE OR ERROR %r' % (fut,))
def committed(self, partition): """ Get the last committed offset for the given partition. (whether the commit happened by this process or another). This offset will be used as the position for the consumer in the event of a failure. This call may block to do a remote call if the partition in question isn't assigned to this consumer or if the consumer hasn't yet initialized its cache of committed offsets. Arguments: partition (TopicPartition): the partition to check Returns: The last committed offset, or None if there was no prior commit. Raises: IllegalOperation: If used with ``group_id == None`` """ if self._group_id is None: raise IllegalOperation("Requires group_id") if self._subscription.is_assigned(partition): assignment = self._subscription.subscription.assignment tp_state = assignment.state_value(partition) if tp_state.committed is None: yield from tp_state.wait_for_committed() committed = tp_state.committed.offset else: commit_map = yield from self._coordinator.fetch_committed_offsets( [partition]) if partition in commit_map: committed = commit_map[partition].offset else: committed = None if committed == -1: return None return committed
def _ensure_transactional(self): if self._txn_manager is None or \ self._txn_manager.transactional_id is None: raise IllegalOperation( "You need to configure transaction_id to use transactions")
async def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None, headers=None): """Publish a message to a topic. Arguments: topic (str): topic where the message will be published value (optional): message value. Must be type bytes, or be serializable to bytes via configured value_serializer. If value is None, key is required and message acts as a 'delete'. See kafka compaction documentation for more details: http://kafka.apache.org/documentation.html#compaction (compaction requires kafka >= 0.8.1) partition (int, optional): optionally specify a partition. If not set, the partition will be selected using the configured 'partitioner'. key (optional): a key to associate with the message. Can be used to determine which partition to send the message to. If partition is None (and producer's partitioner config is left as default), then messages with the same key will be delivered to the same partition (but if key is None, partition is chosen randomly). Must be type bytes, or be serializable to bytes via configured key_serializer. timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) to use as the message timestamp. Defaults to current time. Returns: asyncio.Future: object that will be set when message is processed Raises: kafka.KafkaTimeoutError: if we can't schedule this record ( pending buffer is full) in up to `request_timeout_ms` milliseconds. Note: The returned future will wait based on `request_timeout_ms` setting. Cancelling the returned future **will not** stop event from being sent, but cancelling the ``send`` coroutine itself **will**. """ assert value is not None or self.client.api_version >= (0, 8, 1), ( 'Null messages require kafka >= 0.8.1') assert not (value is None and key is None), \ 'Need at least one: key or value' # first make sure the metadata for the topic is available await self.client._wait_on_metadata(topic) # Ensure transaction is started and not committing if self._txn_manager is not None: txn_manager = self._txn_manager if txn_manager.transactional_id is not None and \ not self._txn_manager.is_in_transaction(): raise IllegalOperation( "Can't send messages while not in transaction") if headers is not None: if self.client.api_version < (0, 11): raise UnsupportedVersionError( "Headers not supported before Kafka 0.11") else: # Record parser/builder support only list type, no explicit None headers = [] key_bytes, value_bytes = self._serialize(topic, key, value) partition = self._partition(topic, partition, key, value, key_bytes, value_bytes) tp = TopicPartition(topic, partition) log.debug("Sending (key=%s value=%s) to %s", key, value, tp) fut = await self._message_accumulator.add_message( tp, key_bytes, value_bytes, self._request_timeout_ms / 1000, timestamp_ms=timestamp_ms, headers=headers) return fut
def commit(self, offsets=None): """ Commit offsets to Kafka. This commits offsets only to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance and also on startup. As such, if you need to store offsets in anything other than Kafka, this API should not be used. Currently only supports kafka-topic offset storage (not zookeeper) When explicitly passing ``offsets`` use either offset of next record, or tuple of offset and metadata:: tp = TopicPartition(msg.topic, msg.partition) metadata = "Some utf-8 metadata" # Either await consumer.commit({tp: msg.offset + 1}) # Or position directly await consumer.commit({tp: (msg.offset + 1, metadata)}) .. note:: If you want `fire and forget` commit, like ``commit_async()`` in *kafka-python*, just run it in a task. Something like:: fut = loop.create_task(consumer.commit()) fut.add_done_callback(on_commit_done) Arguments: offsets (dict, optional): {TopicPartition: (offset, metadata)} dict to commit with the configured ``group_id``. Defaults to current consumed offsets for all subscribed partitions. Raises: IllegalOperation: If used with ``group_id == None`` ValueError: If offsets is of wrong format KafkaError: If commit failed on broker side. This could be due to invalid offset, too long metadata, authorization failure, etc. """ if self._group_id is None: raise IllegalOperation("Requires group_id") if offsets is None: offsets = self._subscription.all_consumed_offsets() else: # validate `offsets` structure if not offsets or not isinstance(offsets, dict): raise ValueError(offsets) formatted_offsets = {} for tp, offset_and_metadata in offsets.items(): if not isinstance(tp, TopicPartition): raise ValueError("Key should be TopicPartition instance") if isinstance(offset_and_metadata, int): offset, metadata = offset_and_metadata, "" else: try: offset, metadata = offset_and_metadata except Exception: raise ValueError(offsets) if not isinstance(metadata, str): raise ValueError("Metadata should be a string") formatted_offsets[tp] = OffsetAndMetadata(offset, metadata) offsets = formatted_offsets yield from self._coordinator.commit_offsets(offsets)
def commit(self, offsets=None): """ Commit offsets to Kafka. This commits offsets only to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance and also on startup. As such, if you need to store offsets in anything other than Kafka, this API should not be used. Currently only supports kafka-topic offset storage (not zookeeper) When explicitly passing ``offsets`` use either offset of next record, or tuple of offset and metadata:: tp = TopicPartition(msg.topic, msg.partition) metadata = "Some utf-8 metadata" # Either await consumer.commit({tp: msg.offset + 1}) # Or position directly await consumer.commit({tp: (msg.offset + 1, metadata)}) .. note:: If you want `fire and forget` commit, like ``commit_async()`` in *kafka-python*, just run it in a task. Something like:: fut = loop.create_task(consumer.commit()) fut.add_done_callback(on_commit_done) Arguments: offsets (dict, optional): {TopicPartition: (offset, metadata)} dict to commit with the configured ``group_id``. Defaults to current consumed offsets for all subscribed partitions. Raises: IllegalOperation: If used with ``group_id == None``. IllegalStateError: If partitions not assigned. ValueError: If offsets is of wrong format. CommitFailedError: If membership already changed on broker. KafkaError: If commit failed on broker side. This could be due to invalid offset, too long metadata, authorization failure, etc. .. versionchanged:: 0.4.0 Changed ``AssertionError`` to ``IllegalStateError`` in case of unassigned partition. .. versionchanged:: 0.4.0 Will now raise ``CommitFailedError`` in case membership changed, as (posibly) this partition is handled by another consumer. """ if self._group_id is None: raise IllegalOperation("Requires group_id") subscription = self._subscription.subscription if subscription is None: raise IllegalStateError("Not subscribed to any topics") assignment = subscription.assignment if assignment is None: raise IllegalStateError("No partitions assigned") if offsets is None: offsets = assignment.all_consumed_offsets() else: offsets = commit_structure_validate(offsets) for tp in offsets: if tp not in assignment.tps: raise IllegalStateError( "Partition {} is not assigned".format(tp)) yield from self._coordinator.commit_offsets(assignment, offsets)