def wait_for_subscription(self): """ Wait for subscription change. This will always wait for next subscription. """ fut = create_future(loop=self._loop) self._subscription_waiters.append(fut) return fut
def wait_for_assignment(self): """ Wait for next assignment. Be careful, as this will always wait for next assignment, even if the current one is active. """ fut = create_future(loop=self._loop) self._assignment_waiters.append(fut) return fut
def send(self, request, expect_response=True): if self._writer is None: raise Errors.ConnectionError( "No connection to broker at {0}:{1}" .format(self._host, self._port)) correlation_id = self._next_correlation_id() header = RequestHeader(request, correlation_id=correlation_id, client_id=self._client_id) message = header.encode() + request.encode() size = struct.pack(">i", len(message)) try: self._writer.write(size + message) except OSError as err: self.close(reason=CloseReason.CONNECTION_BROKEN) raise Errors.ConnectionError( "Connection at {0}:{1} broken: {2}".format( self._host, self._port, err)) self.log.debug( '%s Request %d: %s', self, correlation_id, request) if not expect_response: return self._writer.drain() fut = create_future(loop=self._loop) self._requests.append((correlation_id, request.RESPONSE_TYPE, fut)) return asyncio.wait_for(fut, self._request_timeout, loop=self._loop)
def aborting_transaction(self): self._transition_to(TransactionState.ABORTING_TRANSACTION) # If we had an abortable error we need to create a new waiter if self._transaction_waiter.done(): self._transaction_waiter = create_future(loop=self._loop) self.notify_task_waiter()
def connect(self): loop = self._loop self._closed_fut = create_future(loop=loop) if self._security_protocol in ["PLAINTEXT", "SASL_PLAINTEXT"]: ssl = None else: assert self._security_protocol in ["SSL", "SASL_SSL"] assert self._ssl_context is not None ssl = self._ssl_context # Create streams same as `open_connection`, but using custom protocol reader = asyncio.StreamReader(limit=READER_LIMIT, loop=loop) protocol = AIOKafkaProtocol(self._closed_fut, reader, loop=loop) transport, _ = yield from asyncio.wait_for( loop.create_connection( lambda: protocol, self.host, self.port, ssl=ssl), loop=loop, timeout=self._request_timeout) writer = asyncio.StreamWriter(transport, protocol, reader, loop) self._reader, self._writer, self._protocol = reader, writer, protocol # Start reader task. self._read_task = self._create_reader_task() # Start idle checker if self._max_idle_ms is not None: self._idle_handle = self._loop.call_soon( self._idle_check, weakref.ref(self)) if self._version_hint and self._version_hint >= (0, 10): yield from self._do_version_lookup() if self._security_protocol in ["SASL_SSL", "SASL_PLAINTEXT"]: yield from self._do_sasl_handshake() return reader, writer
def __init__(self, *, loop, bootstrap_servers='localhost', client_id='aiokafka-' + __version__, metadata_max_age_ms=300000, request_timeout_ms=40000, retry_backoff_ms=100, ssl_context=None, security_protocol='PLAINTEXT', api_version='auto', connections_max_idle_ms=540000, sasl_mechanism='PLAIN', sasl_plain_username=None, sasl_plain_password=None, sasl_kerberos_service_name='kafka', sasl_kerberos_domain_name=None): if security_protocol not in ( 'SSL', 'PLAINTEXT', 'SASL_PLAINTEXT', 'SASL_SSL'): raise ValueError("`security_protocol` should be SSL or PLAINTEXT") if security_protocol in ["SSL", "SASL_SSL"] and ssl_context is None: raise ValueError( "`ssl_context` is mandatory if security_protocol=='SSL'") if security_protocol in ["SASL_SSL", "SASL_PLAINTEXT"]: if sasl_mechanism not in ("PLAIN", "GSSAPI"): raise ValueError( "only `PLAIN` and `GSSAPI` sasl_mechanism " "are supported at the moment") if sasl_mechanism == "PLAIN" and \ (sasl_plain_username is None or sasl_plain_password is None): raise ValueError( "sasl_plain_username and sasl_plain_password required for " "PLAIN sasl") self._bootstrap_servers = bootstrap_servers self._client_id = client_id self._metadata_max_age_ms = metadata_max_age_ms self._request_timeout_ms = request_timeout_ms if api_version != "auto": api_version = parse_kafka_version(api_version) self._api_version = api_version self._security_protocol = security_protocol self._ssl_context = ssl_context self._retry_backoff = retry_backoff_ms / 1000 self._connections_max_idle_ms = connections_max_idle_ms self._sasl_mechanism = sasl_mechanism self._sasl_plain_username = sasl_plain_username self._sasl_plain_password = sasl_plain_password self._sasl_kerberos_service_name = sasl_kerberos_service_name self._sasl_kerberos_domain_name = sasl_kerberos_domain_name self.cluster = ClusterMetadata(metadata_max_age_ms=metadata_max_age_ms) self._topics = set() # empty set will fetch all topic metadata self._conns = {} self._loop = loop self._sync_task = None self._md_update_fut = None self._md_update_waiter = create_future(loop=self._loop) self._get_conn_lock = asyncio.Lock(loop=loop)
def __init__(self, user_assignment: Set[TopicPartition], *, loop): topics = set([]) for tp in user_assignment: topics.add(tp.topic) self._topics = frozenset(topics) self._assignment = Assignment(user_assignment, loop=loop) self._loop = loop self.unsubscribe_future = create_future(loop)
def await_reset(self, strategy): """ Called by either Coonsumer in `seek_to_*` or by Coordinator after setting initial committed point. """ self._reset_strategy = strategy self._position = None if self._position_fut.done(): self._position_fut = create_future(loop=self._loop) self._status = PartitionStatus.AWAITING_RESET
def add_offsets_to_txn(self, offsets, group_id): assert self.is_in_transaction() assert self.transactional_id fut = create_future(loop=self._loop) self._pending_txn_offsets.append( (group_id, offsets, fut) ) self.notify_task_waiter() return fut
def _create_fetch_waiter(self): # Creating a fetch waiter is usually not that frequent of an operation, # (get methods will return all data first, before a waiter is created) fut = create_future(loop=self._loop) self._fetch_waiters.add(fut) fut.add_done_callback( lambda f, waiters=self._fetch_waiters: waiters.remove(f)) return fut
def __init__(self, topic_partitions: Set[TopicPartition], *, loop): assert isinstance(topic_partitions, (list, set, tuple)) self._topic_partitions = frozenset(topic_partitions) self._tp_state = {} # type: Dict[TopicPartition:TopicPartitionState] for tp in self._topic_partitions: self._tp_state[tp] = TopicPartitionState(self, loop=loop) self._loop = loop self.unassign_future = create_future(loop) self.commit_refresh_needed = Event(loop=loop)
def fatal_error(self, exc): self._transition_to(TransactionState.FATAL_ERROR) self._txn_partitions.clear() self._txn_consumer_group = None self._pending_txn_partitions.clear() for _, _, fut in self._pending_txn_offsets: fut.set_exception(exc) self._pending_txn_offsets.clear() # There may be an abortable error. We just override it if self._transaction_waiter.done(): self._transaction_waiter = create_future(loop=self._loop) self._transaction_waiter.set_exception(exc)
def add_topic(self, topic): """Add a topic to the list of topics tracked via metadata. Arguments: topic (str): topic to track """ if topic in self._topics: res = create_future(loop=self._loop) res.set_result(True) else: res = self.force_metadata_update() self._topics.add(topic) return res
def force_metadata_update(self): """Update cluster metadata Returns: True/False - metadata updated or not """ if self._md_update_fut is None: # Wake up the `_md_synchronizer` task if not self._md_update_waiter.done(): self._md_update_waiter.set_result(None) self._md_update_fut = create_future(loop=self._loop) # Metadata will be updated in the background by syncronizer return asyncio.shield(self._md_update_fut, loop=self._loop)
def set_topics(self, topics): """Set specific topics to track for metadata. Arguments: topics (list of str): topics to track """ assert not isinstance(topics, str) if not topics or set(topics).difference(self._topics): res = self.force_metadata_update() else: res = create_future(loop=self._loop) res.set_result(True) self._topics = set(topics) return res
def _md_synchronizer(self): """routine (async task) for synchronize cluster metadata every `metadata_max_age_ms` milliseconds""" while True: yield from asyncio.wait( [self._md_update_waiter], timeout=self._metadata_max_age_ms / 1000, loop=self._loop) topics = self._topics if self._md_update_fut is None: self._md_update_fut = create_future(loop=self._loop) ret = yield from self._metadata_update(self.cluster, topics) # If list of topics changed during metadata update we must update # it again right away. if topics != self._topics: continue # Earlier this waiter was set before sending metadata_request, # but that was to avoid topic list changes being unnoticed, which # is handled explicitly now. self._md_update_waiter = create_future(loop=self._loop) self._md_update_fut.set_result(ret) self._md_update_fut = None
def __init__(self, transactional_id, transaction_timeout_ms, *, loop): self.transactional_id = transactional_id self.transaction_timeout_ms = transaction_timeout_ms self.state = TransactionState.UNINITIALIZED self._pid_and_epoch = PidAndEpoch(NO_PRODUCER_ID, NO_PRODUCER_EPOCH) self._pid_waiter = create_future(loop) self._sequence_numbers = defaultdict(lambda: 0) self._transaction_waiter = None self._task_waiter = None self._txn_partitions = set() self._pending_txn_partitions = set() self._txn_consumer_group = None self._pending_txn_offsets = deque() self._loop = loop
def test_producer_send_reenque_resets_waiters(self): # See issue #409. If reenqueue method does not reset the waiter # properly new batches will raise RecursionError. producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts, linger_ms=1000) yield from producer.start() self.add_cleanup(producer.stop) # 1st step is to force an error in produce sequense and force a # reenqueue on 1 batch. with mock.patch.object(producer.client, 'send') as mocked: send_fut = create_future(self.loop) @asyncio.coroutine def mocked_func(node_id, request): if not send_fut.done(): send_fut.set_result(None) raise UnknownTopicOrPartitionError() mocked.side_effect = mocked_func fut = yield from producer.send( self.topic, b'Some MSG', partition=0) yield from send_fut # 100ms backoff time yield from asyncio.sleep(0.11, loop=self.loop) self.assertFalse(fut.done()) self.assertTrue(producer._message_accumulator._batches) # Then we add another msg right after the reenqueue. As we use # linger_ms `_sender_routine` will be locked for some time after we # reenqueue batch, so this add will be forced to wait a longer time. # If drain_waiter is broken it will end up with a RecursionError. fut2 = yield from producer.send(self.topic, b'Some MSG 2', partition=0) yield from fut2 self.assertTrue(fut.done()) self.assertTrue(fut2.done()) msg1 = yield from fut msg2 = yield from fut2 # The order should be preserved self.assertLess(msg1.offset, msg2.offset)
def __init__(self, assignment, *, loop): # Synchronized values self._committed_futs = [] self.highwater = None # Last fetched highwater mark self.lso = None # Last fetched stable offset mark self._position = None # The current position of the topic self._position_fut = create_future(loop=loop) # Will be set by `seek_to_beginning` or `seek_to_end` if called by user # or by Fetcher after confirming that current position is no longer # reachable. self._reset_strategy = None # type: int self._status = PartitionStatus.AWAITING_RESET # type: PartitionStatus self._loop = loop self._assignment = assignment self._paused = False self._resume_fut = None
def _send_sasl_token(self, payload, expect_response=True): if self._writer is None: raise Errors.ConnectionError( "No connection to broker at {0}:{1}" .format(self._host, self._port)) size = struct.pack(">i", len(payload)) try: self._writer.write(size + payload) except OSError as err: self.close(reason=CloseReason.CONNECTION_BROKEN) raise Errors.ConnectionError( "Connection at {0}:{1} broken: {2}".format( self._host, self._port, err)) if not expect_response: return self._writer.drain() fut = create_future(loop=self._loop) self._requests.append((None, None, fut)) return asyncio.wait_for(fut, self._request_timeout, loop=self._loop)
def __init__(self, cluster, batch_size, compression_type, batch_ttl, *, txn_manager=None, loop): self._batches = collections.defaultdict(collections.deque) self._pending_batches = set([]) self._cluster = cluster self._batch_size = batch_size self._compression_type = compression_type self._batch_ttl = batch_ttl self._loop = loop self._wait_data_future = create_future(loop=loop) self._closed = False self._api_version = (0, 9) self._txn_manager = txn_manager self._exception = None # Critical exception
def _send_sasl_token(self, payload, expect_response=True): if self._writer is None: raise Errors.KafkaConnectionError( "No connection to broker at {0}:{1}".format( self._host, self._port)) size = struct.pack(">i", len(payload)) try: self._writer.write(size + payload) except OSError as err: self.close(reason=CloseReason.CONNECTION_BROKEN) raise Errors.KafkaConnectionError( "Connection at {0}:{1} broken: {2}".format( self._host, self._port, err)) if not expect_response: return self._writer.drain() fut = create_future(loop=self._loop) self._requests.append((None, None, fut)) return asyncio.wait_for(fut, self._request_timeout, loop=self._loop)
def __init__(self, *, loop, bootstrap_servers='localhost', client_id='aiokafka-' + __version__, metadata_max_age_ms=300000, request_timeout_ms=40000, retry_backoff_ms=100, ssl_context=None, security_protocol='PLAINTEXT', api_version='auto', connections_max_idle_ms=540000): if security_protocol not in ('SSL', 'PLAINTEXT'): raise ValueError("`security_protocol` should be SSL or PLAINTEXT") if security_protocol == "SSL" and ssl_context is None: raise ValueError( "`ssl_context` is mandatory if security_protocol=='SSL'") self._bootstrap_servers = bootstrap_servers self._client_id = client_id self._metadata_max_age_ms = metadata_max_age_ms self._request_timeout_ms = request_timeout_ms if api_version != "auto": api_version = parse_kafka_version(api_version) self._api_version = api_version self._security_protocol = security_protocol self._ssl_context = ssl_context self._retry_backoff = retry_backoff_ms / 1000 self._connections_max_idle_ms = connections_max_idle_ms self.cluster = ClusterMetadata(metadata_max_age_ms=metadata_max_age_ms) self._topics = set() # empty set will fetch all topic metadata self._conns = {} self._loop = loop self._sync_task = None self._md_update_fut = None self._md_update_waiter = create_future(loop=self._loop) self._get_conn_lock = asyncio.Lock(loop=loop)
def drain_by_nodes(self, ignore_nodes): """ Group batches by leader to partiton nodes. """ nodes = collections.defaultdict(dict) unknown_leaders_exist = False for tp in list(self._batches.keys()): leader = self._cluster.leader_for_partition(tp) if leader is None or leader == -1: if self._batches[tp][0].expired(): # batch is for partition is expired and still no leader, # so set exception for batch and pop it batch = self._pop_batch(tp) if leader is None: err = NotLeaderForPartitionError() else: err = LeaderNotAvailableError() batch.failure(exception=err) unknown_leaders_exist = True continue elif ignore_nodes and leader in ignore_nodes: continue batch = self._pop_batch(tp) # We can get an empty batch here if all `append()` calls failed # with validation... if not batch.is_empty(): nodes[leader][tp] = batch else: # XXX: use something more graceful. We just want to trigger # delivery future here, no message futures. batch.done_noack() # all batches are drained from accumulator # so create "wait data" future again for waiting new data in send # task if not self._wait_data_future.done(): self._wait_data_future.set_result(None) self._wait_data_future = create_future(loop=self._loop) return nodes, unknown_leaders_exist
async def connect(self): loop = self._loop self._closed_fut = create_future() if self._security_protocol in ["PLAINTEXT", "SASL_PLAINTEXT"]: ssl = None else: assert self._security_protocol in ["SSL", "SASL_SSL"] assert self._ssl_context is not None ssl = self._ssl_context # Create streams same as `open_connection`, but using custom protocol reader = asyncio.StreamReader(limit=READER_LIMIT, loop=loop) protocol = AIOKafkaProtocol(self._closed_fut, reader, loop=loop) async with async_timeout.timeout(self._request_timeout): transport, _ = await loop.create_connection(lambda: protocol, self.host, self.port, ssl=ssl) writer = asyncio.StreamWriter(transport, protocol, reader, loop) self._reader, self._writer, self._protocol = reader, writer, protocol # Start reader task. self._read_task = self._create_reader_task() # Start idle checker if self._max_idle_ms is not None: self._idle_handle = loop.call_soon(self._idle_check, weakref.ref(self)) try: if self._version_hint and self._version_hint >= (0, 10): await self._do_version_lookup() if self._security_protocol in ["SASL_SSL", "SASL_PLAINTEXT"]: await self._do_sasl_handshake() except: # noqa: E722 self.close() raise return reader, writer
def ensure_active_group(self): """Ensure that the group is active (i.e. joined and synced)""" with (yield from self._rejoin_lock): if not self.need_rejoin(): return if self.pending_rejoin_fut is None: yield from self._on_join_prepare(self.generation, self.member_id) self.pending_rejoin_fut = create_future(loop=self._loop) while self.need_rejoin(): yield from self.ensure_coordinator_known() # Here we create a copy of subscription so we can check if it # changed during rebalance. subscription = copy(self._subscription.subscription) rebalance = CoordinatorGroupRebalance(self, self.group_id, self.coordinator_id, subscription, self._assignors, self._session_timeout_ms, self._retry_backoff_ms, loop=self.loop) assignment = yield from rebalance.perform_group_join() if (subscription != self._subscription.subscription): log.debug( "Subscription changed during rebalance " "from %s to %s. Rejoining group.", subscription, self._subscription.subscription) continue if assignment is not None: protocol, member_assignment_bytes = assignment yield from self._on_join_complete(self.generation, self.member_id, protocol, member_assignment_bytes) return
def next_record(self, partitions): """ Return one fetched records This method will contain a little overhead as we will do more work this way: * Notify prefetch routine per every consumed partition * Assure message marked for autocommit """ while True: for tp in list(self._records.keys()): if partitions and tp not in partitions: # Cleanup results for unassigned partitons if not self._subscriptions.is_assigned(tp): del self._records[tp] continue res_or_error = self._records[tp] if type(res_or_error) == FetchResult: message = res_or_error.getone() if message is None: # We already processed all messages, request new ones del self._records[tp] self._notify(self._wait_consume_future) else: return message else: # Remove error, so we can fetch on partition again del self._records[tp] self._notify(self._wait_consume_future) res_or_error.check_raise() # No messages ready. Wait for some to arrive if self._wait_empty_future is None \ or self._wait_empty_future.done(): self._wait_empty_future = create_future(loop=self._loop) yield from asyncio.shield(self._wait_empty_future, loop=self._loop)
def connect(self): loop = self._loop self._closed_fut = create_future(loop=loop) if self._secutity_protocol == "PLAINTEXT": ssl = None else: assert self._secutity_protocol == "SSL" assert self._ssl_context is not None ssl = self._ssl_context # Create streams same as `open_connection`, but using custom protocol reader = asyncio.StreamReader(limit=READER_LIMIT, loop=loop) protocol = AIOKafkaProtocol(self._closed_fut, reader, loop=loop) transport, _ = yield from asyncio.wait_for( loop.create_connection( lambda: protocol, self.host, self.port, ssl=ssl), loop=loop, timeout=self._request_timeout) writer = asyncio.StreamWriter(transport, protocol, reader, loop) self._reader, self._writer, self._protocol = reader, writer, protocol # Start reader task. self._read_task = ensure_future(self._read(), loop=loop) # Start idle checker if self._max_idle_ms is not None: self._idle_handle = self._loop.call_soon(self._idle_check) return reader, writer
def make_task_waiter(self): self._task_waiter = create_future(loop=self._loop) return self._task_waiter
def force_metadata_update(): fut = create_future() fut.set_result(True) return fut
def pause(self): if not self._paused: self._paused = True assert self._resume_fut is None self._resume_fut = create_future(loop=self._loop)
def pause(self): if not self._paused: self._paused = True assert self._resume_fut is None self._resume_fut = create_future(loop=self._loop)
def __init__(self, topics: Set[str], *, loop: ALoop): self._topics = frozenset(topics) # type: Set[str] self._assignment = None # type: Assignment self._loop = loop # type: ALoop self.unsubscribe_future = create_future(loop) # type: Future self._reassignment_in_progress = True
def _create_fetch_waiter(self): fut = create_future(loop=self._loop) self._fetch_waiters.add(fut) fut.add_done_callback( lambda f, waiters=self._fetch_waiters: waiters.remove(f)) return fut
def _fetch_requests_routine(self): """ Implements a background task to populate internal fetch queue ``self._records`` with prefetched messages. This helps isolate the ``getall/getone`` calls from actual calls to broker. This way we don't need to think of what happens if user calls get in 2 tasks, etc. The loop is quite complicated due to a large set of events that can allow new fetches to be send. Those include previous fetches, offset resets, metadata updates to discover new leaders for partitions, data consumed for partition. Previously the offset reset was performed separately, but it did not perform too reliably. In ``kafka-python`` and Java client the reset is perform in ``poll()`` before each fetch, which works good for sync systems. But with ``aiokafka`` the user can actually break such behaviour quite easily by performing actions from different tasks. """ try: assignment = None def start_pending_task(coro, node_id, self=self): task = ensure_future(coro, loop=self._loop) self._pending_tasks.add(task) self._in_flight.add(node_id) def on_done(fut, self=self): self._in_flight.discard(node_id) task.add_done_callback(on_done) while True: # If we lose assignment we just cancel all current tasks, # wait for new assignment and restart the loop if assignment is None or not assignment.active: for task in self._pending_tasks: # Those tasks should have proper handling for # cancellation if not task.done(): task.cancel() yield from task self._pending_tasks.clear() self._records.clear() subscription = self._subscriptions.subscription if subscription is None or \ subscription.assignment is None: try: waiter = self._subscriptions.wait_for_assignment() yield from waiter except Errors.KafkaError: # Critical coordination waiters will be passed # to user, but fetcher can just ignore those continue assignment = self._subscriptions.subscription.assignment assert assignment is not None and assignment.active # Reset consuming signal future. self._wait_consume_future = create_future(loop=self._loop) # Determine what action to take per node (fetch_requests, reset_requests, timeout, invalid_metadata, resume_futures) = self._get_actions_per_node(assignment) # Start fetch tasks for node_id, request in fetch_requests: start_pending_task( self._proc_fetch_request(assignment, node_id, request), node_id=node_id) # Start update position tasks for node_id, tps in reset_requests.items(): start_pending_task( self._update_fetch_positions(assignment, node_id, tps), node_id=node_id) # Apart from pending requests we also need to react to other # events to send new fetches as soon as possible other_futs = [self._wait_consume_future, assignment.unassign_future] if invalid_metadata: fut = self._client.force_metadata_update() other_futs.append(fut) done_set, _ = yield from asyncio.wait( chain(self._pending_tasks, other_futs, resume_futures), loop=self._loop, timeout=timeout, return_when=asyncio.FIRST_COMPLETED) # Process fetch tasks results if any done_pending = self._pending_tasks.intersection(done_set) if done_pending: has_new_data = any(fut.result() for fut in done_pending) if has_new_data: for waiter in self._fetch_waiters: # we added some messages to self._records, # wake up waiters self._notify(waiter) self._pending_tasks -= done_pending except asyncio.CancelledError: pass except Exception: # pragma: no cover log.error("Unexpected error in fetcher routine", exc_info=True) raise Errors.KafkaError("Unexpected error during data retrieval")
def __init__(self, topics: Set[str], *, loop: ALoop): self._topics = frozenset(topics) # type: Set[str] self._assignment = None # type: Assignment self._loop = loop # type: ALoop self.unsubscribe_future = create_future(loop) # type: Future self._reassignment_in_progress = True
def make_task_waiter(self): self._task_waiter = create_future(loop=self._loop) return self._task_waiter
def fetch_committed(self): fut = create_future(loop=self._loop) self._committed_futs.append(fut) self._assignment.commit_refresh_needed.set() return fut
def __init__(self, client, subscription, *, loop, group_id='aiokafka-default-group', session_timeout_ms=30000, heartbeat_interval_ms=3000, retry_backoff_ms=100, enable_auto_commit=True, auto_commit_interval_ms=5000, assignors=(RoundRobinPartitionAssignor, ), exclude_internal_topics=True, assignment_changed_cb=None): """Initialize the coordination manager. Parameters: client (AIOKafkaClient): kafka client subscription (SubscriptionState): instance of SubscriptionState located in kafka.consumer.subscription_state group_id (str): name of the consumer group to join for dynamic partition assignment (if enabled), and to use for fetching and committing offsets. Default: 'aiokafka-default-group' enable_auto_commit (bool): If true the consumer's offset will be periodically committed in the background. Default: True. auto_commit_interval_ms (int): milliseconds between automatic offset commits, if enable_auto_commit is True. Default: 5000. assignors (list): List of objects to use to distribute partition ownership amongst consumer instances when group management is used. Default: [RoundRobinPartitionAssignor] heartbeat_interval_ms (int): The expected time in milliseconds between heartbeats to the consumer coordinator when using Kafka's group management feature. Heartbeats are used to ensure that the consumer's session stays active and to facilitate rebalancing when new consumers join or leave the group. The value must be set lower than session_timeout_ms, but typically should be set no higher than 1/3 of that value. It can be adjusted even lower to control the expected time for normal rebalances. Default: 3000 session_timeout_ms (int): The timeout used to detect failures when using Kafka's group managementment facilities. Default: 30000 retry_backoff_ms (int): Milliseconds to backoff when retrying on errors. Default: 100. """ super().__init__(client, subscription, loop=loop, exclude_internal_topics=exclude_internal_topics, assignment_changed_cb=assignment_changed_cb) self._loop = loop self._session_timeout_ms = session_timeout_ms self._heartbeat_interval_ms = heartbeat_interval_ms self._retry_backoff_ms = retry_backoff_ms self._assignors = assignors self._enable_auto_commit = enable_auto_commit self._auto_commit_interval_ms = auto_commit_interval_ms self.generation = OffsetCommitRequest.DEFAULT_GENERATION_ID self.member_id = JoinGroupRequest.UNKNOWN_MEMBER_ID self.group_id = group_id self.coordinator_id = None self.rejoin_needed = True self.pending_rejoin_fut = None # `ensure_active_group` can be called from several places # (from consumer and from heartbeat task), so we need lock self._rejoin_lock = asyncio.Lock(loop=loop) self._auto_commit_task = None # _closing future used as a signal to heartbeat task for finish ASAP self._closing = create_future(loop=loop) self.heartbeat_task = ensure_future(self._heartbeat_task_routine(), loop=loop) if self._enable_auto_commit: interval = self._auto_commit_interval_ms / 1000 self._auto_commit_task = ensure_future( self.auto_commit_routine(interval), loop=loop)
def begin_transaction(self): self._transition_to(TransactionState.IN_TRANSACTION) self._transaction_waiter = create_future()
def make_task_waiter(self): self._task_waiter = create_future() return self._task_waiter
def _fetch_requests_routine(self): """ Implements a background task to populate internal fetch queue ``self._records`` with prefetched messages. This helps isolate the ``getall/getone`` calls from actual calls to broker. This way we don't need to think of what happens if user calls get in 2 tasks, etc. The loop is quite complicated due to a large set of events that can allow new fetches to be send. Those include previous fetches, offset resets, metadata updates to discover new leaders for partitions, data consumed for partition. Previously the offset reset was performed separately, but it did not perform too reliably. In ``kafka-python`` and Java client the reset is perform in ``poll()`` before each fetch, which works good for sync systems. But with ``aiokafka`` the user can actually break such behaviour quite easily by performing actions from different tasks. """ try: assignment = None def start_pending_task(coro, node_id, self=self): task = ensure_future(coro, loop=self._loop) self._pending_tasks.add(task) self._in_flight.add(node_id) def on_done(fut, self=self): self._in_flight.discard(node_id) task.add_done_callback(on_done) while True: # If we lose assignment we just cancel all current tasks, # wait for new assignment and restart the loop if assignment is None or not assignment.active: for task in self._pending_tasks: # Those tasks should have proper handling for # cancellation if not task.done(): task.cancel() yield from task self._pending_tasks.clear() self._records.clear() subscription = self._subscriptions.subscription if subscription is None or \ subscription.assignment is None: yield from self._subscriptions.wait_for_assignment() assignment = self._subscriptions.subscription.assignment assert assignment is not None and assignment.active # Reset consuming signal future. self._wait_consume_future = create_future(loop=self._loop) # Determine what action to take per node fetch_requests, reset_requests, timeout, invalid_metadata = \ self._get_actions_per_node(assignment) # Start fetch tasks for node_id, request in fetch_requests: start_pending_task(self._proc_fetch_request( assignment, node_id, request), node_id=node_id) # Start update position tasks for node_id, tps in reset_requests.items(): start_pending_task(self._update_fetch_positions( assignment, node_id, tps), node_id=node_id) # Apart from pending requests we also need to react to other # events to send new fetches as soon as possible other_futs = [ self._wait_consume_future, assignment.unassign_future ] if invalid_metadata: fut = self._client.force_metadata_update() other_futs.append(fut) done_set, _ = yield from asyncio.wait( chain(self._pending_tasks, other_futs), loop=self._loop, timeout=timeout, return_when=asyncio.FIRST_COMPLETED) # Process fetch tasks results if any done_pending = self._pending_tasks.intersection(done_set) if done_pending: has_new_data = any(fut.result() for fut in done_pending) if has_new_data: for waiter in self._fetch_waiters: # we added some messages to self._records, # wake up waiters self._notify(waiter) self._pending_tasks -= done_pending except asyncio.CancelledError: pass except Exception: # pragma: no cover log.error("Unexpected error in fetcher routine", exc_info=True)
def __init__(self, *, loop, bootstrap_servers='localhost', client_id='aiokafka-' + __version__, metadata_max_age_ms=300000, request_timeout_ms=40000, retry_backoff_ms=100, ssl_context=None, security_protocol='PLAINTEXT', api_version='auto', connections_max_idle_ms=540000, sasl_mechanism='PLAIN', sasl_plain_username=None, sasl_plain_password=None, sasl_kerberos_service_name='kafka', sasl_kerberos_domain_name=None): if security_protocol not in ('SSL', 'PLAINTEXT', 'SASL_PLAINTEXT', 'SASL_SSL'): raise ValueError("`security_protocol` should be SSL or PLAINTEXT") if security_protocol in ["SSL", "SASL_SSL"] and ssl_context is None: raise ValueError( "`ssl_context` is mandatory if security_protocol=='SSL'") if security_protocol in ["SASL_SSL", "SASL_PLAINTEXT"]: if sasl_mechanism not in ("PLAIN", "GSSAPI", "SCRAM-SHA-256", "SCRAM-SHA-512"): raise ValueError( "only `PLAIN`, `GSSAPI`, `SCRAM-SHA-256` and " "`SCRAM-SHA-512` sasl_mechanism are supported " "at the moment") if sasl_mechanism == "PLAIN" and \ (sasl_plain_username is None or sasl_plain_password is None): raise ValueError( "sasl_plain_username and sasl_plain_password required for " "PLAIN sasl") self._bootstrap_servers = bootstrap_servers self._client_id = client_id self._metadata_max_age_ms = metadata_max_age_ms self._request_timeout_ms = request_timeout_ms if api_version != "auto": api_version = parse_kafka_version(api_version) self._api_version = api_version self._security_protocol = security_protocol self._ssl_context = ssl_context self._retry_backoff = retry_backoff_ms / 1000 self._connections_max_idle_ms = connections_max_idle_ms self._sasl_mechanism = sasl_mechanism self._sasl_plain_username = sasl_plain_username self._sasl_plain_password = sasl_plain_password self._sasl_kerberos_service_name = sasl_kerberos_service_name self._sasl_kerberos_domain_name = sasl_kerberos_domain_name self.cluster = ClusterMetadata(metadata_max_age_ms=metadata_max_age_ms) self._topics = set() # empty set will fetch all topic metadata self._conns = {} self._loop = loop self._sync_task = None self._md_update_fut = None self._md_update_waiter = create_future(loop=self._loop) self._get_conn_lock = asyncio.Lock(loop=loop)
def fetch_committed(self): fut = create_future(loop=self._loop) self._committed_futs.append(fut) self._assignment.commit_refresh_needed.set() return fut
def _fetch_requests_routine(self): """ Background task, that always prefetches next result page. The algorithm: * Group partitions per node, which is the leader for it. * If all partitions for this node need prefetch - do it right away * If any partition has some data (in `self._records`) wait up till `prefetch_backoff` so application can consume data from it. * If data in `self._records` is not consumed up to `prefetch_backoff` just request data for other partitions from this node. We request data in such manner cause Kafka blocks the connection if we perform a FetchRequest and we don't have enough data. This means we must perform a FetchRequest to as many partitions as we can in a node. Original Java Kafka client processes data differently, as it only prefetches data if all messages were given to application (i.e. if `self._records` are empty). We don't use this method, cause we allow to process partitions separately (by passing `partitions` list to `getall()` call of the consumer), which can end up in a long wait if some partitions (or topics) are processed slower, than others. """ try: while True: # Reset consuming signal future. self._wait_consume_future = create_future(loop=self._loop) # Create and send fetch requests requests, timeout = self._create_fetch_requests() for node_id, request in requests: node_ready = yield from self._client.ready(node_id) if not node_ready: # We will request it on next routine continue log.debug("Sending FetchRequest to node %s", node_id) task = ensure_future(self._proc_fetch_request( node_id, request), loop=self._loop) self._fetch_tasks.add(task) self._in_flight.add(node_id) done_set, _ = yield from asyncio.wait( chain(self._fetch_tasks, [self._wait_consume_future]), loop=self._loop, timeout=timeout, return_when=asyncio.FIRST_COMPLETED) # Process fetch tasks results if any done_fetches = self._fetch_tasks.intersection(done_set) if done_fetches: has_new_data = any(fut.result() for fut in done_fetches) if has_new_data: # we added some messages to self._records, # wake up getters self._notify(self._wait_empty_future) self._fetch_tasks -= done_fetches except asyncio.CancelledError: pass except Exception: # pragma: no cover log.error("Unexpected error in fetcher routine", exc_info=True)
def reset_drain(self): """Reset drain waiter, until we will do another retry""" assert self._drain_waiter.done() self._drain_waiter = create_future(self._loop)
def begin_transaction(self): self._transition_to(TransactionState.IN_TRANSACTION) self._transaction_waiter = create_future(loop=self._loop)