def _on_change_subscription(self): """This is `group rebalanced` signal handler for update fetch positions of assigned partitions""" # fetch positions if we have partitions we're subscribed # to that we don't know the offset for if not self._subscription.has_all_fetch_positions(): ensure_future(self._update_fetch_positions( self._subscription.missing_fetch_positions()), loop=self._loop)
def connect(self): loop = self._loop self._closed_fut = asyncio.Future(loop=loop) if self._secutity_protocol == "PLAINTEXT": ssl = None else: assert self._secutity_protocol == "SSL" assert self._ssl_context is not None ssl = self._ssl_context # Create streams same as `open_connection`, but using custom protocol reader = asyncio.StreamReader(limit=READER_LIMIT, loop=loop) protocol = AIOKafkaProtocol(self._closed_fut, reader, loop=loop) transport, _ = yield from asyncio.wait_for( loop.create_connection(lambda: protocol, self.host, self.port, ssl=ssl), loop=loop, timeout=self._request_timeout) writer = asyncio.StreamWriter(transport, protocol, reader, loop) self._reader, self._writer, self._protocol = reader, writer, protocol # Start reader task. self._read_task = ensure_future(self._read(), loop=loop) # Start idle checker if self._max_idle_ms is not None: self._idle_handle = self._loop.call_soon(self._idle_check) return reader, writer
def bootstrap(self): """Try to to bootstrap initial cluster metadata""" # using request v0 for bootstap (bcs api version is not detected yet) metadata_request = MetadataRequest[0]([]) for host, port, _ in self.hosts: log.debug("Attempting to bootstrap via node at %s:%s", host, port) try: bootstrap_conn = yield from create_conn( host, port, loop=self._loop, client_id=self._client_id, request_timeout_ms=self._request_timeout_ms, ssl_context=self._ssl_context, security_protocol=self._security_protocol, max_idle_ms=self._connections_max_idle_ms) except (OSError, asyncio.TimeoutError) as err: log.error('Unable connect to "%s:%s": %s', host, port, err) continue try: metadata = yield from bootstrap_conn.send(metadata_request) except KafkaError as err: log.warning('Unable to request metadata from "%s:%s": %s', host, port, err) bootstrap_conn.close() continue self.cluster.update_metadata(metadata) # A cluster with no topics can return no broker metadata... # In that case, we should keep the bootstrap connection till # we get a normal cluster layout. if not len(self.cluster.brokers()): self._conns['bootstrap'] = bootstrap_conn else: bootstrap_conn.close() log.debug('Received cluster metadata: %s', self.cluster) break else: raise ConnectionError('Unable to bootstrap from {}'.format( self.hosts)) # detect api version if need if self._api_version == 'auto': self._api_version = yield from self.check_version() if type(self._api_version) is not tuple: self._api_version = tuple(map(int, self._api_version.split('.'))) if self._sync_task is None: # starting metadata synchronizer task self._sync_task = ensure_future(self._md_synchronizer(), loop=self._loop)
def start(self): """Connect to Kafka cluster and check server version""" log.debug("Starting the Kafka producer") # trace yield from self.client.bootstrap() if self._compression_type == 'lz4': assert self.client.api_version >= (0, 8, 2), \ 'LZ4 Requires >= Kafka 0.8.2 Brokers' self._sender_task = ensure_future(self._sender_routine(), loop=self._loop) self._message_accumulator.set_api_version(self.client.api_version) log.debug("Kafka producer started")
def bootstrap(self): """Try to to bootstrap initial cluster metadata""" metadata_request = MetadataRequest([]) for host, port, _ in self.hosts: log.debug("Attempting to bootstrap via node at %s:%s", host, port) try: bootstrap_conn = yield from create_conn( host, port, loop=self._loop, client_id=self._client_id, request_timeout_ms=self._request_timeout_ms) except (OSError, asyncio.TimeoutError) as err: log.error('Unable connect to "%s:%s": %s', host, port, err) continue try: metadata = yield from bootstrap_conn.send(metadata_request) except KafkaError as err: log.warning('Unable to request metadata from "%s:%s": %s', host, port, err) bootstrap_conn.close() continue self.cluster.update_metadata(metadata) # A cluster with no topics can return no broker metadata # in that case, we should keep the bootstrap connection if not len(self.cluster.brokers()): self._conns['bootstrap'] = bootstrap_conn else: bootstrap_conn.close() log.debug('Received cluster metadata: %s', self.cluster) break else: raise ConnectionError('Unable to bootstrap from {}'.format( self.hosts)) if self._sync_task is None: # starting metadata synchronizer task self._sync_task = ensure_future(self._md_synchronizer(), loop=self._loop)
def start(self): """Connect to Kafka cluster and check server version""" log.debug("Starting the Kafka producer") # trace yield from self.client.bootstrap() # Check Broker Version if not set explicitly if self._api_version == 'auto': self._api_version = yield from self.client.check_version() # Convert api_version config to tuple for easy comparisons self._api_version = tuple(map(int, self._api_version.split('.'))) if self._compression_type == 'lz4': assert self._api_version >= (0, 8, 2), \ 'LZ4 Requires >= Kafka 0.8.2 Brokers' self._sender_task = ensure_future(self._sender_routine(), loop=self._loop) log.debug("Kafka producer started")
def start(self): """Connect to Kafka cluster and check server version""" log.debug("Starting the Kafka producer") # trace yield from self.client.bootstrap() # Check Broker Version if not set explicitly if self._api_version == 'auto': self._api_version = yield from self.client.check_version() # Convert api_version config to tuple for easy comparisons self._api_version = tuple( map(int, self._api_version.split('.'))) if self._compression_type == 'lz4': assert self._api_version >= (0, 8, 2), \ 'LZ4 Requires >= Kafka 0.8.2 Brokers' self._sender_task = ensure_future( self._sender_routine(), loop=self._loop) log.debug("Kafka producer started")
def _sender_routine(self): """backgroud task that sends message batches to Kafka brokers""" tasks = set() try: while True: batches, unknown_leaders_exist = \ self._message_accumulator.drain_by_nodes( ignore_nodes=self._in_flight) # create produce task for every batch for node_id, batches in batches.items(): task = ensure_future( self._send_produce_req(node_id, batches), loop=self._loop) tasks.add(task) if unknown_leaders_exist: # we have at least one unknown partition's leader, # try to update cluster metadata and wait backoff time self.client.force_metadata_update() # Just to have at least 1 future in wait() call fut = asyncio.sleep(self._retry_backoff, loop=self._loop) waiters = tasks.union([fut]) else: fut = self._message_accumulator.data_waiter() waiters = tasks.union([fut]) # wait when: # * At least one of produce task is finished # * Data for new partition arrived # * Metadata update if partition leader unknown done, _ = yield from asyncio.wait( waiters, return_when=asyncio.FIRST_COMPLETED, loop=self._loop) tasks -= done except asyncio.CancelledError: pass except Exception: # pragma: no cover log.error("Unexpected error in sender routine", exc_info=True)
def bootstrap(self): """Try to to bootstrap initial cluster metadata""" metadata_request = MetadataRequest([]) for host, port, _ in self.hosts: log.debug("Attempting to bootstrap via node at %s:%s", host, port) try: bootstrap_conn = yield from create_conn( host, port, loop=self._loop, client_id=self._client_id, request_timeout_ms=self._request_timeout_ms) except (OSError, asyncio.TimeoutError) as err: log.error('Unable connect to "%s:%s": %s', host, port, err) continue try: metadata = yield from bootstrap_conn.send(metadata_request) except KafkaError as err: log.warning('Unable to request metadata from "%s:%s": %s', host, port, err) bootstrap_conn.close() continue self.cluster.update_metadata(metadata) # A cluster with no topics can return no broker metadata # in that case, we should keep the bootstrap connection if not len(self.cluster.brokers()): self._conns['bootstrap'] = bootstrap_conn else: bootstrap_conn.close() log.debug('Received cluster metadata: %s', self.cluster) break else: raise ConnectionError( 'Unable to bootstrap from {}'.format(self.hosts)) if self._sync_task is None: # starting metadata synchronizer task self._sync_task = ensure_future( self._md_synchronizer(), loop=self._loop)
def _sender_routine(self): """backgroud task that sends message batches to Kafka brokers""" tasks = set() try: while True: batches, unknown_leaders_exist = \ self._message_accumulator.drain_by_nodes( ignore_nodes=self._in_flight) # create produce task for every batch for node_id, batches in batches.items(): task = ensure_future( self._send_produce_req(node_id, batches), loop=self._loop) tasks.add(task) if unknown_leaders_exist: # we have at least one unknown partition's leader, # try to update cluster metadata and wait backoff time self.client.force_metadata_update() # Just to have at least 1 future in wait() call fut = asyncio.sleep(self._retry_backoff, loop=self._loop) waiters = tasks.union([fut]) else: fut = self._message_accumulator.data_waiter() waiters = tasks.union([fut]) # wait when: # * At least one of produce task is finished # * Data for new partition arrived done, _ = yield from asyncio.wait( waiters, return_when=asyncio.FIRST_COMPLETED, loop=self._loop) tasks -= done except asyncio.CancelledError: pass except Exception: # noqa log.error("Unexpected error in sender routine", exc_info=True)
def _on_change_subscription(self): """ This is `group rebalanced` signal handler used to update fetch positions of assigned partitions """ if self._closed: # pragma: no cover return # fetch positions if we have partitions we're subscribed # to that we don't know the offset for if not self._subscription.has_all_fetch_positions(): task = ensure_future( self._update_fetch_positions( self._subscription.missing_fetch_positions()), loop=self._loop ) self._pending_position_fetches.add(task) def on_done(fut, tasks=self._pending_position_fetches): tasks.discard(fut) try: fut.result() except Exception as err: # pragma: no cover log.error("Failed to update fetch positions: %r", err) task.add_done_callback(on_done)
def __init__(self, client, subscriptions, *, loop, key_deserializer=None, value_deserializer=None, fetch_min_bytes=1, fetch_max_wait_ms=500, max_partition_fetch_bytes=1048576, check_crcs=True, fetcher_timeout=0.2, prefetch_backoff=0.1): """Initialize a Kafka Message Fetcher. Parameters: client (AIOKafkaClient): kafka client subscription (SubscriptionState): instance of SubscriptionState located in kafka.consumer.subscription_state key_deserializer (callable): Any callable that takes a raw message key and returns a deserialized key. value_deserializer (callable, optional): Any callable that takes a raw message value and returns a deserialized value. fetch_min_bytes (int): Minimum amount of data the server should return for a fetch request, otherwise wait up to fetch_max_wait_ms for more data to accumulate. Default: 1. fetch_max_wait_ms (int): The maximum amount of time in milliseconds the server will block before answering the fetch request if there isn't sufficient data to immediately satisfy the requirement given by fetch_min_bytes. Default: 500. max_partition_fetch_bytes (int): The maximum amount of data per-partition the server will return. The maximum total memory used for a request = #partitions * max_partition_fetch_bytes. This size must be at least as large as the maximum message size the server allows or else it is possible for the producer to send messages larger than the consumer can fetch. If that happens, the consumer can get stuck trying to fetch a large message on a certain partition. Default: 1048576. check_crcs (bool): Automatically check the CRC32 of the records consumed. This ensures no on-the-wire or on-disk corruption to the messages occurred. This check adds some overhead, so it may be disabled in cases seeking extreme performance. Default: True fetcher_timeout (float): Maximum polling interval in the background fetching routine. Default: 0.2 prefetch_backoff (float): number of seconds to wait until consumption of partition is paused. Paused partitions will not request new data from Kafka server (will not be included in next poll request). """ self._client = client self._loop = loop self._key_deserializer = key_deserializer self._value_deserializer = value_deserializer self._fetch_min_bytes = fetch_min_bytes self._fetch_max_wait_ms = fetch_max_wait_ms self._max_partition_fetch_bytes = max_partition_fetch_bytes self._check_crcs = check_crcs self._fetcher_timeout = fetcher_timeout self._prefetch_backoff = prefetch_backoff self._subscriptions = subscriptions self._records = collections.OrderedDict() self._in_flight = set() self._fetch_tasks = set() self._wait_consume_future = None self._wait_empty_future = None self._fetch_task = ensure_future( self._fetch_requests_routine(), loop=loop)
def _fetch_requests_routine(self): """ Background task, that always prefetches next result page. The algorithm: * Group partitions per node, which is the leader for it. * If all partitions for this node need prefetch - do it right alway * If any partition has some data (in `self._records`) wait up till `prefetch_backoff` so application can consume data from it. * If data in `self._records` is not consumed up to `prefetch_backoff` just request data for other partitions from this node. We request data in such manner cause Kafka blocks the connection if we perform a FetchRequest and we don't have enough data. This means we must perform a FetchRequest to as many partitions as we can in a node. Original java Kafka client processes data differently, as it only prefetches data if all messages were given to application (i.e. if `self._records` are empty). We don't use this method, cause we allow to process partitions separately (by passing `partitions` list to `getall()` call of the consumer), which can end up in a long wait if some partitions (or topics) are processed slower, than others. """ try: while True: # Reset consuming signal future. self._wait_consume_future = asyncio.Future(loop=self._loop) # Create and send fetch requests requests, timeout = self._create_fetch_requests() for node_id, request in requests: node_ready = yield from self._client.ready(node_id) if not node_ready: # We will request it on next routine continue log.debug("Sending FetchRequest to node %s", node_id) task = ensure_future( self._proc_fetch_request(node_id, request), loop=self._loop) self._fetch_tasks.add(task) self._in_flight.add(node_id) done_set, _ = yield from asyncio.wait( chain(self._fetch_tasks, [self._wait_consume_future]), loop=self._loop, timeout=timeout, return_when=asyncio.FIRST_COMPLETED) # Process fetch tasks results if any done_fetches = self._fetch_tasks.intersection(done_set) if done_fetches: has_new_data = any(fut.result() for fut in done_fetches) if has_new_data: # we added some messages to self._records, # wake up getters self._notify(self._wait_empty_future) self._fetch_tasks -= done_fetches except asyncio.CancelledError: pass except Exception: # noqa log.error("Unexpected error in fetcher routine", exc_info=True)
def _sender_routine(self): """ Background task, that sends pending batches to leader nodes for batch's partition. This incapsulates same logic as Java's `Sender` background thread. Because we use asyncio this is more event based loop, rather than counting timeout till next possible even like in Java. The procedure: * Group pending batches by partition leaders (write nodes) * Ignore not ready (disconnected) and nodes, that already have a pending request. * If we have unknown leaders for partitions, we request a metadata update. * Wait for any event, that can change the above procedure, like new metadata or pending send is finished and a new one can be done. """ tasks = set() try: while True: batches, unknown_leaders_exist = \ self._message_accumulator.drain_by_nodes( ignore_nodes=self._in_flight) # create produce task for every batch for node_id, batches in batches.items(): task = ensure_future(self._send_produce_req( node_id, batches), loop=self._loop) self._in_flight.add(node_id) tasks.add(task) if unknown_leaders_exist: # we have at least one unknown partition's leader, # try to update cluster metadata and wait backoff time fut = self.client.force_metadata_update() waiters = tasks.union([fut]) else: fut = self._message_accumulator.data_waiter() waiters = tasks.union([fut]) # wait when: # * At least one of produce task is finished # * Data for new partition arrived # * Metadata update if partition leader unknown done, _ = yield from asyncio.wait( waiters, return_when=asyncio.FIRST_COMPLETED, loop=self._loop) # done tasks should never produce errors, if they are it's a # bug for task in done: task.result() tasks -= done except asyncio.CancelledError: pass except Exception: # pragma: no cover log.error("Unexpected error in sender routine", exc_info=True)
def connect(self): future = asyncio.open_connection(self.host, self.port, loop=self._loop) self._reader, self._writer = yield from asyncio.wait_for( future, self._request_timeout, loop=self._loop) self._read_task = ensure_future(self._read(), loop=self._loop)
def __init__(self, client, subscription, *, loop, group_id='aiokafka-default-group', session_timeout_ms=30000, heartbeat_interval_ms=3000, retry_backoff_ms=100, enable_auto_commit=True, auto_commit_interval_ms=5000, assignors=(RoundRobinPartitionAssignor, )): """Initialize the coordination manager. Parameters: client (AIOKafkaClient): kafka client subscription (SubscriptionState): instance of SubscriptionState located in kafka.consumer.subscription_state group_id (str): name of the consumer group to join for dynamic partition assignment (if enabled), and to use for fetching and committing offsets. Default: 'aiokafka-default-group' enable_auto_commit (bool): If true the consumer's offset will be periodically committed in the background. Default: True. auto_commit_interval_ms (int): milliseconds between automatic offset commits, if enable_auto_commit is True. Default: 5000. assignors (list): List of objects to use to distribute partition ownership amongst consumer instances when group management is used. Default: [RoundRobinPartitionAssignor] heartbeat_interval_ms (int): The expected time in milliseconds between heartbeats to the consumer coordinator when using Kafka's group management feature. Heartbeats are used to ensure that the consumer's session stays active and to facilitate rebalancing when new consumers join or leave the group. The value must be set lower than session_timeout_ms, but typically should be set no higher than 1/3 of that value. It can be adjusted even lower to control the expected time for normal rebalances. Default: 3000 session_timeout_ms (int): The timeout used to detect failures when using Kafka's group managementment facilities. Default: 30000 retry_backoff_ms (int): Milliseconds to backoff when retrying on errors. Default: 100. """ self._client = client self._session_timeout_ms = session_timeout_ms self._heartbeat_interval_ms = heartbeat_interval_ms self._retry_backoff_ms = retry_backoff_ms self.generation = OffsetCommitRequest.DEFAULT_GENERATION_ID self.member_id = JoinGroupRequest.UNKNOWN_MEMBER_ID self.group_id = group_id self.coordinator_id = None self.rejoin_needed = True self.needs_join_prepare = True self.loop = loop # rejoin group can be called in parallel # (from consumer and from heartbeat task), so we need lock self._rejoin_lock = asyncio.Lock(loop=loop) self._enable_auto_commit = enable_auto_commit self._auto_commit_interval_ms = auto_commit_interval_ms self._assignors = assignors self._subscription = subscription self._partitions_per_topic = {} self._cluster = client.cluster self._auto_commit_task = None # _closing future used as a signal to heartbeat task for finish ASAP self._closing = asyncio.Future(loop=loop) # update subscribe state usint currently known metadata self._handle_metadata_update(client.cluster) self._cluster.add_listener(self._handle_metadata_update) self._group_rebalanced_callback = None self.heartbeat_task = ensure_future(self._heartbeat_task_routine(), loop=loop) if self._enable_auto_commit: interval = self._auto_commit_interval_ms / 1000 self._auto_commit_task = ensure_future( self.auto_commit_routine(interval), loop=loop)
def _fetch_requests_routine(self): """ Background task, that always prefetches next result page. The algorithm: * Group partitions per node, which is the leader for it. * If all partitions for this node need prefetch - do it right away * If any partition has some data (in `self._records`) wait up till `prefetch_backoff` so application can consume data from it. * If data in `self._records` is not consumed up to `prefetch_backoff` just request data for other partitions from this node. We request data in such manner cause Kafka blocks the connection if we perform a FetchRequest and we don't have enough data. This means we must perform a FetchRequest to as many partitions as we can in a node. Original Java Kafka client processes data differently, as it only prefetches data if all messages were given to application (i.e. if `self._records` are empty). We don't use this method, cause we allow to process partitions separately (by passing `partitions` list to `getall()` call of the consumer), which can end up in a long wait if some partitions (or topics) are processed slower, than others. """ try: while True: # Reset consuming signal future. self._wait_consume_future = asyncio.Future(loop=self._loop) # Create and send fetch requests requests, timeout = self._create_fetch_requests() for node_id, request in requests: node_ready = yield from self._client.ready(node_id) if not node_ready: # We will request it on next routine continue log.debug("Sending FetchRequest to node %s", node_id) task = ensure_future(self._proc_fetch_request( node_id, request), loop=self._loop) self._fetch_tasks.add(task) self._in_flight.add(node_id) done_set, _ = yield from asyncio.wait( chain(self._fetch_tasks, [self._wait_consume_future]), loop=self._loop, timeout=timeout, return_when=asyncio.FIRST_COMPLETED) # Process fetch tasks results if any done_fetches = self._fetch_tasks.intersection(done_set) if done_fetches: has_new_data = any(fut.result() for fut in done_fetches) if has_new_data: # we added some messages to self._records, # wake up getters self._notify(self._wait_empty_future) self._fetch_tasks -= done_fetches except asyncio.CancelledError: pass except Exception: # pragma: no cover log.error("Unexpected error in fetcher routine", exc_info=True)
def __init__(self, client, subscriptions, *, loop, key_deserializer=None, value_deserializer=None, fetch_min_bytes=1, fetch_max_wait_ms=500, max_partition_fetch_bytes=1048576, check_crcs=True, fetcher_timeout=0.2, prefetch_backoff=0.1, retry_backoff_ms=100): """Initialize a Kafka Message Fetcher. Parameters: client (AIOKafkaClient): kafka client subscription (SubscriptionState): instance of SubscriptionState located in kafka.consumer.subscription_state key_deserializer (callable): Any callable that takes a raw message key and returns a deserialized key. value_deserializer (callable, optional): Any callable that takes a raw message value and returns a deserialized value. fetch_min_bytes (int): Minimum amount of data the server should return for a fetch request, otherwise wait up to fetch_max_wait_ms for more data to accumulate. Default: 1. fetch_max_wait_ms (int): The maximum amount of time in milliseconds the server will block before answering the fetch request if there isn't sufficient data to immediately satisfy the requirement given by fetch_min_bytes. Default: 500. max_partition_fetch_bytes (int): The maximum amount of data per-partition the server will return. The maximum total memory used for a request = #partitions * max_partition_fetch_bytes. This size must be at least as large as the maximum message size the server allows or else it is possible for the producer to send messages larger than the consumer can fetch. If that happens, the consumer can get stuck trying to fetch a large message on a certain partition. Default: 1048576. check_crcs (bool): Automatically check the CRC32 of the records consumed. This ensures no on-the-wire or on-disk corruption to the messages occurred. This check adds some overhead, so it may be disabled in cases seeking extreme performance. Default: True fetcher_timeout (float): Maximum polling interval in the background fetching routine. Default: 0.2 prefetch_backoff (float): number of seconds to wait until consumption of partition is paused. Paused partitions will not request new data from Kafka server (will not be included in next poll request). """ self._client = client self._loop = loop self._key_deserializer = key_deserializer self._value_deserializer = value_deserializer self._fetch_min_bytes = fetch_min_bytes self._fetch_max_wait_ms = fetch_max_wait_ms self._max_partition_fetch_bytes = max_partition_fetch_bytes self._check_crcs = check_crcs self._fetcher_timeout = fetcher_timeout self._prefetch_backoff = prefetch_backoff self._retry_backoff = retry_backoff_ms / 1000 self._subscriptions = subscriptions self._records = collections.OrderedDict() self._in_flight = set() self._fetch_tasks = set() self._wait_consume_future = None self._wait_empty_future = None req_version = 2 if client.api_version >= (0, 10) else 1 self._fetch_request_class = FetchRequest[req_version] self._fetch_task = ensure_future(self._fetch_requests_routine(), loop=loop)
def __init__(self, client, subscription, *, loop, group_id='aiokafka-default-group', session_timeout_ms=30000, heartbeat_interval_ms=3000, retry_backoff_ms=100, enable_auto_commit=True, auto_commit_interval_ms=5000, assignors=(RoundRobinPartitionAssignor,) ): """Initialize the coordination manager. Parameters: client (AIOKafkaClient): kafka client subscription (SubscriptionState): instance of SubscriptionState located in kafka.consumer.subscription_state group_id (str): name of the consumer group to join for dynamic partition assignment (if enabled), and to use for fetching and committing offsets. Default: 'aiokafka-default-group' enable_auto_commit (bool): If true the consumer's offset will be periodically committed in the background. Default: True. auto_commit_interval_ms (int): milliseconds between automatic offset commits, if enable_auto_commit is True. Default: 5000. assignors (list): List of objects to use to distribute partition ownership amongst consumer instances when group management is used. Default: [RoundRobinPartitionAssignor] heartbeat_interval_ms (int): The expected time in milliseconds between heartbeats to the consumer coordinator when using Kafka's group management feature. Heartbeats are used to ensure that the consumer's session stays active and to facilitate rebalancing when new consumers join or leave the group. The value must be set lower than session_timeout_ms, but typically should be set no higher than 1/3 of that value. It can be adjusted even lower to control the expected time for normal rebalances. Default: 3000 session_timeout_ms (int): The timeout used to detect failures when using Kafka's group managementment facilities. Default: 30000 retry_backoff_ms (int): Milliseconds to backoff when retrying on errors. Default: 100. """ self._client = client self._session_timeout_ms = session_timeout_ms self._heartbeat_interval_ms = heartbeat_interval_ms self._retry_backoff_ms = retry_backoff_ms self.generation = OffsetCommitRequest.DEFAULT_GENERATION_ID self.member_id = JoinGroupRequest.UNKNOWN_MEMBER_ID self.group_id = group_id self.coordinator_id = None self.rejoin_needed = True self.needs_join_prepare = True self.loop = loop # rejoin group can be called in parallel # (from consumer and from heartbeat task), so we need lock self._rejoin_lock = asyncio.Lock(loop=loop) self._enable_auto_commit = enable_auto_commit self._auto_commit_interval_ms = auto_commit_interval_ms self._assignors = assignors self._subscription = subscription self._partitions_per_topic = {} self._cluster = client.cluster self._auto_commit_task = None # _closing future used as a signal to heartbeat task for finish ASAP self._closing = asyncio.Future(loop=loop) # update subscribe state usint currently known metadata self._handle_metadata_update(client.cluster) self._cluster.add_listener(self._handle_metadata_update) self._group_rebalanced_callback = None self.heartbeat_task = ensure_future( self._heartbeat_task_routine(), loop=loop) if self._enable_auto_commit: interval = self._auto_commit_interval_ms / 1000 self._auto_commit_task = ensure_future( self.auto_commit_routine(interval), loop=loop)