def _send_sync_group_request(self, request): if (yield from self.coordinator_unknown()): raise Errors.GroupCoordinatorNotAvailableError() response = None try: response = yield from self._send_req(self.coordinator_id, request) log.debug( "Received successful sync group response for group %s: %s", self.group_id, response) return response.member_assignment except Errors.RebalanceInProgressError as err: log.info("SyncGroup for group %s failed due to coordinator" " rebalance, rejoining the group", self.group_id) raise err except (Errors.UnknownMemberIdError, Errors.IllegalGenerationError) as err: log.info("SyncGroup for group %s failed due to %s," " rejoining the group", self.group_id, err) self.member_id = JoinGroupRequest.UNKNOWN_MEMBER_ID raise err except (Errors.GroupCoordinatorNotAvailableError, Errors.NotCoordinatorForGroupError) as err: log.info("SyncGroup for group %s failed due to %s, will find new" " coordinator and rejoin", self.group_id, err) self.coordinator_dead() raise err except Errors.KafkaError as err: log.error("Error from SyncGroup: %s", err) raise err finally: if response is None: # Always rejoin on error self.rejoin_needed = True
def _perform_group_join(self): """Join the group and return the assignment for the next generation. This function handles both JoinGroup and SyncGroup, delegating to _perform_assignment() if elected leader by the coordinator. Returns: Future: resolves to the encoded-bytes assignment returned from the group leader """ if self.coordinator_unknown(): e = Errors.GroupCoordinatorNotAvailableError(self.coordinator_id) return Future().failure(e) # send a join group request to the coordinator log.debug("(Re-)joining group %s", self.group_id) request = JoinGroupRequest( self.group_id, self.config['session_timeout_ms'], self.member_id, self.protocol_type(), [(protocol, metadata if isinstance(metadata, bytes) else metadata.encode()) for protocol, metadata in self.group_protocols()]) # create the request for the coordinator log.debug("Issuing request (%s) to coordinator %s", request, self.coordinator_id) future = Future() _f = self._client.send(self.coordinator_id, request) _f.add_callback(self._handle_join_group_response, future) _f.add_errback(self._failed_request, self.coordinator_id, request, future) return future
def __call__(self): if (self._coordinator.generation < 0 or self._coordinator.need_rejoin()): # no need to send the heartbeat we're not using auto-assignment # or if we are awaiting a rebalance log.info("Skipping heartbeat: no auto-assignment" " or waiting on rebalance") return if self._coordinator.coordinator_unknown(): log.warning("Coordinator unknown during heartbeat -- will retry") self._handle_heartbeat_failure(Errors.GroupCoordinatorNotAvailableError()) return if self._heartbeat.session_expired(): # we haven't received a successful heartbeat in one session interval # so mark the coordinator dead log.error("Heartbeat session expired - marking coordinator dead") self._coordinator.coordinator_dead() return if not self._heartbeat.should_heartbeat(): # we don't need to heartbeat now, so reschedule for when we do ttl = self._heartbeat.ttl() log.debug("Heartbeat task unneeded now, retrying in %s", ttl) self._client.schedule(self, time.time() + ttl) else: self._heartbeat.sent_heartbeat() self._request_in_flight = True future = self._coordinator._send_heartbeat_request() future.add_callback(self._handle_heartbeat_success) future.add_errback(self._handle_heartbeat_failure)
def _send_sync_group_request(self, request): if self.coordinator_unknown(): return Future().failure(Errors.GroupCoordinatorNotAvailableError()) future = Future() _f = self._client.send(self.coordinator_id, request) _f.add_callback(self._handle_sync_group_response, future) _f.add_errback(self._failed_request, self.coordinator_id, request, future) return future
def test_failed_group_join(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts) subscription = SubscriptionState('latest') subscription.subscribe(topics=('topic1', )) coordinator = GroupCoordinator(client, subscription, loop=self.loop, retry_backoff_ms=10) @asyncio.coroutine def do_rebalance(): rebalance = CoordinatorGroupRebalance( coordinator, coordinator.group_id, coordinator.coordinator_id, subscription.subscription, coordinator._assignors, coordinator._session_timeout_ms, coordinator._retry_backoff_ms, loop=self.loop) yield from rebalance.perform_group_join() yield from client.bootstrap() yield from self.wait_topic(client, 'topic1') mocked = mock.MagicMock() coordinator._client = mocked # no exception expected, just wait mocked.send.side_effect = Errors.GroupLoadInProgressError() yield from do_rebalance() self.assertEqual(coordinator.need_rejoin(), True) mocked.send.side_effect = Errors.InvalidGroupIdError() with self.assertRaises(Errors.InvalidGroupIdError): yield from do_rebalance() self.assertEqual(coordinator.need_rejoin(), True) # no exception expected, member_id should be reseted coordinator.member_id = 'some_invalid_member_id' mocked.send.side_effect = Errors.UnknownMemberIdError() yield from do_rebalance() self.assertEqual(coordinator.need_rejoin(), True) self.assertEqual(coordinator.member_id, JoinGroupRequest.UNKNOWN_MEMBER_ID) # no exception expected, coordinator_id should be reseted coordinator.coordinator_id = 'some_id' mocked.send.side_effect = Errors.GroupCoordinatorNotAvailableError() yield from do_rebalance() self.assertEqual(coordinator.need_rejoin(), True) self.assertEqual(coordinator.coordinator_id, None) yield from client.close()
def commit_offsets(self, offsets): """Commit specific offsets asynchronously. Arguments: offsets (dict {TopicPartition: OffsetAndMetadata}): what to commit Raises error on failure """ self._subscription.needs_fetch_committed_offsets = True if not offsets: log.debug('No offsets to commit') return True if (yield from self.coordinator_unknown()): raise Errors.GroupCoordinatorNotAvailableError() node_id = self.coordinator_id # create the offset commit request offset_data = collections.defaultdict(list) for tp, offset in offsets.items(): offset_data[tp.topic].append( (tp.partition, offset.offset, offset.metadata)) request = OffsetCommitRequest( self.group_id, self.generation, self.member_id, OffsetCommitRequest.DEFAULT_RETENTION_TIME, [(topic, tp_offsets) for topic, tp_offsets in offset_data.items()]) log.debug("Sending offset-commit request with %s to %s", offsets, node_id) response = yield from self._send_req(node_id, request) unauthorized_topics = set() for topic, partitions in response.topics: for partition, error_code in partitions: tp = TopicPartition(topic, partition) offset = offsets[tp] error_type = Errors.for_code(error_code) if error_type is Errors.NoError: log.debug("Committed offset %s for partition %s", offset, tp) if self._subscription.is_assigned(tp): partition = self._subscription.assignment[tp] partition.committed = offset.offset elif error_type is Errors.GroupAuthorizationFailedError: log.error("OffsetCommit failed for group %s - %s", self.group_id, error_type.__name__) raise error_type() elif error_type is Errors.TopicAuthorizationFailedError: unauthorized_topics.add(topic) elif error_type in (Errors.OffsetMetadataTooLargeError, Errors.InvalidCommitOffsetSizeError): # raise the error to the user log.info( "OffsetCommit failed for group %s on partition %s" " due to %s, will retry", self.group_id, tp, error_type.__name__) raise error_type() elif error_type is Errors.GroupLoadInProgressError: # just retry log.info( "OffsetCommit failed for group %s because group is" " initializing (%s), will retry", self.group_id, error_type.__name__) raise error_type() elif error_type in (Errors.GroupCoordinatorNotAvailableError, Errors.NotCoordinatorForGroupError, Errors.RequestTimedOutError): log.info( "OffsetCommit failed for group %s due to a" " coordinator error (%s), will find new coordinator" " and retry", self.group_id, error_type.__name__) self.coordinator_dead() raise error_type() elif error_type in (Errors.UnknownMemberIdError, Errors.IllegalGenerationError, Errors.RebalanceInProgressError): # need to re-join group error = error_type(self.group_id) log.error( "OffsetCommit failed for group %s due to group" " error (%s), will rejoin", self.group_id, error) self._subscription.mark_for_reassignment() raise error else: log.error( "OffsetCommit failed for group %s on partition %s" " with offset %s: %s", self.group_id, tp, offset, error_type.__name__) raise error_type() if unauthorized_topics: log.error("OffsetCommit failed for unauthorized topics %s", unauthorized_topics) raise Errors.TopicAuthorizationFailedError(unauthorized_topics)