def test_get_offsets(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts) yield from client.bootstrap() subscription = SubscriptionState('earliest') subscription.subscribe(topics=('topic1',)) coordinator = GroupCoordinator( client, subscription, loop=self.loop, group_id='getoffsets-group') yield from self.wait_topic(client, 'topic1') producer = AIOKafkaProducer( loop=self.loop, bootstrap_servers=self.hosts) yield from producer.start() yield from producer.send('topic1', b'first msg', partition=0) yield from producer.send('topic1', b'second msg', partition=1) yield from producer.send('topic1', b'third msg', partition=1) yield from producer.stop() yield from coordinator.ensure_active_group() offsets = {TopicPartition('topic1', 0): OffsetAndMetadata(1, ''), TopicPartition('topic1', 1): OffsetAndMetadata(2, '')} yield from coordinator.commit_offsets(offsets) self.assertEqual(subscription.all_consumed_offsets(), {}) subscription.seek(('topic1', 0), 0) subscription.seek(('topic1', 1), 0) yield from coordinator.refresh_committed_offsets() self.assertEqual(subscription.assignment[('topic1', 0)].committed, 1) self.assertEqual(subscription.assignment[('topic1', 1)].committed, 2) yield from coordinator.close() yield from client.close()
def test_refresh_committed_offsets_if_needed(mocker, coordinator): mocker.patch.object(ConsumerCoordinator, 'fetch_committed_offsets', return_value = { TopicPartition('foobar', 0): OffsetAndMetadata(123, b''), TopicPartition('foobar', 1): OffsetAndMetadata(234, b'')}) coordinator._subscription.assign_from_user([TopicPartition('foobar', 0)]) assert coordinator._subscription.needs_fetch_committed_offsets is True coordinator.refresh_committed_offsets_if_needed() assignment = coordinator._subscription.assignment assert assignment[TopicPartition('foobar', 0)].committed == 123 assert TopicPartition('foobar', 1) not in assignment assert coordinator._subscription.needs_fetch_committed_offsets is False
def auto_commit_routine(self, interval): while not self._closing.done(): if (yield from self.coordinator_unknown()): log.debug( "Cannot auto-commit offsets because the coordinator is" " unknown, will retry after backoff") yield from asyncio.sleep(self._retry_backoff_ms / 1000, loop=self.loop) continue yield from asyncio.wait([self._closing], timeout=interval, loop=self.loop) # select offsets that should be committed offsets = {} for partition in self._subscription.assigned_partitions(): tp = self._subscription.assignment[partition] if tp.position != tp.committed and tp.has_valid_position: offsets[partition] = OffsetAndMetadata(tp.position, '') try: yield from self.commit_offsets(offsets) except Errors.KafkaError as error: if error.retriable and not self._closing.done(): log.debug( "Failed to auto-commit offsets: %s, will retry" " immediately", error) else: log.warning("Auto offset commit failed: %s", error)
def all_consumed_offsets(self): """Returns consumed offsets as {TopicPartition: OffsetAndMetadata}""" all_consumed = {} for partition, state in six.iteritems(self.assignment): if state.has_valid_position: all_consumed[partition] = OffsetAndMetadata(state.position, '') return all_consumed
def test_manual_subscribe_pattern(self): msgs1 = yield from self.send_messages(0, range(0, 10)) msgs2 = yield from self.send_messages(1, range(10, 20)) available_msgs = msgs1 + msgs2 consumer = AIOKafkaConsumer(loop=self.loop, group_id='test-group', bootstrap_servers=self.hosts, auto_offset_reset='earliest', enable_auto_commit=False) consumer.subscribe(pattern="topic-test_manual_subs*") yield from consumer.start() yield from consumer.seek_to_committed() result = [] for i in range(20): msg = yield from consumer.getone() result.append(msg.value) self.assertEqual(set(available_msgs), set(result)) yield from consumer.commit( {TopicPartition(self.topic, 0): OffsetAndMetadata(9, '')}) yield from consumer.seek_to_committed(TopicPartition(self.topic, 0)) msg = yield from consumer.getone(TopicPartition(self.topic, 0)) self.assertEqual(msg.value, b'9') yield from consumer.commit( {TopicPartition(self.topic, 0): OffsetAndMetadata(10, '')}) yield from consumer.stop() # subscribe by topic consumer = AIOKafkaConsumer(loop=self.loop, group_id='test-group', bootstrap_servers=self.hosts, auto_offset_reset='earliest', enable_auto_commit=False) consumer.subscribe(topics=(self.topic, )) yield from consumer.start() yield from consumer.seek_to_committed() result = [] for i in range(10): msg = yield from consumer.getone() result.append(msg.value) self.assertEqual(set(msgs2), set(result)) self.assertEqual(consumer.subscription(), set([self.topic])) yield from consumer.stop()
def test_offsets_failed_scenarios(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts) yield from client.bootstrap() yield from self.wait_topic(client, 'topic1') subscription = SubscriptionState('earliest') subscription.subscribe(topics=('topic1', )) coordinator = GroupCoordinator(client, subscription, loop=self.loop, group_id='test-offsets-group') yield from coordinator.ensure_active_group() offsets = {TopicPartition('topic1', 0): OffsetAndMetadata(1, '')} yield from coordinator.commit_offsets(offsets) with mock.patch('kafka.common.for_code') as mocked: mocked.return_value = Errors.GroupAuthorizationFailedError with self.assertRaises(Errors.GroupAuthorizationFailedError): yield from coordinator.commit_offsets(offsets) mocked.return_value = Errors.TopicAuthorizationFailedError with self.assertRaises(Errors.TopicAuthorizationFailedError): yield from coordinator.commit_offsets(offsets) mocked.return_value = Errors.InvalidCommitOffsetSizeError with self.assertRaises(Errors.InvalidCommitOffsetSizeError): yield from coordinator.commit_offsets(offsets) mocked.return_value = Errors.GroupLoadInProgressError with self.assertRaises(Errors.GroupLoadInProgressError): yield from coordinator.commit_offsets(offsets) mocked.return_value = Errors.RebalanceInProgressError with self.assertRaises(Errors.RebalanceInProgressError): yield from coordinator.commit_offsets(offsets) self.assertEqual(subscription.needs_partition_assignment, True) mocked.return_value = KafkaError with self.assertRaises(KafkaError): yield from coordinator.commit_offsets(offsets) mocked.return_value = Errors.NotCoordinatorForGroupError with self.assertRaises(Errors.NotCoordinatorForGroupError): yield from coordinator.commit_offsets(offsets) self.assertEqual(coordinator.coordinator_id, None) with self.assertRaises(Errors.GroupCoordinatorNotAvailableError): yield from coordinator.commit_offsets(offsets) yield from coordinator.close() yield from client.close()
def test_consumer_commit_validation(self): consumer = yield from self.consumer_factory() tp = TopicPartition(self.topic, 0) offset = yield from consumer.position(tp) offset_and_metadata = OffsetAndMetadata(offset, "") with self.assertRaises(ValueError): yield from consumer.commit({}) with self.assertRaises(ValueError): yield from consumer.commit("something") with self.assertRaises(ValueError): yield from consumer.commit({"my_topic": offset_and_metadata}) with self.assertRaises(ValueError): yield from consumer.commit({tp: offset}) with self.assertRaises(ValueError): yield from consumer.commit({tp: (offset, 1000)})
def test_consumer_commit_validation(self): consumer = yield from self.consumer_factory() tp = TopicPartition(self.topic, 0) offset = yield from consumer.position(tp) offset_and_metadata = OffsetAndMetadata(offset, "") with self.assertRaises(ValueError): yield from consumer.commit({}) with self.assertRaises(ValueError): yield from consumer.commit("something") with self.assertRaisesRegexp(ValueError, "Key should be TopicPartition instance"): yield from consumer.commit({"my_topic": offset_and_metadata}) with self.assertRaisesRegexp(ValueError, "Metadata should be a string"): yield from consumer.commit({tp: (offset, 1000)}) with self.assertRaisesRegexp(ValueError, "Metadata should be a string"): yield from consumer.commit({tp: (offset, b"\x00\x02")})
def test_fetchoffsets_failed_scenarios(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts) yield from client.bootstrap() yield from self.wait_topic(client, 'topic1') subscription = SubscriptionState('earliest') subscription.subscribe(topics=('topic1', )) coordinator = GroupCoordinator(client, subscription, loop=self.loop, group_id='fetch-offsets-group') yield from coordinator.ensure_active_group() offsets = {TopicPartition('topic1', 0): OffsetAndMetadata(1, '')} with mock.patch('kafka.common.for_code') as mocked: mocked.side_effect = MockedKafkaErrCode( Errors.GroupLoadInProgressError, Errors.NoError) yield from coordinator.fetch_committed_offsets(offsets) mocked.side_effect = MockedKafkaErrCode( Errors.UnknownMemberIdError, Errors.NoError) with self.assertRaises(Errors.UnknownMemberIdError): yield from coordinator.fetch_committed_offsets(offsets) self.assertEqual(subscription.needs_partition_assignment, True) mocked.side_effect = None mocked.return_value = Errors.UnknownTopicOrPartitionError r = yield from coordinator.fetch_committed_offsets(offsets) self.assertEqual(r, {}) mocked.return_value = KafkaError with self.assertRaises(KafkaError): yield from coordinator.fetch_committed_offsets(offsets) mocked.side_effect = MockedKafkaErrCode( Errors.NotCoordinatorForGroupError, Errors.NoError, Errors.NoError, Errors.NoError) yield from coordinator.fetch_committed_offsets(offsets) yield from coordinator.close() yield from client.close()
def _proc_offsets_fetch_request(self, node_id, request): response = yield from self._send_req(node_id, request) offsets = {} for topic, partitions in response.topics: for partition, offset, metadata, error_code in partitions: tp = TopicPartition(topic, partition) error_type = Errors.for_code(error_code) if error_type is not Errors.NoError: error = error_type() log.debug("Error fetching offset for %s: %s", tp, error) if error_type is Errors.GroupLoadInProgressError: # just retry raise error elif error_type is Errors.NotCoordinatorForGroupError: # re-discover the coordinator and retry self.coordinator_dead() raise error elif error_type in (Errors.UnknownMemberIdError, Errors.IllegalGenerationError): # need to re-join group self._subscription.mark_for_reassignment() raise error elif error_type is Errors.UnknownTopicOrPartitionError: log.warning( "OffsetFetchRequest -- unknown topic %s", topic) continue else: log.error("Unknown error fetching offsets for %s: %s", tp, error) raise error elif offset >= 0: # record the position with the offset # (-1 indicates no committed offset to fetch) offsets[tp] = OffsetAndMetadata(offset, metadata) else: log.debug( "No committed offset for partition %s", tp) return offsets
def offsets(): return { TopicPartition('foobar', 0): OffsetAndMetadata(123, b''), TopicPartition('foobar', 1): OffsetAndMetadata(234, b''), }
def start_consumer(self): """ This method is the workhorse of this class - it starts the Kafka consumer and calls the callback function for each valid record """ logger = self.panoptes_context.logger config = self.panoptes_context.config_object last_batch_size = 0 logger.info( u'Trying to start Kafka Consumer with brokers: "%s", topics: "%s", group: "%s"' % (config.kafka_brokers, self._topics, self.group)) try: consumer = kafka.KafkaConsumer( bootstrap_servers=config.kafka_brokers, client_id=self.client_id, group_id=self.group, enable_auto_commit=False, session_timeout_ms=self._session_timeout, request_timeout_ms=self._request_timeout, heartbeat_interval_ms=self._heartbeat_interval, max_poll_records=self._max_poll_records, max_partition_fetch_bytes=self._max_partition_fetch_bytes) consumer.subscribe(topics=self._topics) logger.info(u'Consumer subscribed to: %s' % consumer.subscription()) self._consumer = consumer except Exception as e: sys.exit(u'Error trying to start Kafka consumer: %s' % str(e)) while not self.asked_to_stop(): poll_age = (time.time() - self._last_polled) * 1000 if (poll_age > self._session_timeout) and (last_batch_size > 0): logger.warn( u'Poll cycle took %.2f ms for %d records, ' u'which is greater than the session timeout of %d ms' % (poll_age, last_batch_size, self._session_timeout)) try: topic_partitions = consumer.poll(timeout_ms=self._poll_timeout) self._last_polled = time.time() logger.debug(u'Poll returned with %d topic partitions' % len(topic_partitions)) except Exception as e: logger.error(u'Error while polling: %s' % str(e)) continue last_batch_size = 0 for topic_partition in list(topic_partitions.keys()): consumer_records = topic_partitions[topic_partition] last_batch_size += len(consumer_records) logger.debug( u'Processing topic partition: %s, consumer records: %d, committed: %s' % (str(topic_partition), len(consumer_records), consumer.committed(topic_partition))) logger.debug(u'Consumed offsets: %s' % consumer._subscription.all_consumed_offsets()) callback_succeeded = True consumer_records_skipped = 0 consumer_records_validation_failed = 0 for consumer_record in consumer_records: logger.debug( u'Processing consumer record with key: "%s" and value: "%s"' % (consumer_record.key, consumer_record.value)) consumer_record_key = consumer_record.key.decode(u'utf-8') if self.keys and consumer_record_key not in self.keys: logger.debug( u'Consumer record key "%s" does not match any of the provided keys, skipping' % consumer_record.key) consumer_records_skipped += 1 continue try: consumer_record_object = json.loads( consumer_record.value) except Exception as e: logger.warn( u'Could not convert consumer record "%s" to JSON, skipping: %s' % (consumer_record.value, str(e))) consumer_records_validation_failed += 1 continue if self._validate: if not PanoptesConsumerRecordValidator.validate( self.consumer_type, consumer_record_object): logger.debug( u'Consumer record failed validation, skipping') consumer_records_validation_failed += 1 continue try: callback_succeeded = self._callback( consumer_record_key, consumer_record_object) # If the callback fails even for one consumer record, we want to fail (not update the committed) # offset for the entire the batch, so exit if not callback_succeeded: logger.error(u'Callback function returned false') break except: logger.exception( u'Error trying to execute callback function') break # Update the committed offset if the callback function succeeds for *all* consumer records in this topic # partition if callback_succeeded: try: position = consumer.position(topic_partition) except Exception as e: logger.error( u'Error trying to fetch position for topic partition "%s": %s' % (topic_partition, str(e))) else: offset = { topic_partition: OffsetAndMetadata(offset=position, metadata='') } logger.debug(u'Going to commit offset %s' % str(offset)) try: consumer.commit(offset) except Exception as e: logger.error( u'Error trying to commit offset "%s": %s' % (offset, str(e))) if consumer_records_skipped or consumer_records_validation_failed: logger.debug( u'Skipped %d consumer records due to non-matching keys and %d consumer records due to ' u'validation failures for topic partition: %s' % (consumer_records_skipped, consumer_records_validation_failed, topic_partition))