def test_get_offsets(self):
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts)
        yield from client.bootstrap()

        subscription = SubscriptionState('earliest')
        subscription.subscribe(topics=('topic1',))
        coordinator = GroupCoordinator(
            client, subscription, loop=self.loop,
            group_id='getoffsets-group')

        yield from self.wait_topic(client, 'topic1')
        producer = AIOKafkaProducer(
            loop=self.loop, bootstrap_servers=self.hosts)
        yield from producer.start()
        yield from producer.send('topic1', b'first msg', partition=0)
        yield from producer.send('topic1', b'second msg', partition=1)
        yield from producer.send('topic1', b'third msg', partition=1)
        yield from producer.stop()

        yield from coordinator.ensure_active_group()

        offsets = {TopicPartition('topic1', 0): OffsetAndMetadata(1, ''),
                   TopicPartition('topic1', 1): OffsetAndMetadata(2, '')}
        yield from coordinator.commit_offsets(offsets)

        self.assertEqual(subscription.all_consumed_offsets(), {})
        subscription.seek(('topic1', 0), 0)
        subscription.seek(('topic1', 1), 0)
        yield from coordinator.refresh_committed_offsets()
        self.assertEqual(subscription.assignment[('topic1', 0)].committed, 1)
        self.assertEqual(subscription.assignment[('topic1', 1)].committed, 2)

        yield from coordinator.close()
        yield from client.close()
def test_refresh_committed_offsets_if_needed(mocker, coordinator):
    mocker.patch.object(ConsumerCoordinator, 'fetch_committed_offsets',
                        return_value = {
                            TopicPartition('foobar', 0): OffsetAndMetadata(123, b''),
                            TopicPartition('foobar', 1): OffsetAndMetadata(234, b'')})
    coordinator._subscription.assign_from_user([TopicPartition('foobar', 0)])
    assert coordinator._subscription.needs_fetch_committed_offsets is True
    coordinator.refresh_committed_offsets_if_needed()
    assignment = coordinator._subscription.assignment
    assert assignment[TopicPartition('foobar', 0)].committed == 123
    assert TopicPartition('foobar', 1) not in assignment
    assert coordinator._subscription.needs_fetch_committed_offsets is False
    def auto_commit_routine(self, interval):
        while not self._closing.done():
            if (yield from self.coordinator_unknown()):
                log.debug(
                    "Cannot auto-commit offsets because the coordinator is"
                    " unknown, will retry after backoff")
                yield from asyncio.sleep(self._retry_backoff_ms / 1000,
                                         loop=self.loop)
                continue

            yield from asyncio.wait([self._closing],
                                    timeout=interval,
                                    loop=self.loop)

            # select offsets that should be committed
            offsets = {}
            for partition in self._subscription.assigned_partitions():
                tp = self._subscription.assignment[partition]
                if tp.position != tp.committed and tp.has_valid_position:
                    offsets[partition] = OffsetAndMetadata(tp.position, '')

            try:
                yield from self.commit_offsets(offsets)
            except Errors.KafkaError as error:
                if error.retriable and not self._closing.done():
                    log.debug(
                        "Failed to auto-commit offsets: %s, will retry"
                        " immediately", error)
                else:
                    log.warning("Auto offset commit failed: %s", error)
Exemple #4
0
 def all_consumed_offsets(self):
     """Returns consumed offsets as {TopicPartition: OffsetAndMetadata}"""
     all_consumed = {}
     for partition, state in six.iteritems(self.assignment):
         if state.has_valid_position:
             all_consumed[partition] = OffsetAndMetadata(state.position, '')
     return all_consumed
Exemple #5
0
    def test_manual_subscribe_pattern(self):
        msgs1 = yield from self.send_messages(0, range(0, 10))
        msgs2 = yield from self.send_messages(1, range(10, 20))
        available_msgs = msgs1 + msgs2

        consumer = AIOKafkaConsumer(loop=self.loop,
                                    group_id='test-group',
                                    bootstrap_servers=self.hosts,
                                    auto_offset_reset='earliest',
                                    enable_auto_commit=False)
        consumer.subscribe(pattern="topic-test_manual_subs*")
        yield from consumer.start()
        yield from consumer.seek_to_committed()
        result = []
        for i in range(20):
            msg = yield from consumer.getone()
            result.append(msg.value)
        self.assertEqual(set(available_msgs), set(result))

        yield from consumer.commit(
            {TopicPartition(self.topic, 0): OffsetAndMetadata(9, '')})
        yield from consumer.seek_to_committed(TopicPartition(self.topic, 0))
        msg = yield from consumer.getone(TopicPartition(self.topic, 0))
        self.assertEqual(msg.value, b'9')
        yield from consumer.commit(
            {TopicPartition(self.topic, 0): OffsetAndMetadata(10, '')})
        yield from consumer.stop()

        # subscribe by topic
        consumer = AIOKafkaConsumer(loop=self.loop,
                                    group_id='test-group',
                                    bootstrap_servers=self.hosts,
                                    auto_offset_reset='earliest',
                                    enable_auto_commit=False)
        consumer.subscribe(topics=(self.topic, ))
        yield from consumer.start()
        yield from consumer.seek_to_committed()
        result = []
        for i in range(10):
            msg = yield from consumer.getone()
            result.append(msg.value)
        self.assertEqual(set(msgs2), set(result))
        self.assertEqual(consumer.subscription(), set([self.topic]))
        yield from consumer.stop()
Exemple #6
0
    def test_offsets_failed_scenarios(self):
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts)
        yield from client.bootstrap()
        yield from self.wait_topic(client, 'topic1')
        subscription = SubscriptionState('earliest')
        subscription.subscribe(topics=('topic1', ))
        coordinator = GroupCoordinator(client,
                                       subscription,
                                       loop=self.loop,
                                       group_id='test-offsets-group')

        yield from coordinator.ensure_active_group()

        offsets = {TopicPartition('topic1', 0): OffsetAndMetadata(1, '')}
        yield from coordinator.commit_offsets(offsets)
        with mock.patch('kafka.common.for_code') as mocked:
            mocked.return_value = Errors.GroupAuthorizationFailedError
            with self.assertRaises(Errors.GroupAuthorizationFailedError):
                yield from coordinator.commit_offsets(offsets)

            mocked.return_value = Errors.TopicAuthorizationFailedError
            with self.assertRaises(Errors.TopicAuthorizationFailedError):
                yield from coordinator.commit_offsets(offsets)

            mocked.return_value = Errors.InvalidCommitOffsetSizeError
            with self.assertRaises(Errors.InvalidCommitOffsetSizeError):
                yield from coordinator.commit_offsets(offsets)

            mocked.return_value = Errors.GroupLoadInProgressError
            with self.assertRaises(Errors.GroupLoadInProgressError):
                yield from coordinator.commit_offsets(offsets)

            mocked.return_value = Errors.RebalanceInProgressError
            with self.assertRaises(Errors.RebalanceInProgressError):
                yield from coordinator.commit_offsets(offsets)
            self.assertEqual(subscription.needs_partition_assignment, True)

            mocked.return_value = KafkaError
            with self.assertRaises(KafkaError):
                yield from coordinator.commit_offsets(offsets)

            mocked.return_value = Errors.NotCoordinatorForGroupError
            with self.assertRaises(Errors.NotCoordinatorForGroupError):
                yield from coordinator.commit_offsets(offsets)
            self.assertEqual(coordinator.coordinator_id, None)

            with self.assertRaises(Errors.GroupCoordinatorNotAvailableError):
                yield from coordinator.commit_offsets(offsets)

        yield from coordinator.close()
        yield from client.close()
    def test_consumer_commit_validation(self):
        consumer = yield from self.consumer_factory()

        tp = TopicPartition(self.topic, 0)
        offset = yield from consumer.position(tp)
        offset_and_metadata = OffsetAndMetadata(offset, "")

        with self.assertRaises(ValueError):
            yield from consumer.commit({})
        with self.assertRaises(ValueError):
            yield from consumer.commit("something")
        with self.assertRaises(ValueError):
            yield from consumer.commit({"my_topic": offset_and_metadata})
        with self.assertRaises(ValueError):
            yield from consumer.commit({tp: offset})
        with self.assertRaises(ValueError):
            yield from consumer.commit({tp: (offset, 1000)})
Exemple #8
0
    def test_consumer_commit_validation(self):
        consumer = yield from self.consumer_factory()

        tp = TopicPartition(self.topic, 0)
        offset = yield from consumer.position(tp)
        offset_and_metadata = OffsetAndMetadata(offset, "")

        with self.assertRaises(ValueError):
            yield from consumer.commit({})
        with self.assertRaises(ValueError):
            yield from consumer.commit("something")
        with self.assertRaisesRegexp(ValueError,
                                     "Key should be TopicPartition instance"):
            yield from consumer.commit({"my_topic": offset_and_metadata})
        with self.assertRaisesRegexp(ValueError,
                                     "Metadata should be a string"):
            yield from consumer.commit({tp: (offset, 1000)})
        with self.assertRaisesRegexp(ValueError,
                                     "Metadata should be a string"):
            yield from consumer.commit({tp: (offset, b"\x00\x02")})
Exemple #9
0
    def test_fetchoffsets_failed_scenarios(self):
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=self.hosts)
        yield from client.bootstrap()
        yield from self.wait_topic(client, 'topic1')
        subscription = SubscriptionState('earliest')
        subscription.subscribe(topics=('topic1', ))
        coordinator = GroupCoordinator(client,
                                       subscription,
                                       loop=self.loop,
                                       group_id='fetch-offsets-group')

        yield from coordinator.ensure_active_group()

        offsets = {TopicPartition('topic1', 0): OffsetAndMetadata(1, '')}
        with mock.patch('kafka.common.for_code') as mocked:
            mocked.side_effect = MockedKafkaErrCode(
                Errors.GroupLoadInProgressError, Errors.NoError)
            yield from coordinator.fetch_committed_offsets(offsets)

            mocked.side_effect = MockedKafkaErrCode(
                Errors.UnknownMemberIdError, Errors.NoError)
            with self.assertRaises(Errors.UnknownMemberIdError):
                yield from coordinator.fetch_committed_offsets(offsets)
            self.assertEqual(subscription.needs_partition_assignment, True)

            mocked.side_effect = None
            mocked.return_value = Errors.UnknownTopicOrPartitionError
            r = yield from coordinator.fetch_committed_offsets(offsets)
            self.assertEqual(r, {})

            mocked.return_value = KafkaError
            with self.assertRaises(KafkaError):
                yield from coordinator.fetch_committed_offsets(offsets)

            mocked.side_effect = MockedKafkaErrCode(
                Errors.NotCoordinatorForGroupError, Errors.NoError,
                Errors.NoError, Errors.NoError)
            yield from coordinator.fetch_committed_offsets(offsets)

        yield from coordinator.close()
        yield from client.close()
Exemple #10
0
    def _proc_offsets_fetch_request(self, node_id, request):
        response = yield from self._send_req(node_id, request)
        offsets = {}
        for topic, partitions in response.topics:
            for partition, offset, metadata, error_code in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if error_type is not Errors.NoError:
                    error = error_type()
                    log.debug("Error fetching offset for %s: %s", tp, error)
                    if error_type is Errors.GroupLoadInProgressError:
                        # just retry
                        raise error
                    elif error_type is Errors.NotCoordinatorForGroupError:
                        # re-discover the coordinator and retry
                        self.coordinator_dead()
                        raise error
                    elif error_type in (Errors.UnknownMemberIdError,
                                        Errors.IllegalGenerationError):
                        # need to re-join group
                        self._subscription.mark_for_reassignment()
                        raise error
                    elif error_type is Errors.UnknownTopicOrPartitionError:
                        log.warning(
                            "OffsetFetchRequest -- unknown topic %s", topic)
                        continue
                    else:
                        log.error("Unknown error fetching offsets for %s: %s",
                                  tp, error)
                        raise error
                elif offset >= 0:
                    # record the position with the offset
                    # (-1 indicates no committed offset to fetch)
                    offsets[tp] = OffsetAndMetadata(offset, metadata)
                else:
                    log.debug(
                        "No committed offset for partition %s", tp)

        return offsets
def offsets():
    return {
        TopicPartition('foobar', 0): OffsetAndMetadata(123, b''),
        TopicPartition('foobar', 1): OffsetAndMetadata(234, b''),
    }
Exemple #12
0
    def start_consumer(self):
        """
        This method is the workhorse of this class - it starts the Kafka consumer and calls the callback function for
        each valid record
        """
        logger = self.panoptes_context.logger
        config = self.panoptes_context.config_object
        last_batch_size = 0
        logger.info(
            u'Trying to start Kafka Consumer with brokers: "%s", topics: "%s", group: "%s"'
            % (config.kafka_brokers, self._topics, self.group))

        try:
            consumer = kafka.KafkaConsumer(
                bootstrap_servers=config.kafka_brokers,
                client_id=self.client_id,
                group_id=self.group,
                enable_auto_commit=False,
                session_timeout_ms=self._session_timeout,
                request_timeout_ms=self._request_timeout,
                heartbeat_interval_ms=self._heartbeat_interval,
                max_poll_records=self._max_poll_records,
                max_partition_fetch_bytes=self._max_partition_fetch_bytes)
            consumer.subscribe(topics=self._topics)
            logger.info(u'Consumer subscribed to: %s' %
                        consumer.subscription())
            self._consumer = consumer
        except Exception as e:
            sys.exit(u'Error trying to start Kafka consumer: %s' % str(e))

        while not self.asked_to_stop():
            poll_age = (time.time() - self._last_polled) * 1000
            if (poll_age > self._session_timeout) and (last_batch_size > 0):
                logger.warn(
                    u'Poll cycle took %.2f ms for %d records, '
                    u'which is greater than the session timeout of %d ms' %
                    (poll_age, last_batch_size, self._session_timeout))

            try:
                topic_partitions = consumer.poll(timeout_ms=self._poll_timeout)
                self._last_polled = time.time()
                logger.debug(u'Poll returned with %d topic partitions' %
                             len(topic_partitions))
            except Exception as e:
                logger.error(u'Error while polling: %s' % str(e))
                continue

            last_batch_size = 0
            for topic_partition in list(topic_partitions.keys()):
                consumer_records = topic_partitions[topic_partition]
                last_batch_size += len(consumer_records)

                logger.debug(
                    u'Processing topic partition: %s, consumer records: %d, committed: %s'
                    % (str(topic_partition), len(consumer_records),
                       consumer.committed(topic_partition)))
                logger.debug(u'Consumed offsets: %s' %
                             consumer._subscription.all_consumed_offsets())

                callback_succeeded = True
                consumer_records_skipped = 0
                consumer_records_validation_failed = 0
                for consumer_record in consumer_records:
                    logger.debug(
                        u'Processing consumer record with key: "%s" and value: "%s"'
                        % (consumer_record.key, consumer_record.value))

                    consumer_record_key = consumer_record.key.decode(u'utf-8')
                    if self.keys and consumer_record_key not in self.keys:
                        logger.debug(
                            u'Consumer record key "%s" does not match any of the provided keys, skipping'
                            % consumer_record.key)
                        consumer_records_skipped += 1
                        continue

                    try:
                        consumer_record_object = json.loads(
                            consumer_record.value)
                    except Exception as e:
                        logger.warn(
                            u'Could not convert consumer record "%s" to JSON, skipping: %s'
                            % (consumer_record.value, str(e)))
                        consumer_records_validation_failed += 1
                        continue

                    if self._validate:
                        if not PanoptesConsumerRecordValidator.validate(
                                self.consumer_type, consumer_record_object):
                            logger.debug(
                                u'Consumer record failed validation, skipping')
                            consumer_records_validation_failed += 1
                            continue
                    try:
                        callback_succeeded = self._callback(
                            consumer_record_key, consumer_record_object)
                        # If the callback fails even for one consumer record, we want to fail (not update the committed)
                        # offset for the entire the batch, so exit
                        if not callback_succeeded:
                            logger.error(u'Callback function returned false')
                            break
                    except:
                        logger.exception(
                            u'Error trying to execute callback function')
                        break

                # Update the committed offset if the callback function succeeds for *all* consumer records in this topic
                # partition
                if callback_succeeded:
                    try:
                        position = consumer.position(topic_partition)
                    except Exception as e:
                        logger.error(
                            u'Error trying to fetch position for topic partition "%s": %s'
                            % (topic_partition, str(e)))
                    else:
                        offset = {
                            topic_partition:
                            OffsetAndMetadata(offset=position, metadata='')
                        }
                        logger.debug(u'Going to commit offset %s' %
                                     str(offset))
                        try:
                            consumer.commit(offset)
                        except Exception as e:
                            logger.error(
                                u'Error trying to commit offset "%s": %s' %
                                (offset, str(e)))

                if consumer_records_skipped or consumer_records_validation_failed:
                    logger.debug(
                        u'Skipped %d consumer records due to non-matching keys and %d consumer records due to '
                        u'validation failures for topic partition: %s' %
                        (consumer_records_skipped,
                         consumer_records_validation_failed, topic_partition))