예제 #1
0
    def start(self):
        """ Connect to Kafka cluster. This will:

            * Load metadata for all cluster nodes and partition allocation
            * Wait for possible topic autocreation
            * Join group if ``group_id`` provided
        """
        yield from self._client.bootstrap()
        yield from self._wait_topics()

        if self._client.api_version < (0, 9):
            raise ValueError("Unsupported Kafka version: {}".format(
                self._client.api_version))

        self._fetcher = Fetcher(
            self._client,
            self._subscription,
            loop=self._loop,
            key_deserializer=self._key_deserializer,
            value_deserializer=self._value_deserializer,
            fetch_min_bytes=self._fetch_min_bytes,
            fetch_max_wait_ms=self._fetch_max_wait_ms,
            max_partition_fetch_bytes=self._max_partition_fetch_bytes,
            check_crcs=self._check_crcs,
            fetcher_timeout=self._consumer_timeout)

        if self._group_id is not None:
            # using group coordinator for automatic partitions assignment
            self._coordinator = GroupCoordinator(
                self._client,
                self._subscription,
                loop=self._loop,
                group_id=self._group_id,
                heartbeat_interval_ms=self._heartbeat_interval_ms,
                retry_backoff_ms=self._retry_backoff_ms,
                enable_auto_commit=self._enable_auto_commit,
                auto_commit_interval_ms=self._auto_commit_interval_ms,
                assignors=self._partition_assignment_strategy,
                exclude_internal_topics=self._exclude_internal_topics,
                assignment_changed_cb=self._on_change_subscription)

            yield from self._coordinator.ensure_active_group()
        else:
            # Using a simple assignment coordinator for reassignment on
            # metadata changes
            self._coordinator = NoGroupCoordinator(
                self._client,
                self._subscription,
                loop=self._loop,
                exclude_internal_topics=self._exclude_internal_topics,
                assignment_changed_cb=self._on_change_subscription)

            # If we passed `topics` to constructor.
            if self._subscription.needs_partition_assignment:
                yield from self._client.force_metadata_update()
                self._coordinator.assign_all_partitions(check_unknown=True)
예제 #2
0
    def start(self):
        yield from self._client.bootstrap()

        # Check Broker Version if not set explicitly
        if self._api_version == 'auto':
            self._api_version = yield from self._client.check_version()
        # Convert api_version config to tuple for easy comparisons
        self._api_version = tuple(map(int, self._api_version.split('.')))

        if self._api_version < (0, 9):
            raise ValueError("Unsupported Kafka version: {}".format(
                self._api_version))

        self._fetcher = Fetcher(
            self._client,
            self._subscription,
            loop=self._loop,
            key_deserializer=self._key_deserializer,
            value_deserializer=self._value_deserializer,
            fetch_min_bytes=self._fetch_min_bytes,
            fetch_max_wait_ms=self._fetch_max_wait_ms,
            max_partition_fetch_bytes=self._max_partition_fetch_bytes,
            check_crcs=self._check_crcs,
            fetcher_timeout=self._consumer_timeout)

        if self._group_id is not None:
            # using group coordinator for automatic partitions assignment
            self._coordinator = GroupCoordinator(
                self._client,
                self._subscription,
                loop=self._loop,
                group_id=self._group_id,
                heartbeat_interval_ms=self._heartbeat_interval_ms,
                retry_backoff_ms=self._retry_backoff_ms,
                enable_auto_commit=self._enable_auto_commit,
                auto_commit_interval_ms=self._auto_commit_interval_ms,
                assignors=self._partition_assignment_strategy)
            self._coordinator.on_group_rebalanced(self._on_change_subscription)

            yield from self._coordinator.ensure_active_group()
        elif self._subscription.needs_partition_assignment:
            # using manual partitions assignment by topic(s)
            yield from self._client.force_metadata_update()
            partitions = []
            for topic in self._subscription.subscription:
                p_ids = self.partitions_for_topic(topic)
                if not p_ids:
                    raise UnknownTopicOrPartitionError()
                for p_id in p_ids:
                    partitions.append(TopicPartition(topic, p_id))
            self._subscription.unsubscribe()
            self._subscription.assign_from_user(partitions)
            yield from self._update_fetch_positions(
                self._subscription.missing_fetch_positions())
예제 #3
0
    def test_update_fetch_positions(self):
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[])
        subscriptions = SubscriptionState('latest')
        fetcher = Fetcher(client, subscriptions, loop=self.loop)
        partition = TopicPartition('test', 0)
        # partition is not assigned, should be ignored
        yield from fetcher.update_fetch_positions([partition])

        state = TopicPartitionState()
        state.seek(0)
        subscriptions.assignment[partition] = state
        # partition is fetchable, no need to update position
        yield from fetcher.update_fetch_positions([partition])

        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: OffsetResponse([('test', [(0, 0, [4])])]))
        state.await_reset(OffsetResetStrategy.LATEST)
        client.cluster.leader_for_partition = mock.MagicMock()
        client.cluster.leader_for_partition.side_effect = [None, -1, 0]
        yield from fetcher.update_fetch_positions([partition])
        self.assertEqual(state.position, 4)

        client.cluster.leader_for_partition = mock.MagicMock()
        client.cluster.leader_for_partition.return_value = 1
        client.send = mock.MagicMock()
        client.send.side_effect = asyncio.coroutine(
            lambda n, r: OffsetResponse([('test', [(0, 3, [])])]))
        state.await_reset(OffsetResetStrategy.LATEST)
        with self.assertRaises(UnknownTopicOrPartitionError):
            yield from fetcher.update_fetch_positions([partition])

        client.send.side_effect = asyncio.coroutine(
            lambda n, r: OffsetResponse([('test', [(0, -1, [])])]))
        with self.assertRaises(UnknownError):
            yield from fetcher.update_fetch_positions([partition])
        yield from fetcher.close()
예제 #4
0
    def _setup_error_after_data(self):
        subscriptions = SubscriptionState('latest')
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[])
        fetcher = Fetcher(client, subscriptions, loop=self.loop)
        tp1 = TopicPartition('some_topic', 0)
        tp2 = TopicPartition('some_topic', 1)

        state = TopicPartitionState()
        state.seek(0)
        subscriptions.assignment[tp1] = state
        state = TopicPartitionState()
        state.seek(0)
        subscriptions.assignment[tp2] = state
        subscriptions.needs_partition_assignment = False

        # Add some data
        messages = [
            ConsumerRecord(topic="some_topic",
                           partition=1,
                           offset=0,
                           timestamp=0,
                           timestamp_type=0,
                           key=None,
                           value=b"some",
                           checksum=None,
                           serialized_key_size=0,
                           serialized_value_size=4)
        ]
        fetcher._records[tp2] = FetchResult(tp2,
                                            subscriptions=subscriptions,
                                            loop=self.loop,
                                            messages=deque(messages),
                                            backoff=0)
        # Add some error
        fetcher._records[tp1] = FetchError(loop=self.loop,
                                           error=OffsetOutOfRangeError({}),
                                           backoff=0)
        return fetcher, tp1, tp2, messages
예제 #5
0
    def test_proc_fetch_request(self):
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[])
        subscriptions = SubscriptionState('latest')
        fetcher = Fetcher(client, subscriptions, loop=self.loop)

        tp = TopicPartition('test', 0)
        tp_info = (tp.topic, [(tp.partition, 155, 100000)])
        req = FetchRequest(
            -1,  # replica_id
            100,
            100,
            [tp_info])

        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()
        msg = Message(b"test msg")
        msg._encode_self()
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 0, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        state = TopicPartitionState()
        state.seek(0)
        subscriptions.assignment[tp] = state
        subscriptions.needs_partition_assignment = False
        fetcher._in_flight.add(0)
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone(), None)  # invalid offset, msg is ignored

        state.seek(4)
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        self.assertEqual(buf.getone().value, b"test msg")

        # error -> no partition found
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 3, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> topic auth failed
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 29, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(TopicAuthorizationFailedError):
            yield from fetcher.next_record([])

        # error -> unknown
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, -1, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)

        # error -> offset out of range
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 1, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, False)
        self.assertEqual(state.is_fetchable(), False)

        state.seek(4)
        subscriptions._default_offset_reset_strategy = OffsetResetStrategy.NONE
        client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse(
            [('test', [(0, 1, 9, [(4, 10, msg)])])]))
        fetcher._in_flight.add(0)
        fetcher._records.clear()
        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        with self.assertRaises(OffsetOutOfRangeError):
            yield from fetcher.next_record([])

        yield from fetcher.close()
예제 #6
0
    def test_compacted_topic_consumption(self):
        # Compacted topics can have offsets skipped
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[])
        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client.force_metadata_update = mock.MagicMock()
        client.force_metadata_update.side_effect = asyncio.coroutine(
            lambda: False)
        client.send = mock.MagicMock()

        subscriptions = SubscriptionState('latest')
        fetcher = Fetcher(client, subscriptions, loop=self.loop)

        tp = TopicPartition('test', 0)
        req = FetchRequest(
            -1,  # replica_id
            100,
            100,
            [(tp.topic, [(tp.partition, 155, 100000)])])
        msg1 = Message(b"12345", key=b"1")
        msg1._encode_self()
        msg2 = Message(b"23456", key=b"2")
        msg2._encode_self()
        msg3 = Message(b"34567", key=b"3")
        msg3._encode_self()
        resp = FetchResponse([(
            'test',
            [(
                0,
                0,
                3000,  # partition, error_code, highwater_offset
                [
                    (160, 5, msg1),  # offset, len_bytes, bytes
                    (162, 5, msg2),
                    (167, 5, msg3),
                ])])])

        client.send.side_effect = asyncio.coroutine(lambda n, r: resp)
        state = TopicPartitionState()
        state.seek(155)
        state.drop_pending_message_set = False
        subscriptions.assignment[tp] = state
        subscriptions.needs_partition_assignment = False
        fetcher._in_flight.add(0)

        needs_wake_up = yield from fetcher._proc_fetch_request(0, req)
        self.assertEqual(needs_wake_up, True)
        buf = fetcher._records[tp]
        # Test successful getone
        first = buf.getone()
        self.assertEqual(state.position, 161)
        self.assertEqual((first.value, first.key, first.offset),
                         (msg1.value, msg1.key, 160))

        # Test successful getmany
        second, third = buf.getall()
        self.assertEqual(state.position, 168)
        self.assertEqual((second.value, second.key, second.offset),
                         (msg2.value, msg2.key, 162))
        self.assertEqual((third.value, third.key, third.offset),
                         (msg3.value, msg3.key, 167))
예제 #7
0
    def test_fetcher_offsets_for_times(self):
        client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[])
        client.ready = mock.MagicMock()
        client.ready.side_effect = asyncio.coroutine(lambda a: True)
        client._maybe_wait_metadata = mock.MagicMock()
        client._maybe_wait_metadata.side_effect = asyncio.coroutine(
            lambda: False)
        client.cluster.leader_for_partition = mock.MagicMock()
        client.cluster.leader_for_partition.return_value = 0
        client._api_version = (0, 10, 1)

        subscriptions = SubscriptionState('latest')
        fetcher = Fetcher(client, subscriptions, loop=self.loop)
        tp0 = TopicPartition("topic", 0)
        tp1 = TopicPartition("topic", 1)

        subscriptions = SubscriptionState('latest')
        fetcher = Fetcher(client, subscriptions, loop=self.loop)

        # Timeouting will result in KafkaTimeoutError
        with mock.patch.object(fetcher, "_proc_offset_requests") as mocked:
            mocked.side_effect = asyncio.TimeoutError

            with self.assertRaises(KafkaTimeoutError):
                yield from fetcher.get_offsets_by_times({tp0: 0}, 1000)

        # Broker returns UnsupportedForMessageFormatError
        with mock.patch.object(client, "send") as mocked:

            @asyncio.coroutine
            def mock_send(node_id, request):
                return OffsetResponse[1]([("topic", [(0, 43, -1, -1)]),
                                          ("topic", [(1, 0, 1000, 9999)])])

            mocked.side_effect = mock_send
            offsets = yield from fetcher.get_offsets_by_times({
                tp0: 0,
                tp1: 0
            }, 1000)
            self.assertEqual(offsets, {
                tp0: None,
                tp1: OffsetAndTimestamp(9999, 1000),
            })
        # Brokers returns NotLeaderForPartitionError
        with mock.patch.object(client, "send") as mocked:

            @asyncio.coroutine
            def mock_send(node_id, request):
                return OffsetResponse[1]([
                    ("topic", [(0, 6, -1, -1)]),
                ])

            mocked.side_effect = mock_send
            with self.assertRaises(NotLeaderForPartitionError):
                yield from fetcher._proc_offset_request(
                    0, {"topic": (0, 1000)})

        # Broker returns UnknownTopicOrPartitionError
        with mock.patch.object(client, "send") as mocked:

            @asyncio.coroutine
            def mock_send(node_id, request):
                return OffsetResponse[1]([
                    ("topic", [(0, 3, -1, -1)]),
                ])

            mocked.side_effect = mock_send
            with self.assertLogs("aiokafka.fetcher", "WARN") as cm:
                with self.assertRaises(UnknownTopicOrPartitionError):
                    yield from fetcher._proc_offset_request(
                        0, {"topic": (0, 1000)})
            if cm is not None:
                self.assertIn("Received unknown topic or partition error",
                              cm.output[0])