def start(self): """ Connect to Kafka cluster. This will: * Load metadata for all cluster nodes and partition allocation * Wait for possible topic autocreation * Join group if ``group_id`` provided """ yield from self._client.bootstrap() yield from self._wait_topics() if self._client.api_version < (0, 9): raise ValueError("Unsupported Kafka version: {}".format( self._client.api_version)) self._fetcher = Fetcher( self._client, self._subscription, loop=self._loop, key_deserializer=self._key_deserializer, value_deserializer=self._value_deserializer, fetch_min_bytes=self._fetch_min_bytes, fetch_max_wait_ms=self._fetch_max_wait_ms, max_partition_fetch_bytes=self._max_partition_fetch_bytes, check_crcs=self._check_crcs, fetcher_timeout=self._consumer_timeout) if self._group_id is not None: # using group coordinator for automatic partitions assignment self._coordinator = GroupCoordinator( self._client, self._subscription, loop=self._loop, group_id=self._group_id, heartbeat_interval_ms=self._heartbeat_interval_ms, retry_backoff_ms=self._retry_backoff_ms, enable_auto_commit=self._enable_auto_commit, auto_commit_interval_ms=self._auto_commit_interval_ms, assignors=self._partition_assignment_strategy, exclude_internal_topics=self._exclude_internal_topics, assignment_changed_cb=self._on_change_subscription) yield from self._coordinator.ensure_active_group() else: # Using a simple assignment coordinator for reassignment on # metadata changes self._coordinator = NoGroupCoordinator( self._client, self._subscription, loop=self._loop, exclude_internal_topics=self._exclude_internal_topics, assignment_changed_cb=self._on_change_subscription) # If we passed `topics` to constructor. if self._subscription.needs_partition_assignment: yield from self._client.force_metadata_update() self._coordinator.assign_all_partitions(check_unknown=True)
def start(self): yield from self._client.bootstrap() # Check Broker Version if not set explicitly if self._api_version == 'auto': self._api_version = yield from self._client.check_version() # Convert api_version config to tuple for easy comparisons self._api_version = tuple(map(int, self._api_version.split('.'))) if self._api_version < (0, 9): raise ValueError("Unsupported Kafka version: {}".format( self._api_version)) self._fetcher = Fetcher( self._client, self._subscription, loop=self._loop, key_deserializer=self._key_deserializer, value_deserializer=self._value_deserializer, fetch_min_bytes=self._fetch_min_bytes, fetch_max_wait_ms=self._fetch_max_wait_ms, max_partition_fetch_bytes=self._max_partition_fetch_bytes, check_crcs=self._check_crcs, fetcher_timeout=self._consumer_timeout) if self._group_id is not None: # using group coordinator for automatic partitions assignment self._coordinator = GroupCoordinator( self._client, self._subscription, loop=self._loop, group_id=self._group_id, heartbeat_interval_ms=self._heartbeat_interval_ms, retry_backoff_ms=self._retry_backoff_ms, enable_auto_commit=self._enable_auto_commit, auto_commit_interval_ms=self._auto_commit_interval_ms, assignors=self._partition_assignment_strategy) self._coordinator.on_group_rebalanced(self._on_change_subscription) yield from self._coordinator.ensure_active_group() elif self._subscription.needs_partition_assignment: # using manual partitions assignment by topic(s) yield from self._client.force_metadata_update() partitions = [] for topic in self._subscription.subscription: p_ids = self.partitions_for_topic(topic) if not p_ids: raise UnknownTopicOrPartitionError() for p_id in p_ids: partitions.append(TopicPartition(topic, p_id)) self._subscription.unsubscribe() self._subscription.assign_from_user(partitions) yield from self._update_fetch_positions( self._subscription.missing_fetch_positions())
def test_update_fetch_positions(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) partition = TopicPartition('test', 0) # partition is not assigned, should be ignored yield from fetcher.update_fetch_positions([partition]) state = TopicPartitionState() state.seek(0) subscriptions.assignment[partition] = state # partition is fetchable, no need to update position yield from fetcher.update_fetch_positions([partition]) client.ready = mock.MagicMock() client.ready.side_effect = asyncio.coroutine(lambda a: True) client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = asyncio.coroutine( lambda: False) client.send = mock.MagicMock() client.send.side_effect = asyncio.coroutine( lambda n, r: OffsetResponse([('test', [(0, 0, [4])])])) state.await_reset(OffsetResetStrategy.LATEST) client.cluster.leader_for_partition = mock.MagicMock() client.cluster.leader_for_partition.side_effect = [None, -1, 0] yield from fetcher.update_fetch_positions([partition]) self.assertEqual(state.position, 4) client.cluster.leader_for_partition = mock.MagicMock() client.cluster.leader_for_partition.return_value = 1 client.send = mock.MagicMock() client.send.side_effect = asyncio.coroutine( lambda n, r: OffsetResponse([('test', [(0, 3, [])])])) state.await_reset(OffsetResetStrategy.LATEST) with self.assertRaises(UnknownTopicOrPartitionError): yield from fetcher.update_fetch_positions([partition]) client.send.side_effect = asyncio.coroutine( lambda n, r: OffsetResponse([('test', [(0, -1, [])])])) with self.assertRaises(UnknownError): yield from fetcher.update_fetch_positions([partition]) yield from fetcher.close()
def _setup_error_after_data(self): subscriptions = SubscriptionState('latest') client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) fetcher = Fetcher(client, subscriptions, loop=self.loop) tp1 = TopicPartition('some_topic', 0) tp2 = TopicPartition('some_topic', 1) state = TopicPartitionState() state.seek(0) subscriptions.assignment[tp1] = state state = TopicPartitionState() state.seek(0) subscriptions.assignment[tp2] = state subscriptions.needs_partition_assignment = False # Add some data messages = [ ConsumerRecord(topic="some_topic", partition=1, offset=0, timestamp=0, timestamp_type=0, key=None, value=b"some", checksum=None, serialized_key_size=0, serialized_value_size=4) ] fetcher._records[tp2] = FetchResult(tp2, subscriptions=subscriptions, loop=self.loop, messages=deque(messages), backoff=0) # Add some error fetcher._records[tp1] = FetchError(loop=self.loop, error=OffsetOutOfRangeError({}), backoff=0) return fetcher, tp1, tp2, messages
def test_proc_fetch_request(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) tp = TopicPartition('test', 0) tp_info = (tp.topic, [(tp.partition, 155, 100000)]) req = FetchRequest( -1, # replica_id 100, 100, [tp_info]) client.ready = mock.MagicMock() client.ready.side_effect = asyncio.coroutine(lambda a: True) client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = asyncio.coroutine( lambda: False) client.send = mock.MagicMock() msg = Message(b"test msg") msg._encode_self() client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 0, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) state = TopicPartitionState() state.seek(0) subscriptions.assignment[tp] = state subscriptions.needs_partition_assignment = False fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] self.assertEqual(buf.getone(), None) # invalid offset, msg is ignored state.seek(4) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] self.assertEqual(buf.getone().value, b"test msg") # error -> no partition found client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 3, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) # error -> topic auth failed client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 29, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) with self.assertRaises(TopicAuthorizationFailedError): yield from fetcher.next_record([]) # error -> unknown client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, -1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) # error -> offset out of range client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) self.assertEqual(state.is_fetchable(), False) state.seek(4) subscriptions._default_offset_reset_strategy = OffsetResetStrategy.NONE client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) with self.assertRaises(OffsetOutOfRangeError): yield from fetcher.next_record([]) yield from fetcher.close()
def test_compacted_topic_consumption(self): # Compacted topics can have offsets skipped client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) client.ready = mock.MagicMock() client.ready.side_effect = asyncio.coroutine(lambda a: True) client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = asyncio.coroutine( lambda: False) client.send = mock.MagicMock() subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) tp = TopicPartition('test', 0) req = FetchRequest( -1, # replica_id 100, 100, [(tp.topic, [(tp.partition, 155, 100000)])]) msg1 = Message(b"12345", key=b"1") msg1._encode_self() msg2 = Message(b"23456", key=b"2") msg2._encode_self() msg3 = Message(b"34567", key=b"3") msg3._encode_self() resp = FetchResponse([( 'test', [( 0, 0, 3000, # partition, error_code, highwater_offset [ (160, 5, msg1), # offset, len_bytes, bytes (162, 5, msg2), (167, 5, msg3), ])])]) client.send.side_effect = asyncio.coroutine(lambda n, r: resp) state = TopicPartitionState() state.seek(155) state.drop_pending_message_set = False subscriptions.assignment[tp] = state subscriptions.needs_partition_assignment = False fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] # Test successful getone first = buf.getone() self.assertEqual(state.position, 161) self.assertEqual((first.value, first.key, first.offset), (msg1.value, msg1.key, 160)) # Test successful getmany second, third = buf.getall() self.assertEqual(state.position, 168) self.assertEqual((second.value, second.key, second.offset), (msg2.value, msg2.key, 162)) self.assertEqual((third.value, third.key, third.offset), (msg3.value, msg3.key, 167))
def test_fetcher_offsets_for_times(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) client.ready = mock.MagicMock() client.ready.side_effect = asyncio.coroutine(lambda a: True) client._maybe_wait_metadata = mock.MagicMock() client._maybe_wait_metadata.side_effect = asyncio.coroutine( lambda: False) client.cluster.leader_for_partition = mock.MagicMock() client.cluster.leader_for_partition.return_value = 0 client._api_version = (0, 10, 1) subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) tp0 = TopicPartition("topic", 0) tp1 = TopicPartition("topic", 1) subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) # Timeouting will result in KafkaTimeoutError with mock.patch.object(fetcher, "_proc_offset_requests") as mocked: mocked.side_effect = asyncio.TimeoutError with self.assertRaises(KafkaTimeoutError): yield from fetcher.get_offsets_by_times({tp0: 0}, 1000) # Broker returns UnsupportedForMessageFormatError with mock.patch.object(client, "send") as mocked: @asyncio.coroutine def mock_send(node_id, request): return OffsetResponse[1]([("topic", [(0, 43, -1, -1)]), ("topic", [(1, 0, 1000, 9999)])]) mocked.side_effect = mock_send offsets = yield from fetcher.get_offsets_by_times({ tp0: 0, tp1: 0 }, 1000) self.assertEqual(offsets, { tp0: None, tp1: OffsetAndTimestamp(9999, 1000), }) # Brokers returns NotLeaderForPartitionError with mock.patch.object(client, "send") as mocked: @asyncio.coroutine def mock_send(node_id, request): return OffsetResponse[1]([ ("topic", [(0, 6, -1, -1)]), ]) mocked.side_effect = mock_send with self.assertRaises(NotLeaderForPartitionError): yield from fetcher._proc_offset_request( 0, {"topic": (0, 1000)}) # Broker returns UnknownTopicOrPartitionError with mock.patch.object(client, "send") as mocked: @asyncio.coroutine def mock_send(node_id, request): return OffsetResponse[1]([ ("topic", [(0, 3, -1, -1)]), ]) mocked.side_effect = mock_send with self.assertLogs("aiokafka.fetcher", "WARN") as cm: with self.assertRaises(UnknownTopicOrPartitionError): yield from fetcher._proc_offset_request( 0, {"topic": (0, 1000)}) if cm is not None: self.assertIn("Received unknown topic or partition error", cm.output[0])