def test_init_fetches(fetcher, mocker): fetch_requests = [ FetchRequest(-1, fetcher.config['fetch_max_wait_ms'], fetcher.config['fetch_min_bytes'], [('foobar', [ (0, 0, fetcher.config['max_partition_fetch_bytes']), (1, 0, fetcher.config['max_partition_fetch_bytes']), ])]), FetchRequest(-1, fetcher.config['fetch_max_wait_ms'], fetcher.config['fetch_min_bytes'], [('foobar', [ (2, 0, fetcher.config['max_partition_fetch_bytes']), ])]) ] mocker.patch.object(fetcher, '_create_fetch_requests', return_value = dict(enumerate(fetch_requests))) fetcher._records.append('foobar') ret = fetcher.init_fetches() assert fetcher._create_fetch_requests.call_count == 0 assert ret == [] fetcher._records.clear() fetcher._iterator = 'foo' ret = fetcher.init_fetches() assert fetcher._create_fetch_requests.call_count == 0 assert ret == [] fetcher._iterator = None ret = fetcher.init_fetches() for node, request in enumerate(fetch_requests): fetcher._client.send.assert_any_call(node, request) assert len(ret) == len(fetch_requests)
def _create_fetch_requests(self): """Create fetch requests for all assigned partitions, grouped by node. FetchRequests skipped if no leader, node has requests in flight, or we have not returned all previously fetched records to consumer Returns: dict: {node_id: [FetchRequest,...]} """ # create the fetch info as a dict of lists of partition info tuples # which can be passed to FetchRequest() via .items() fetchable = collections.defaultdict( lambda: collections.defaultdict(list)) for partition in self._subscriptions.fetchable_partitions(): node_id = self._client.cluster.leader_for_partition(partition) if node_id is None or node_id == -1: log.debug( "No leader found for partition %s." " Requesting metadata update", partition) self._client.cluster.request_update() elif self._client.in_flight_request_count(node_id) == 0: # if there is a leader and no in-flight requests, # issue a new fetch but only fetch data for partitions whose # previously fetched data has been consumed position = self._subscriptions.assignment[partition].position partition_info = (partition.partition, position, self.config['max_partition_fetch_bytes']) fetchable[node_id][partition.topic].append(partition_info) log.debug("Adding fetch request for partition %s at offset %d", partition, position) requests = {} for node_id, partition_data in six.iteritems(fetchable): requests[node_id] = FetchRequest( -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], partition_data.items()) return requests
def test_proc_fetch_request(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) tp = TopicPartition('test', 0) tp_info = (tp.topic, [(tp.partition, 155, 100000)]) req = FetchRequest( -1, # replica_id 100, 100, [tp_info]) client.ready = mock.MagicMock() client.ready.side_effect = asyncio.coroutine(lambda a: True) client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = asyncio.coroutine( lambda: False) client.send = mock.MagicMock() msg = Message(b"test msg") msg._encode_self() client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 0, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) state = TopicPartitionState() state.seek(0) subscriptions.assignment[tp] = state subscriptions.needs_partition_assignment = False fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] self.assertEqual(buf.getone(), None) # invalid offset, msg is ignored state.seek(4) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] self.assertEqual(buf.getone().value, b"test msg") # error -> no partition found client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 3, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) # error -> topic auth failed client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 29, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) with self.assertRaises(TopicAuthorizationFailedError): yield from fetcher.next_record([]) # error -> unknown client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, -1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) # error -> offset out of range client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) self.assertEqual(state.is_fetchable(), False) state.seek(4) subscriptions._default_offset_reset_strategy = OffsetResetStrategy.NONE client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) with self.assertRaises(OffsetOutOfRangeError): yield from fetcher.next_record([]) yield from fetcher.close()
def _create_fetch_requests(self): """Create fetch requests for all assigned partitions, grouped by node. FetchRequests skipped if: * no leader, or node has already fetches in flight * we have data for this partition * we have data for other partitions on this node Returns: dict: {node_id: FetchRequest, ...} """ if self._subscriptions.needs_partition_assignment: return {}, self._fetcher_timeout # create the fetch info as a dict of lists of partition info tuples # which can be passed to FetchRequest() via .items() fetchable = collections.defaultdict( lambda: collections.defaultdict(list)) backoff_by_nodes = collections.defaultdict(list) fetchable_partitions = self._subscriptions.fetchable_partitions() for tp in fetchable_partitions: node_id = self._client.cluster.leader_for_partition(tp) if tp in self._records: record = self._records[tp] # Calculate backoff for this node if data is only recently # fetched. If data is consumed before backoff we will # include this partition in this fetch request backoff = record.calculate_backoff() if backoff: backoff_by_nodes[node_id].append(backoff) # We have some prefetched data for this partition already continue if node_id in self._in_flight: # We have in-flight fetches to this node continue if node_id is None or node_id == -1: log.debug( "No leader found for partition %s." " Waiting metadata update", tp) else: # fetch if there is a leader and no in-flight requests position = self._subscriptions.assignment[tp].position partition_info = (tp.partition, position, self._max_partition_fetch_bytes) fetchable[node_id][tp.topic].append(partition_info) log.debug("Adding fetch request for partition %s at offset %d", tp, position) requests = [] for node_id, partition_data in fetchable.items(): if node_id in backoff_by_nodes: # At least one partition is still waiting to be consumed continue req = FetchRequest( -1, # replica_id self._fetch_max_wait_ms, self._fetch_min_bytes, partition_data.items()) requests.append((node_id, req)) if backoff_by_nodes: # Return min time til any node will be ready to send event # (max of it's backoffs) backoff = min(map(max, backoff_by_nodes.values())) else: backoff = self._fetcher_timeout return requests, backoff
def test_compacted_topic_consumption(self): # Compacted topics can have offsets skipped client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) client.ready = mock.MagicMock() client.ready.side_effect = asyncio.coroutine(lambda a: True) client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = asyncio.coroutine( lambda: False) client.send = mock.MagicMock() subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) tp = TopicPartition('test', 0) req = FetchRequest( -1, # replica_id 100, 100, [(tp.topic, [(tp.partition, 155, 100000)])]) msg1 = Message(b"12345", key=b"1") msg1._encode_self() msg2 = Message(b"23456", key=b"2") msg2._encode_self() msg3 = Message(b"34567", key=b"3") msg3._encode_self() resp = FetchResponse([( 'test', [( 0, 0, 3000, # partition, error_code, highwater_offset [ (160, 5, msg1), # offset, len_bytes, bytes (162, 5, msg2), (167, 5, msg3), ])])]) client.send.side_effect = asyncio.coroutine(lambda n, r: resp) state = TopicPartitionState() state.seek(155) state.drop_pending_message_set = False subscriptions.assignment[tp] = state subscriptions.needs_partition_assignment = False fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] # Test successful getone first = buf.getone() self.assertEqual(state.position, 161) self.assertEqual((first.value, first.key, first.offset), (msg1.value, msg1.key, 160)) # Test successful getmany second, third = buf.getall() self.assertEqual(state.position, 168) self.assertEqual((second.value, second.key, second.offset), (msg2.value, msg2.key, 162)) self.assertEqual((third.value, third.key, third.offset), (msg3.value, msg3.key, 167))
async def test_compacted_topic_consumption(self): # Compacted topics can have offsets skipped client = AIOKafkaClient(bootstrap_servers=[]) async def ready(conn): return True def force_metadata_update(): fut = create_future() fut.set_result(True) return fut client.ready = mock.MagicMock() client.ready.side_effect = ready client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = force_metadata_update client.send = mock.MagicMock() subscriptions = SubscriptionState() fetcher = Fetcher(client, subscriptions) tp = TopicPartition('test', 0) req = FetchRequest( -1, # replica_id 100, 100, [(tp.topic, [(tp.partition, 155, 100000)])]) builder = LegacyRecordBatchBuilder(magic=1, compression_type=0, batch_size=99999999) builder.append(160, value=b"12345", key=b"1", timestamp=None) builder.append(162, value=b"23456", key=b"2", timestamp=None) builder.append(167, value=b"34567", key=b"3", timestamp=None) batch = bytes(builder.build()) resp = FetchResponse([( 'test', [( 0, 0, 3000, # partition, error_code, highwater_offset batch # Batch raw bytes )])]) async def send(node, ready): return resp subscriptions.assign_from_user({tp}) assignment = subscriptions.subscription.assignment tp_state = assignment.state_value(tp) client.send.side_effect = send tp_state.seek(155) fetcher._in_flight.add(0) needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] # Test successful getone, the closest in batch offset=160 first = buf.getone() self.assertEqual(tp_state.position, 161) self.assertEqual((first.value, first.key, first.offset), (b"12345", b"1", 160)) # Test successful getmany second, third = buf.getall() self.assertEqual(tp_state.position, 168) self.assertEqual((second.value, second.key, second.offset), (b"23456", b"2", 162)) self.assertEqual((third.value, third.key, third.offset), (b"34567", b"3", 167))
async def test_proc_fetch_request(self): client = AIOKafkaClient(bootstrap_servers=[]) subscriptions = SubscriptionState() fetcher = Fetcher(client, subscriptions, auto_offset_reset="latest") tp = TopicPartition('test', 0) tp_info = (tp.topic, [(tp.partition, 4, 100000)]) req = FetchRequest( -1, # replica_id 100, 100, [tp_info]) async def ready(conn): return True def force_metadata_update(): fut = create_future() fut.set_result(False) return fut client.ready = mock.MagicMock() client.ready.side_effect = ready client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = force_metadata_update client.send = mock.MagicMock() builder = LegacyRecordBatchBuilder(magic=1, compression_type=0, batch_size=99999999) builder.append(offset=4, value=b"test msg", key=None, timestamp=None) raw_batch = bytes(builder.build()) fetch_response = FetchResponse([('test', [(0, 0, 9, raw_batch)])]) async def send(node, request): nonlocal fetch_response return fetch_response client.send.side_effect = send subscriptions.assign_from_user({tp}) assignment = subscriptions.subscription.assignment tp_state = assignment.state_value(tp) # The partition has no active position, so will ignore result needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, False) self.assertEqual(fetcher._records, {}) # The partition's position does not match request's fetch offset subscriptions.seek(tp, 0) needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, False) self.assertEqual(fetcher._records, {}) subscriptions.seek(tp, 4) needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] self.assertEqual(buf.getone().value, b"test msg") # If position changed after fetch request passed subscriptions.seek(tp, 4) needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) subscriptions.seek(tp, 10) self.assertIsNone(buf.getone()) # If assignment is lost after fetch request passed subscriptions.seek(tp, 4) needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) subscriptions.unsubscribe() self.assertIsNone(buf.getone()) subscriptions.assign_from_user({tp}) assignment = subscriptions.subscription.assignment tp_state = assignment.state_value(tp) # error -> no partition found (UnknownTopicOrPartitionError) subscriptions.seek(tp, 4) fetcher._records.clear() fetch_response = FetchResponse([('test', [(0, 3, 9, raw_batch)])]) cc = client.force_metadata_update.call_count needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, False) self.assertEqual(client.force_metadata_update.call_count, cc + 1) # error -> topic auth failed (TopicAuthorizationFailedError) fetch_response = FetchResponse([('test', [(0, 29, 9, raw_batch)])]) needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, True) with self.assertRaises(TopicAuthorizationFailedError): await fetcher.next_record([]) # error -> unknown fetch_response = FetchResponse([('test', [(0, -1, 9, raw_batch)])]) needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, False) # error -> offset out of range with offset strategy fetch_response = FetchResponse([('test', [(0, 1, 9, raw_batch)])]) needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, False) self.assertEqual(tp_state.has_valid_position, False) self.assertEqual(tp_state.awaiting_reset, True) self.assertEqual(tp_state.reset_strategy, OffsetResetStrategy.LATEST) # error -> offset out of range without offset strategy subscriptions.seek(tp, 4) fetcher._default_reset_strategy = OffsetResetStrategy.NONE needs_wake_up = await fetcher._proc_fetch_request(assignment, 0, req) self.assertEqual(needs_wake_up, True) with self.assertRaises(OffsetOutOfRangeError): await fetcher.next_record([]) await fetcher.close()