def encode_list_offset_request(cls, payloads=()): return kafka.protocol.offset.OffsetRequest[1]( replica_id=-1, topics=[( topic, [( partition, payload.time) for partition, payload in six.iteritems(topic_payloads)]) for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))])
def _send_offset_requests(self, timestamps): """Fetch offsets for each partition in timestamps dict. This may send request to multiple nodes, based on who is Leader for partition. Arguments: timestamps (dict): {TopicPartition: int} mapping of fetching timestamps. Returns: Future: resolves to a mapping of retrieved offsets """ timestamps_by_node = collections.defaultdict(dict) for partition, timestamp in six.iteritems(timestamps): node_id = self._client.cluster.leader_for_partition(partition) if node_id is None: self._client.add_topic(partition.topic) log.debug("Partition %s is unknown for fetching offset," " wait for metadata refresh", partition) return Future().failure(Errors.StaleMetadata(partition)) elif node_id == -1: log.debug("Leader for partition %s unavailable for fetching " "offset, wait for metadata refresh", partition) return Future().failure( Errors.LeaderNotAvailableError(partition)) else: timestamps_by_node[node_id][partition] = timestamp # Aggregate results until we have all list_offsets_future = Future() responses = [] node_count = len(timestamps_by_node) def on_success(value): responses.append(value) if len(responses) == node_count: offsets = {} for r in responses: offsets.update(r) list_offsets_future.success(offsets) def on_fail(err): if not list_offsets_future.is_done: list_offsets_future.failure(err) for node_id, timestamps in six.iteritems(timestamps_by_node): _f = self._send_offset_request(node_id, timestamps) _f.add_callback(on_success) _f.add_errback(on_fail) return list_offsets_future
def _perform_assignment(self, leader_id, assignment_strategy, members): assignor = self._lookup_assignor(assignment_strategy) assert assignor, 'Invalid assignment protocol: %s' % assignment_strategy member_metadata = {} all_subscribed_topics = set() for member_id, metadata_bytes in members: metadata = ConsumerProtocol.METADATA.decode(metadata_bytes) member_metadata[member_id] = metadata all_subscribed_topics.update(metadata.subscription) # pylint: disable-msg=no-member # the leader will begin watching for changes to any of the topics # the group is interested in, which ensures that all metadata changes # will eventually be seen # Because assignment typically happens within response callbacks, # we cannot block on metadata updates here (no recursion into poll()) self._subscription.group_subscribe(all_subscribed_topics) self._client.set_topics(self._subscription.group_subscription()) # keep track of the metadata used for assignment so that we can check # after rebalance completion whether anything has changed self._cluster.request_update() self._assignment_snapshot = self._metadata_snapshot log.debug("Performing assignment for group %s using strategy %s" " with subscriptions %s", self.group_id, assignor.name, member_metadata) assignments = assignor.assign(self._cluster, member_metadata) log.debug("Finished assignment for group %s: %s", self.group_id, assignments) group_assignment = {} for member_id, assignment in six.iteritems(assignments): group_assignment[member_id] = assignment return group_assignment
def _raise_if_offset_out_of_range(self): """Check FetchResponses for offset out of range. Raises: OffsetOutOfRangeError: if any partition from previous FetchResponse contains OffsetOutOfRangeError and the default_reset_policy is None """ if not self._offset_out_of_range_partitions: return current_out_of_range_partitions = {} # filter only the fetchable partitions for partition, offset in six.iteritems(self._offset_out_of_range_partitions): if not self._subscriptions.is_fetchable(partition): log.debug("Ignoring fetched records for %s since it is no" " longer fetchable", partition) continue position = self._subscriptions.assignment[partition].position # ignore partition if the current position != offset in FetchResponse # e.g. after seek() if position is not None and offset == position: current_out_of_range_partitions[partition] = position self._offset_out_of_range_partitions.clear() if current_out_of_range_partitions: raise Errors.OffsetOutOfRangeError(current_out_of_range_partitions)
def fetchable_partitions(self): """Return set of TopicPartitions that should be Fetched.""" fetchable = set() for partition, state in six.iteritems(self.assignment): if state.is_fetchable(): fetchable.add(partition) return fetchable
def all_consumed_offsets(self): """Returns consumed offsets as {TopicPartition: OffsetAndMetadata}""" all_consumed = {} for partition, state in six.iteritems(self.assignment): if state.has_valid_position: all_consumed[partition] = OffsetAndMetadata(state.position, '') return all_consumed
def _produce_request(self, node_id, acks, timeout, batches): """Create a produce request from the given record batches. Returns: ProduceRequest (version depends on api_version) """ produce_records_by_partition = collections.defaultdict(dict) for batch in batches: topic = batch.topic_partition.topic partition = batch.topic_partition.partition # TODO: bytearray / memoryview buf = batch.records.buffer() produce_records_by_partition[topic][partition] = buf if self.config['api_version'] >= (0, 10): version = 2 elif self.config['api_version'] == (0, 9): version = 1 else: version = 0 return ProduceRequest[version]( required_acks=acks, timeout=timeout, topics=[(topic, list(partition_info.items())) for topic, partition_info in six.iteritems(produce_records_by_partition)] )
def _on_join_leader(self, response): """ Perform leader synchronization and send back the assignment for the group via SyncGroupRequest Arguments: response (JoinResponse): broker response to parse Returns: Future: resolves to member assignment encoded-bytes """ try: group_assignment = self._perform_assignment(response.leader_id, response.group_protocol, response.members) except Exception as e: return Future().failure(e) request = SyncGroupRequest[0]( self.group_id, self.generation, self.member_id, [(member_id, assignment if isinstance(assignment, bytes) else assignment.encode()) for member_id, assignment in six.iteritems(group_assignment)]) log.debug("Sending leader SyncGroup for group %s to coordinator %s: %s", self.group_id, self.coordinator_id, request) return self._send_sync_group_request(request)
def refresh_committed_offsets_if_needed(self): """Fetch committed offsets for assigned partitions.""" if self._subscription.needs_fetch_committed_offsets: offsets = self.fetch_committed_offsets(self._subscription.assigned_partitions()) for partition, offset in six.iteritems(offsets): # verify assignment is still active if self._subscription.is_assigned(partition): self._subscription.assignment[partition].committed = offset.offset self._subscription.needs_fetch_committed_offsets = False
def encode_offset_commit_request(cls, group, payloads): """ Encode an OffsetCommitRequest struct Arguments: group: string, the consumer group you are committing offsets for payloads: list of OffsetCommitRequestPayload """ return kafka.protocol.commit.OffsetCommitRequest[0]( consumer_group=group, topics=[( topic, [( partition, payload.offset, payload.metadata) for partition, payload in six.iteritems(topic_payloads)]) for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))])
def encode_offset_commit_request_kafka(cls, group, payloads): """ Encode an OffsetCommitRequest struct Arguments: group: string, the consumer group you are committing offsets for payloads: list of OffsetCommitRequestPayload """ return kafka.protocol.commit.OffsetCommitRequest[2]( consumer_group=group, consumer_group_generation_id=kafka.protocol.commit.OffsetCommitRequest[2].DEFAULT_GENERATION_ID, consumer_id='', retention_time=kafka.protocol.commit.OffsetCommitRequest[2].DEFAULT_RETENTION_TIME, topics=[( topic, [( partition, payload.offset, payload.metadata) for partition, payload in six.iteritems(topic_payloads)]) for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))])
def _create_fetch_requests(self): """Create fetch requests for all assigned partitions, grouped by node. FetchRequests skipped if no leader, or node has requests in flight Returns: dict: {node_id: FetchRequest, ...} (version depends on api_version) """ # create the fetch info as a dict of lists of partition info tuples # which can be passed to FetchRequest() via .items() fetchable = collections.defaultdict(lambda: collections.defaultdict(list)) # avoid re-fetching pending offsets pending = set() for fetch_offset, tp, _ in self._records: pending.add((tp, fetch_offset)) for partition in self._subscriptions.fetchable_partitions(): node_id = self._client.cluster.leader_for_partition(partition) position = self._subscriptions.assignment[partition].position # fetch if there is a leader, no in-flight requests, and no _records if node_id is None or node_id == -1: log.debug("No leader found for partition %s." " Requesting metadata update", partition) self._client.cluster.request_update() elif ((partition, position) not in pending and self._client.in_flight_request_count(node_id) == 0): partition_info = ( partition.partition, position, self.config['max_partition_fetch_bytes'] ) fetchable[node_id][partition.topic].append(partition_info) log.debug("Adding fetch request for partition %s at offset %d", partition, position) if self.config['api_version'] >= (0, 10): version = 2 elif self.config['api_version'] == (0, 9): version = 1 else: version = 0 requests = {} for node_id, partition_data in six.iteritems(fetchable): requests[node_id] = FetchRequest[version]( -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], partition_data.items()) return requests
def _init_fetches(self): futures = [] for node_id, request in six.iteritems(self._create_fetch_requests()): if self._client.ready(node_id): log.debug("Sending FetchRequest to node %s", node_id) future = self._client.send(node_id, request) future.add_callback(self._handle_fetch_response, request, time.time()) future.add_errback(log.error, 'Fetch to node %s failed: %s', node_id) futures.append(future) self._fetch_futures.extend(futures) self._clean_done_fetch_futures() return futures
def available_partitions_for_topic(self, topic): """Return set of partitions with known leaders Arguments: topic (str): topic to check for partitions Returns: set: {partition (int), ...} """ if topic not in self._partitions: return None return set([partition for partition, metadata in six.iteritems(self._partitions[topic]) if metadata.leader != -1])
def _send_offset_request(self, node_id, timestamps): by_topic = collections.defaultdict(list) for tp, timestamp in six.iteritems(timestamps): if self.config['api_version'] >= (0, 10, 1): data = (tp.partition, timestamp) else: data = (tp.partition, timestamp, 1) by_topic[tp.topic].append(data) if self.config['api_version'] >= (0, 10, 1): request = OffsetRequest[1](-1, list(six.iteritems(by_topic))) else: request = OffsetRequest[0](-1, list(six.iteritems(by_topic))) # Client returns a future that only fails on network issues # so create a separate future and attach a callback to update it # based on response error codes future = Future() _f = self._client.send(node_id, request) _f.add_callback(self._handle_offset_response, future) _f.add_errback(lambda e: future.failure(e)) return future
def assign(cls, cluster, member_metadata): consumers_per_topic = collections.defaultdict(list) for member, metadata in six.iteritems(member_metadata): for topic in metadata.subscription: consumers_per_topic[topic].append(member) # construct {member_id: {topic: [partition, ...]}} assignment = collections.defaultdict(dict) for topic, consumers_for_topic in six.iteritems(consumers_per_topic): partitions = cluster.partitions_for_topic(topic) if partitions is None: log.warning('No partition metadata for topic %s', topic) continue partitions = sorted(list(partitions)) partitions_for_topic = len(partitions) consumers_for_topic.sort() partitions_per_consumer = len(partitions) // len(consumers_for_topic) consumers_with_extra = len(partitions) % len(consumers_for_topic) for i in range(len(consumers_for_topic)): start = partitions_per_consumer * i start += min(i, consumers_with_extra) length = partitions_per_consumer if not i + 1 > consumers_with_extra: length += 1 member = consumers_for_topic[i] assignment[member][topic] = partitions[start:start+length] protocol_assignment = {} for member_id in member_metadata: protocol_assignment[member_id] = ConsumerProtocolMemberAssignment( cls.version, sorted(assignment[member_id].items()), b'') return protocol_assignment
def test_conflicting_previous_assignments(mocker): cluster = create_cluster(mocker, topics={'t'}, topics_partitions={0, 1}) subscriptions = { 'C1': {'t'}, 'C2': {'t'}, } member_metadata = {} for member, topics in six.iteritems(subscriptions): # assume both C1 and C2 have partition 1 assigned to them in generation 1 member_metadata[member] = StickyPartitionAssignor._metadata( topics, [TopicPartition('t', 0), TopicPartition('t', 0)], 1) assignment = StickyPartitionAssignor.assign(cluster, member_metadata) verify_validity_and_balance(subscriptions, assignment)
def _create_fetch_requests(self): """Create fetch requests for all assigned partitions, grouped by node. FetchRequests skipped if no leader, or node has requests in flight Returns: dict: {node_id: FetchRequest, ...} (version depends on api_version) """ # create the fetch info as a dict of lists of partition info tuples # which can be passed to FetchRequest() via .items() fetchable = collections.defaultdict(lambda: collections.defaultdict(list)) for partition in self._fetchable_partitions(): node_id = self._client.cluster.leader_for_partition(partition) position = self._subscriptions.assignment[partition].position # fetch if there is a leader and no in-flight requests if node_id is None or node_id == -1: log.debug("No leader found for partition %s." " Requesting metadata update", partition) self._client.cluster.request_update() elif self._client.in_flight_request_count(node_id) == 0: partition_info = ( partition.partition, position, self.config['max_partition_fetch_bytes'] ) fetchable[node_id][partition.topic].append(partition_info) log.debug("Adding fetch request for partition %s at offset %d", partition, position) if self.config['api_version'] >= (0, 10): version = 2 elif self.config['api_version'] == (0, 9): version = 1 else: version = 0 requests = {} for node_id, partition_data in six.iteritems(fetchable): requests[node_id] = FetchRequest[version]( -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], partition_data.items()) return requests
def send_fetches(self): """Send FetchRequests for all assigned partitions that do not already have an in-flight fetch or pending fetch data. Returns: List of Futures: each future resolves to a FetchResponse """ futures = [] for node_id, request in six.iteritems(self._create_fetch_requests()): if self._client.ready(node_id): log.debug("Sending FetchRequest to node %s", node_id) future = self._client.send(node_id, request) future.add_callback(self._handle_fetch_response, request, time.time()) future.add_errback(log.error, 'Fetch to node %s failed: %s', node_id) futures.append(future) self._fetch_futures.extend(futures) self._clean_done_fetch_futures() return futures
def _create_produce_requests(self, collated): """ Transfer the record batches into a list of produce requests on a per-node basis. Arguments: collated: {node_id: [RecordBatch]} Returns: dict: {node_id: ProduceRequest} (version depends on api_version) """ requests = {} for node_id, batches in six.iteritems(collated): requests[node_id] = self._produce_request( node_id, self.config['acks'], self.config['request_timeout_ms'], batches) return requests
def metadata(cls, topics): if cls.member_assignment is None: log.debug("No member assignment available") user_data = b"" else: log.debug( "Member assignment is available, generating the metadata: generation {}" .format(cls.generation)) partitions_by_topic = defaultdict(list) for (topic_partition) in cls.member_assignment: # pylint: disable=not-an-iterable partitions_by_topic[topic_partition.topic].append( topic_partition.partition) data = StickyAssignorUserDataV1(six.iteritems(partitions_by_topic), cls.generation) user_data = data.encode() return ConsumerProtocolMemberMetadata(cls.version, list(topics), user_data)
def encode_offset_fetch_request(cls, group, payloads, from_kafka=False): """ Encode an OffsetFetchRequest struct. The request is encoded using version 0 if from_kafka is false, indicating a request for Zookeeper offsets. It is encoded using version 1 otherwise, indicating a request for Kafka offsets. Arguments: group: string, the consumer group you are fetching offsets for payloads: list of OffsetFetchRequestPayload from_kafka: bool, default False, set True for Kafka-committed offsets """ version = 1 if from_kafka else 0 return kafka.protocol.commit.OffsetFetchRequest[version]( consumer_group=group, topics=[(topic, list(topic_payloads.keys())) for topic, topic_payloads in six.iteritems( group_by_topic_and_partition(payloads))])
def send_fetches(self): """Send FetchRequests asynchronously for all assigned partitions. Note: noop if there are unconsumed records internal to the fetcher Returns: List of Futures: each future resolves to a FetchResponse """ futures = [] for node_id, request in six.iteritems(self._create_fetch_requests()): if self._client.ready(node_id): log.debug("Sending FetchRequest to node %s", node_id) future = self._client.send(node_id, request) future.add_callback(self._handle_fetch_response, request, time.time()) future.add_errback(log.error, 'Fetch to node %s failed: %s', node_id) futures.append(future) self._fetch_futures.extend(futures) self._clean_done_fetch_futures() return futures
def encode_offset_fetch_request(cls, group, payloads, from_kafka=False): """ Encode an OffsetFetchRequest struct. The request is encoded using version 0 if from_kafka is false, indicating a request for Zookeeper offsets. It is encoded using version 1 otherwise, indicating a request for Kafka offsets. Arguments: group: string, the consumer group you are fetching offsets for payloads: list of OffsetFetchRequestPayload from_kafka: bool, default False, set True for Kafka-committed offsets """ version = 1 if from_kafka else 0 return kafka.protocol.commit.OffsetFetchRequest[version]( consumer_group=group, topics=[( topic, list(topic_payloads.keys())) for topic, topic_payloads in six.iteritems(group_by_topic_and_partition(payloads))])
def test_kafka_consumer_max_bytes_simple(self): self.send_messages(0, range(100, 200)) self.send_messages(1, range(200, 300)) # Start a consumer consumer = self.kafka_consumer( auto_offset_reset='earliest', fetch_max_bytes=300) seen_partitions = set([]) for i in range(10): poll_res = consumer.poll(timeout_ms=100) for partition, msgs in six.iteritems(poll_res): for msg in msgs: seen_partitions.add(partition) # Check that we fetched at least 1 message from both partitions self.assertEqual( seen_partitions, set([ TopicPartition(self.topic, 0), TopicPartition(self.topic, 1)])) consumer.close()
def _perform_assignment(self, leader_id, assignment_strategy, members): assignor = self._lookup_assignor(assignment_strategy) assert assignor, "Invalid assignment protocol: %s" % ( assignment_strategy, ) member_metadata = {} all_subscribed_topics = set() for member_id, metadata_bytes in members: metadata = ConsumerProtocol.METADATA.decode(metadata_bytes) member_metadata[member_id] = metadata all_subscribed_topics.update( metadata.subscription) # pylint: disable-msg=no-member # the leader will begin watching for changes to any of the topics # the group is interested in, which ensures that all metadata changes # will eventually be seen # Because assignment typically happens within response callbacks, # we cannot block on metadata updates here (no recursion into poll()) self._subscription.group_subscribe(all_subscribed_topics) self._client.set_topics(self._subscription.group_subscription()) # keep track of the metadata used for assignment so that we can check # after rebalance completion whether anything has changed self._cluster.request_update() self._is_leader = True self._assignment_snapshot = self._metadata_snapshot log.debug( "Performing assignment for group %s using strategy %s" " with subscriptions %s", self.group_id, assignor.name, member_metadata, ) assignments = assignor.assign(self._cluster, member_metadata) log.debug("Finished assignment for group %s: %s", self.group_id, assignments) group_assignment = {} for member_id, assignment in six.iteritems(assignments): group_assignment[member_id] = assignment return group_assignment
def metrics(self, raw=False): """Get metrics on producer performance. This is ported from the Java Producer, for details see: https://kafka.apache.org/documentation/#producer_monitoring Warning: This is an unstable interface. It may change in future releases without warning. """ if raw: return self._metrics.metrics.copy() metrics = {} for k, v in six.iteritems(self._metrics.metrics.copy()): if k.group not in metrics: metrics[k.group] = {} if k.name not in metrics[k.group]: metrics[k.group][k.name] = {} metrics[k.group][k.name] = v.value() return metrics
def _is_balanced(self): """Determines if the current assignment is a balanced one""" if (len(self.current_assignment[ self._get_consumer_with_least_subscriptions()]) >= len(self.current_assignment[ self._get_consumer_with_most_subscriptions()]) - 1): # if minimum and maximum numbers of partitions assigned to consumers differ by at most one return true return True # create a mapping from partitions to the consumer assigned to them all_assigned_partitions = {} for consumer_id, consumer_partitions in six.iteritems( self.current_assignment): for partition in consumer_partitions: if partition in all_assigned_partitions: log.error( "{} is assigned to more than one consumer.".format( partition)) all_assigned_partitions[partition] = consumer_id # for each consumer that does not have all the topic partitions it can get # make sure none of the topic partitions it could but did not get cannot be moved to it # (because that would break the balance) for consumer, _ in self.sorted_current_subscriptions: consumer_partition_count = len(self.current_assignment[consumer]) # skip if this consumer already has all the topic partitions it can get if consumer_partition_count == len( self.consumer_to_all_potential_partitions[consumer]): continue # otherwise make sure it cannot get any more for partition in self.consumer_to_all_potential_partitions[ consumer]: if partition not in self.current_assignment[consumer]: other_consumer = all_assigned_partitions[partition] other_consumer_partition_count = len( self.current_assignment[other_consumer]) if consumer_partition_count < other_consumer_partition_count: return False return True
def _produce_request(self, node_id, acks, timeout, batches): """Create a produce request from the given record batches. Returns: ProduceRequest (version depends on api_version) """ produce_records_by_partition = collections.defaultdict(dict) for batch in batches: topic = batch.topic_partition.topic partition = batch.topic_partition.partition buf = batch.records.buffer() produce_records_by_partition[topic][partition] = buf kwargs = {} if self.config['api_version'] >= (2, 1): version = 7 elif self.config['api_version'] >= (2, 0): version = 6 elif self.config['api_version'] >= (1, 1): version = 5 elif self.config['api_version'] >= (1, 0): version = 4 elif self.config['api_version'] >= (0, 11): version = 3 kwargs = dict(transactional_id=None) elif self.config['api_version'] >= (0, 10): version = 2 elif self.config['api_version'] == (0, 9): version = 1 else: version = 0 return ProduceRequest[version]( required_acks=acks, timeout=timeout, topics=[(topic, list(partition_info.items())) for topic, partition_info in six.iteritems( produce_records_by_partition)], **kwargs)
def test_send_broker_unaware_request_fail(self, load_metadata, conn): mocked_conns = { ('kafka01', 9092): MagicMock(), ('kafka02', 9092): MagicMock() } for val in mocked_conns.values(): mock_conn(val, success=False) def mock_get_conn(host, port, afi): return mocked_conns[(host, port)] conn.side_effect = mock_get_conn client = SimpleClient(hosts=['kafka01:9092', 'kafka02:9092']) req = KafkaProtocol.encode_metadata_request() with self.assertRaises(KafkaUnavailableError): client._send_broker_unaware_request(payloads=['fake request'], encoder_fn=MagicMock(return_value='fake encoded message'), decoder_fn=lambda x: x) for key, conn in six.iteritems(mocked_conns): conn.send.assert_called_with('fake encoded message')
def test_new_subscription(mocker): cluster = create_cluster(mocker, topics={'t1', 't2', 't3', 't4'}, topics_partitions={0}) subscriptions = defaultdict(set) for i in range(3): for j in range(i, 3 * i - 2 + 1): subscriptions['C{}'.format(i)].add('t{}'.format(j)) member_metadata = make_member_metadata(subscriptions) assignment = StickyPartitionAssignor.assign(cluster, member_metadata) verify_validity_and_balance(subscriptions, assignment) subscriptions['C0'].add('t1') member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata(topics, []) assignment = StickyPartitionAssignor.assign(cluster, member_metadata) verify_validity_and_balance(subscriptions, assignment) assert StickyPartitionAssignor._latest_partition_movements.are_sticky()
def test_sticky_reassignment_after_one_consumer_added(mocker): cluster = create_cluster(mocker, topics={'t'}, topics_partitions=set(range(20))) subscriptions = defaultdict(set) for i in range(1, 10): subscriptions['C{}'.format(i)] = {'t'} member_metadata = make_member_metadata(subscriptions) assignment = StickyPartitionAssignor.assign(cluster, member_metadata) verify_validity_and_balance(subscriptions, assignment) subscriptions['C10'] = {'t'} member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata( topics, assignment[member].partitions() if member in assignment else []) assignment = StickyPartitionAssignor.assign(cluster, member_metadata) verify_validity_and_balance(subscriptions, assignment) assert StickyPartitionAssignor._latest_partition_movements.are_sticky()
def test_sticky_large_assignment_with_multiple_consumers_leaving(mocker): n_topics = 40 n_consumers = 200 all_topics = set(['t{}'.format(i) for i in range(1, n_topics + 1)]) partitions = dict([(t, set(range(1, randint(0, 10) + 1))) for t in all_topics]) cluster = create_cluster(mocker, topics=all_topics, topic_partitions_lambda=lambda t: partitions[t]) subscriptions = defaultdict(set) for i in range(1, n_consumers + 1): for j in range(0, randint(1, 20)): subscriptions['C{}'.format(i)].add('t{}'.format( randint(1, n_topics))) member_metadata = make_member_metadata(subscriptions) assignment = StickyPartitionAssignor.assign(cluster, member_metadata) verify_validity_and_balance(subscriptions, assignment) member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata( topics, assignment[member].partitions()) for i in range(50): member = 'C{}'.format(randint(1, n_consumers)) if member in subscriptions: del subscriptions[member] del member_metadata[member] assignment = StickyPartitionAssignor.assign(cluster, member_metadata) verify_validity_and_balance(subscriptions, assignment) assert StickyPartitionAssignor._latest_partition_movements.are_sticky()
def _on_join_leader(self, response): """ Perform leader synchronization and send back the assignment for the group via SyncGroupRequest Arguments: response (JoinResponse): broker response to parse Returns: Future: resolves to member assignment encoded-bytes """ try: group_assignment = self._perform_assignment( response.leader_id, response.group_protocol, response.members) except Exception as e: return Future().failure(e) version = 0 if self.config["api_version"] < (0, 11, 0) else 1 request = SyncGroupRequest[version]( self.group_id, self._generation.generation_id, self._generation.member_id, [( member_id, assignment if isinstance(assignment, bytes) else assignment.encode(), ) for member_id, assignment in six.iteritems(group_assignment)], ) log.debug( "Sending leader SyncGroup for group %s to coordinator %s: %s", self.group_id, self.coordinator_id, request, ) return self._send_sync_group_request(request)
def test_send_broker_unaware_request_fail(self, load_metadata, conn): mocked_conns = { ('kafka01', 9092): MagicMock(), ('kafka02', 9092): MagicMock() } for val in mocked_conns.values(): mock_conn(val, success=False) def mock_get_conn(host, port, afi): return mocked_conns[(host, port)] conn.side_effect = mock_get_conn client = SimpleClient(hosts=['kafka01:9092', 'kafka02:9092']) req = KafkaProtocol.encode_metadata_request() with self.assertRaises(KafkaUnavailableError): client._send_broker_unaware_request( payloads=['fake request'], encoder_fn=MagicMock(return_value='fake encoded message'), decoder_fn=lambda x: x) for key, conn in six.iteritems(mocked_conns): conn.send.assert_called_with('fake encoded message')
def test_sticky_add_remove_topic_two_consumers(mocker): cluster = create_cluster(mocker, topics={'t1', 't2'}, topics_partitions={0, 1, 2}) subscriptions = { 'C1': {'t1'}, 'C2': {'t1'}, } member_metadata = make_member_metadata(subscriptions) sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) expected_assignment = { 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0, 2])], b''), 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [1])], b''), } assert_assignment(sticky_assignment, expected_assignment) subscriptions = { 'C1': {'t1', 't2'}, 'C2': {'t1', 't2'}, } member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata( topics, sticky_assignment[member].partitions()) sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) expected_assignment = { 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0, 2]), ('t2', [1])], b''), 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [1]), ('t2', [0, 2])], b''), } assert_assignment(sticky_assignment, expected_assignment) subscriptions = { 'C1': {'t2'}, 'C2': {'t2'}, } member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata( topics, sticky_assignment[member].partitions()) sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) expected_assignment = { 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [1])], b''), 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [0, 2])], b''), } assert_assignment(sticky_assignment, expected_assignment)
def test_sticky_assignor2(mocker): """ Given: there are three consumers C0, C1, C2, and three topics t0, t1, t2, with 1, 2, and 3 partitions respectively. Therefore, the partitions are t0p0, t1p0, t1p1, t2p0, t2p1, t2p2. C0 is subscribed to t0; C1 is subscribed to t0, t1; and C2 is subscribed to t0, t1, t2. Then: perform the assignment Expected: the assignment is - C0 [t0p0] - C1 [t1p0, t1p1] - C2 [t2p0, t2p1, t2p2] Then: remove C0 and perform the assignment Expected: the assignment is - C1 [t0p0, t1p0, t1p1] - C2 [t2p0, t2p1, t2p2] """ partitions = {'t0': {0}, 't1': {0, 1}, 't2': {0, 1, 2}} cluster = create_cluster(mocker, topics={'t0', 't1', 't2'}, topic_partitions_lambda=lambda t: partitions[t]) subscriptions = { 'C0': {'t0'}, 'C1': {'t0', 't1'}, 'C2': {'t0', 't1', 't2'}, } member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata(topics, []) sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) expected_assignment = { 'C0': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [0])], b''), 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0, 1])], b''), 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [0, 1, 2])], b''), } assert_assignment(sticky_assignment, expected_assignment) del subscriptions['C0'] member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata( topics, sticky_assignment[member].partitions()) sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) expected_assignment = { 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [0]), ('t1', [0, 1])], b''), 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t2', [0, 1, 2])], b''), } assert_assignment(sticky_assignment, expected_assignment)
def make_member_metadata(subscriptions): member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata(topics, []) return member_metadata
def _send_offset_commit_request(self, offsets): """Commit offsets for the specified list of topics and partitions. This is a non-blocking call which returns a request future that can be polled in the case of a synchronous commit or ignored in the asynchronous case. Arguments: offsets (dict of {TopicPartition: OffsetAndMetadata}): what should be committed Returns: Future: indicating whether the commit was successful or not """ assert self.config['api_version'] >= (0, 8, 1), 'Unsupported Broker API' assert all(map(lambda k: isinstance(k, TopicPartition), offsets)) assert all(map(lambda v: isinstance(v, OffsetAndMetadata), offsets.values())) if not offsets: log.debug('No offsets to commit') return Future().success(True) elif self.coordinator_unknown(): return Future().failure(Errors.GroupCoordinatorNotAvailableError) node_id = self.coordinator_id # create the offset commit request offset_data = collections.defaultdict(dict) for tp, offset in six.iteritems(offsets): offset_data[tp.topic][tp.partition] = offset if self.config['api_version'] >= (0, 9): request = OffsetCommitRequest[2]( self.group_id, self.generation, self.member_id, OffsetCommitRequest[2].DEFAULT_RETENTION_TIME, [( topic, [( partition, offset.offset, offset.metadata ) for partition, offset in six.iteritems(partitions)] ) for topic, partitions in six.iteritems(offset_data)] ) elif self.config['api_version'] >= (0, 8, 2): request = OffsetCommitRequest[1]( self.group_id, -1, '', [( topic, [( partition, offset.offset, -1, offset.metadata ) for partition, offset in six.iteritems(partitions)] ) for topic, partitions in six.iteritems(offset_data)] ) elif self.config['api_version'] >= (0, 8, 1): request = OffsetCommitRequest[0]( self.group_id, [( topic, [( partition, offset.offset, offset.metadata ) for partition, offset in six.iteritems(partitions)] ) for topic, partitions in six.iteritems(offset_data)] ) log.debug("Sending offset-commit request with %s for group %s to %s", offsets, self.group_id, node_id) future = Future() _f = self._client.send(node_id, request) _f.add_callback(self._handle_offset_commit_response, offsets, future, time.time()) _f.add_errback(self._failed_request, node_id, request, future) return future
def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): """ Group a list of request payloads by topic+partition and send them to the leader broker for that partition using the supplied encode/decode functions Arguments: payloads: list of object-like entities with a topic (str) and partition (int) attribute; payloads with duplicate topic-partitions are not supported. encode_fn: a method to encode the list of payloads to a request body, must accept client_id, correlation_id, and payloads as keyword arguments decode_fn: a method to decode a response body into response objects. The response objects must be object-like and have topic and partition attributes Returns: List of response objects in the same order as the supplied payloads """ # encoders / decoders do not maintain ordering currently # so we need to keep this so we can rebuild order before returning original_ordering = [(p.topic, p.partition) for p in payloads] # Connection errors generally mean stale metadata # although sometimes it means incorrect api request # Unfortunately there is no good way to tell the difference # so we'll just reset metadata on all errors to be safe refresh_metadata = False # For each broker, send the list of request payloads # and collect the responses and errors payloads_by_broker = self._payloads_by_broker(payloads) responses = {} def failed_payloads(payloads): for payload in payloads: topic_partition = (str(payload.topic), payload.partition) responses[(topic_partition)] = FailedPayloadsError(payload) # For each BrokerConnection keep the real socket so that we can use # a select to perform unblocking I/O connections_by_future = {} for broker, broker_payloads in six.iteritems(payloads_by_broker): if broker is None: failed_payloads(broker_payloads) continue host, port, afi = get_ip_port_afi(broker.host) try: conn = self._get_conn(host, broker.port, afi) except ConnectionError: refresh_metadata = True failed_payloads(broker_payloads) continue request = encoder_fn(payloads=broker_payloads) future = conn.send(request) if future.failed(): refresh_metadata = True failed_payloads(broker_payloads) continue if not request.expect_response(): for payload in broker_payloads: topic_partition = (str(payload.topic), payload.partition) responses[topic_partition] = None continue connections_by_future[future] = (conn, broker) conn = None while connections_by_future: futures = list(connections_by_future.keys()) # block until a socket is ready to be read sockets = [ conn._sock for future, (conn, _) in six.iteritems(connections_by_future) if not future.is_done and conn._sock is not None ] if sockets: read_socks, _, _ = select.select(sockets, [], []) for future in futures: if not future.is_done: conn, _ = connections_by_future[future] conn.recv() continue _, broker = connections_by_future.pop(future) if future.failed(): refresh_metadata = True failed_payloads(payloads_by_broker[broker]) else: for payload_response in decoder_fn(future.value): topic_partition = (str(payload_response.topic), payload_response.partition) responses[topic_partition] = payload_response if refresh_metadata: self.reset_all_metadata() # Return responses in the same order as provided return [responses[tp] for tp in original_ordering]
def _fetch(self): # Create fetch request payloads for all the partitions partitions = dict((p, self.buffer_size) for p in self.fetch_offsets.keys()) while partitions: requests = [] for partition, buffer_size in six.iteritems(partitions): requests.append(FetchRequestPayload(self.topic, partition, self.fetch_offsets[partition], buffer_size)) # Send request responses = self.client.send_fetch_request( requests, max_wait_time=int(self.fetch_max_wait_time), min_bytes=self.fetch_min_bytes, fail_on_error=False ) retry_partitions = {} for resp in responses: try: check_error(resp) except UnknownTopicOrPartitionError: log.error('UnknownTopicOrPartitionError for %s:%d', resp.topic, resp.partition) self.client.reset_topic_metadata(resp.topic) raise except NotLeaderForPartitionError: log.error('NotLeaderForPartitionError for %s:%d', resp.topic, resp.partition) self.client.reset_topic_metadata(resp.topic) continue except OffsetOutOfRangeError: log.warning('OffsetOutOfRangeError for %s:%d. ' 'Resetting partition offset...', resp.topic, resp.partition) self.reset_partition_offset(resp.partition) # Retry this partition retry_partitions[resp.partition] = partitions[resp.partition] continue except FailedPayloadsError as e: log.warning('FailedPayloadsError for %s:%d', e.payload.topic, e.payload.partition) # Retry this partition retry_partitions[e.payload.partition] = partitions[e.payload.partition] continue partition = resp.partition buffer_size = partitions[partition] # Check for partial message if resp.messages and isinstance(resp.messages[-1].message, PartialMessage): # If buffer is at max and all we got was a partial message # raise ConsumerFetchSizeTooSmall if (self.max_buffer_size is not None and buffer_size == self.max_buffer_size and len(resp.messages) == 1): log.error('Max fetch size %d too small', self.max_buffer_size) raise ConsumerFetchSizeTooSmall() if self.max_buffer_size is None: buffer_size *= 2 else: buffer_size = min(buffer_size * 2, self.max_buffer_size) log.warning('Fetch size too small, increase to %d (2x) ' 'and retry', buffer_size) retry_partitions[partition] = buffer_size resp.messages.pop() for message in resp.messages: if message.offset < self.fetch_offsets[partition]: log.debug('Skipping message %s because its offset is less than the consumer offset', message) continue # Put the message in our queue self.queue.put((partition, message)) self.fetch_offsets[partition] = message.offset + 1 partitions = retry_partitions
def list_consumer_group_offsets(self, group_id, group_coordinator_id=None, partitions=None): """Fetch Consumer Group Offsets. Note: This does not verify that the group_id or partitions actually exist in the cluster. As soon as any error is encountered, it is immediately raised. :param group_id: The consumer group id name for which to fetch offsets. :param group_coordinator_id: The node_id of the group's coordinator broker. If set to None, will query the cluster to find the group coordinator. Explicitly specifying this can be useful to prevent that extra network round trip if you already know the group coordinator. Default: None. :param partitions: A list of TopicPartitions for which to fetch offsets. On brokers >= 0.10.2, this can be set to None to fetch all known offsets for the consumer group. Default: None. :return dictionary: A dictionary with TopicPartition keys and OffsetAndMetada values. Partitions that are not specified and for which the group_id does not have a recorded offset are omitted. An offset value of `-1` indicates the group_id has no offset for that TopicPartition. A `-1` can only happen for partitions that are explicitly specified. """ group_offsets_listing = {} if group_coordinator_id is None: group_coordinator_id = self._find_group_coordinator_id(group_id) version = self._matching_api_version(OffsetFetchRequest) if version <= 3: if partitions is None: if version <= 1: raise ValueError( """OffsetFetchRequest_v{} requires specifying the partitions for which to fetch offsets. Omitting the partitions is only supported on brokers >= 0.10.2. For details, see KIP-88.""".format(version)) topics_partitions = None else: # transform from [TopicPartition("t1", 1), TopicPartition("t1", 2)] to [("t1", [1, 2])] topics_partitions_dict = defaultdict(set) for topic, partition in partitions: topics_partitions_dict[topic].add(partition) topics_partitions = list(six.iteritems(topics_partitions_dict)) request = OffsetFetchRequest[version](group_id, topics_partitions) response = self._send_request_to_node(group_coordinator_id, request) if version > 1: # OffsetFetchResponse_v1 lacks a top-level error_code error_type = Errors.for_code(response.error_code) if error_type is not Errors.NoError: # optionally we could retry if error_type.retriable raise error_type( "Request '{}' failed with response '{}'." .format(request, response)) # transform response into a dictionary with TopicPartition keys and # OffsetAndMetada values--this is what the Java AdminClient returns for topic, partitions in response.topics: for partition, offset, metadata, error_code in partitions: error_type = Errors.for_code(error_code) if error_type is not Errors.NoError: raise error_type( "Unable to fetch offsets for group_id {}, topic {}, partition {}" .format(group_id, topic, partition)) group_offsets_listing[TopicPartition(topic, partition)] = OffsetAndMetadata(offset, metadata) else: raise NotImplementedError( "Support for OffsetFetch v{} has not yet been added to KafkaAdminClient." .format(version)) return group_offsets_listing
def run_once(self): """Run a single iteration of sending.""" while self._topics_to_add: self._client.add_topic(self._topics_to_add.pop()) # get the list of partitions with data ready to send result = self._accumulator.ready(self._metadata) ready_nodes, next_ready_check_delay, unknown_leaders_exist = result # if there are any partitions whose leaders are not known yet, force # metadata update if unknown_leaders_exist: log.debug('Unknown leaders exist, requesting metadata update') self._metadata.request_update() # remove any nodes we aren't ready to send to not_ready_timeout = 999999999 for node in list(ready_nodes): if not self._client.ready(node): log.debug('Node %s not ready; delaying produce of accumulated batch', node) ready_nodes.remove(node) not_ready_timeout = min(not_ready_timeout, self._client.connection_delay(node)) # create produce requests batches_by_node = self._accumulator.drain( self._metadata, ready_nodes, self.config['max_request_size']) if self.config['guarantee_message_order']: # Mute all the partitions drained for batch_list in six.itervalues(batches_by_node): for batch in batch_list: self._accumulator.muted.add(batch.topic_partition) expired_batches = self._accumulator.abort_expired_batches( self.config['request_timeout_ms'], self._metadata) for expired_batch in expired_batches: self._sensors.record_errors(expired_batch.topic_partition.topic, expired_batch.record_count) self._sensors.update_produce_request_metrics(batches_by_node) requests = self._create_produce_requests(batches_by_node) # If we have any nodes that are ready to send + have sendable data, # poll with 0 timeout so this can immediately loop and try sending more # data. Otherwise, the timeout is determined by nodes that have # partitions with data that isn't yet sendable (e.g. lingering, backing # off). Note that this specifically does not include nodes with # sendable data that aren't ready to send since they would cause busy # looping. poll_timeout_ms = min(next_ready_check_delay * 1000, not_ready_timeout) if ready_nodes: log.debug("Nodes with data ready to send: %s", ready_nodes) # trace log.debug("Created %d produce requests: %s", len(requests), requests) # trace poll_timeout_ms = 0 for node_id, request in six.iteritems(requests): batches = batches_by_node[node_id] log.debug('Sending Produce Request: %r', request) (self._client.send(node_id, request) .add_callback( self._handle_produce_response, node_id, time.time(), batches) .add_errback( self._failed_produce, batches, node_id)) # if some partitions are already ready to be sent, the select time # would be 0; otherwise if some partition already has some data # accumulated but not ready yet, the select time will be the time # difference between now and its linger expiry time; otherwise the # select time will be the time difference between now and the # metadata expiry time self._client.poll(poll_timeout_ms, sleep=True)
def _send_broker_aware_request(self, payloads, encoder_fn, decoder_fn): """ Group a list of request payloads by topic+partition and send them to the leader broker for that partition using the supplied encode/decode functions Arguments: payloads: list of object-like entities with a topic (str) and partition (int) attribute; payloads with duplicate topic-partitions are not supported. encode_fn: a method to encode the list of payloads to a request body, must accept client_id, correlation_id, and payloads as keyword arguments decode_fn: a method to decode a response body into response objects. The response objects must be object-like and have topic and partition attributes Returns: List of response objects in the same order as the supplied payloads """ # encoders / decoders do not maintain ordering currently # so we need to keep this so we can rebuild order before returning original_ordering = [(p.topic, p.partition) for p in payloads] # Connection errors generally mean stale metadata # although sometimes it means incorrect api request # Unfortunately there is no good way to tell the difference # so we'll just reset metadata on all errors to be safe refresh_metadata = False # For each broker, send the list of request payloads # and collect the responses and errors payloads_by_broker = self._payloads_by_broker(payloads) responses = {} def failed_payloads(payloads): for payload in payloads: topic_partition = (str(payload.topic), payload.partition) responses[(topic_partition)] = FailedPayloadsError(payload) # For each BrokerConnection keep the real socket so that we can use # a select to perform unblocking I/O connections_by_future = {} for broker, broker_payloads in six.iteritems(payloads_by_broker): if broker is None: failed_payloads(broker_payloads) continue host, port, afi = get_ip_port_afi(broker.host) try: conn = self._get_conn(host, broker.port, afi) except ConnectionError: refresh_metadata = True failed_payloads(broker_payloads) continue request = encoder_fn(payloads=broker_payloads) future = conn.send(request) if future.failed(): refresh_metadata = True failed_payloads(broker_payloads) continue if not request.expect_response(): for payload in broker_payloads: topic_partition = (str(payload.topic), payload.partition) responses[topic_partition] = None continue connections_by_future[future] = (conn, broker) conn = None while connections_by_future: futures = list(connections_by_future.keys()) # block until a socket is ready to be read sockets = [ conn._sock for future, (conn, _) in six.iteritems(connections_by_future) if not future.is_done and conn._sock is not None] if sockets: read_socks, _, _ = select.select(sockets, [], []) for future in futures: if not future.is_done: conn, _ = connections_by_future[future] conn.recv() continue _, broker = connections_by_future.pop(future) if future.failed(): refresh_metadata = True failed_payloads(payloads_by_broker[broker]) else: for payload_response in decoder_fn(future.value): topic_partition = (str(payload_response.topic), payload_response.partition) responses[topic_partition] = payload_response if refresh_metadata: self.reset_all_metadata() # Return responses in the same order as provided return [responses[tp] for tp in original_ordering]
def test_sticky_assignor1(mocker): """ Given: there are three consumers C0, C1, C2, four topics t0, t1, t2, t3, and each topic has 2 partitions, resulting in partitions t0p0, t0p1, t1p0, t1p1, t2p0, t2p1, t3p0, t3p1. Each consumer is subscribed to all three topics. Then: perform fresh assignment Expected: the assignment is - C0: [t0p0, t1p1, t3p0] - C1: [t0p1, t2p0, t3p1] - C2: [t1p0, t2p1] Then: remove C1 consumer and perform the reassignment Expected: the new assignment is - C0 [t0p0, t1p1, t2p0, t3p0] - C2 [t0p1, t1p0, t2p1, t3p1] """ cluster = create_cluster(mocker, topics={'t0', 't1', 't2', 't3'}, topics_partitions={0, 1}) subscriptions = { 'C0': {'t0', 't1', 't2', 't3'}, 'C1': {'t0', 't1', 't2', 't3'}, 'C2': {'t0', 't1', 't2', 't3'}, } member_metadata = make_member_metadata(subscriptions) sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) expected_assignment = { 'C0': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [0]), ('t1', [1]), ('t3', [0])], b''), 'C1': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [1]), ('t2', [0]), ('t3', [1])], b''), 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t1', [0]), ('t2', [1])], b''), } assert_assignment(sticky_assignment, expected_assignment) del subscriptions['C1'] member_metadata = {} for member, topics in six.iteritems(subscriptions): member_metadata[member] = StickyPartitionAssignor._metadata( topics, sticky_assignment[member].partitions()) sticky_assignment = StickyPartitionAssignor.assign(cluster, member_metadata) expected_assignment = { 'C0': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [0]), ('t1', [1]), ('t2', [0]), ('t3', [0])], b''), 'C2': ConsumerProtocolMemberAssignment(StickyPartitionAssignor.version, [('t0', [1]), ('t1', [0]), ('t2', [1]), ('t3', [1])], b''), } assert_assignment(sticky_assignment, expected_assignment)
def _create_fetch_requests(self): """Create fetch requests for all assigned partitions, grouped by node. FetchRequests skipped if no leader, or node has requests in flight Returns: dict: {node_id: FetchRequest, ...} (version depends on api_version) """ # create the fetch info as a dict of lists of partition info tuples # which can be passed to FetchRequest() via .items() fetchable = collections.defaultdict( lambda: collections.defaultdict(list)) for partition in self._fetchable_partitions(): node_id = self._client.cluster.leader_for_partition(partition) # advance position for any deleted compacted messages if required if self._subscriptions.assignment[ partition].last_offset_from_message_batch: next_offset_from_batch_header = self._subscriptions.assignment[ partition].last_offset_from_message_batch + 1 if next_offset_from_batch_header > self._subscriptions.assignment[ partition].position: log.debug( "Advance position for partition %s from %s to %s (last message batch location plus one)" " to correct for deleted compacted messages", partition, self._subscriptions.assignment[partition].position, next_offset_from_batch_header) self._subscriptions.assignment[ partition].position = next_offset_from_batch_header position = self._subscriptions.assignment[partition].position # fetch if there is a leader and no in-flight requests if node_id is None or node_id == -1: log.debug( "No leader found for partition %s." " Requesting metadata update", partition) self._client.cluster.request_update() elif self._client.in_flight_request_count(node_id) == 0: partition_info = (partition.partition, position, self.config['max_partition_fetch_bytes']) fetchable[node_id][partition.topic].append(partition_info) log.debug("Adding fetch request for partition %s at offset %d", partition, position) else: log.log( 0, "Skipping fetch for partition %s because there is an inflight request to node %s", partition, node_id) if self.config['api_version'] >= (0, 11, 0): version = 4 elif self.config['api_version'] >= (0, 10, 1): version = 3 elif self.config['api_version'] >= (0, 10): version = 2 elif self.config['api_version'] == (0, 9): version = 1 else: version = 0 requests = {} for node_id, partition_data in six.iteritems(fetchable): if version < 3: requests[node_id] = FetchRequest[version]( -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], partition_data.items()) else: # As of version == 3 partitions will be returned in order as # they are requested, so to avoid starvation with # `fetch_max_bytes` option we need this shuffle # NOTE: we do have partition_data in random order due to usage # of unordered structures like dicts, but that does not # guarantee equal distribution, and starting in Python3.6 # dicts retain insert order. partition_data = list(partition_data.items()) random.shuffle(partition_data) if version == 3: requests[node_id] = FetchRequest[version]( -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], self.config['fetch_max_bytes'], partition_data) else: requests[node_id] = FetchRequest[version]( -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], self.config['fetch_max_bytes'], self._isolation_level, partition_data) return requests
def _create_fetch_requests(self): """Create fetch requests for all assigned partitions, grouped by node. FetchRequests skipped if no leader, or node has requests in flight Returns: dict: {node_id: FetchRequest, ...} (version depends on api_version) """ # create the fetch info as a dict of lists of partition info tuples # which can be passed to FetchRequest() via .items() fetchable = collections.defaultdict(lambda: collections.defaultdict(list)) for partition in self._fetchable_partitions(): node_id = self._client.cluster.leader_for_partition(partition) position = self._subscriptions.assignment[partition].position # fetch if there is a leader and no in-flight requests if node_id is None or node_id == -1: log.debug("No leader found for partition %s." " Requesting metadata update", partition) self._client.cluster.request_update() elif self._client.in_flight_request_count(node_id) == 0: partition_info = ( partition.partition, position, self.config['max_partition_fetch_bytes'] ) fetchable[node_id][partition.topic].append(partition_info) log.debug("Adding fetch request for partition %s at offset %d", partition, position) if self.config['api_version'] >= (0, 10, 1): version = 3 elif self.config['api_version'] >= (0, 10): version = 2 elif self.config['api_version'] == (0, 9): version = 1 else: version = 0 requests = {} for node_id, partition_data in six.iteritems(fetchable): if version < 3: requests[node_id] = FetchRequest[version]( -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], partition_data.items()) else: # As of version == 3 partitions will be returned in order as # they are requested, so to avoid starvation with # `fetch_max_bytes` option we need this shuffle # NOTE: we do have partition_data in random order due to usage # of unordered structures like dicts, but that does not # guaranty equal distribution, and starting Python3.6 # dicts retain insert order. partition_data = list(partition_data.items()) random.shuffle(partition_data) requests[node_id] = FetchRequest[version]( -1, # replica_id self.config['fetch_max_wait_ms'], self.config['fetch_min_bytes'], self.config['fetch_max_bytes'], partition_data) return requests
def missing_fetch_positions(self): missing = set() for partition, state in six.iteritems(self.assignment): if not state.has_valid_position: missing.add(partition) return missing
def _send_offset_commit_request(self, offsets): """Commit offsets for the specified list of topics and partitions. This is a non-blocking call which returns a request future that can be polled in the case of a synchronous commit or ignored in the asynchronous case. Arguments: offsets (dict of {TopicPartition: OffsetAndMetadata}): what should be committed Returns: Future: indicating whether the commit was successful or not """ assert self.config["api_version"] >= (0, 8, 1), "Unsupported Broker API" assert all(map(lambda k: isinstance(k, TopicPartition), offsets)) assert all( map(lambda v: isinstance(v, OffsetAndMetadata), offsets.values())) if not offsets: log.debug("No offsets to commit") return Future().success(None) node_id = self.coordinator() if node_id is None: return Future().failure(Errors.GroupCoordinatorNotAvailableError) # create the offset commit request offset_data = collections.defaultdict(dict) for tp, offset in six.iteritems(offsets): offset_data[tp.topic][tp.partition] = offset if self._subscription.partitions_auto_assigned(): generation = self.generation() else: generation = Generation.NO_GENERATION # if the generation is None, we are not part of an active group # (and we expect to be). The only thing we can do is fail the commit # and let the user rejoin the group in poll() if self.config["api_version"] >= (0, 9) and generation is None: return Future().failure(Errors.CommitFailedError()) if self.config["api_version"] >= (0, 9): request = OffsetCommitRequest[2]( self.group_id, generation.generation_id, generation.member_id, OffsetCommitRequest[2].DEFAULT_RETENTION_TIME, [( topic, [(partition, offset.offset, offset.metadata) for partition, offset in six.iteritems(partitions)], ) for topic, partitions in six.iteritems(offset_data)], ) elif self.config["api_version"] >= (0, 8, 2): request = OffsetCommitRequest[1]( self.group_id, -1, "", [( topic, [(partition, offset.offset, -1, offset.metadata) for partition, offset in six.iteritems(partitions)], ) for topic, partitions in six.iteritems(offset_data)], ) elif self.config["api_version"] >= (0, 8, 1): request = OffsetCommitRequest[0]( self.group_id, [( topic, [(partition, offset.offset, offset.metadata) for partition, offset in six.iteritems(partitions)], ) for topic, partitions in six.iteritems(offset_data)], ) log.debug( "Sending offset-commit request with %s for group %s to %s", offsets, self.group_id, node_id, ) future = Future() _f = self._client.send(node_id, request) _f.add_callback(self._handle_offset_commit_response, offsets, future, time.time()) _f.add_errback(self._failed_request, node_id, request, future) return future