Beispiel #1
0
    def balance(self):
        self._initialize_current_subscriptions()
        initializing = (len(self.current_assignment[
            self._get_consumer_with_most_subscriptions()]) == 0)

        # assign all unassigned partitions
        for partition in self.unassigned_partitions:
            # skip if there is no potential consumer for the partition
            if not self.partition_to_all_potential_consumers[partition]:
                continue
            self._assign_partition(partition)

        # narrow down the reassignment scope to only those partitions that can actually be reassigned
        fixed_partitions = set()
        for partition in six.iterkeys(
                self.partition_to_all_potential_consumers):
            if not self._can_partition_participate_in_reassignment(partition):
                fixed_partitions.add(partition)
        for fixed_partition in fixed_partitions:
            remove_if_present(self.sorted_partitions, fixed_partition)
            remove_if_present(self.unassigned_partitions, fixed_partition)

        # narrow down the reassignment scope to only those consumers that are subject to reassignment
        fixed_assignments = {}
        for consumer in six.iterkeys(
                self.consumer_to_all_potential_partitions):
            if not self._can_consumer_participate_in_reassignment(consumer):
                self._remove_consumer_from_current_subscriptions_and_maintain_order(
                    consumer)
                fixed_assignments[consumer] = self.current_assignment[consumer]
                del self.current_assignment[consumer]

        # create a deep copy of the current assignment so we can revert to it
        # if we do not get a more balanced assignment later
        prebalance_assignment = deepcopy(self.current_assignment)
        prebalance_partition_consumers = deepcopy(
            self.current_partition_consumer)

        # if we don't already need to revoke something due to subscription changes,
        # first try to balance by only moving newly added partitions
        if not self.revocation_required:
            self._perform_reassignments(self.unassigned_partitions)
        reassignment_performed = self._perform_reassignments(
            self.sorted_partitions)

        # if we are not preserving existing assignments and we have made changes to the current assignment
        # make sure we are getting a more balanced assignment; otherwise, revert to previous assignment
        if (not initializing and reassignment_performed
                and self._get_balance_score(self.current_assignment) >=
                self._get_balance_score(prebalance_assignment)):
            self.current_assignment = prebalance_assignment
            self.current_partition_consumer.clear()
            self.current_partition_consumer.update(
                prebalance_partition_consumers)

        # add the fixed assignments (those that could not change) back
        for consumer, partitions in six.iteritems(fixed_assignments):
            self.current_assignment[consumer] = partitions
            self._add_consumer_to_current_subscriptions_and_maintain_order(
                consumer)
def test_assignment_with_conflicting_previous_generations(
        mocker, execution_number):
    cluster = create_cluster(mocker,
                             topics={'t'},
                             topics_partitions={0, 1, 2, 3, 4, 5})

    member_assignments = {
        'C1': [TopicPartition('t', p) for p in {0, 1, 4}],
        'C2': [TopicPartition('t', p) for p in {0, 2, 3}],
        'C3': [TopicPartition('t', p) for p in {3, 4, 5}],
    }
    member_generations = {
        'C1': 1,
        'C2': 1,
        'C3': 2,
    }
    member_metadata = {}
    for member in six.iterkeys(member_assignments):
        member_metadata[member] = StickyPartitionAssignor._metadata(
            {'t'}, member_assignments[member], member_generations[member])

    assignment = StickyPartitionAssignor.assign(cluster, member_metadata)
    verify_validity_and_balance({
        'C1': {'t'},
        'C2': {'t'},
        'C3': {'t'}
    }, assignment)
    assert StickyPartitionAssignor._latest_partition_movements.are_sticky()
def verify_validity_and_balance(subscriptions, assignment):
    """
    Verifies that the given assignment is valid with respect to the given subscriptions
    Validity requirements:
    - each consumer is subscribed to topics of all partitions assigned to it, and
    - each partition is assigned to no more than one consumer
    Balance requirements:
    - the assignment is fully balanced (the numbers of topic partitions assigned to consumers differ by at most one), or
    - there is no topic partition that can be moved from one consumer to another with 2+ fewer topic partitions

    :param subscriptions  topic subscriptions of each consumer
    :param assignment: given assignment for balance check
    """
    assert six.viewkeys(subscriptions) == six.viewkeys(assignment)

    consumers = sorted(six.viewkeys(assignment))
    for i in range(len(consumers)):
        consumer = consumers[i]
        partitions = assignment[consumer].partitions()
        for partition in partitions:
            assert partition.topic in subscriptions[consumer], (
                'Error: Partition {} is assigned to consumer {}, '
                'but it is not subscribed to topic {}\n'
                'Subscriptions: {}\n'
                'Assignments: {}'.format(partition, consumers[i],
                                         partition.topic, subscriptions,
                                         assignment))
        if i == len(consumers) - 1:
            continue

        for j in range(i + 1, len(consumers)):
            other_consumer = consumers[j]
            other_partitions = assignment[other_consumer].partitions()
            partitions_intersection = set(partitions).intersection(
                set(other_partitions))
            assert partitions_intersection == set(), (
                'Error: Consumers {} and {} have common partitions '
                'assigned to them: {}\n'
                'Subscriptions: {}\n'
                'Assignments: {}'.format(consumer, other_consumer,
                                         partitions_intersection,
                                         subscriptions, assignment))

            if abs(len(partitions) - len(other_partitions)) <= 1:
                continue

            assignments_by_topic = group_partitions_by_topic(partitions)
            other_assignments_by_topic = group_partitions_by_topic(
                other_partitions)
            if len(partitions) > len(other_partitions):
                for topic in six.iterkeys(assignments_by_topic):
                    assert topic not in other_assignments_by_topic, (
                        'Error: Some partitions can be moved from {} ({} partitions) '
                        'to {} ({} partitions) '
                        'to achieve a better balance\n'
                        'Subscriptions: {}\n'
                        'Assignments: {}'.format(consumer, len(partitions),
                                                 other_consumer,
                                                 len(other_partitions),
                                                 subscriptions, assignment))
            if len(other_partitions) > len(partitions):
                for topic in six.iterkeys(other_assignments_by_topic):
                    assert topic not in assignments_by_topic, (
                        'Error: Some partitions can be moved from {} ({} partitions) '
                        'to {} ({} partitions) '
                        'to achieve a better balance\n'
                        'Subscriptions: {}\n'
                        'Assignments: {}'.format(other_consumer,
                                                 len(other_partitions),
                                                 consumer, len(partitions),
                                                 subscriptions, assignment))
Beispiel #4
0
    def _populate_sorted_partitions(self):
        # set of topic partitions with their respective potential consumers
        all_partitions = set((tp, tuple(consumers))
                             for tp, consumers in six.iteritems(
                                 self.partition_to_all_potential_consumers))
        partitions_sorted_by_num_of_potential_consumers = sorted(
            all_partitions, key=partitions_comparator_key)

        self.sorted_partitions = []
        if not self.is_fresh_assignment and self._are_subscriptions_identical(
        ):
            # if this is a reassignment and the subscriptions are identical (all consumers can consumer from all topics)
            # then we just need to simply list partitions in a round robin fashion (from consumers with
            # most assigned partitions to those with least)
            assignments = deepcopy(self.current_assignment)
            for consumer_id, partitions in six.iteritems(assignments):
                to_remove = []
                for partition in partitions:
                    if partition not in self.partition_to_all_potential_consumers:
                        to_remove.append(partition)
                for partition in to_remove:
                    partitions.remove(partition)

            sorted_consumers = SortedSet(
                iterable=[
                    (consumer, tuple(partitions))
                    for consumer, partitions in six.iteritems(assignments)
                ],
                key=subscriptions_comparator_key,
            )
            # at this point, sorted_consumers contains an ascending-sorted list of consumers based on
            # how many valid partitions are currently assigned to them
            while sorted_consumers:
                # take the consumer with the most partitions
                consumer, _ = sorted_consumers.pop_last()
                # currently assigned partitions to this consumer
                remaining_partitions = assignments[consumer]
                # from partitions that had a different consumer before,
                # keep only those that are assigned to this consumer now
                previous_partitions = set(
                    six.iterkeys(self.previous_assignment)).intersection(
                        set(remaining_partitions))
                if previous_partitions:
                    # if there is a partition of this consumer that was assigned to another consumer before
                    # mark it as good options for reassignment
                    partition = previous_partitions.pop()
                    remaining_partitions.remove(partition)
                    self.sorted_partitions.append(partition)
                    sorted_consumers.add(
                        (consumer, tuple(assignments[consumer])))
                elif remaining_partitions:
                    # otherwise, mark any other one of the current partitions as a reassignment candidate
                    self.sorted_partitions.append(remaining_partitions.pop())
                    sorted_consumers.add(
                        (consumer, tuple(assignments[consumer])))

            while partitions_sorted_by_num_of_potential_consumers:
                partition = partitions_sorted_by_num_of_potential_consumers.pop(
                    0)[0]
                if partition not in self.sorted_partitions:
                    self.sorted_partitions.append(partition)
        else:
            while partitions_sorted_by_num_of_potential_consumers:
                self.sorted_partitions.append(
                    partitions_sorted_by_num_of_potential_consumers.pop(0)[0])