Esempio n. 1
0
    def run_command(self, cluster_topology, cluster_balancer):
        base_assignment = cluster_topology.assignment

        cluster_balancer.revoke_leadership(self.args.broker_ids)

        if not validate_plan(
            assignment_to_plan(cluster_topology.assignment),
            assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid assignment %s.', cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology,
            0,  # Number of partition movements
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            msg = "Cluster already balanced. No more partitions as leaders in " \
                "revoked-leadership brokers."
            self.log.info(msg)
            print(msg)
Esempio n. 2
0
def test_validate_plan_invalid_format():
    # Invalid format: partition-id string
    assignment = {
        "version":
        1,
        "partitions": [{
            "partition": '0',
            "topic": u't1',
            "replicas": [2, 1, 0]
        }, {
            "partition": 0,
            "topic": u't2',
            "replicas": [0, 3]
        }]
    }
    base_assignment = {
        "version":
        1,
        "partitions": [{
            "partition": 0,
            "topic": u't1',
            "replicas": [0, 2, 3]
        }, {
            "partition": 1,
            "topic": u't1',
            "replicas": [0, 1, 2]
        }, {
            "partition": 0,
            "topic": u't2',
            "replicas": [0, 1]
        }]
    }

    # Verify validation failed
    assert validate_plan(assignment, base_assignment) is False
Esempio n. 3
0
    def run_command(self, cluster_topology, cluster_balancer):
        base_assignment = cluster_topology.assignment

        cluster_balancer.revoke_leadership(self.args.broker_ids)

        if not validate_plan(
                assignment_to_plan(cluster_topology.assignment),
                assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid assignment %s.',
                           cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology.assignment,
            0,  # Number of partition movements
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            msg = "Cluster already balanced. No more partitions as leaders in " \
                "revoked-leadership brokers."
            self.log.info(msg)
            print(msg)
Esempio n. 4
0
def test_validate_plan_3():
    assignment = {
        "version":
        1,
        "partitions": [{
            "partition": 0,
            "topic": u't1',
            "replicas": [2, 1, 0]
        }, {
            "partition": 0,
            "topic": u't2',
            "replicas": [0, 3]
        }]
    }
    base_assignment = {
        "version":
        1,
        "partitions": [{
            "partition": 0,
            "topic": u't1',
            "replicas": [0, 2, 3]
        }, {
            "partition": 1,
            "topic": u't1',
            "replicas": [0, 1, 2]
        }, {
            "partition": 0,
            "topic": u't2',
            "replicas": [0, 1]
        }]
    }

    # Verify valid plan
    assert validate_plan(assignment, base_assignment) is True
Esempio n. 5
0
def test_validate_plan_incomplete_partition_subset_2():
    # All partitions in new-plan
    # Given complete-assignment partition-set superset
    complete_assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [2, 1, 0]},
            {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]},
            {"partition": 0, "topic": u't2', "replicas": [0, 1]},
            {"partition": 0, "topic": u't3', "replicas": [0, 1]}
        ]
    }
    base_assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [0, 2, 3]},
            {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]},
            {"partition": 0, "topic": u't2', "replicas": [0, 1]}
        ]
    }

    # Verify valid plan
    assert validate_plan(
        complete_assignment,
        base_assignment,
        is_partition_subset=False,
    ) is False
Esempio n. 6
0
    def run_command(self, cluster_topology, cluster_balancer):
        base_assignment = cluster_topology.assignment

        cluster_balancer.decommission_brokers(self.args.broker_ids)

        if not validate_plan(
                assignment_to_plan(cluster_topology.assignment),
                assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid assignment %s.',
                           cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology.assignment,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            self.log.info("Cluster already balanced. No more replicas in "
                          "decommissioned brokers.")
            print("Cluster already balanced. No more replicas in "
                  "decommissioned brokers.")
Esempio n. 7
0
    def run_command(self, cluster_topology):
        if self.args.source_broker == self.args.dest_broker:
            print("Error: Destination broker is same as source broker.")
            sys.exit()

        base_assignment = cluster_topology.assignment
        cluster_topology.replace_broker(self.args.source_broker, self.args.dest_broker)

        if not validate_plan(
            assignment_to_plan(cluster_topology.assignment),
            assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid assignment %s.', cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology.assignment,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            self.log.info("Broker already replaced. No more replicas in source broker.")
            print("Broker already replaced. No more replicas in source broker.")
Esempio n. 8
0
    def run_command(self, cluster_topology):
        base_assignment = cluster_topology.assignment
        cluster_topology.decommission_brokers(self.args.broker_ids)

        if not validate_plan(
            assignment_to_plan(cluster_topology.assignment),
            assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid assignment %s.', cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology.assignment,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            self.log.info(
                "Cluster already balanced. No more replicas in "
                "decommissioned brokers."
            )
            print(
                "Cluster already balanced. No more replicas in "
                "decommissioned brokers."
            )
Esempio n. 9
0
def test_validate_plan_1():
    # Only given assignment without base assignment
    assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [1, 4]},
            {"partition": 0, "topic": u't2', "replicas": [0, 1]}
        ]
    }

    # Verify valid plan
    assert validate_plan(assignment) is True
Esempio n. 10
0
def test_validate_plan_duplicate_partition():
    # Invalid assignment: Duplicate partition
    assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [2, 1, 0]},
            {"partition": 0, "topic": u't1', "replicas": [0, 3]}
        ]
    }

    # Verify validation failed
    assert validate_plan(assignment) is False
Esempio n. 11
0
    def run_command(self, cluster_topology, cluster_balancer):
        if self.args.source_broker == self.args.dest_broker:
            print("Error: Destination broker is same as source broker.")
            sys.exit()
        if self.args.dest_broker is None:
            self.log.warning('This will shrink the replica set of topics.')

        base_assignment = cluster_topology.assignment
        cluster_topology.replace_broker(self.args.source_broker,
                                        self.args.dest_broker)

        if not validate_plan(
                assignment_to_plan(cluster_topology.assignment),
                assignment_to_plan(base_assignment),
                allow_rf_change=self.args.rf_change,
                allow_rf_mismatch=self.args.rf_mismatch,
        ):
            self.log.error('Invalid assignment %s.',
                           cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on the topic_partition_filter, if provided
        if self.args.topic_partition_filter:
            self.log.info("Using provided filter list")
            filter_set = self.get_topic_filter()
            filtered_assignment = {}
            for t_p, replica in six.iteritems(base_assignment):
                if t_p in filter_set:
                    filtered_assignment[t_p] = replica
            base_assignment = filtered_assignment

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment,
                                    allow_rf_change=self.args.rf_change,
                                    allow_rf_mismatch=self.args.rf_mismatch)
        else:
            self.log.info(
                "Broker already replaced. No more replicas in source broker.")
            print(
                "Broker already replaced. No more replicas in source broker.")
Esempio n. 12
0
 def execute_plan(self,
                  plan,
                  allow_rf_change=False,
                  allow_rf_mismatch=False):
     """Submit reassignment plan for execution."""
     reassignment_path = '{admin}/{reassignment_node}'\
         .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE)
     plan_json = dump_json(plan)
     topic_names_from_proposed_plan = set()
     for partition in plan['partitions']:
         topic_names_from_proposed_plan.add(partition['topic'])
     base_plan = self.get_cluster_plan(
         topic_names=list(topic_names_from_proposed_plan))
     if not validate_plan(plan,
                          base_plan,
                          allow_rf_change=allow_rf_change,
                          allow_rf_mismatch=allow_rf_mismatch):
         _log.error(
             'Given plan is invalid. Aborting new reassignment plan ... {plan}'
             .format(plan=plan))
         return False
     # Send proposed-plan to zookeeper
     try:
         _log.info('Sending plan to Zookeeper...')
         self.create(reassignment_path, plan_json, makepath=True)
         _log.info(
             'Re-assign partitions node in Zookeeper updated successfully '
             'with {plan}'.format(plan=plan), )
         return True
     except NodeExistsError:
         _log.warning('Previous plan in progress. Exiting..')
         _log.warning(
             'Aborting new reassignment plan... {plan}'.format(plan=plan))
         in_progress_plan = load_json(self.get(reassignment_path)[0])
         in_progress_partitions = [
             '{topic}-{p_id}'.format(
                 topic=p_data['topic'],
                 p_id=str(p_data['partition']),
             ) for p_data in in_progress_plan['partitions']
         ]
         _log.warning(
             '{count} partition(s) reassignment currently in progress:-'.
             format(count=len(in_progress_partitions)), )
         _log.warning(
             '{partitions}. In Progress reassignment plan...'.format(
                 partitions=', '.join(in_progress_partitions), ), )
         return False
     except Exception as e:
         _log.error(
             'Could not re-assign partitions {plan}. Error: {e}'.format(
                 plan=plan, e=e), )
         return False
Esempio n. 13
0
 def execute_plan(self, plan, allow_rf_change=False):
     """Submit reassignment plan for execution."""
     reassignment_path = '{admin}/{reassignment_node}'\
         .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE)
     plan_json = dump_json(plan)
     base_plan = self.get_cluster_plan()
     if not validate_plan(plan, base_plan, allow_rf_change=allow_rf_change):
         _log.error('Given plan is invalid. Aborting new reassignment plan ... {plan}'.format(plan=plan))
         return False
     # Send proposed-plan to zookeeper
     try:
         _log.info('Sending plan to Zookeeper...')
         self.create(reassignment_path, plan_json, makepath=True)
         _log.info(
             'Re-assign partitions node in Zookeeper updated successfully '
             'with {plan}'.format(plan=plan),
         )
         return True
     except NodeExistsError:
         _log.warning('Previous plan in progress. Exiting..')
         _log.warning('Aborting new reassignment plan... {plan}'.format(plan=plan))
         in_progress_plan = load_json(self.get(reassignment_path)[0])
         in_progress_partitions = [
             '{topic}-{p_id}'.format(
                 topic=p_data['topic'],
                 p_id=str(p_data['partition']),
             )
             for p_data in in_progress_plan['partitions']
         ]
         _log.warning(
             '{count} partition(s) reassignment currently in progress:-'
             .format(count=len(in_progress_partitions)),
         )
         _log.warning(
             '{partitions}. In Progress reassignment plan...'.format(
                 partitions=', '.join(in_progress_partitions),
             ),
         )
         return False
     except Exception as e:
         _log.error(
             'Could not re-assign partitions {plan}. Error: {e}'
             .format(plan=plan, e=e),
         )
         return False
Esempio n. 14
0
def test_validate_plan_3():
    assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [2, 1, 0]},
            {"partition": 0, "topic": u't2', "replicas": [0, 3]}
        ]
    }
    base_assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [0, 2, 3]},
            {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]},
            {"partition": 0, "topic": u't2', "replicas": [0, 1]}
        ]
    }

    # Verify valid plan
    assert validate_plan(assignment, base_assignment) is True
Esempio n. 15
0
def test_validate_plan_invalid_format():
    # Invalid format: partition-id string
    assignment = {
        "version": 1,
        "partitions": [
            {"partition": '0', "topic": u't1', "replicas": [2, 1, 0]},
            {"partition": 0, "topic": u't2', "replicas": [0, 3]}
        ]
    }
    base_assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [0, 2, 3]},
            {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]},
            {"partition": 0, "topic": u't2', "replicas": [0, 1]}
        ]
    }

    # Verify validation failed
    assert validate_plan(assignment, base_assignment) is False
Esempio n. 16
0
def test_validate_plan_2():
    # All partitions in new-plan
    assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [2, 1, 0]},
            {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]},
            {"partition": 0, "topic": u't2', "replicas": [0, 3]}
        ]
    }
    base_assignment = {
        "version": 1,
        "partitions": [
            {"partition": 0, "topic": u't1', "replicas": [0, 2, 3]},
            {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]},
            {"partition": 0, "topic": u't2', "replicas": [0, 1]}
        ]
    }

    # Verify valid plan
    assert validate_plan(assignment, base_assignment, is_partition_subset=False) is True
Esempio n. 17
0
    def run_command(self, ct):
        """Get executable proposed plan(if any) for display or execution."""
        base_assignment = ct.assignment
        assignment = self.build_balanced_assignment(ct)

        if not validate_plan(
            assignment_to_plan(assignment),
            assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid latest-cluster assignment. Exiting.')
            sys.exit(1)

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            assignment,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            self.log.info("Cluster already balanced. No actions to perform.")
Esempio n. 18
0
    def run_command(self, ct):
        """Get executable proposed plan(if any) for display or execution."""
        base_assignment = ct.assignment
        assignment = self.build_balanced_assignment(ct)

        if not validate_plan(
            assignment_to_plan(assignment),
            assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid latest-cluster assignment. Exiting.')
            sys.exit(1)

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            assignment,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            self.log.info("Cluster already balanced. No actions to perform.")
Esempio n. 19
0
    def run_command(self, cluster_topology, cluster_balancer):
        if self.args.source_broker == self.args.dest_broker:
            print("Error: Destination broker is same as source broker.")
            sys.exit()

        base_assignment = cluster_topology.assignment
        cluster_topology.replace_broker(self.args.source_broker,
                                        self.args.dest_broker)

        if not validate_plan(
                assignment_to_plan(cluster_topology.assignment),
                assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid assignment %s.',
                           cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology.assignment,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            self.log.info(
                "Broker already replaced. No more replicas in source broker.")
            print(
                "Broker already replaced. No more replicas in source broker.")
Esempio n. 20
0
    def run_command(self, cluster_topology, cluster_balancer):
        if self.args.force_progress and self.args.max_movement_size is None:
            self.log.error(
                '--force-progress must be used with --max-movement-size',
            )
            sys.exit(1)
        # Obtain the largest partition in the set of partitions we will move
        partitions_to_move = set()
        for broker in self.args.broker_ids:
            partitions_to_move.update(cluster_topology.brokers[broker].partitions)

        largest_size = max(
            partition.size
            for partition in partitions_to_move
        )

        smallest_size = min(
            partition.size
            for partition in partitions_to_move
        )

        if self.args.auto_max_movement_size:
            self.args.max_movement_size = largest_size
            self.log.info(
                'Auto-max-movement-size: using {max_movement_size} as'
                ' max-movement-size.'.format(
                    max_movement_size=self.args.max_movement_size,
                )
            )

        if self.args.max_movement_size and self.args.max_movement_size < largest_size:
            if not self.args.force_progress:
                self.log.error(
                    'Max partition movement size is only {max_movement_size},'
                    ' but remaining partitions to move range from {smallest_size} to'
                    ' {largest_size}. The decommission will not make progress'.format(
                        max_movement_size=self.args.max_movement_size,
                        smallest_size=smallest_size,
                        largest_size=largest_size,
                    )
                )
                sys.exit(1)
            else:
                self.log.warning(
                    'Max partition movement size is only {max_movement_size},'
                    ' but remaining partitions to move range from {smallest_size} to'
                    ' {largest_size}. The decommission may be slower than expected'.format(
                        max_movement_size=self.args.max_movement_size,
                        smallest_size=smallest_size,
                        largest_size=largest_size,
                    )
                )

        base_assignment = cluster_topology.assignment

        cluster_balancer.decommission_brokers(self.args.broker_ids)

        if not validate_plan(
            assignment_to_plan(cluster_topology.assignment),
            assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid assignment %s.', cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
            max_movement_size=self.args.max_movement_size,
            force_progress=self.args.force_progress,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            msg_str = "Cluster already balanced. No more replicas in decommissioned brokers."
            self.log.info(msg_str)
            print(msg_str)
Esempio n. 21
0
    def run_command(self, cluster_topology, cluster_balancer):
        """Get executable proposed plan(if any) for display or execution."""

        # The ideal weight of each broker is total_weight / broker_count.
        # It should be possible to remove partitions from each broker until
        # the weight of the broker is less than this ideal value, otherwise it
        # is impossible to balance the cluster. If --max-movement-size is too
        # small, exit with an error.
        if self.args.max_movement_size:
            total_weight = sum(
                partition.weight
                for partition in six.itervalues(cluster_topology.partitions)
            )
            broker_count = len(cluster_topology.brokers)
            optimal_weight = total_weight / broker_count

            broker, max_unmovable_on_one_broker = max((
                (broker, sum(
                    partition.weight
                    for partition in broker.partitions
                    if partition.size > self.args.max_movement_size
                ))
                for broker in cluster_topology.brokers.values()),
                key=lambda t: t[1],
            )

            if max_unmovable_on_one_broker >= optimal_weight:
                sorted_partitions = sorted(
                    [
                        partition
                        for partition in broker.partitions
                        if partition.size > self.args.max_movement_size
                    ],
                    reverse=True,
                    key=lambda partition: partition.size,
                )

                for partition in sorted_partitions:
                    max_unmovable_on_one_broker -= partition.weight
                    if max_unmovable_on_one_broker <= optimal_weight:
                        required_max_movement_size = partition.size
                        break

                self.log.error(
                    'Max movement size {max_movement_size} is too small, it is'
                    ' not be possible to balance the cluster. A max movement'
                    ' size of {required} or higher is required.'.format(
                        max_movement_size=self.args.max_movement_size,
                        required=required_max_movement_size,
                    )
                )
                sys.exit(1)
        elif self.args.auto_max_movement_size:
            self.args.max_movement_size = max(
                partition.size
                for partition in six.itervalues(cluster_topology.partitions)
            )
            self.log.info(
                'Auto-max-movement-size: using {max_movement_size} as'
                ' max-movement-size.'.format(
                    max_movement_size=self.args.max_movement_size,
                )
            )

        base_assignment = cluster_topology.assignment
        base_score = cluster_balancer.score()
        rg_imbalance, _ = get_replication_group_imbalance_stats(
            list(cluster_topology.rgs.values()),
            list(cluster_topology.partitions.values())
        )

        cluster_balancer.rebalance()

        assignment = cluster_topology.assignment
        score = cluster_balancer.score()
        new_rg_imbalance, _ = get_replication_group_imbalance_stats(
            list(cluster_topology.rgs.values()),
            list(cluster_topology.partitions.values())
        )

        if self.args.show_stats:
            display_cluster_topology_stats(cluster_topology, base_assignment)
            if base_score is not None and score is not None:
                print('\nScore before: %f' % base_score)
                print('Score after:  %f' % score)
                print('Score improvement: %f' % (score - base_score))

        if not validate_plan(
            assignment_to_plan(assignment),
            assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid latest-cluster assignment. Exiting.')
            sys.exit(1)

        if self.args.score_improvement_threshold:
            if base_score is None or score is None:
                self.log.error(
                    '%s cannot assign scores so --score-improvement-threshold'
                    ' cannot be used.',
                    cluster_balancer.__class__.__name__,
                )
                return
            else:
                score_improvement = score - base_score
                if score_improvement >= self.args.score_improvement_threshold:
                    self.log.info(
                        'Score improvement %f is greater than the threshold %f.'
                        ' Continuing to apply the assignment.',
                        score_improvement,
                        self.args.score_improvement_threshold,
                    )
                elif new_rg_imbalance < rg_imbalance:
                    self.log.info(
                        'Score improvement %f is less than the threshold %f,'
                        ' but replica balance has improved. Continuing to'
                        ' apply the assignment.',
                        score_improvement,
                        self.args.score_improvement_threshold,
                    )
                else:
                    self.log.info(
                        'Score improvement %f is less than the threshold %f.'
                        ' Assignment will not be applied.',
                        score_improvement,
                        self.args.score_improvement_threshold,
                    )
                    return

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            self.log.info("Cluster already balanced. No actions to perform.")
Esempio n. 22
0
    def run_command(self, cluster_topology, cluster_balancer):
        # If the max_movement_size is still default, then the user did not input a value for it
        if self.args.force_progress and self.args.max_movement_size == DEFAULT_MAX_MOVEMENT_SIZE:
            self.log.error(
                '--force-progress must be used with --max-movement-size', )
            sys.exit(1)
        # Obtain the largest partition in the set of partitions we will move
        partitions_to_move = set()
        for broker in self.args.broker_ids:
            partitions_to_move.update(
                cluster_topology.brokers[broker].partitions)

        largest_size = max(partition.size for partition in partitions_to_move)

        smallest_size = min(partition.size for partition in partitions_to_move)

        if self.args.auto_max_movement_size:
            self.args.max_movement_size = largest_size
            self.log.info(
                'Auto-max-movement-size: using {max_movement_size} as'
                ' max-movement-size.'.format(
                    max_movement_size=self.args.max_movement_size, ))

        if self.args.max_movement_size and self.args.max_movement_size < largest_size:
            if not self.args.force_progress:
                self.log.error(
                    'Max partition movement size is only {max_movement_size},'
                    ' but remaining partitions to move range from {smallest_size} to'
                    ' {largest_size}. The decommission will not make progress'.
                    format(
                        max_movement_size=self.args.max_movement_size,
                        smallest_size=smallest_size,
                        largest_size=largest_size,
                    ))
                sys.exit(1)
            else:
                self.log.warning(
                    'Max partition movement size is only {max_movement_size},'
                    ' but remaining partitions to move range from {smallest_size} to'
                    ' {largest_size}. The decommission may be slower than expected'
                    .format(
                        max_movement_size=self.args.max_movement_size,
                        smallest_size=smallest_size,
                        largest_size=largest_size,
                    ))

        base_assignment = cluster_topology.assignment

        cluster_balancer.decommission_brokers(self.args.broker_ids)

        if not validate_plan(
                assignment_to_plan(cluster_topology.assignment),
                assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid assignment %s.',
                           cluster_topology.assignment)
            print(
                'Invalid assignment: {0}'.format(cluster_topology.assignment),
                file=sys.stderr,
            )
            sys.exit(1)

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            cluster_topology,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
            max_movement_size=self.args.max_movement_size,
            force_progress=self.args.force_progress,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            msg_str = "Cluster already balanced. No more replicas in decommissioned brokers."
            self.log.info(msg_str)
            print(msg_str)
Esempio n. 23
0
    def run_command(self, cluster_topology, cluster_balancer):
        """Get executable proposed plan(if any) for display or execution."""

        # The ideal weight of each broker is total_weight / broker_count.
        # It should be possible to remove partitions from each broker until
        # the weight of the broker is less than this ideal value, otherwise it
        # is impossible to balance the cluster. If --max-movement-size is too
        # small, exit with an error.
        if self.args.max_movement_size:
            total_weight = sum(
                partition.weight
                for partition in cluster_topology.partitions.itervalues()
            )
            broker_count = len(cluster_topology.brokers)
            optimal_weight = total_weight / broker_count

            broker, max_unmovable_on_one_broker = max((
                (broker, sum(
                    partition.weight
                    for partition in broker.partitions
                    if partition.size > self.args.max_movement_size
                ))
                for broker in cluster_topology.brokers.values()),
                key=lambda t: t[1],
            )

            if max_unmovable_on_one_broker >= optimal_weight:
                sorted_partitions = sorted(
                    [
                        partition
                        for partition in broker.partitions
                        if partition.size > self.args.max_movement_size
                    ],
                    reverse=True,
                    key=lambda partition: partition.size,
                )

                for partition in sorted_partitions:
                    max_unmovable_on_one_broker -= partition.weight
                    if max_unmovable_on_one_broker <= optimal_weight:
                        required_max_movement_size = partition.size
                        break

                self.log.error(
                    'Max movement size {max_movement_size} is too small, it is'
                    ' not be possible to balance the cluster. A max movement'
                    ' size of {required} or higher is required.'.format(
                        max_movement_size=self.args.max_movement_size,
                        required=required_max_movement_size,
                    )
                )
                sys.exit(1)
        elif self.args.auto_max_movement_size:
            self.args.max_movement_size = max(
                partition.size
                for partition in cluster_topology.partitions.itervalues()
            )
            self.log.info(
                'Auto-max-movement-size: using {max_movement_size} as'
                ' max-movement-size.'.format(
                    max_movement_size=self.args.max_movement_size,
                )
            )

        base_assignment = cluster_topology.assignment
        base_score = cluster_balancer.score()
        rg_imbalance, _ = get_replication_group_imbalance_stats(
            cluster_topology.rgs.values(),
            cluster_topology.partitions.values()
        )

        cluster_balancer.rebalance()

        assignment = cluster_topology.assignment
        score = cluster_balancer.score()
        new_rg_imbalance, _ = get_replication_group_imbalance_stats(
            cluster_topology.rgs.values(),
            cluster_topology.partitions.values()
        )

        if self.args.show_stats:
            display_cluster_topology_stats(cluster_topology, base_assignment)
            if base_score is not None and score is not None:
                print('\nScore before: %f' % base_score)
                print('Score after:  %f' % score)
                print('Score improvement: %f' % (score - base_score))

        if not validate_plan(
            assignment_to_plan(assignment),
            assignment_to_plan(base_assignment),
        ):
            self.log.error('Invalid latest-cluster assignment. Exiting.')
            sys.exit(1)

        if self.args.score_improvement_threshold:
            if base_score is None or score is None:
                self.log.error(
                    '%s cannot assign scores so --score-improvement-threshold'
                    ' cannot be used.',
                    cluster_balancer.__class__.__name__,
                )
                return
            else:
                score_improvement = score - base_score
                if score_improvement >= self.args.score_improvement_threshold:
                    self.log.info(
                        'Score improvement %f is greater than the threshold %f.'
                        ' Continuing to apply the assignment.',
                        score_improvement,
                        self.args.score_improvement_threshold,
                    )
                elif new_rg_imbalance < rg_imbalance:
                    self.log.info(
                        'Score improvement %f is less than the threshold %f,'
                        ' but replica balance has improved. Continuing to'
                        ' apply the assignment.',
                        score_improvement,
                        self.args.score_improvement_threshold,
                    )
                else:
                    self.log.info(
                        'Score improvement %f is less than the threshold %f.'
                        ' Assignment will not be applied.',
                        score_improvement,
                        self.args.score_improvement_threshold,
                    )
                    return

        # Reduce the proposed assignment based on max_partition_movements
        # and max_leader_changes
        reduced_assignment = self.get_reduced_assignment(
            base_assignment,
            assignment,
            self.args.max_partition_movements,
            self.args.max_leader_changes,
        )
        if reduced_assignment:
            self.process_assignment(reduced_assignment)
        else:
            self.log.info("Cluster already balanced. No actions to perform.")