def run_command(self, cluster_topology, cluster_balancer): base_assignment = cluster_topology.assignment cluster_balancer.revoke_leadership(self.args.broker_ids) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology, 0, # Number of partition movements self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: msg = "Cluster already balanced. No more partitions as leaders in " \ "revoked-leadership brokers." self.log.info(msg) print(msg)
def test_validate_plan_invalid_format(): # Invalid format: partition-id string assignment = { "version": 1, "partitions": [{ "partition": '0', "topic": u't1', "replicas": [2, 1, 0] }, { "partition": 0, "topic": u't2', "replicas": [0, 3] }] } base_assignment = { "version": 1, "partitions": [{ "partition": 0, "topic": u't1', "replicas": [0, 2, 3] }, { "partition": 1, "topic": u't1', "replicas": [0, 1, 2] }, { "partition": 0, "topic": u't2', "replicas": [0, 1] }] } # Verify validation failed assert validate_plan(assignment, base_assignment) is False
def run_command(self, cluster_topology, cluster_balancer): base_assignment = cluster_topology.assignment cluster_balancer.revoke_leadership(self.args.broker_ids) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology.assignment, 0, # Number of partition movements self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: msg = "Cluster already balanced. No more partitions as leaders in " \ "revoked-leadership brokers." self.log.info(msg) print(msg)
def test_validate_plan_3(): assignment = { "version": 1, "partitions": [{ "partition": 0, "topic": u't1', "replicas": [2, 1, 0] }, { "partition": 0, "topic": u't2', "replicas": [0, 3] }] } base_assignment = { "version": 1, "partitions": [{ "partition": 0, "topic": u't1', "replicas": [0, 2, 3] }, { "partition": 1, "topic": u't1', "replicas": [0, 1, 2] }, { "partition": 0, "topic": u't2', "replicas": [0, 1] }] } # Verify valid plan assert validate_plan(assignment, base_assignment) is True
def test_validate_plan_incomplete_partition_subset_2(): # All partitions in new-plan # Given complete-assignment partition-set superset complete_assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [2, 1, 0]}, {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]}, {"partition": 0, "topic": u't2', "replicas": [0, 1]}, {"partition": 0, "topic": u't3', "replicas": [0, 1]} ] } base_assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [0, 2, 3]}, {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]}, {"partition": 0, "topic": u't2', "replicas": [0, 1]} ] } # Verify valid plan assert validate_plan( complete_assignment, base_assignment, is_partition_subset=False, ) is False
def run_command(self, cluster_topology, cluster_balancer): base_assignment = cluster_topology.assignment cluster_balancer.decommission_brokers(self.args.broker_ids) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology.assignment, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: self.log.info("Cluster already balanced. No more replicas in " "decommissioned brokers.") print("Cluster already balanced. No more replicas in " "decommissioned brokers.")
def run_command(self, cluster_topology): if self.args.source_broker == self.args.dest_broker: print("Error: Destination broker is same as source broker.") sys.exit() base_assignment = cluster_topology.assignment cluster_topology.replace_broker(self.args.source_broker, self.args.dest_broker) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology.assignment, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: self.log.info("Broker already replaced. No more replicas in source broker.") print("Broker already replaced. No more replicas in source broker.")
def run_command(self, cluster_topology): base_assignment = cluster_topology.assignment cluster_topology.decommission_brokers(self.args.broker_ids) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology.assignment, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: self.log.info( "Cluster already balanced. No more replicas in " "decommissioned brokers." ) print( "Cluster already balanced. No more replicas in " "decommissioned brokers." )
def test_validate_plan_1(): # Only given assignment without base assignment assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [1, 4]}, {"partition": 0, "topic": u't2', "replicas": [0, 1]} ] } # Verify valid plan assert validate_plan(assignment) is True
def test_validate_plan_duplicate_partition(): # Invalid assignment: Duplicate partition assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [2, 1, 0]}, {"partition": 0, "topic": u't1', "replicas": [0, 3]} ] } # Verify validation failed assert validate_plan(assignment) is False
def run_command(self, cluster_topology, cluster_balancer): if self.args.source_broker == self.args.dest_broker: print("Error: Destination broker is same as source broker.") sys.exit() if self.args.dest_broker is None: self.log.warning('This will shrink the replica set of topics.') base_assignment = cluster_topology.assignment cluster_topology.replace_broker(self.args.source_broker, self.args.dest_broker) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), allow_rf_change=self.args.rf_change, allow_rf_mismatch=self.args.rf_mismatch, ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on the topic_partition_filter, if provided if self.args.topic_partition_filter: self.log.info("Using provided filter list") filter_set = self.get_topic_filter() filtered_assignment = {} for t_p, replica in six.iteritems(base_assignment): if t_p in filter_set: filtered_assignment[t_p] = replica base_assignment = filtered_assignment # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment, allow_rf_change=self.args.rf_change, allow_rf_mismatch=self.args.rf_mismatch) else: self.log.info( "Broker already replaced. No more replicas in source broker.") print( "Broker already replaced. No more replicas in source broker.")
def execute_plan(self, plan, allow_rf_change=False, allow_rf_mismatch=False): """Submit reassignment plan for execution.""" reassignment_path = '{admin}/{reassignment_node}'\ .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE) plan_json = dump_json(plan) topic_names_from_proposed_plan = set() for partition in plan['partitions']: topic_names_from_proposed_plan.add(partition['topic']) base_plan = self.get_cluster_plan( topic_names=list(topic_names_from_proposed_plan)) if not validate_plan(plan, base_plan, allow_rf_change=allow_rf_change, allow_rf_mismatch=allow_rf_mismatch): _log.error( 'Given plan is invalid. Aborting new reassignment plan ... {plan}' .format(plan=plan)) return False # Send proposed-plan to zookeeper try: _log.info('Sending plan to Zookeeper...') self.create(reassignment_path, plan_json, makepath=True) _log.info( 'Re-assign partitions node in Zookeeper updated successfully ' 'with {plan}'.format(plan=plan), ) return True except NodeExistsError: _log.warning('Previous plan in progress. Exiting..') _log.warning( 'Aborting new reassignment plan... {plan}'.format(plan=plan)) in_progress_plan = load_json(self.get(reassignment_path)[0]) in_progress_partitions = [ '{topic}-{p_id}'.format( topic=p_data['topic'], p_id=str(p_data['partition']), ) for p_data in in_progress_plan['partitions'] ] _log.warning( '{count} partition(s) reassignment currently in progress:-'. format(count=len(in_progress_partitions)), ) _log.warning( '{partitions}. In Progress reassignment plan...'.format( partitions=', '.join(in_progress_partitions), ), ) return False except Exception as e: _log.error( 'Could not re-assign partitions {plan}. Error: {e}'.format( plan=plan, e=e), ) return False
def execute_plan(self, plan, allow_rf_change=False): """Submit reassignment plan for execution.""" reassignment_path = '{admin}/{reassignment_node}'\ .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE) plan_json = dump_json(plan) base_plan = self.get_cluster_plan() if not validate_plan(plan, base_plan, allow_rf_change=allow_rf_change): _log.error('Given plan is invalid. Aborting new reassignment plan ... {plan}'.format(plan=plan)) return False # Send proposed-plan to zookeeper try: _log.info('Sending plan to Zookeeper...') self.create(reassignment_path, plan_json, makepath=True) _log.info( 'Re-assign partitions node in Zookeeper updated successfully ' 'with {plan}'.format(plan=plan), ) return True except NodeExistsError: _log.warning('Previous plan in progress. Exiting..') _log.warning('Aborting new reassignment plan... {plan}'.format(plan=plan)) in_progress_plan = load_json(self.get(reassignment_path)[0]) in_progress_partitions = [ '{topic}-{p_id}'.format( topic=p_data['topic'], p_id=str(p_data['partition']), ) for p_data in in_progress_plan['partitions'] ] _log.warning( '{count} partition(s) reassignment currently in progress:-' .format(count=len(in_progress_partitions)), ) _log.warning( '{partitions}. In Progress reassignment plan...'.format( partitions=', '.join(in_progress_partitions), ), ) return False except Exception as e: _log.error( 'Could not re-assign partitions {plan}. Error: {e}' .format(plan=plan, e=e), ) return False
def test_validate_plan_3(): assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [2, 1, 0]}, {"partition": 0, "topic": u't2', "replicas": [0, 3]} ] } base_assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [0, 2, 3]}, {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]}, {"partition": 0, "topic": u't2', "replicas": [0, 1]} ] } # Verify valid plan assert validate_plan(assignment, base_assignment) is True
def test_validate_plan_invalid_format(): # Invalid format: partition-id string assignment = { "version": 1, "partitions": [ {"partition": '0', "topic": u't1', "replicas": [2, 1, 0]}, {"partition": 0, "topic": u't2', "replicas": [0, 3]} ] } base_assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [0, 2, 3]}, {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]}, {"partition": 0, "topic": u't2', "replicas": [0, 1]} ] } # Verify validation failed assert validate_plan(assignment, base_assignment) is False
def test_validate_plan_2(): # All partitions in new-plan assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [2, 1, 0]}, {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]}, {"partition": 0, "topic": u't2', "replicas": [0, 3]} ] } base_assignment = { "version": 1, "partitions": [ {"partition": 0, "topic": u't1', "replicas": [0, 2, 3]}, {"partition": 1, "topic": u't1', "replicas": [0, 1, 2]}, {"partition": 0, "topic": u't2', "replicas": [0, 1]} ] } # Verify valid plan assert validate_plan(assignment, base_assignment, is_partition_subset=False) is True
def run_command(self, ct): """Get executable proposed plan(if any) for display or execution.""" base_assignment = ct.assignment assignment = self.build_balanced_assignment(ct) if not validate_plan( assignment_to_plan(assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid latest-cluster assignment. Exiting.') sys.exit(1) # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, assignment, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: self.log.info("Cluster already balanced. No actions to perform.")
def run_command(self, ct): """Get executable proposed plan(if any) for display or execution.""" base_assignment = ct.assignment assignment = self.build_balanced_assignment(ct) if not validate_plan( assignment_to_plan(assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid latest-cluster assignment. Exiting.') sys.exit(1) # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, assignment, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: self.log.info("Cluster already balanced. No actions to perform.")
def run_command(self, cluster_topology, cluster_balancer): if self.args.source_broker == self.args.dest_broker: print("Error: Destination broker is same as source broker.") sys.exit() base_assignment = cluster_topology.assignment cluster_topology.replace_broker(self.args.source_broker, self.args.dest_broker) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology.assignment, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: self.log.info( "Broker already replaced. No more replicas in source broker.") print( "Broker already replaced. No more replicas in source broker.")
def run_command(self, cluster_topology, cluster_balancer): if self.args.force_progress and self.args.max_movement_size is None: self.log.error( '--force-progress must be used with --max-movement-size', ) sys.exit(1) # Obtain the largest partition in the set of partitions we will move partitions_to_move = set() for broker in self.args.broker_ids: partitions_to_move.update(cluster_topology.brokers[broker].partitions) largest_size = max( partition.size for partition in partitions_to_move ) smallest_size = min( partition.size for partition in partitions_to_move ) if self.args.auto_max_movement_size: self.args.max_movement_size = largest_size self.log.info( 'Auto-max-movement-size: using {max_movement_size} as' ' max-movement-size.'.format( max_movement_size=self.args.max_movement_size, ) ) if self.args.max_movement_size and self.args.max_movement_size < largest_size: if not self.args.force_progress: self.log.error( 'Max partition movement size is only {max_movement_size},' ' but remaining partitions to move range from {smallest_size} to' ' {largest_size}. The decommission will not make progress'.format( max_movement_size=self.args.max_movement_size, smallest_size=smallest_size, largest_size=largest_size, ) ) sys.exit(1) else: self.log.warning( 'Max partition movement size is only {max_movement_size},' ' but remaining partitions to move range from {smallest_size} to' ' {largest_size}. The decommission may be slower than expected'.format( max_movement_size=self.args.max_movement_size, smallest_size=smallest_size, largest_size=largest_size, ) ) base_assignment = cluster_topology.assignment cluster_balancer.decommission_brokers(self.args.broker_ids) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology, self.args.max_partition_movements, self.args.max_leader_changes, max_movement_size=self.args.max_movement_size, force_progress=self.args.force_progress, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: msg_str = "Cluster already balanced. No more replicas in decommissioned brokers." self.log.info(msg_str) print(msg_str)
def run_command(self, cluster_topology, cluster_balancer): """Get executable proposed plan(if any) for display or execution.""" # The ideal weight of each broker is total_weight / broker_count. # It should be possible to remove partitions from each broker until # the weight of the broker is less than this ideal value, otherwise it # is impossible to balance the cluster. If --max-movement-size is too # small, exit with an error. if self.args.max_movement_size: total_weight = sum( partition.weight for partition in six.itervalues(cluster_topology.partitions) ) broker_count = len(cluster_topology.brokers) optimal_weight = total_weight / broker_count broker, max_unmovable_on_one_broker = max(( (broker, sum( partition.weight for partition in broker.partitions if partition.size > self.args.max_movement_size )) for broker in cluster_topology.brokers.values()), key=lambda t: t[1], ) if max_unmovable_on_one_broker >= optimal_weight: sorted_partitions = sorted( [ partition for partition in broker.partitions if partition.size > self.args.max_movement_size ], reverse=True, key=lambda partition: partition.size, ) for partition in sorted_partitions: max_unmovable_on_one_broker -= partition.weight if max_unmovable_on_one_broker <= optimal_weight: required_max_movement_size = partition.size break self.log.error( 'Max movement size {max_movement_size} is too small, it is' ' not be possible to balance the cluster. A max movement' ' size of {required} or higher is required.'.format( max_movement_size=self.args.max_movement_size, required=required_max_movement_size, ) ) sys.exit(1) elif self.args.auto_max_movement_size: self.args.max_movement_size = max( partition.size for partition in six.itervalues(cluster_topology.partitions) ) self.log.info( 'Auto-max-movement-size: using {max_movement_size} as' ' max-movement-size.'.format( max_movement_size=self.args.max_movement_size, ) ) base_assignment = cluster_topology.assignment base_score = cluster_balancer.score() rg_imbalance, _ = get_replication_group_imbalance_stats( list(cluster_topology.rgs.values()), list(cluster_topology.partitions.values()) ) cluster_balancer.rebalance() assignment = cluster_topology.assignment score = cluster_balancer.score() new_rg_imbalance, _ = get_replication_group_imbalance_stats( list(cluster_topology.rgs.values()), list(cluster_topology.partitions.values()) ) if self.args.show_stats: display_cluster_topology_stats(cluster_topology, base_assignment) if base_score is not None and score is not None: print('\nScore before: %f' % base_score) print('Score after: %f' % score) print('Score improvement: %f' % (score - base_score)) if not validate_plan( assignment_to_plan(assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid latest-cluster assignment. Exiting.') sys.exit(1) if self.args.score_improvement_threshold: if base_score is None or score is None: self.log.error( '%s cannot assign scores so --score-improvement-threshold' ' cannot be used.', cluster_balancer.__class__.__name__, ) return else: score_improvement = score - base_score if score_improvement >= self.args.score_improvement_threshold: self.log.info( 'Score improvement %f is greater than the threshold %f.' ' Continuing to apply the assignment.', score_improvement, self.args.score_improvement_threshold, ) elif new_rg_imbalance < rg_imbalance: self.log.info( 'Score improvement %f is less than the threshold %f,' ' but replica balance has improved. Continuing to' ' apply the assignment.', score_improvement, self.args.score_improvement_threshold, ) else: self.log.info( 'Score improvement %f is less than the threshold %f.' ' Assignment will not be applied.', score_improvement, self.args.score_improvement_threshold, ) return # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: self.log.info("Cluster already balanced. No actions to perform.")
def run_command(self, cluster_topology, cluster_balancer): # If the max_movement_size is still default, then the user did not input a value for it if self.args.force_progress and self.args.max_movement_size == DEFAULT_MAX_MOVEMENT_SIZE: self.log.error( '--force-progress must be used with --max-movement-size', ) sys.exit(1) # Obtain the largest partition in the set of partitions we will move partitions_to_move = set() for broker in self.args.broker_ids: partitions_to_move.update( cluster_topology.brokers[broker].partitions) largest_size = max(partition.size for partition in partitions_to_move) smallest_size = min(partition.size for partition in partitions_to_move) if self.args.auto_max_movement_size: self.args.max_movement_size = largest_size self.log.info( 'Auto-max-movement-size: using {max_movement_size} as' ' max-movement-size.'.format( max_movement_size=self.args.max_movement_size, )) if self.args.max_movement_size and self.args.max_movement_size < largest_size: if not self.args.force_progress: self.log.error( 'Max partition movement size is only {max_movement_size},' ' but remaining partitions to move range from {smallest_size} to' ' {largest_size}. The decommission will not make progress'. format( max_movement_size=self.args.max_movement_size, smallest_size=smallest_size, largest_size=largest_size, )) sys.exit(1) else: self.log.warning( 'Max partition movement size is only {max_movement_size},' ' but remaining partitions to move range from {smallest_size} to' ' {largest_size}. The decommission may be slower than expected' .format( max_movement_size=self.args.max_movement_size, smallest_size=smallest_size, largest_size=largest_size, )) base_assignment = cluster_topology.assignment cluster_balancer.decommission_brokers(self.args.broker_ids) if not validate_plan( assignment_to_plan(cluster_topology.assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid assignment %s.', cluster_topology.assignment) print( 'Invalid assignment: {0}'.format(cluster_topology.assignment), file=sys.stderr, ) sys.exit(1) # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, cluster_topology, self.args.max_partition_movements, self.args.max_leader_changes, max_movement_size=self.args.max_movement_size, force_progress=self.args.force_progress, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: msg_str = "Cluster already balanced. No more replicas in decommissioned brokers." self.log.info(msg_str) print(msg_str)
def run_command(self, cluster_topology, cluster_balancer): """Get executable proposed plan(if any) for display or execution.""" # The ideal weight of each broker is total_weight / broker_count. # It should be possible to remove partitions from each broker until # the weight of the broker is less than this ideal value, otherwise it # is impossible to balance the cluster. If --max-movement-size is too # small, exit with an error. if self.args.max_movement_size: total_weight = sum( partition.weight for partition in cluster_topology.partitions.itervalues() ) broker_count = len(cluster_topology.brokers) optimal_weight = total_weight / broker_count broker, max_unmovable_on_one_broker = max(( (broker, sum( partition.weight for partition in broker.partitions if partition.size > self.args.max_movement_size )) for broker in cluster_topology.brokers.values()), key=lambda t: t[1], ) if max_unmovable_on_one_broker >= optimal_weight: sorted_partitions = sorted( [ partition for partition in broker.partitions if partition.size > self.args.max_movement_size ], reverse=True, key=lambda partition: partition.size, ) for partition in sorted_partitions: max_unmovable_on_one_broker -= partition.weight if max_unmovable_on_one_broker <= optimal_weight: required_max_movement_size = partition.size break self.log.error( 'Max movement size {max_movement_size} is too small, it is' ' not be possible to balance the cluster. A max movement' ' size of {required} or higher is required.'.format( max_movement_size=self.args.max_movement_size, required=required_max_movement_size, ) ) sys.exit(1) elif self.args.auto_max_movement_size: self.args.max_movement_size = max( partition.size for partition in cluster_topology.partitions.itervalues() ) self.log.info( 'Auto-max-movement-size: using {max_movement_size} as' ' max-movement-size.'.format( max_movement_size=self.args.max_movement_size, ) ) base_assignment = cluster_topology.assignment base_score = cluster_balancer.score() rg_imbalance, _ = get_replication_group_imbalance_stats( cluster_topology.rgs.values(), cluster_topology.partitions.values() ) cluster_balancer.rebalance() assignment = cluster_topology.assignment score = cluster_balancer.score() new_rg_imbalance, _ = get_replication_group_imbalance_stats( cluster_topology.rgs.values(), cluster_topology.partitions.values() ) if self.args.show_stats: display_cluster_topology_stats(cluster_topology, base_assignment) if base_score is not None and score is not None: print('\nScore before: %f' % base_score) print('Score after: %f' % score) print('Score improvement: %f' % (score - base_score)) if not validate_plan( assignment_to_plan(assignment), assignment_to_plan(base_assignment), ): self.log.error('Invalid latest-cluster assignment. Exiting.') sys.exit(1) if self.args.score_improvement_threshold: if base_score is None or score is None: self.log.error( '%s cannot assign scores so --score-improvement-threshold' ' cannot be used.', cluster_balancer.__class__.__name__, ) return else: score_improvement = score - base_score if score_improvement >= self.args.score_improvement_threshold: self.log.info( 'Score improvement %f is greater than the threshold %f.' ' Continuing to apply the assignment.', score_improvement, self.args.score_improvement_threshold, ) elif new_rg_imbalance < rg_imbalance: self.log.info( 'Score improvement %f is less than the threshold %f,' ' but replica balance has improved. Continuing to' ' apply the assignment.', score_improvement, self.args.score_improvement_threshold, ) else: self.log.info( 'Score improvement %f is less than the threshold %f.' ' Assignment will not be applied.', score_improvement, self.args.score_improvement_threshold, ) return # Reduce the proposed assignment based on max_partition_movements # and max_leader_changes reduced_assignment = self.get_reduced_assignment( base_assignment, assignment, self.args.max_partition_movements, self.args.max_leader_changes, ) if reduced_assignment: self.process_assignment(reduced_assignment) else: self.log.info("Cluster already balanced. No actions to perform.")