def get_partition_sizes(self): # Get broker partition sizes size_re = re.compile("^([0-9]+)\s+.*?\/([a-z0-9_-]+)-([0-9]+)\s*$", re.I) for broker_id, broker in self.cluster.brokers.items(): if broker.hostname is None: raise UnknownBrokerException("Cannot get sizes for broker ID {0} which has no hostname. " "Remove the broker from the cluster before balance".format(broker_id)) log.info("Getting partition sizes via SSH for {0}".format(broker.hostname)) self._client.connect(broker.hostname, allow_agent=True) stdin, stdout, stderr = self._client.exec_command('du -sk {0}/*'.format(self.args.datadir)) for ln in stdout.readlines(): m = size_re.match(ln) if m: size = int(m.group(1)) topic = m.group(2) pnum = int(m.group(3)) if topic not in self.cluster.topics: log.warn("Unknown topic found on disk on broker {0}: {1}".format(broker, topic)) elif pnum >= len(self.cluster.topics[topic].partitions): log.warn("Unknown partition found on disk on broker {0}: {1}:{2}".format(broker, topic, pnum)) else: self.cluster.topics[topic].partitions[pnum].set_size(size) self._client.close()
def check_topic_ok(self, topic): if len(topic.partitions) % len(self.cluster.brokers) != 0: log.warn("Skipping topic {0} as it has {1} partitions, which is not a multiple of the number of brokers ({2})".format( topic.name, len(topic.partitions), len(self.cluster.brokers))) return False if any([len(partition.replicas) != len(topic.partitions[0].replicas) for partition in topic.partitions]): log.warn("Skipping topic {0} as not all partitions have the same replication factor".format(topic.name)) return False return True
def process_df_match(self, match_obj, broker_id): if match_obj: size = int(match_obj.group(1)) topic = match_obj.group(2) pnum = int(match_obj.group(3)) if topic not in self.cluster.topics: log.warn("Unknown topic found on disk on broker {0}: {1}".format(broker_id, topic)) elif pnum >= len(self.cluster.topics[topic].partitions): log.warn("Unknown partition found on disk on broker {0}: {1}:{2}".format(broker_id, topic, pnum)) else: self.cluster.topics[topic].partitions[pnum].set_size(size)
def process_cluster(self): source_set = set(self.sources) for partition in self.cluster.partitions(): if len(source_set & set([replica.id for replica in partition.replicas])) > 0: if self.to_broker in partition.replicas: log.warn("Target broker (ID {0}) is already in the replica list for {1}:{2}".format(self.to_broker.id, partition.topic.name, partition.num)) # If the broker is already in the replica list, it ALWAYS becomes the leader if self.to_broker != partition.replicas[0]: partition.swap_replica_positions(self.to_broker, partition.replicas[0]) else: # If one of the source brokers is currently the leader, the target broker is the leader. Otherwise, the target leader is in second place if partition.replicas[0].id in self.sources: partition.add_replica(self.to_broker, 0) else: partition.add_replica(self.to_broker, 1)
def process_cluster(self): for topic in self.cluster.topics: if len(self.cluster.topics[topic].partitions) % len(self.cluster.brokers) != 0: log.warn("Skipping topic {0} as it has {1} partitions, which is not a multiple of the number of brokers ({2})".format( topic, len(self.cluster.topics[topic].partitions), len(self.cluster.brokers))) continue rf = len(self.cluster.topics[topic].partitions[0].replicas) target = len(self.cluster.topics[topic].partitions) / len(self.cluster.brokers) different_rf = False for partition in self.cluster.topics[topic].partitions: if len(partition.replicas) != rf: log.warn("Skipping topic {0} as not all partitions have the same replication factor".format(topic)) different_rf = True if different_rf: continue # Initialize broker map for this topic. pmap = [dict.fromkeys(self.cluster.brokers.keys(), 0) for pos in range(rf)] for partition in self.cluster.topics[topic].partitions: for i, replica in enumerate(partition.replicas): pmap[i][replica.id] += 1 for partition in self.cluster.topics[topic].partitions: for pos in range(rf): # Current placement is fine. Leave the replica where it is if pmap[pos][partition.replicas[pos].id] <= target: continue # Find a new replica for the partition at this position for bid in pmap[pos]: if pmap[pos][bid] >= target: continue broker = self.cluster.brokers[bid] source = partition.replicas[pos] if broker in partition.replicas: other_pos = partition.replicas.index(broker) partition.swap_replica_positions(source, broker) pmap[other_pos][broker.id] -= 1 pmap[other_pos][source.id] += 1 else: partition.swap_replicas(source, broker) pmap[pos][broker.id] += 1 pmap[pos][source.id] -= 1 break
def process_cluster(self): source_set = set(self.sources) for partition in self.cluster.partitions(self.args.exclude_topics): if len(source_set & set([replica.id for replica in partition.replicas])) > 0: if self.to_broker in partition.replicas: log.warn( "Target broker (ID {0}) is already in the replica list for {1}:{2}" .format(self.to_broker.id, partition.topic.name, partition.num)) # If the broker is already in the replica list, it ALWAYS becomes the leader if self.to_broker != partition.replicas[0]: partition.swap_replica_positions( self.to_broker, partition.replicas[0]) else: # If one of the source brokers is currently the leader, the target broker is the leader. Otherwise, the target leader is in second place if partition.replicas[0].id in self.sources: partition.add_replica(self.to_broker, 0) else: partition.add_replica(self.to_broker, 1)