コード例 #1
0
ファイル: ssh.py プロジェクト: gitter-badger/kafka-tools
    def get_partition_sizes(self):
        # Get broker partition sizes
        size_re = re.compile("^([0-9]+)\s+.*?\/([a-z0-9_-]+)-([0-9]+)\s*$", re.I)
        for broker_id, broker in self.cluster.brokers.items():
            if broker.hostname is None:
                raise UnknownBrokerException("Cannot get sizes for broker ID {0} which has no hostname. "
                                             "Remove the broker from the cluster before balance".format(broker_id))

            log.info("Getting partition sizes via SSH for {0}".format(broker.hostname))
            self._client.connect(broker.hostname, allow_agent=True)
            stdin, stdout, stderr = self._client.exec_command('du -sk {0}/*'.format(self.args.datadir))
            for ln in stdout.readlines():
                m = size_re.match(ln)
                if m:
                    size = int(m.group(1))
                    topic = m.group(2)
                    pnum = int(m.group(3))

                    if topic not in self.cluster.topics:
                        log.warn("Unknown topic found on disk on broker {0}: {1}".format(broker, topic))
                    elif pnum >= len(self.cluster.topics[topic].partitions):
                        log.warn("Unknown partition found on disk on broker {0}: {1}:{2}".format(broker, topic, pnum))
                    else:
                        self.cluster.topics[topic].partitions[pnum].set_size(size)
        self._client.close()
コード例 #2
0
 def check_topic_ok(self, topic):
         if len(topic.partitions) % len(self.cluster.brokers) != 0:
             log.warn("Skipping topic {0} as it has {1} partitions, which is not a multiple of the number of brokers ({2})".format(
                 topic.name, len(topic.partitions), len(self.cluster.brokers)))
             return False
         if any([len(partition.replicas) != len(topic.partitions[0].replicas) for partition in topic.partitions]):
             log.warn("Skipping topic {0} as not all partitions have the same replication factor".format(topic.name))
             return False
         return True
コード例 #3
0
ファイル: ssh.py プロジェクト: prezi/kafka-tools
    def process_df_match(self, match_obj, broker_id):
        if match_obj:
            size = int(match_obj.group(1))
            topic = match_obj.group(2)
            pnum = int(match_obj.group(3))

            if topic not in self.cluster.topics:
                log.warn("Unknown topic found on disk on broker {0}: {1}".format(broker_id, topic))
            elif pnum >= len(self.cluster.topics[topic].partitions):
                log.warn("Unknown partition found on disk on broker {0}: {1}:{2}".format(broker_id, topic, pnum))
            else:
                self.cluster.topics[topic].partitions[pnum].set_size(size)
コード例 #4
0
ファイル: ssh.py プロジェクト: toddpalino/kafka-tools
    def process_df_match(self, match_obj, broker_id):
        if match_obj:
            size = int(match_obj.group(1))
            topic = match_obj.group(2)
            pnum = int(match_obj.group(3))

            if topic not in self.cluster.topics:
                log.warn("Unknown topic found on disk on broker {0}: {1}".format(broker_id, topic))
            elif pnum >= len(self.cluster.topics[topic].partitions):
                log.warn("Unknown partition found on disk on broker {0}: {1}:{2}".format(broker_id, topic, pnum))
            else:
                self.cluster.topics[topic].partitions[pnum].set_size(size)
コード例 #5
0
ファイル: clone.py プロジェクト: bingyuac/kafka-tools
    def process_cluster(self):
        source_set = set(self.sources)
        for partition in self.cluster.partitions():
            if len(source_set & set([replica.id for replica in partition.replicas])) > 0:
                if self.to_broker in partition.replicas:
                    log.warn("Target broker (ID {0}) is already in the replica list for {1}:{2}".format(self.to_broker.id, partition.topic.name, partition.num))

                    # If the broker is already in the replica list, it ALWAYS becomes the leader
                    if self.to_broker != partition.replicas[0]:
                        partition.swap_replica_positions(self.to_broker, partition.replicas[0])
                else:
                    # If one of the source brokers is currently the leader, the target broker is the leader. Otherwise, the target leader is in second place
                    if partition.replicas[0].id in self.sources:
                        partition.add_replica(self.to_broker, 0)
                    else:
                        partition.add_replica(self.to_broker, 1)
コード例 #6
0
ファイル: even.py プロジェクト: gitter-badger/kafka-tools
    def process_cluster(self):
        for topic in self.cluster.topics:
            if len(self.cluster.topics[topic].partitions) % len(self.cluster.brokers) != 0:
                log.warn("Skipping topic {0} as it has {1} partitions, which is not a multiple of the number of brokers ({2})".format(
                    topic, len(self.cluster.topics[topic].partitions), len(self.cluster.brokers)))
                continue
            rf = len(self.cluster.topics[topic].partitions[0].replicas)
            target = len(self.cluster.topics[topic].partitions) / len(self.cluster.brokers)

            different_rf = False
            for partition in self.cluster.topics[topic].partitions:
                if len(partition.replicas) != rf:
                    log.warn("Skipping topic {0} as not all partitions have the same replication factor".format(topic))
                    different_rf = True
            if different_rf:
                continue

            # Initialize broker map for this topic.
            pmap = [dict.fromkeys(self.cluster.brokers.keys(), 0) for pos in range(rf)]
            for partition in self.cluster.topics[topic].partitions:
                for i, replica in enumerate(partition.replicas):
                    pmap[i][replica.id] += 1

            for partition in self.cluster.topics[topic].partitions:
                for pos in range(rf):
                    # Current placement is fine. Leave the replica where it is
                    if pmap[pos][partition.replicas[pos].id] <= target:
                        continue

                    # Find a new replica for the partition at this position
                    for bid in pmap[pos]:
                        if pmap[pos][bid] >= target:
                            continue
                        broker = self.cluster.brokers[bid]
                        source = partition.replicas[pos]

                        if broker in partition.replicas:
                            other_pos = partition.replicas.index(broker)
                            partition.swap_replica_positions(source, broker)
                            pmap[other_pos][broker.id] -= 1
                            pmap[other_pos][source.id] += 1
                        else:
                            partition.swap_replicas(source, broker)

                        pmap[pos][broker.id] += 1
                        pmap[pos][source.id] -= 1
                        break
コード例 #7
0
ファイル: clone.py プロジェクト: simplesteph/kafka-tools
    def process_cluster(self):
        source_set = set(self.sources)
        for partition in self.cluster.partitions(self.args.exclude_topics):
            if len(source_set
                   & set([replica.id for replica in partition.replicas])) > 0:
                if self.to_broker in partition.replicas:
                    log.warn(
                        "Target broker (ID {0}) is already in the replica list for {1}:{2}"
                        .format(self.to_broker.id, partition.topic.name,
                                partition.num))

                    # If the broker is already in the replica list, it ALWAYS becomes the leader
                    if self.to_broker != partition.replicas[0]:
                        partition.swap_replica_positions(
                            self.to_broker, partition.replicas[0])
                else:
                    # If one of the source brokers is currently the leader, the target broker is the leader. Otherwise, the target leader is in second place
                    if partition.replicas[0].id in self.sources:
                        partition.add_replica(self.to_broker, 0)
                    else:
                        partition.add_replica(self.to_broker, 1)