Beispiel #1
0
    def get_partition_sizes(self):
        # Get broker partition sizes
        FNULL = open(os.devnull, 'w')

        for broker_id, broker in self.cluster.brokers.items():
            if broker.hostname is None:
                raise UnknownBrokerException("Cannot get sizes for broker ID {0} which has no hostname. "
                                             "Remove the broker from the cluster before balance".format(broker_id))

            if 'sshuser' in self.properties:
                connection_endpoint = self.properties['sshuser']+'@'+broker.hostname
            else:
                connection_endpoint = broker.hostname

            if 'sshkey' in self.properties:
                key = self.properties['sshkey']
            else:
                key = None

            if key is None:
                log.info("Getting partition sizes via SSH for {0}".format(broker.hostname))
                proc = subprocess.Popen(['ssh', connection_endpoint, 'du -sk {0}/*'.format(self.properties['datadir'])],
                                        stdout=subprocess.PIPE, stderr=FNULL)
            else:
                log.info("Getting partition sizes via SSH using key: {0} for {1}".format(key, broker.hostname))
                proc = subprocess.Popen(['ssh', '-i', key, connection_endpoint,
                                         'du -sk {0}/*'.format(self.properties['datadir'])],
                                        stdout=subprocess.PIPE, stderr=FNULL)

            for line in proc.stdout:
                self.process_df_match(self.size_re.match(line.decode()), broker_id)
Beispiel #2
0
 def log_broker_summary(self):
     for broker_id in sorted(self.brokers.keys()):
         broker = self.brokers[broker_id]
         log.info(
             "Broker {0}: partitions={1}/{2} ({3:.2f}%), size={4}".format(
                 broker_id, broker.num_leaders(), broker.num_partitions(),
                 broker.percent_leaders(), broker.total_size()))
    def _execute(self, num, total, zookeeper, tools_path):
        with NamedTemporaryFile(mode='w') as assignfile:
            json.dump(self.dict_for_reassignment(), assignfile)
            assignfile.flush()
            FNULL = open(os.devnull, 'w')
            proc = subprocess.Popen([
                '{0}/kafka-reassign-partitions.sh'.format(tools_path),
                '--execute', '--zookeeper', zookeeper,
                '--reassignment-json-file', assignfile.name
            ],
                                    stdout=FNULL,
                                    stderr=FNULL)
            proc.wait()

            # Wait until finished
            while True:
                remaining_partitions = self.check_completion(
                    zookeeper, tools_path, assignfile.name)
                if remaining_partitions == 0:
                    break

                log.info(
                    'Partition reassignment {0}/{1} in progress [ {2}/{3} partitions remain ]. Sleeping {4} seconds'
                    .format(num, total, remaining_partitions,
                            len(self.partitions), self.pause_time))
                time.sleep(self.pause_time)
Beispiel #4
0
    def get_partition_sizes(self):
        self._validate_properties()
        for broker_id, broker in self.cluster.brokers.items():
            if broker.hostname is None:
                raise UnknownBrokerException("Cannot get sizes for broker ID {0} which has no hostname. "
                                             "Remove the broker from the cluster before balance".format(broker_id))

            log.info("Getting partition sizes via Prometheus exporter for {0}".format(broker.hostname))
            self._query_prometheus(broker.hostname)
Beispiel #5
0
def run_preferred_replica_elections(batches, args, tools_path, plugins, dry_run):
    for i, batch in enumerate(batches):
        # Sleep between PLEs
        if i > 0 and not dry_run:
            log.info("Waiting {0} seconds for replica election to complete".format(args.ple_wait))
            time.sleep(args.ple_wait)

        log.info("Executing preferred replica election {0}/{1}".format(i + 1, len(batches)))
        batch.execute(i + 1, len(batches), args.zookeeper, tools_path, plugins, dry_run)
Beispiel #6
0
    def close(self):
        log.info("Disconnecting from {0}".format(self.hostname))

        # Shutdown throws an error if the socket is not connected, but that's OK
        try:
            self._sock.shutdown(socket.SHUT_RDWR)
        except OSError:
            pass

        self._sock.close()
Beispiel #7
0
def check_and_get_sizes(action_cls, args, cluster, sizer_map):
    if action_cls.needs_sizes:
        sizer_to_run = sizer_map[args.sizer](args, cluster)
        sizer_to_run.get_partition_sizes()

        if args.size:
            log.info("Partition Sizes:")
            for topic in cluster.topics:
                for partition in cluster.topics[topic].partitions:
                    log.info("{0} {1}:{2}".format(partition.size, topic, partition.num))
Beispiel #8
0
    def connect(self):
        protocol = 'SSL' if self._configuration.ssl_context is not None else 'PLAINTEXT'
        endpoint = self.get_endpoint(protocol)

        log.info("Connecting to {0} on port {1} using {2}".format(self.hostname, self.port, protocol))
        try:
            self._sock = self._sock or self._get_socket(self._configuration.ssl_context)
            self._sock.connect((endpoint.hostname, endpoint.port))
        except socket.error as e:
            log.error("Cannot connect to broker {0}:{1}: {2}".format(endpoint.hostname, endpoint.port, e))
            raise ConnectionError("Cannot connect to broker {0}:{1}: {2}".format(endpoint.hostname, endpoint.port, e))
Beispiel #9
0
    def close(self):
        log.info("Disconnecting from {0}".format(self.hostname))

        # Shutdown throws an error if the socket is not connected, but that's OK
        try:
            self._sock.shutdown(socket.SHUT_RDWR)
        except OSError:
            pass

        self._sock.close()
        self._sock = None
Beispiel #10
0
def check_and_get_sizes(action_cls, args, cluster, sizer_map):
    if action_cls.needs_sizes:
        sizer_to_run = sizer_map[args.sizer](args, cluster)
        sizer_to_run.get_partition_sizes()

        if args.size:
            log.info("Partition Sizes:")
            for topic in cluster.topics:
                for partition in cluster.topics[topic].partitions:
                    log.info("{0} {1}:{2}".format(partition.size, topic,
                                                  partition.num))
Beispiel #11
0
    def get_partition_sizes(self):
        self._validate_properties()
        for broker_id, broker in self.cluster.brokers.items():
            if broker.hostname is None:
                raise UnknownBrokerException(
                    "Cannot get sizes for broker ID {0} which has no hostname. "
                    "Remove the broker from the cluster before balance".format(
                        broker_id))

            log.info("Getting partition sizes via Prometheus exporter for {0}".
                     format(broker.hostname))
            self._query_prometheus(broker.hostname)
Beispiel #12
0
    def get_partition_sizes(self):
        # Get broker partition sizes
        FNULL = open(os.devnull, 'w')

        for broker_id, broker in self.cluster.brokers.items():
            if broker.hostname is None:
                raise UnknownBrokerException("Cannot get sizes for broker ID {0} which has no hostname. "
                                             "Remove the broker from the cluster before balance".format(broker_id))

            log.info("Getting partition sizes via SSH for {0}".format(broker.hostname))
            proc = subprocess.Popen(['ssh', broker.hostname, 'du -sk {0}/*'.format(self.properties['datadir'])],
                                    stdout=subprocess.PIPE, stderr=FNULL)
            for line in proc.stdout:
                self.process_df_match(self.size_re.match(line.decode()), broker_id)
Beispiel #13
0
def run_preferred_replica_elections(batches, args, tools_path, plugins,
                                    dry_run):
    for i, batch in enumerate(batches):
        # Sleep between PLEs
        if i > 0 and not dry_run:
            log.info(
                "Waiting {0} seconds for replica election to complete".format(
                    args.ple_wait))
            time.sleep(args.ple_wait)

        log.info("Executing preferred replica election {0}/{1}".format(
            i + 1, len(batches)))
        batch.execute(i + 1, len(batches), args.zookeeper, tools_path, plugins,
                      dry_run)
Beispiel #14
0
    def process_cluster(self):
        log.info("Starting partition balance by rack")

        # Check if rack information is set for the cluster
        broker_racks = [broker.rack for broker in self.cluster.brokers.values()]
        if len(set(broker_racks)) == 1:
            raise BalanceException("Cannot balance cluster by rack as it has no rack information")

        # Figure out the max RF for the cluster
        max_rf = self.cluster.max_replication_factor()

        # Balance partitions at each position separately
        for pos in range(max_rf):
            self._process_partitions_at_pos(pos)
Beispiel #15
0
def main():
    # Start by loading all the modules
    action_map = get_module_map(kafka.tools.assigner.actions, kafka.tools.assigner.actions.ActionModule)
    sizer_map = get_module_map(kafka.tools.assigner.sizers, kafka.tools.assigner.sizers.SizerModule)
    plugins = get_all_plugins()

    # Set up and parse all CLI arguments
    args = set_up_arguments(action_map, sizer_map, plugins)
    run_plugins_at_step(plugins, 'set_arguments', args)

    tools_path = get_tools_path(args.tools_path)
    check_java_home()

    cluster = Cluster.create_from_zookeeper(args.zookeeper, getattr(args, 'default_retention', 1))
    run_plugins_at_step(plugins, 'set_cluster', cluster)

    # If the module needs the partition sizes, call a size module to get the information
    check_and_get_sizes(action_map[args.action], args, cluster, sizer_map)
    run_plugins_at_step(plugins, 'after_sizes')
    print_leadership("before", cluster, args.leadership)

    # Clone the cluster, and run the action to generate a new cluster state
    newcluster = cluster.clone()
    action_to_run = action_map[args.action](args, newcluster)
    action_to_run.process_cluster()
    run_plugins_at_step(plugins, 'set_new_cluster', action_to_run.cluster)
    print_leadership("after", newcluster, args.leadership)

    move_partitions = cluster.changed_partitions(action_to_run.cluster)
    batches = split_partitions_into_batches(move_partitions, batch_size=args.moves, use_class=Reassignment)
    run_plugins_at_step(plugins, 'set_batches', batches)

    log.info("Partition moves required: {0}".format(len(move_partitions)))
    log.info("Number of batches: {0}".format(len(batches)))
    dry_run = is_dry_run(args)

    for i, batch in enumerate(batches):
        log.info("Executing partition reassignment {0}/{1}: {2}".format(i + 1, len(batches), repr(batch)))
        batch.execute(i + 1, len(batches), args.zookeeper, tools_path, plugins, dry_run)

    run_plugins_at_step(plugins, 'before_ple')

    if not args.skip_ple:
        all_cluster_partitions = [p for p in action_to_run.cluster.partitions(args.exclude_topics)]
        batches = split_partitions_into_batches(all_cluster_partitions, batch_size=args.ple_size, use_class=ReplicaElection)
        log.info("Number of replica elections: {0}".format(len(batches)))
        run_preferred_replica_elections(batches, args, tools_path, plugins, dry_run)

    run_plugins_at_step(plugins, 'finished')

    if args.output_json:
        data = {
            'before': cluster.to_dict(),
            'after': action_to_run.cluster.to_dict()
        }
        sys.stdout.write(json.dumps(data, indent=4, sort_keys=True))

    return os.EX_OK
Beispiel #16
0
    def get_partition_sizes(self):
        # Get broker partition sizes
        for broker_id, broker in self.cluster.brokers.items():
            _validate_broker(broker)

            log.info("Getting partition sizes via JMX for {0}".format(broker.hostname))
            jmxurl = self._java_provider.javax.management.remote.JMXServiceURL(
                "service:jmx:rmi:///jndi/rmi://{0}:{1}/jmxrmi".format(broker.hostname, broker.jmx_port))
            jmxsoc = self._java_provider.javax.management.remote.JMXConnectorFactory.connect(jmxurl, self._envhash)

            connection = jmxsoc.getMBeanServerConnection()
            beans = connection.queryNames(self._java_provider.javax.management.ObjectName("kafka.log:name=Size,*"), None)
            for bean in beans:
                self._fetch_bean(connection, bean)

            jmxsoc.close()
Beispiel #17
0
    def process_cluster(self):
        log.info("Starting partition balance by rack")

        # Check if rack information is set for the cluster
        broker_racks = [
            broker.rack for broker in self.cluster.brokers.values()
        ]
        if len(set(broker_racks)) == 1:
            raise BalanceException(
                "Cannot balance cluster by rack as it has no rack information")

        # Figure out the max RF for the cluster
        max_rf = self.cluster.max_replication_factor()

        # Balance partitions at each position separately
        for pos in range(max_rf):
            self._process_partitions_at_pos(pos)
Beispiel #18
0
    def get_partition_sizes(self):
        # Get broker partition sizes
        for broker_id, broker in self.cluster.brokers.items():
            _validate_broker(broker)

            log.info("Getting partition sizes via JMX for {0}".format(
                broker.hostname))
            jmxurl = self._java_provider.javax.management.remote.JMXServiceURL(
                "service:jmx:rmi:///jndi/rmi://{0}:{1}/jmxrmi".format(
                    broker.hostname, broker.jmx_port))
            jmxsoc = self._java_provider.javax.management.remote.JMXConnectorFactory.connect(
                jmxurl, self._envhash)

            connection = jmxsoc.getMBeanServerConnection()
            beans = connection.queryNames(
                self._java_provider.javax.management.ObjectName(
                    "kafka.log:name=Size,*"), None)
            for bean in beans:
                self._fetch_bean(connection, bean)

            jmxsoc.close()
Beispiel #19
0
    def create_from_zookeeper(cls, zkconnect, default_retention=1, fetch_topics=True):
        log.info("Connecting to zookeeper {0}".format(zkconnect))
        try:
            zk = KazooClient(zkconnect)
            zk.start()
        except Exception as e:
            raise ZookeeperException("Cannot connect to Zookeeper: {0}".format(e))

        # Get broker list
        cluster = cls(retention=default_retention)
        add_brokers_from_zk(cluster, zk)

        # Get current partition state
        if fetch_topics:
            log.info("Getting partition list from Zookeeper")
            for topic in zk.get_children("/brokers/topics"):
                zdata, zstat = zk.get("/brokers/topics/{0}".format(topic))
                add_topic_with_replicas(cluster, topic, json_loads(zdata))
                set_topic_retention(cluster.topics[topic], zk)

            if cluster.num_topics() == 0:
                raise ZookeeperException("The cluster specified does not have any topics")

        log.info("Closing connection to zookeeper")
        zk.stop()
        zk.close()

        return cluster
Beispiel #20
0
    def _execute(self, num, total, zookeeper, tools_path):
        with NamedTemporaryFile(mode='w') as assignfile:
            json.dump(self.dict_for_reassignment(), assignfile)
            assignfile.flush()
            FNULL = open(os.devnull, 'w')
            proc = subprocess.Popen(['{0}/kafka-reassign-partitions.sh'.format(tools_path), '--execute',
                                     '--zookeeper', zookeeper,
                                     '--reassignment-json-file', assignfile.name],
                                    stdout=FNULL, stderr=FNULL)
            proc.wait()

            # Wait until finished
            while True:
                remaining_partitions = self.check_completion(zookeeper, tools_path, assignfile.name)
                if remaining_partitions == 0:
                    break

                log.info('Partition reassignment {0}/{1} in progress [ {2}/{3} partitions remain ]. Sleeping {4} seconds'.format(num,
                                                                                                                                 total,
                                                                                                                                 remaining_partitions,
                                                                                                                                 len(self.partitions),
                                                                                                                                 self.pause_time))
                time.sleep(self.pause_time)
Beispiel #21
0
def main():
    # Start by loading all the modules
    action_map = get_module_map(kafka.tools.assigner.actions,
                                kafka.tools.assigner.actions.ActionModule)
    sizer_map = get_module_map(kafka.tools.assigner.sizers,
                               kafka.tools.assigner.sizers.SizerModule)
    plugins = get_all_plugins()

    # Set up and parse all CLI arguments
    args = set_up_arguments(action_map, sizer_map, plugins)
    run_plugins_at_step(plugins, 'set_arguments', args)

    tools_path = get_tools_path(args.tools_path)
    check_java_home()

    cluster = Cluster.create_from_zookeeper(
        args.zookeeper, getattr(args, 'default_retention', 1))
    run_plugins_at_step(plugins, 'set_cluster', cluster)

    # If the module needs the partition sizes, call a size module to get the information
    check_and_get_sizes(action_map[args.action], args, cluster, sizer_map)
    run_plugins_at_step(plugins, 'after_sizes')
    print_leadership("before", cluster, args.leadership)

    # Clone the cluster, and run the action to generate a new cluster state
    newcluster = cluster.clone()
    action_to_run = action_map[args.action](args, newcluster)
    action_to_run.process_cluster()
    run_plugins_at_step(plugins, 'set_new_cluster', action_to_run.cluster)
    print_leadership("after", newcluster, args.leadership)

    move_partitions = cluster.changed_partitions(action_to_run.cluster)
    batches = split_partitions_into_batches(move_partitions,
                                            batch_size=args.moves,
                                            use_class=Reassignment)
    run_plugins_at_step(plugins, 'set_batches', batches)

    log.info("Partition moves required: {0}".format(len(move_partitions)))
    log.info("Number of batches: {0}".format(len(batches)))
    dry_run = is_dry_run(args)

    for i, batch in enumerate(batches):
        log.info("Executing partition reassignment {0}/{1}: {2}".format(
            i + 1, len(batches), repr(batch)))
        batch.execute(i + 1, len(batches), args.zookeeper, tools_path, plugins,
                      dry_run)

    run_plugins_at_step(plugins, 'before_ple')

    if not args.skip_ple:
        all_cluster_partitions = [
            p for p in action_to_run.cluster.partitions(args.exclude_topics)
        ]
        batches = split_partitions_into_batches(all_cluster_partitions,
                                                batch_size=args.ple_size,
                                                use_class=ReplicaElection)
        log.info("Number of replica elections: {0}".format(len(batches)))
        run_preferred_replica_elections(batches, args, tools_path, plugins,
                                        dry_run)

    run_plugins_at_step(plugins, 'finished')

    return os.EX_OK
Beispiel #22
0
 def connect(self):
     log.info("Connecting to {0} on port {1} using PLAINTEXT".format(
         self.hostname, self.port))
     self._sock.connect((self.hostname, self.port))
Beispiel #23
0
def is_dry_run(args):
    if args.generate or not args.execute:
        log.info("--execute flag NOT specified. DRY RUN ONLY")
        return True
    return False
Beispiel #24
0
def print_leadership(type_str, cluster, dont_skip):
    if dont_skip:
        log.info("Cluster Leadership Balance ({0}):".format(type_str))
        cluster.log_broker_summary()
Beispiel #25
0
    def process_cluster(self):
        log.info("Starting partition balance by {0}".format(self._size_attr))

        # Figure out the max RF for the cluster
        max_rf = self.cluster.max_replication_factor()

        # Calculate cluster information and sorted partition lists first
        partitions = {}
        sizes = {}
        targets = {}
        margins = {}
        for pos in range(max_rf):
            sizes[pos] = {}
            targets[pos] = {}
            margins[pos] = {}

            # Create a sorted list of partitions to use at this position (descending size)
            # Throw out partitions that are 4K or less in size, as they are effectively empty
            partitions[pos] = [p for p in self.cluster.partitions(self.args.exclude_topics) if (len(p.replicas) > pos) and (getattr(p, self._size_attr) > 4)]
            if len(partitions[pos]) == 0:
                continue
            partitions[pos].sort(key=attrgetter(self._size_attr), reverse=True)

            # Calculate broker size at this position
            for broker in self.cluster.brokers:
                if pos in self.cluster.brokers[broker].partitions:
                    sizes[pos][broker] = sum([getattr(p, self._size_attr) for p in self.cluster.brokers[broker].partitions[pos]], 0)
                else:
                    sizes[pos][broker] = 0

            # Calculate the median size of partitions (margin is median/2) and the average size per broker to target
            # Yes, I know the median calculation is slightly broken (it keeps integers). This is OK
            targets[pos] = sum([getattr(p, self._size_attr) for p in partitions[pos]], 0) // len(self.cluster.brokers)
            sizelen = len(partitions[pos])
            if not sizelen % 2:
                margins[pos] = (getattr(partitions[pos][sizelen // 2], self._size_attr) + getattr(partitions[pos][sizelen // 2 - 1], self._size_attr)) // 4
            else:
                margins[pos] = getattr(partitions[pos][sizelen // 2], self._size_attr) // 2

        # Balance partitions for each replica position separately
        for pos in range(max_rf):
            if len(sizes[pos]) == 0:
                continue

            log.info("Calculating ideal state for replica position {0}".format(pos))
            log.debug("Target average size per-broker is {0} kibibytes (+/- {1})".format(targets[pos], margins[pos]))

            for broker_id in self.cluster.brokers:
                broker = self.cluster.brokers[broker_id]

                # Skip brokers that are larger than our minimum target size
                min_move = targets[pos] - margins[pos] - sizes[pos][broker_id]
                max_move = min_move + (margins[pos] * 2)
                if min_move <= 0:
                    continue
                log.debug("Moving between {0} and {1} kibibytes to broker {2}".format(min_move, max_move, broker_id))

                # Find partitions to move to this broker
                for partition in partitions[pos]:
                    partition_size = getattr(partition, self._size_attr)

                    # We can use this partition if all of the following are true: the partition has a replica at this position,
                    # it's size is less than or equal to the max move size, the broker at this replica position would not go out
                    # of range, and it doesn't already exist on this broker at this position
                    if ((len(partition.replicas) <= pos) or (partition_size > max_move) or
                       ((sizes[pos][partition.replicas[pos].id] - partition_size) < (targets[pos] - margins[pos])) or
                       (partition.replicas[pos] == broker)):
                        continue

                    # We can only use a partition that this replica exists on if swapping positions wouldn't hurt balance of the other position or broker
                    source = partition.replicas[pos]
                    if broker in partition.replicas:
                        other_pos = partition.replicas.index(broker)
                        if ((sizes[other_pos][broker_id] - partition_size < targets[other_pos] - margins[other_pos]) or
                           (sizes[other_pos][source.id] + partition_size > targets[pos] + margins[pos]) or
                           (sizes[pos][broker_id] + partition_size > targets[pos] + margins[pos]) or
                           (sizes[pos][source.id] - partition_size < targets[pos] - margins[pos])):
                            continue

                        partition.swap_replica_positions(source, broker)
                        sizes[other_pos][broker_id] -= partition_size
                        sizes[other_pos][source.id] += partition_size
                    else:
                        # Move the partition and adjust sizes
                        partition.swap_replicas(source, broker)
                    sizes[pos][broker_id] += partition_size
                    sizes[pos][source.id] -= partition_size
                    min_move -= partition_size
                    max_move -= partition_size

                    # If we have moved enough partitions, stop for this broker
                    if min_move <= 0:
                        break
Beispiel #26
0
    def process_cluster(self):
        log.info("Starting partition balance by count")

        # Figure out the max RF for the cluster and sort all partition lists by size (ascending)
        max_pos = self.cluster.max_replication_factor()
        for broker in self.cluster.brokers:
            for pos in self.cluster.brokers[broker].partitions:
                self.cluster.brokers[broker].partitions[pos].sort(key=attrgetter('size'))

        # Calculate partition counts for each position first
        max_count = {}
        for pos in range(max_pos):
            # Calculate the maximum number of partitions each broker should have (floor(average))
            # We'll also track a remainder and make sure they only go 1 per broker
            pcount = 0
            for broker in self.cluster.brokers:
                if pos in self.cluster.brokers[broker].partitions:
                    pcount += self.cluster.brokers[broker].num_partitions_at_position(pos)
            max_count[pos] = [pcount / len(self.cluster.brokers), pcount % len(self.cluster.brokers)]
            log.info("Calculating ideal state for replica position {0} - max {1} partitions".format(pos, max_count[pos][0] + 1))

        # Balance partition counts for each replica position separately
        for pos in range(max_pos):
            for broker_id in self.cluster.brokers:
                broker = self.cluster.brokers[broker_id]
                # Figure out how many more partitions this broker needs
                diff = max_count[pos][0]
                if max_count[pos][1]:
                    diff += 1
                    max_count[pos][1] -= 1
                if pos in broker.partitions:
                    diff -= broker.num_partitions_at_position(pos)

                if diff > 0:
                    log.debug("Moving {0} partitions to broker {1}".format(diff, broker_id))

                    # Iterate through the largest brokers to find diff partitions to move to this broker
                    for source_id in self.cluster.brokers:
                        source = self.cluster.brokers[source_id]
                        if diff == 0:
                            break
                        if pos not in source.partitions:
                            continue

                        iterlist = list(source.partitions[pos])
                        for partition in iterlist:
                            # If we have moved enough partitions from this broker, exit out of the inner loop
                            if (source.num_partitions_at_position(pos) < max_count[pos][0]) or (diff == 0):
                                break

                            # Skip topics that are being excluded
                            if partition.topic.name in self.args.exclude_topics:
                                continue

                            # If the partition is already on the target, swap positions only if it makes the balance better
                            if broker in partition.replicas:
                                other_pos = partition.replicas.index(broker)
                                if (other_pos in source.partitions) and (source.num_partitions_at_position(other_pos) < max_count[other_pos][0]):
                                    partition.swap_replica_positions(source, broker)
                            else:
                                partition.swap_replicas(source, broker)
                                diff -= 1

                    log.debug("Finish broker {0} with {1} partitions".format(broker_id, broker.num_partitions_at_position(pos)))
                elif diff < 0:
                    log.debug("Moving {0} partitions off broker {1}".format(-diff, broker_id))

                    # Iterate through the smallest brokers to find diff partitions to move off this broker
                    for target_id in self.cluster.brokers:
                        target = self.cluster.brokers[target_id]
                        if diff == 0:
                            break
                        if (pos in target.partitions) and (target.num_partitions_at_position(pos) > (max_count[pos][0] + 1)):
                            continue

                        iterlist = list(broker.partitions[pos])
                        for partition in iterlist:
                            # If we have moved enough partitions to this broker, exit out of the inner loop
                            if ((pos in target.partitions) and (target.num_partitions_at_position(pos) >= max_count[pos][0])) or (diff == 0):
                                break
                            # Skip partitions that are already on the target broker or are being excluded
                            if (target in partition.replicas) or (partition.topic.name in self.args.exclude_topics):
                                continue

                            partition.swap_replicas(broker, target)
                            diff += 1

                    log.debug("Finish broker {0} with {1} partitions".format(broker, broker.num_partitions_at_position(pos)))
                else:
                    log.debug("Skipping broker {0} which has {1} partitions".format(broker, broker.num_partitions_at_position(pos)))
                    continue
Beispiel #27
0
    def process_cluster(self):
        log.info("Starting even partition balance")

        # Initialize broker deques for each position for remainder assignment
        ordered_brokers = sorted(self.cluster.brokers.keys())
        max_rf = self.cluster.max_replication_factor()
        remainder_brokers = [deque(ordered_brokers) for pos in range(max_rf)]
        for pos in range(max_rf):
            # Advance the deque by max_rf places so that we don't collide replicas
            remainder_brokers[pos].rotate(-pos)

        for topic_name in sorted(self.cluster.topics):
            topic = self.cluster.topics[topic_name]
            if not self.check_topic_ok(topic):
                continue

            # How many partitions per broker, and what's the last one that can be evenly balanced
            target = len(topic.partitions) // len(self.cluster.brokers)
            last_even_partition = len(topic.partitions) - (len(topic.partitions) % len(self.cluster.brokers)) - 1

            # Initialize broker map for this topic.
            pmap = [dict.fromkeys(self.cluster.brokers.keys(), 0) for pos in range(len(topic.partitions[0].replicas))]
            for pnum in range(0, last_even_partition + 1):
                partition = topic.partitions[pnum]
                for i, replica in enumerate(partition.replicas):
                    pmap[i][replica.id] += 1

            # Balance all but the last remainder partitions
            while not pmap_matches_target(pmap, target):
                for pnum in range(0, last_even_partition + 1):
                    partition = topic.partitions[pnum]

                    for pos in range(len(partition.replicas)):
                        # Current placement is fine (or low). Leave the replica where it is
                        if pmap[pos][partition.replicas[pos].id] <= target:
                            continue

                        # Find a new replica for the partition at this position
                        for bid in pmap[pos]:
                            if pmap[pos][bid] >= target:
                                continue
                            broker = self.cluster.brokers[bid]
                            source = partition.replicas[pos]

                            if broker in partition.replicas:
                                other_pos = partition.replicas.index(broker)
                                partition.swap_replica_positions(source, broker)
                                pmap[other_pos][broker.id] -= 1
                                pmap[other_pos][source.id] += 1
                            else:
                                partition.swap_replicas(source, broker)

                            pmap[pos][broker.id] += 1
                            pmap[pos][source.id] -= 1
                            break

            # Distribute the remainder partitions evenly among the brokers
            # This is a pretty dumb round robin distribution, but it will be stable
            for pnum in range(last_even_partition + 1, len(topic.partitions)):
                partition = topic.partitions[pnum]

                for pos in range(len(partition.replicas)):
                    # Find a new replica for this partition
                    proposed = remainder_brokers[pos].popleft()
                    remainder_brokers[pos].append(proposed)

                    partition.swap_replicas(partition.replicas[pos], self.cluster.brokers[proposed])
Beispiel #28
0
def is_dry_run(args):
    if args.generate or not args.execute:
        log.info("--execute flag NOT specified. DRY RUN ONLY")
        return True
    return False
Beispiel #29
0
def print_leadership(type_str, cluster, dont_skip):
    if dont_skip:
        log.info("Cluster Leadership Balance ({0}):".format(type_str))
        cluster.log_broker_summary()
Beispiel #30
0
    def process_cluster(self):
        log.info("Starting partition balance by count")

        # Figure out the max RF for the cluster and sort all partition lists by size (ascending)
        max_pos = self.cluster.max_replication_factor()
        for broker in self.cluster.brokers:
            for pos in self.cluster.brokers[broker].partitions:
                self.cluster.brokers[broker].partitions[pos].sort(
                    key=attrgetter('size'))

        # Calculate partition counts for each position first
        max_count = {}
        for pos in range(max_pos):
            # Calculate the maximum number of partitions each broker should have (floor(average))
            # We'll also track a remainder and make sure they only go 1 per broker
            pcount = 0
            for broker in self.cluster.brokers:
                if pos in self.cluster.brokers[broker].partitions:
                    pcount += self.cluster.brokers[
                        broker].num_partitions_at_position(pos)
            max_count[pos] = [
                pcount / len(self.cluster.brokers),
                pcount % len(self.cluster.brokers)
            ]
            log.info(
                "Calculating ideal state for replica position {0} - max {1} partitions"
                .format(pos, max_count[pos][0] + 1))

        # Balance partition counts for each replica position separately
        for pos in range(max_pos):
            for broker_id in self.cluster.brokers:
                broker = self.cluster.brokers[broker_id]
                # Figure out how many more partitions this broker needs
                diff = max_count[pos][0]
                if max_count[pos][1]:
                    diff += 1
                    max_count[pos][1] -= 1
                if pos in broker.partitions:
                    diff -= broker.num_partitions_at_position(pos)

                if diff > 0:
                    log.debug("Moving {0} partitions to broker {1}".format(
                        diff, broker_id))

                    # Iterate through the largest brokers to find diff partitions to move to this broker
                    for source_id in self.cluster.brokers:
                        source = self.cluster.brokers[source_id]
                        if diff == 0:
                            break
                        if pos not in source.partitions:
                            continue

                        iterlist = list(source.partitions[pos])
                        for partition in iterlist:
                            # If we have moved enough partitions from this broker, exit out of the inner loop
                            if (source.num_partitions_at_position(pos) <
                                    max_count[pos][0]) or (diff == 0):
                                break

                            # Skip topics that are being excluded
                            if partition.topic.name in self.args.exclude_topics:
                                continue

                            # If the partition is already on the target, swap positions only if it makes the balance better
                            if broker in partition.replicas:
                                other_pos = partition.replicas.index(broker)
                                if (other_pos in source.partitions
                                    ) and (source.num_partitions_at_position(
                                        other_pos) < max_count[other_pos][0]):
                                    partition.swap_replica_positions(
                                        source, broker)
                            else:
                                partition.swap_replicas(source, broker)
                                diff -= 1

                    log.debug("Finish broker {0} with {1} partitions".format(
                        broker_id, broker.num_partitions_at_position(pos)))
                elif diff < 0:
                    log.debug("Moving {0} partitions off broker {1}".format(
                        -diff, broker_id))

                    # Iterate through the smallest brokers to find diff partitions to move off this broker
                    for target_id in self.cluster.brokers:
                        target = self.cluster.brokers[target_id]
                        if diff == 0:
                            break
                        if (pos in target.partitions) and (
                                target.num_partitions_at_position(pos) >
                            (max_count[pos][0] + 1)):
                            continue

                        iterlist = list(broker.partitions[pos])
                        for partition in iterlist:
                            # If we have moved enough partitions to this broker, exit out of the inner loop
                            if ((pos in target.partitions) and
                                (target.num_partitions_at_position(pos) >=
                                 max_count[pos][0])) or (diff == 0):
                                break
                            # Skip partitions that are already on the target broker or are being excluded
                            if (target in partition.replicas) or (
                                    partition.topic.name
                                    in self.args.exclude_topics):
                                continue

                            partition.swap_replicas(broker, target)
                            diff += 1

                    log.debug("Finish broker {0} with {1} partitions".format(
                        broker, broker.num_partitions_at_position(pos)))
                else:
                    log.debug(
                        "Skipping broker {0} which has {1} partitions".format(
                            broker, broker.num_partitions_at_position(pos)))
                    continue
Beispiel #31
0
def print_leadership(type_str, cluster, dont_skip):
    if dont_skip:
        log.info("Cluster Leadership Balance (before):")
        cluster.log_broker_summary()
Beispiel #32
0
    def process_cluster(self):
        log.info("Starting partition balance by {0}".format(self._size_attr))

        # Figure out the max RF for the cluster
        max_rf = self.cluster.max_replication_factor()

        # Calculate cluster information and sorted partition lists first
        partitions = {}
        sizes = {}
        targets = {}
        margins = {}
        for pos in range(max_rf):
            sizes[pos] = {}
            targets[pos] = {}
            margins[pos] = {}

            # Create a sorted list of partitions to use at this position (descending size)
            # Throw out partitions that are 4K or less in size, as they are effectively empty
            partitions[pos] = [
                p for p in self.cluster.partitions(self.args.exclude_topics)
                if (len(p.replicas) > pos) and (
                    getattr(p, self._size_attr) > 4)
            ]
            if len(partitions[pos]) == 0:
                continue
            partitions[pos].sort(key=attrgetter(self._size_attr), reverse=True)

            # Calculate broker size at this position
            for broker in self.cluster.brokers:
                if pos in self.cluster.brokers[broker].partitions:
                    sizes[pos][broker] = sum([
                        getattr(p, self._size_attr)
                        for p in self.cluster.brokers[broker].partitions[pos]
                    ], 0)
                else:
                    sizes[pos][broker] = 0

            # Calculate the median size of partitions (margin is median/2) and the average size per broker to target
            # Yes, I know the median calculation is slightly broken (it keeps integers). This is OK
            targets[pos] = sum(
                [getattr(p, self._size_attr)
                 for p in partitions[pos]], 0) // len(self.cluster.brokers)
            sizelen = len(partitions[pos])
            if not sizelen % 2:
                margins[pos] = (
                    getattr(partitions[pos][sizelen // 2], self._size_attr) +
                    getattr(partitions[pos][sizelen // 2 - 1],
                            self._size_attr)) // 4
            else:
                margins[pos] = getattr(partitions[pos][sizelen // 2],
                                       self._size_attr) // 2

        # Balance partitions for each replica position separately
        for pos in range(max_rf):
            if len(sizes[pos]) == 0:
                continue

            log.info(
                "Calculating ideal state for replica position {0}".format(pos))
            log.debug(
                "Target average size per-broker is {0} kibibytes (+/- {1})".
                format(targets[pos], margins[pos]))

            for broker_id in self.cluster.brokers:
                broker = self.cluster.brokers[broker_id]

                # Skip brokers that are larger than our minimum target size
                min_move = targets[pos] - margins[pos] - sizes[pos][broker_id]
                max_move = min_move + (margins[pos] * 2)
                if min_move <= 0:
                    continue
                log.debug("Moving between {0} and {1} kibibytes to broker {2}".
                          format(min_move, max_move, broker_id))

                # Find partitions to move to this broker
                for partition in partitions[pos]:
                    partition_size = getattr(partition, self._size_attr)

                    # We can use this partition if all of the following are true: the partition has a replica at this position,
                    # it's size is less than or equal to the max move size, the broker at this replica position would not go out
                    # of range, and it doesn't already exist on this broker at this position
                    if ((len(partition.replicas) <= pos)
                            or (partition_size > max_move)
                            or ((sizes[pos][partition.replicas[pos].id] -
                                 partition_size) <
                                (targets[pos] - margins[pos]))
                            or (partition.replicas[pos] == broker)):
                        continue

                    # We can only use a partition that this replica exists on if swapping positions wouldn't hurt balance of the other position or broker
                    source = partition.replicas[pos]
                    if broker in partition.replicas:
                        other_pos = partition.replicas.index(broker)
                        if ((sizes[other_pos][broker_id] - partition_size <
                             targets[other_pos] - margins[other_pos]) or
                            (sizes[other_pos][source.id] + partition_size >
                             targets[pos] + margins[pos])
                                or (sizes[pos][broker_id] + partition_size >
                                    targets[pos] + margins[pos])
                                or (sizes[pos][source.id] - partition_size <
                                    targets[pos] - margins[pos])):
                            continue

                        partition.swap_replica_positions(source, broker)
                        sizes[other_pos][broker_id] -= partition_size
                        sizes[other_pos][source.id] += partition_size
                    else:
                        # Move the partition and adjust sizes
                        partition.swap_replicas(source, broker)
                    sizes[pos][broker_id] += partition_size
                    sizes[pos][source.id] -= partition_size
                    min_move -= partition_size
                    max_move -= partition_size

                    # If we have moved enough partitions, stop for this broker
                    if min_move <= 0:
                        break