def register_rolling_restart(zk: BukuExhibitor, broker_id: str, image: str, instance_type: str, scalyr_key: str, scalyr_region: str, kms_key_id: str, cool_down: int): if zk.is_rolling_restart_in_progress(): _LOG.warning('Rolling restart in progress, skipping') return restart_assignment = {} brokers = zk.get_broker_ids() for idx in range(len(brokers)): broker_to_make_restart = brokers[idx] if idx == len(brokers) - 1: broker_to_restart = brokers[0] else: broker_to_restart = brokers[idx + 1] restart_assignment[broker_to_make_restart] = broker_to_restart _LOG.info('Rolling restart assignment\n {}'.format(restart_assignment)) action = {'name': 'rolling_restart', 'restart_assignment': restart_assignment, 'image': image, 'instance_type': instance_type, 'scalyr_key': scalyr_key, 'scalyr_region': scalyr_region, 'kms_key_id': kms_key_id, 'cool_down': cool_down} zk.register_action(action, broker_id=broker_id)
def _load_disk_stats(zk: BukuExhibitor, api_port: int): size_stats = zk.get_disk_stats() if len(size_stats) < 2: _LOG.info("No size stats available, imbalance check cancelled") return None result = {} for broker_id, value in size_stats.items(): try: if api_port != -1: # For unit tests only host = zk.get_broker_address(broker_id) tmp = requests.get('http://{}:{}/api/disk_stats'.format( host, api_port), timeout=5).json() if any(a not in tmp for a in ['free_kb', 'used_kb']): continue value['disk'] = tmp value['host'] = host result[broker_id] = value except Exception as e: _LOG.error( 'Failed to load disk stats for broker {}. Skipping it'.format( broker_id), exc_info=e) return result
def register_rebalance(zk: BukuExhibitor, broker_id: str, empty_brokers: list, exclude_topics: list): action = {'name': 'rebalance', 'empty_brokers': empty_brokers, 'exclude_topics': exclude_topics} with zk.lock(): if broker_id: zk.register_action(action, broker_id=broker_id) else: zk.register_action(action)
def register_fatboy_slim(zk: BukuExhibitor, threshold_kb: int): if zk.is_rebalancing(): _LOG.warn( 'Rebalance is already in progress, may be it will take time for this command to start processing' ) with zk.lock(): zk.register_action({ 'name': 'fatboyslim', 'threshold_kb': threshold_kb })
def register_rebalance(zk: BukuExhibitor, broker_id: str, empty_brokers: list, exclude_topics: list, parallelism: int, bin_packing: bool): if parallelism <= 0: raise Exception('Parallelism for rebalance should be greater than 0') action = {'name': 'rebalance', 'empty_brokers': empty_brokers, 'exclude_topics': exclude_topics, 'parallelism': int(parallelism), 'bin_packing': bool(bin_packing)} with zk.lock(): if broker_id: zk.register_action(action, broker_id=broker_id) else: zk.register_action(action)
def test_get_broker_ids(): exhibitor_mock = MagicMock() def _get_children(path): if path == '/brokers/ids': return ['3', '1', '2'] else: raise NotImplementedError() exhibitor_mock.get_children = _get_children buku = BukuExhibitor(exhibitor_mock) assert ['1', '2', '3'] == buku.get_broker_ids() # ensure that return list is sorted
def register_rebalance(zk: BukuExhibitor, broker_id: str, empty_brokers: list, exclude_topics: list, parallelism: int, bin_packing: bool, throttle: int): if parallelism <= 0: raise Exception('Parallelism for rebalance should be greater than 0') action = {'name': 'rebalance', 'empty_brokers': empty_brokers, 'exclude_topics': exclude_topics, 'parallelism': int(parallelism), 'bin_packing': bool(bin_packing), 'throttle': int(throttle)} with zk.lock(): if broker_id: zk.register_action(action, broker_id=broker_id) else: zk.register_action(action)
def __check_all_broker_ids_exist(broker_ids: list, zk: BukuExhibitor): registered_brokers = zk.get_broker_ids() unknown_brokers = [broker_id for broker_id in broker_ids if broker_id not in registered_brokers] if len(unknown_brokers) == 1: raise Exception('1 broker id is not valid: {}'.format(unknown_brokers[0])) if len(unknown_brokers) > 1: raise Exception('{} broker ids are not valid: {}'.format(len(unknown_brokers), ",".join(unknown_brokers)))
def test_reallocate_partition(): call_idx = [0] def _create(path, value=None, **kwargs): if path in ('/bubuku/changes', '/bubuku/actions/global'): pass elif path == '/admin/reassign_partitions': if call_idx[0] >= 5: raise NodeExistsError() call_idx[0] += 1 j = json.loads(value.decode('utf-8')) assert j['version'] == '1' assert len(j['partitions']) == 1 p = j['partitions'][0] assert p['topic'] == 't01' assert p['partition'] == 0 assert p['replicas'] == [1, 2, 3] else: raise NotImplementedError( 'Not implemented for path {}'.format(path)) exhibitor_mock = MagicMock() exhibitor_mock.create = _create buku = BukuExhibitor(exhibitor_mock) assert buku.reallocate_partition('t01', 0, ['1', '2', '3']) assert buku.reallocate_partition('t01', 0, ['1', '2', 3]) assert buku.reallocate_partition('t01', 0, [1, 2, 3]) assert buku.reallocate_partition('t01', 0, [1, 2, 3]) assert buku.reallocate_partition('t01', 0, [1, 2, 3]) # Node exists assert not buku.reallocate_partition('t01', 0, [1, 2, 3])
def get_opt_broker_id(broker_id: str, config: Config, zk: BukuExhibitor, env_provider: EnvProvider) -> str: if not broker_id: kafka_properties = KafkaProperties(config.kafka_settings_template, '/tmp/tmp.props'.format(config.kafka_dir)) broker_id_manager = env_provider.create_broker_id_manager(zk, kafka_properties) broker_id = broker_id_manager.get_broker_id() _LOG.info('Will use broker_id {}'.format(broker_id)) running_brokers = zk.get_broker_ids() if broker_id not in running_brokers: raise Exception('Broker id {} is not registered ({})'.format(broker_id, running_brokers)) return broker_id
def __get_opt_broker_id(broker_id: str, config: Config, zk: BukuExhibitor, env_provider: EnvProvider) -> str: if not broker_id: kafka_properties = KafkaProperties(config.kafka_settings_template, '/tmp/tmp.props'.format(config.kafka_dir)) broker_id_manager = env_provider.create_broker_id_manager(zk, kafka_properties) broker_id = broker_id_manager.detect_broker_id() _LOG.info('Will use broker_id {}'.format(broker_id)) running_brokers = zk.get_broker_ids() if broker_id not in running_brokers: raise Exception('Broker id {} is not registered ({}), can not restart'.format(broker_id, running_brokers)) return broker_id
def __init__(self, zk: BukuExhibitor, broker_ids: list, empty_brokers: list, exclude_topics: list, parallelism: int = 1): self.zk = zk self.all_broker_ids = sorted(int(id_) for id_ in broker_ids) self.broker_ids = sorted(int(id_) for id_ in broker_ids if id_ not in empty_brokers) self.broker_racks = zk.get_broker_racks() self.exclude_topics = exclude_topics self.broker_distribution = None self.source_distribution = None self.action_queue = [] self.state = OptimizedRebalanceChange._LOAD_STATE self.parallelism = parallelism
def register_migration(zk: BukuExhibitor, brokers_from: list, brokers_to: list, shrink: bool, broker_id: str, parallelism: int): if len(brokers_from) != len(brokers_to): raise Exception('Brokers list {} and {} must have the same size'.format(brokers_from, brokers_to)) if any(b in brokers_from for b in brokers_to) or any(b in brokers_to for b in brokers_from): raise Exception('Broker lists can not hold same broker ids') if len(set(brokers_from)) != len(brokers_from): raise Exception('Can not use same broker ids for source_list {}'.format(brokers_from)) if len(set(brokers_to)) != len(brokers_to): raise Exception('Can not use same broker ids for source_list {}'.format(brokers_from)) active_ids = zk.get_broker_ids() if any(b not in active_ids for b in brokers_from) or any(b not in active_ids for b in brokers_to): raise Exception('Brokers dead from: {} to: {} alive:{}'.format(brokers_from, brokers_to, active_ids)) if broker_id and str(broker_id) not in active_ids: raise Exception('Broker id to run change on ({}) is not in active list {}'.format( broker_id, active_ids)) if parallelism <= 0: raise Exception('Parallelism for migration should be greater than 0') with zk.lock(): action = {'name': 'migrate', 'from': brokers_from, 'to': brokers_to, 'shrink': bool(shrink), 'parallelism': int(parallelism)} if broker_id: zk.register_action(action, str(broker_id)) else: zk.register_action(action)
def register_migration(zk: BukuExhibitor, brokers_from: list, brokers_to: list, shrink: bool, broker_id: str, throttle: int, parallelism: int): if len(brokers_from) != len(brokers_to): raise Exception('Brokers list {} and {} must have the same size'.format(brokers_from, brokers_to)) if any(b in brokers_from for b in brokers_to) or any(b in brokers_to for b in brokers_from): raise Exception('Broker lists can not hold same broker ids') if len(set(brokers_from)) != len(brokers_from): raise Exception('Can not use same broker ids for source_list {}'.format(brokers_from)) if len(set(brokers_to)) != len(brokers_to): raise Exception('Can not use same broker ids for source_list {}'.format(brokers_from)) active_ids = zk.get_broker_ids() if any(b not in active_ids for b in brokers_from) or any(b not in active_ids for b in brokers_to): raise Exception('Brokers dead from: {} to: {} alive:{}'.format(brokers_from, brokers_to, active_ids)) if broker_id and str(broker_id) not in active_ids: raise Exception('Broker id to run change on ({}) is not in active list {}'.format( broker_id, active_ids)) if parallelism <= 0: raise Exception('Parallelism for migration should be greater than 0') with zk.lock(): action = {'name': 'migrate', 'from': brokers_from, 'to': brokers_to, 'shrink': bool(shrink), 'parallelism': int(parallelism), 'throttle': int(throttle)} if broker_id: zk.register_action(action, str(broker_id)) else: zk.register_action(action)
def test_reallocate_partition(): call_idx = [0] def _create(path, value=None, **kwargs): if path in ('/bubuku/changes', '/bubuku/actions/global'): pass elif path == '/admin/reassign_partitions': if call_idx[0] >= 5: raise NodeExistsError() call_idx[0] += 1 j = json.loads(value.decode('utf-8')) assert j['version'] == '1' assert len(j['partitions']) == 1 p = j['partitions'][0] assert p['topic'] == 't01' assert p['partition'] == 0 assert p['replicas'] == [1, 2, 3] else: raise NotImplementedError('Not implemented for path {}'.format(path)) exhibitor_mock = MagicMock() exhibitor_mock.create = _create buku = BukuExhibitor(exhibitor_mock) assert buku.reallocate_partition('t01', 0, ['1', '2', '3']) assert buku.reallocate_partition('t01', 0, ['1', '2', 3]) assert buku.reallocate_partition('t01', 0, [1, 2, 3]) assert buku.reallocate_partition('t01', 0, [1, 2, 3]) assert buku.reallocate_partition('t01', 0, [1, 2, 3]) # Node exists assert not buku.reallocate_partition('t01', 0, [1, 2, 3])
def _load_disk_stats(zk: BukuExhibitor, api_port: int): size_stats = zk.get_disk_stats() if len(size_stats) < 2: _LOG.info("No size stats available, imbalance check cancelled") return None result = {} for broker_id, value in size_stats.items(): try: if api_port != -1: # For unit tests only host = zk.get_broker_address(broker_id) tmp = requests.get( 'http://{}:{}/api/disk_stats'.format(host, api_port), timeout=5).json() if any(a not in tmp for a in ['free_kb', 'used_kb']): continue value['disk'] = tmp value['host'] = host result[broker_id] = value except Exception as e: _LOG.error('Failed to load disk stats for broker {}. Skipping it'.format(broker_id), exc_info=e) return result
def __init__(self, zk: BukuExhibitor, broker_ids: list, empty_brokers: list, exclude_topics: list, throttle: int = 100000000, parallelism: int = 1): self.zk = zk self.all_broker_ids = sorted(int(id_) for id_ in broker_ids) self.broker_ids = sorted(int(id_) for id_ in broker_ids if id_ not in empty_brokers) self.broker_racks = zk.get_broker_racks() self.exclude_topics = exclude_topics self.broker_distribution = None self.source_distribution = None self.action_queue = [] self.state = OptimizedRebalanceChange._LOAD_STATE self.parallelism = parallelism self.throttle_manager = RebalanceThrottleManager(self.zk, throttle)
def select_fat_slim_brokers(zk: BukuExhibitor, sorted_stats: list): racks = zk.get_broker_racks() if any([rack is None for rack in racks.values()]): return sorted_stats[0], sorted_stats[-1] for i in range(len(sorted_stats) - 1): fat_broker = sorted_stats[i] fat_rack = racks[int(fat_broker[0])] for j in range(len(sorted_stats) -1, i, -1): slim_broker = sorted_stats[j] slim_rack = racks[int(slim_broker[0])] if slim_rack == fat_rack: return fat_broker, slim_broker return None, None
def select_fat_slim_brokers(zk: BukuExhibitor, sorted_stats: list): racks = zk.get_broker_racks() if any([rack is None for rack in racks.values()]): return sorted_stats[0], sorted_stats[-1] for i in range(len(sorted_stats) - 1): fat_broker = sorted_stats[i] fat_rack = racks[int(fat_broker[0])] for j in range(len(sorted_stats) - 1, i, -1): slim_broker = sorted_stats[j] slim_rack = racks[int(slim_broker[0])] if slim_rack == fat_rack: return fat_broker, slim_broker return None, None
def test_is_broker_registered(): def _get(path): if path == '/brokers/ids/123': return '123', object() elif path == '/brokers/ids/321': return None, None else: raise NoNodeError() exhibitor_mock = MagicMock() exhibitor_mock.get = _get buku = BukuExhibitor(exhibitor_mock) assert buku.is_broker_registered('123') assert buku.is_broker_registered(123) assert not buku.is_broker_registered('321') assert not buku.is_broker_registered(321) assert not buku.is_broker_registered(333) assert not buku.is_broker_registered('333')
def register_rebalance(zk: BukuExhibitor, broker_id: str): with zk.lock(): if broker_id: zk.register_action({'name': 'rebalance'}, broker_id=broker_id) else: zk.register_action({'name': 'rebalance'})
def register_restart(zk: BukuExhibitor, broker_id: str): with zk.lock(): zk.register_action({'name': 'restart'}, broker_id=broker_id)
def register_fatboy_slim(zk: BukuExhibitor, threshold_kb: int): if zk.is_rebalancing(): _LOG.warning('Rebalance is already in progress, may be it will take time for this command to start ' 'processing') with zk.lock(): zk.register_action({'name': 'fatboyslim', 'threshold_kb': threshold_kb})
def register_start(zk: BukuExhibitor, broker_id: str): zk.register_action({'name': 'start'}, broker_id=broker_id)
def register_restart(zk: BukuExhibitor, broker_id: str): with zk.lock(): zk.register_action( {'name': 'restart'}, broker_id=broker_id)
raise NotImplementedError() def _get_async(path): def _get_iresult(block): assert block return _get(path) mock = MagicMock() mock.get = _get_iresult return mock exhibitor_mock.get = _get exhibitor_mock.get_async = _get_async exhibitor_mock.get_children = _get_children buku_ex = BukuExhibitor(exhibitor_mock, async) expected_result = [ ('t01', 0, [1, 2, 3]), ('t01', 1, [3, 2, 1]), ('t02', 0, [4, 5, 6]), ('t02', 1, [5, 1, 2]), ] result = [r for r in buku_ex.load_partition_assignment()] assert len(expected_result) == len(result) for e in expected_result: assert e in result def test_load_partition_assignment_sync(): _test_load_partition_assignment(False)