def test_restart_atomicity(self): """ Because of action locks structure there is need to restart instances atomically. That means, that during restart action parts (stop, wait for leader election, start) must be under same lock. That guarantees, that cluster instances won't be destroyed at the same time during zk update. """ broker = MagicMock() zk = MagicMock() change = RestartBrokerChange(zk, broker, None) zk.get_conn_str = lambda: 'xxx' broker.is_running_and_registered = lambda: True stopped = [] broker.stop_kafka_process = lambda: stopped.append(True) assert change.run([]) assert stopped and stopped[0] broker.start_kafka_process = Mock(side_effect=LeaderElectionInProgress()) for i in range(1, 50): assert change.run([]) started = [] broker.start_kafka_process = lambda x: started.append(x) assert not change.run([]) assert started and 'xxx' == started[0]
def check(self) -> Change: with self.zk.lock(): data = self.zk.take_action( self.broker_manager.id_manager.detect_broker_id()) if not data: return None if 'name' not in data: _LOG.error( 'Action name can not be restored from {}, skipping'.format( data)) return None try: if data['name'] == 'restart': return RestartBrokerChange(self.zk, self.broker_manager, lambda: False) elif data['name'] == 'rebalance': return OptimizedRebalanceChange(self.zk, self.zk.get_broker_ids()) elif data['name'] == 'migrate': return MigrationChange(self.zk, data['from'], data['to'], data['shrink']) elif data['name'] == 'fatboyslim': return SwapPartitionsChange( self.zk, lambda x: load_swap_data( x, self.api_port, int(data['threshold_kb']))) else: _LOG.error('Action {} not supported'.format(data)) except Exception as e: _LOG.error('Failed to create action from {}'.format(data), exc_info=e) return None
def check(self) -> Change: if not self.need_check: return None if self.broker.is_running_and_registered(): return None _LOG.info('Oops! Broker is dead, triggering restart') self.need_check = False # Do not start if broker is running and registered def _cancel_if(): return self.broker.is_running_and_registered() return RestartBrokerChange(self.zk, self.broker, _cancel_if, self.on_check_removed)
def run_daemon_loop(config: Config, process_holder: KafkaProcess, cmd_helper: CmdHelper, restart_on_init: bool): _LOG.info("Using configuration: {}".format(config)) kafka_props = KafkaProperties( config.kafka_settings_template, '{}/config/server.properties'.format(config.kafka_dir)) env_provider = EnvProvider.create_env_provider(config) address_provider = env_provider.get_address_provider() rack = env_provider.get_rack() if rack: kafka_props.set_property('broker.rack', rack) startup_timeout = StartupTimeout.build(config.timeout) _LOG.info("Loading exhibitor configuration") with load_exhibitor_proxy(address_provider, config.zk_prefix) as zookeeper: _LOG.info("Loading broker_id policy") broker_id_manager = env_provider.create_broker_id_manager( zookeeper, kafka_props) _LOG.info("Building broker manager") broker = BrokerManager(process_holder, zookeeper, broker_id_manager, kafka_props, startup_timeout) _LOG.info("Creating controller") controller = Controller(broker, zookeeper, env_provider) controller.add_check(CheckBrokerStopped(broker, zookeeper)) controller.add_check( RemoteCommandExecutorCheck(zookeeper, broker, config.health_port)) controller.add_check( GenerateDataSizeStatistics( zookeeper, broker, cmd_helper, kafka_props.get_property("log.dirs").split(","))) apply_features(config.health_port, config.features, controller, zookeeper, broker, kafka_props, env_provider) _LOG.info('Starting main controller loop') controller.loop( RestartBrokerChange(zookeeper, broker, lambda: False ) if restart_on_init else None)