class Heartbeat(Thread): def __init__(self, event, kill_clients_on_disconnect): Thread.__init__(self) self.kill_clients_on_disconnect = kill_clients_on_disconnect self.stopped = event self.success = None self.node_client = NodeClient(None) def run(self): while not self.stopped.wait(HEARTBEAT_FREQUENCY_SEC): client_statuses = self.node_client.get_client_statuses() dead_clients = [c for c in client_statuses if not c['alive'] or not c['busy']] alive_clients = [c for c in client_statuses if c['alive'] and c['busy']] if dead_clients and self.kill_clients_on_disconnect: printable_names = '.'.join([c['address'] for c in dead_clients]) _logger.critical('Heartbeat: One or more clients ({}) are not alive anymore; ' 'exiting others as well.'.format(printable_names)) self.node_client.stop_running_experiments(dead_clients) self.success = False return elif all(c['finished'] for c in alive_clients): _logger.info('Heartbeat: All clients finished their experiments.') self.success = True return
def _terminate(self, stop_clients=True, return_code=-1): try: if self.heartbeat_thread: self._logger.info('Stopping heartbeat...') self.heartbeat_thread.stopped.set() self.heartbeat_thread.join() if stop_clients: self._logger.info('Stopping clients...') node_client = NodeClient(None) node_client.stop_running_experiments() finally: db_logger = DbLogger() if db_logger.is_enabled and self.experiment_id is not None: db_logger.finish_experiment(self.experiment_id) exit(return_code)