def _run_lipizzaner(config): LogHelper.log_only_flask_warnings() cc = ConfigurationContainer.instance() cc.settings = config output_base_dir = cc.output_dir ClientAPI._set_output_dir(cc) if 'logging' in cc.settings['general'] and cc.settings['general']['logging']['enabled']: LogHelper.setup(cc.settings['general']['logging']['log_level'], cc.output_dir) ClientAPI._logger.info('Distributed training recognized, set log directory to {}'.format(cc.output_dir)) try: lipizzaner = Lipizzaner() lipizzaner.run(cc.settings['trainer']['n_iterations'], ClientAPI._stop_event) ClientAPI.is_finished = True # Wait until master finishes experiment, i.e. collects results, or experiment is terminated or_event(ClientAPI._finish_event, ClientAPI._stop_event).wait() except Exception as ex: ClientAPI.is_finished = True ClientAPI._logger.critical('An unhandled error occured while running Lipizzaner: {}'.format(ex)) # Flask 1.0.2 does not print the stack trace of exceptions anymore ClientAPI._logger.critical('An unhandled error occured while running Lipizzaner: {}'.format(traceback.print_stack())) traceback.print_exc() raise ex finally: ClientAPI.is_busy = False ClientAPI._logger.info('Finished experiment, waiting for new requests.') cc.output_dir = output_base_dir ConcurrentPopulations.instance().lock()
def _run_lipizzaner(config): cc = ConfigurationContainer.instance() cc.settings = config grid = Grid.instance() grid.load_grid() output_base_dir = cc.output_dir LipizzanerMpiClient._set_output_dir(cc) if 'logging' in cc.settings['general'] and cc.settings['general'][ 'logging']['enabled']: LogHelper.setup(cc.settings['general']['logging']['log_level'], cc.output_dir) message = 'Distributed training recognized, set log directory to {}'.format( cc.output_dir) LipizzanerMpiClient._logger.info(message) try: lipizzaner = Lipizzaner() lipizzaner.run(cc.settings['trainer']['n_iterations'], LipizzanerMpiClient._stop_event) LipizzanerMpiClient.is_finished = True # Wait until master finishes experiment, i.e. collects results, or experiment is terminated or_event(LipizzanerMpiClient._finish_event, LipizzanerMpiClient._stop_event).wait() except Exception as ex: LipizzanerMpiClient.is_finished = True LipizzanerMpiClient._logger.critical( 'An unhandled error occured while running Lipizzaner: {}'. format(ex)) traceback.print_exc() raise ex finally: LipizzanerMpiClient.is_busy = False LipizzanerMpiClient._logger.info( 'Finished experiment, waiting for new requests.') cc.output_dir = output_base_dir ConcurrentPopulations.instance().lock()