def start_daemon() -> None: """Start a daemon runner for the currently configured profile.""" daemon_client = get_daemon_client() configure_logging(daemon=True, daemon_log_file=daemon_client.daemon_log_file) try: manager = get_manager() runner = manager.create_daemon_runner() manager.set_runner(runner) except Exception: LOGGER.exception('daemon runner failed to start') raise signals = (signal.SIGTERM, signal.SIGINT) for s in signals: # pylint: disable=invalid-name runner.loop.add_signal_handler(s, lambda s=s: asyncio.create_task(shutdown_runner(runner))) try: LOGGER.info('Starting a daemon runner') runner.start() except SystemError as exception: LOGGER.info('Received a SystemError: %s', exception) runner.close() LOGGER.info('Daemon runner started')
def start_daemon(): """Start a daemon runner for the currently configured profile.""" daemon_client = get_daemon_client() configure_logging(daemon=True, daemon_log_file=daemon_client.daemon_log_file) try: manager = get_manager() runner = manager.create_daemon_runner() manager.set_runner(runner) except Exception as exception: LOGGER.exception('daemon runner failed to start') raise def shutdown_daemon(_num, _frame): LOGGER.info('Received signal to shut down the daemon runner') runner.close() signal.signal(signal.SIGINT, shutdown_daemon) signal.signal(signal.SIGTERM, shutdown_daemon) LOGGER.info('Starting a daemon runner') try: runner.start() except SystemError as exception: LOGGER.info('Received a SystemError: %s', exception) runner.close() LOGGER.info('Daemon runner stopped')
def restart(ctx, reset, no_wait): """Restart the daemon. By default will only reset the workers of the running daemon. After the restart the same amount of workers will be running. If the `--reset` flag is passed, however, the full daemon will be stopped and restarted with the default number of workers that is started when calling `verdi daemon start` manually. """ from aiida.engine.daemon.client import get_daemon_client client = get_daemon_client() wait = not no_wait if reset: ctx.invoke(stop) # These two lines can be simplified to `ctx.invoke(start)` once issue #950 in `click` is resolved. # Due to that bug, the `callback` of the `number` argument the `start` command is not being called, which is # responsible for settting the default value, which causes `None` to be passed and that triggers an exception. # As a temporary workaround, we fetch the default here manually and pass that in explicitly. number = ctx.obj.config.get_option('daemon.default_workers', ctx.obj.profile.name) ctx.invoke(start, number=number) else: if wait: echo.echo('Restarting the daemon... ', nl=False) else: echo.echo('Restarting the daemon') response = client.restart_daemon(wait) if wait: print_client_response_status(response)
def decr(number): """Remove NUMBER [default=1] workers from the running daemon.""" from aiida.engine.daemon.client import get_daemon_client client = get_daemon_client() response = client.decrease_workers(number) print_client_response_status(response)
def start(foreground, number): """Start the daemon with NUMBER workers. If the NUMBER of desired workers is not specified, the default is used, which is determined by the configuration option `daemon.default_workers`, which if not explicitly changed defaults to 1. """ from aiida.engine.daemon.client import get_daemon_client client = get_daemon_client() echo.echo('Starting the daemon... ', nl=False) if foreground: command = ['verdi', '-p', client.profile.name, 'daemon', _START_CIRCUS_COMMAND, '--foreground', str(number)] else: command = ['verdi', '-p', client.profile.name, 'daemon', _START_CIRCUS_COMMAND, str(number)] try: currenv = get_env_with_venv_bin() subprocess.check_output(command, env=currenv, stderr=subprocess.STDOUT) # pylint: disable=unexpected-keyword-arg except subprocess.CalledProcessError as exception: click.secho('FAILED', fg='red', bold=True) echo.echo_critical(str(exception)) # We add a small timeout to give the pid-file a chance to be created with spinner(): time.sleep(1) response = client.get_status() print_client_response_status(response)
def status(all_profiles): """Print the status of the current daemon or all daemons. Returns exit code 0 if all requested daemons are running, else exit code 3. """ from aiida.engine.daemon.client import get_daemon_client config = get_config() if all_profiles is True: profiles = [profile for profile in config.profiles if not profile.is_test_profile] else: profiles = [config.current_profile] daemons_running = [] for profile in profiles: client = get_daemon_client(profile.name) delete_stale_pid_file(client) click.secho('Profile: ', fg='red', bold=True, nl=False) click.secho('{}'.format(profile.name), bold=True) result = get_daemon_status(client) echo.echo(result) daemons_running.append(client.is_daemon_running) if not all(daemons_running): sys.exit(3)
def print_last_process_state_change(process_type=None): """ Print the last time that a process of the specified type has changed its state. This function will also print a warning if the daemon is not running. :param process_type: optional process type for which to get the latest state change timestamp. Valid process types are either 'calculation' or 'work'. """ from aiida.cmdline.utils.echo import echo_info, echo_warning from aiida.common import timezone from aiida.common.utils import str_timedelta from aiida.engine.daemon.client import get_daemon_client from aiida.engine.utils import get_process_state_change_timestamp client = get_daemon_client() timestamp = get_process_state_change_timestamp(process_type) if timestamp is None: echo_info('last time an entry changed state: never') else: timedelta = timezone.delta(timestamp, timezone.now()) formatted = format_local_time(timestamp, format_str='at %H:%M:%S on %Y-%m-%d') relative = str_timedelta(timedelta, negative_to_zero=True, max_num_fields=1) echo_info('last time an entry changed state: {} ({})'.format( relative, formatted)) if not client.is_daemon_running: echo_warning('the daemon is not running', bold=True)
def restart(ctx, reset, no_wait): """Restart the daemon. By default will only reset the workers of the running daemon. After the restart the same amount of workers will be running. If the `--reset` flag is passed, however, the full circus daemon will be stopped and restarted with just a single worker. """ from aiida.engine.daemon.client import get_daemon_client client = get_daemon_client() wait = not no_wait if reset: ctx.invoke(stop) ctx.invoke(start) else: if wait: echo.echo('Restarting the daemon... ', nl=False) else: echo.echo('Restarting the daemon') response = client.restart_daemon(wait) if wait: print_client_response_status(response)
def wrapper(wrapped, _, args, kwargs): """If daemon pid file is not found / empty, echo message and call decorated function.""" from aiida.engine.daemon.client import get_daemon_client daemon_client = get_daemon_client() if not daemon_client.get_daemon_pid(): echo_function(message) return wrapped(*args, **kwargs)
def database_migrate(force): """Migrate the database to the latest schema version.""" from aiida.manage.manager import get_manager from aiida.engine.daemon.client import get_daemon_client client = get_daemon_client() if client.is_daemon_running: echo.echo_critical( 'Migration aborted, the daemon for the profile is still running.') manager = get_manager() profile = manager.get_profile() backend = manager._load_backend(schema_check=False) # pylint: disable=protected-access if force: try: backend.migrate() except exceptions.ConfigurationError as exception: echo.echo_critical(str(exception)) return echo.echo_warning( 'Migrating your database might take a while and is not reversible.') echo.echo_warning( 'Before continuing, make sure you have completed the following steps:') echo.echo_warning('') echo.echo_warning( ' 1. Make sure you have no active calculations and workflows.') echo.echo_warning( ' 2. If you do, revert the code to the previous version and finish running them first.' ) echo.echo_warning(' 3. Stop the daemon using `verdi daemon stop`') echo.echo_warning(' 4. Make a backup of your database and repository') echo.echo_warning('') echo.echo_warning('', nl=False) expected_answer = 'MIGRATE NOW' confirm_message = 'If you have completed the steps above and want to migrate profile "{}", type {}'.format( profile.name, expected_answer) try: response = click.prompt(confirm_message) while response != expected_answer: response = click.prompt(confirm_message) except click.Abort: echo.echo('\n') echo.echo_critical( 'Migration aborted, the data has not been affected.') else: try: backend.migrate() except exceptions.ConfigurationError as exception: echo.echo_critical(str(exception)) else: echo.echo_success('migration completed')
def test_daemon_worker_timeout(): """Test `get_daemon_status` output if a daemon worker cannot be reached by the circus daemon.""" client = get_daemon_client() literal = """\ Daemon is running as PID 111015 since 2019-12-17 11:42:18 Active workers [1]: PID MEM % CPU % started ----- ------- ------- --------- 4990 - - - Use verdi daemon [incr | decr] [num] to increase / decrease the amount of workers""" compare_string_literals(get_daemon_status(client), literal)
def test_daemon_working(): """Test `get_daemon_status` output if everything is working normally with a single worker.""" client = get_daemon_client() literal = """\ Daemon is running as PID 111015 since 2019-12-17 11:42:18 Active workers [1]: PID MEM % CPU % started ----- ------- ------- ------------------- 4990 0.231 0 2019-12-17 12:27:38 Use verdi daemon [incr | decr] [num] to increase / decrease the amount of workers""" assert get_daemon_status(client) == literal assert client.is_daemon_running
def logshow(): """Show the log of the daemon, press CTRL+C to quit.""" from aiida.engine.daemon.client import get_daemon_client client = get_daemon_client() try: currenv = get_env_with_venv_bin() process = subprocess.Popen(['tail', '-f', client.daemon_log_file], env=currenv) process.wait() except KeyboardInterrupt: process.kill()
def decr(number): """Remove NUMBER [default=1] workers from the running daemon. Returns exit code 0 if the daemon is OK, non-zero if there was an error. """ from aiida.engine.daemon.client import get_daemon_client client = get_daemon_client() response = client.decrease_workers(number) retcode = print_client_response_status(response) if retcode: sys.exit(retcode)
def print_daemon_log(): daemon_client = get_daemon_client() daemon_log = daemon_client.daemon_log_file print("Output of 'cat {}':".format(daemon_log)) try: print( subprocess.check_output( ['cat', '{}'.format(daemon_log)], stderr=subprocess.STDOUT, )) except subprocess.CalledProcessError as e: print('Note: the command failed, message: {}'.format(e))
def print_daemon_log(): """Print daemon log.""" daemon_client = get_daemon_client() daemon_log = daemon_client.daemon_log_file print(f"Output of 'cat {daemon_log}':") try: print( subprocess.check_output( ['cat', f'{daemon_log}'], stderr=subprocess.STDOUT, )) except subprocess.CalledProcessError as exception: print(f'Note: the command failed, message: {exception}')
def stop(no_wait, all_profiles): """Stop the daemon. Returns exit code 0 if the daemon was shut down successfully (or was not running), non-zero if there was an error. """ from aiida.engine.daemon.client import get_daemon_client config = get_config() if all_profiles is True: profiles = [ profile for profile in config.profiles if not profile.is_test_profile ] else: profiles = [config.current_profile] for profile in profiles: client = get_daemon_client(profile.name) click.secho('Profile: ', fg='red', bold=True, nl=False) click.secho(f'{profile.name}', bold=True) if not client.is_daemon_running: echo.echo('Daemon was not running') continue delete_stale_pid_file(client) wait = not no_wait if wait: echo.echo('Waiting for the daemon to shut down... ', nl=False) else: echo.echo('Shutting the daemon down') response = client.stop_daemon(wait) if wait: if response['status'] == client.DAEMON_ERROR_NOT_RUNNING: click.echo('The daemon was not running.') else: retcode = print_client_response_status(response) if retcode: sys.exit(retcode)
def status(all_profiles): """Print the status of the current daemon or all daemons.""" from aiida.engine.daemon.client import get_daemon_client config = get_config() if all_profiles is True: profiles = [ profile for profile in config.profiles if not profile.is_test_profile ] else: profiles = [config.current_profile] for profile in profiles: client = get_daemon_client(profile.name) delete_stale_pid_file(client) click.secho('Profile: ', fg='red', bold=True, nl=False) click.secho('{}'.format(profile.name), bold=True) result = get_daemon_status(client) echo.echo(result)
def test_ipc_socket_file_length_limit(self): """ The maximum length of socket filepaths is often limited by the operating system. For MacOS it is limited to 103 bytes, versus 107 bytes on Unix. This limit is exposed by the Zmq library which is used by Circus library that is used to daemonize the daemon runners. This test verifies that the three endpoints used for the Circus client have a filepath that does not exceed that path limit. See issue #1317 and pull request #1403 for the discussion """ # pylint: disable=no-member daemon_client = get_daemon_client() controller_endpoint = daemon_client.get_controller_endpoint() pubsub_endpoint = daemon_client.get_pubsub_endpoint() stats_endpoint = daemon_client.get_stats_endpoint() self.assertTrue(len(controller_endpoint) <= zmq.IPC_PATH_MAX_LEN) self.assertTrue(len(pubsub_endpoint) <= zmq.IPC_PATH_MAX_LEN) self.assertTrue(len(stats_endpoint) <= zmq.IPC_PATH_MAX_LEN)
def test_circus_timeout(): """Test `get_daemon_status` output when the circus daemon process cannot be reached.""" client = get_daemon_client() assert 'Call to the circus controller timed out' in get_daemon_status( client)
def test_daemon_not_running(): """Test `get_daemon_status` output when the daemon is not running.""" client = get_daemon_client() assert 'The daemon is not running' in get_daemon_status(client) assert not client.is_daemon_running
def start_circus(foreground, number): """This will actually launch the circus daemon, either daemonized in the background or in the foreground. If run in the foreground all logs are redirected to stdout. .. note:: this should not be called directly from the commandline! """ from circus import get_arbiter from circus import logger as circus_logger from circus.circusd import daemonize from circus.pidfile import Pidfile from circus.util import check_future_exception_and_log, configure_logger from aiida.engine.daemon.client import get_daemon_client if foreground and number > 1: raise click.ClickException('can only run a single worker when running in the foreground') client = get_daemon_client() loglevel = client.loglevel logoutput = '-' if not foreground: logoutput = client.circus_log_file arbiter_config = { 'controller': client.get_controller_endpoint(), 'pubsub_endpoint': client.get_pubsub_endpoint(), 'stats_endpoint': client.get_stats_endpoint(), 'logoutput': logoutput, 'loglevel': loglevel, 'debug': False, 'statsd': True, 'pidfile': client.circus_pid_file, 'watchers': [{ 'cmd': client.cmd_string, 'name': client.daemon_name, 'numprocesses': number, 'virtualenv': client.virtualenv, 'copy_env': True, 'stdout_stream': { 'class': 'FileStream', 'filename': client.daemon_log_file, }, 'stderr_stream': { 'class': 'FileStream', 'filename': client.daemon_log_file, }, 'env': get_env_with_venv_bin(), }] } # yapf: disable if not foreground: daemonize() arbiter = get_arbiter(**arbiter_config) pidfile = Pidfile(arbiter.pidfile) try: pidfile.create(os.getpid()) except RuntimeError as exception: echo.echo_critical(str(exception)) # Configure the logger loggerconfig = None loggerconfig = loggerconfig or arbiter.loggerconfig or None configure_logger(circus_logger, loglevel, logoutput, loggerconfig) # Main loop should_restart = True while should_restart: try: future = arbiter.start() should_restart = False if check_future_exception_and_log(future) is None: should_restart = arbiter._restarting # pylint: disable=protected-access except Exception as exception: # Emergency stop arbiter.loop.run_sync(arbiter._emergency_stop) # pylint: disable=protected-access raise exception except KeyboardInterrupt: pass finally: arbiter = None if pidfile is not None: pidfile.unlink()