def test_server_multiproc_threading(set_timeout, restore_signal): started = 0 terminated = 0 proc_idxs = [0, 0, 0] value_lock = threading.Lock() @aiotools.actxmgr async def myserver(loop, proc_idx, args): nonlocal started, terminated, proc_idxs await asyncio.sleep(0) with value_lock: started += 1 proc_idxs[proc_idx] = proc_idx yield await asyncio.sleep(0) with value_lock: terminated += 1 def interrupt(): os.kill(0, signal.SIGINT) set_timeout(0.2, interrupt) aiotools.start_server(myserver, num_workers=3, use_threading=True) assert started == 3 assert terminated == 3 assert list(proc_idxs) == [0, 1, 2]
def test_server_user_main_threading(set_timeout, restore_signal): main_enter = False main_exit = False @contextlib.contextmanager def mymain(): nonlocal main_enter, main_exit main_enter = True yield 987 main_exit = True @aiotools.actxmgr async def myworker(loop, proc_idx, args): assert args[0] == 987 # first arg from user main assert args[1] == 123 # second arg from start_server args yield def interrupt(): os.kill(0, signal.SIGINT) set_timeout(0.2, interrupt) aiotools.start_server(myworker, mymain, num_workers=3, use_threading=True, args=(123, )) assert main_enter assert main_exit
def test_server_multiproc(set_timeout, restore_signal): started = mp.Value('i', 0) terminated = mp.Value('i', 0) proc_idxs = mp.Array('i', 3) @aiotools.actxmgr async def myserver(loop, proc_idx, args): started, terminated, proc_idxs = args await asyncio.sleep(0) with started.get_lock(): started.value += 1 proc_idxs[proc_idx] = proc_idx yield await asyncio.sleep(0) with terminated.get_lock(): terminated.value += 1 def interrupt(): os.kill(0, signal.SIGINT) set_timeout(0.2, interrupt) aiotools.start_server(myserver, num_workers=3, args=(started, terminated, proc_idxs)) assert started.value == 3 assert terminated.value == 3 assert list(proc_idxs) == [0, 1, 2] assert len(mp.active_children()) == 0
def main(): config = load_config(extra_args_func=gw_args) init_logger(config) log.info(f'Backend.AI Gateway {__version__}') log.info(f'runtime: {env_info()}') log_config = logging.getLogger('ai.backend.gateway.config') log_config.debug('debug mode enabled.') if config.debug: aiohttp.log.server_logger.setLevel('DEBUG') aiohttp.log.access_logger.setLevel('DEBUG') else: aiohttp.log.server_logger.setLevel('WARNING') aiohttp.log.access_logger.setLevel('WARNING') asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) num_workers = os.cpu_count() manager = SyncManager() manager.start(lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) shared_states = manager.Namespace() shared_states.lock = manager.Lock() shared_states.barrier = manager.Barrier(num_workers) shared_states.agent_last_seen = manager.dict() try: aiotools.start_server(server_main, num_workers=num_workers, extra_procs=[event_router], args=(config, shared_states)) finally: manager.shutdown() log.info('terminated.')
def test_server_extra_proc_custom_stop_signal(set_timeout, restore_signal): received_signals = mp.Array('i', [0, 0]) def extra_proc(key, _, pidx, args): received_signals = args[0] try: while True: time.sleep(0.1) except aiotools.InterruptedBySignal as e: received_signals[key] = e.args[0] @aiotools.server async def myworker(loop, pidx, args): yield def interrupt(): os.kill(os.getpid(), signal.SIGUSR1) set_timeout(0.3, interrupt) aiotools.start_server(myworker, extra_procs=[ functools.partial(extra_proc, 0), functools.partial(extra_proc, 1) ], stop_signals={signal.SIGUSR1}, args=(received_signals, ), num_workers=3) assert received_signals[0] == signal.SIGUSR1 assert received_signals[1] == signal.SIGUSR1
def test_server_user_main_custom_stop_signals(set_timeout, restore_signal): main_enter = False main_exit = False main_signal = None worker_signals = mp.Array('i', 3) @aiotools.main def mymain(): nonlocal main_enter, main_exit, main_signal main_enter = True main_signal = yield main_exit = True @aiotools.server async def myworker(loop, proc_idx, args): worker_signals = args[0] worker_signals[proc_idx] = yield def interrupt(): os.kill(os.getpid(), signal.SIGUSR1) def noop(signum, frame): pass set_timeout(0.2, interrupt) aiotools.start_server(myworker, mymain, num_workers=3, stop_signals={signal.SIGUSR1}, args=(worker_signals, )) assert main_enter assert main_exit assert main_signal == signal.SIGUSR1 assert list(worker_signals) == [signal.SIGUSR1] * 3
def test_server_user_main_tuple(set_timeout, restore_signal): main_enter = False main_exit = False @aiotools.main def mymain(): nonlocal main_enter, main_exit main_enter = True yield 987, 654 main_exit = True @aiotools.server async def myworker(loop, proc_idx, args): assert args[0] == 987 # first arg from user main assert args[1] == 654 # second arg from user main assert args[2] == 123 # third arg from start_server args yield def interrupt(): os.kill(os.getpid(), signal.SIGINT) set_timeout(0.2, interrupt) aiotools.start_server(myworker, mymain, num_workers=3, args=(123, )) assert main_enter assert main_exit
def test_server_user_main(mocker, set_timeout, restore_signal, start_method): mpctx = mp.get_context(start_method) mocker.patch('aiotools.server.mp', mpctx) main_enter = False main_exit = False # FIXME: This should work with start_method = "spawn", but to test with it # we need to allow passing arguments to user-provided main functions. @aiotools.main def mymain_user_main(): nonlocal main_enter, main_exit main_enter = True yield 987 main_exit = True @aiotools.server # type: ignore async def myworker_user_main(loop, proc_idx, args): assert args[0] == 987 # first arg from user main assert args[1] == 123 # second arg from start_server args yield set_timeout(0.2, interrupt) aiotools.start_server(myworker_user_main, mymain_user_main, num_workers=3, args=(123, )) assert main_enter assert main_exit
def main(ctx, config_path, debug): cfg = load_config(config_path, debug) if ctx.invoked_subcommand is None: cfg['manager']['pid-file'].write_text(str(os.getpid())) try: logger = Logger(cfg['logging']) with logger: ns = cfg['etcd']['namespace'] setproctitle(f"backend.ai: manager {ns}") log.info('Backend.AI Gateway {0}', __version__) log.info('runtime: {0}', env_info()) log_config = logging.getLogger('ai.backend.gateway.config') log_config.debug('debug mode enabled.') if cfg['manager']['event-loop'] == 'uvloop': uvloop.install() log.info('Using uvloop as the event loop backend') try: aiotools.start_server( server_main, num_workers=cfg['manager']['num-proc'], extra_procs=[event_router], args=(cfg, )) finally: log.info('terminated.') finally: if cfg['manager']['pid-file'].is_file(): # check is_file() to prevent deleting /dev/null! cfg['manager']['pid-file'].unlink() else: # Click is going to invoke a subcommand. pass
def main(): config = load_config(extra_args_funcs=(gw_args, Logger.update_log_args)) logger = Logger(config) logger.add_pkg('aiotools') logger.add_pkg('aiopg') logger.add_pkg('ai.backend') with logger: log.info(f'Backend.AI Gateway {__version__}') log.info(f'runtime: {env_info()}') log_config = logging.getLogger('ai.backend.gateway.config') log_config.debug('debug mode enabled.') if config.debug: aiohttp.log.server_logger.setLevel('DEBUG') aiohttp.log.access_logger.setLevel('DEBUG') else: aiohttp.log.server_logger.setLevel('WARNING') aiohttp.log.access_logger.setLevel('WARNING') asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) try: aiotools.start_server(server_main, num_workers=config.num_proc, extra_procs=[event_router], args=(config, )) finally: log.info('terminated.')
def test_server_singleproc_threading(restore_signal): started = 0 terminated = 0 value_lock = threading.Lock() def interrupt(): os.kill(0, signal.SIGINT) @aiotools.actxmgr async def myserver(loop, proc_idx, args): nonlocal started, terminated assert proc_idx == 0 assert len(args) == 0 await asyncio.sleep(0) with value_lock: started += 1 loop.call_later(0.2, interrupt) yield await asyncio.sleep(0) with value_lock: terminated += 1 aiotools.start_server(myserver, use_threading=True) assert started == 1 assert terminated == 1
def test_server_singleproc(restore_signal): started = mp.Value('i', 0) terminated = mp.Value('i', 0) def interrupt(): os.kill(0, signal.SIGINT) @aiotools.actxmgr async def myserver(loop, proc_idx, args): nonlocal started, terminated assert proc_idx == 0 assert len(args) == 0 await asyncio.sleep(0) with started.get_lock(): started.value += 1 loop.call_later(0.2, interrupt) yield await asyncio.sleep(0) with terminated.get_lock(): terminated.value += 1 aiotools.start_server(myserver) assert started.value == 1 assert terminated.value == 1
def test_server_singleproc(mocker, set_timeout, restore_signal, start_method): mpctx = mp.get_context(start_method) mocker.patch('aiotools.server.mp', mpctx) started = mpctx.Value('i', 0) terminated = mpctx.Value('i', 0) set_timeout(0.2, interrupt) aiotools.start_server(myserver_singleproc, args=(started, terminated)) assert started.value == 1 assert terminated.value == 1
def test_server_multiproc(mocker, set_timeout, restore_signal, start_method): mpctx = mp.get_context(start_method) mocker.patch('aiotools.server.mp', mpctx) started = mpctx.Value('i', 0) terminated = mpctx.Value('i', 0) proc_idxs = mpctx.Array('i', 3) set_timeout(0.2, interrupt) aiotools.start_server(myserver_multiproc, num_workers=3, args=(started, terminated, proc_idxs)) assert started.value == 3 assert terminated.value == 3 assert list(proc_idxs) == [0, 1, 2] assert len(mp.active_children()) == 0
def test_server_extra_proc_threading(set_timeout, restore_signal): # When using extra_procs with threading, you need to provide a way to # explicitly interrupt your synchronous loop. # Here, we use a threading.Event object to signal interruption. extras = [0, 0] value_lock = threading.Lock() def extra_proc(key, intr_event, pidx, args): assert isinstance(intr_event, threading.Event) with value_lock: extras[key] = 980 + key try: while not intr_event.is_set(): time.sleep(0.1) except Exception as e: print(f'extra[{key}] exception', e) finally: with value_lock: extras[key] = 990 + key @aiotools.actxmgr async def myworker(loop, pidx, args): yield def interrupt(): os.kill(0, signal.SIGINT) set_timeout(0.2, interrupt) aiotools.start_server(myworker, extra_procs=[ functools.partial(extra_proc, 0), functools.partial(extra_proc, 1) ], use_threading=True, num_workers=3, args=(123, )) assert extras[0] == 990 assert extras[1] == 991
def main(ctx: click.Context, config_path: Path, debug: bool) -> None: cfg = load_config(config_path, debug) if ctx.invoked_subcommand is None: cfg['manager']['pid-file'].write_text(str(os.getpid())) log_sockpath = Path( f'/tmp/backend.ai/ipc/manager-logger-{os.getpid()}.sock') log_sockpath.parent.mkdir(parents=True, exist_ok=True) log_endpoint = f'ipc://{log_sockpath}' try: logger = Logger(cfg['logging'], is_master=True, log_endpoint=log_endpoint) with logger: ns = cfg['etcd']['namespace'] setproctitle(f"backend.ai: manager {ns}") log.info('Backend.AI Gateway {0}', __version__) log.info('runtime: {0}', env_info()) log_config = logging.getLogger('ai.backend.gateway.config') log_config.debug('debug mode enabled.') if cfg['manager']['event-loop'] == 'uvloop': import uvloop uvloop.install() log.info('Using uvloop as the event loop backend') try: aiotools.start_server( server_main_logwrapper, num_workers=cfg['manager']['num-proc'], args=(cfg, log_endpoint), ) finally: log.info('terminated.') finally: if cfg['manager']['pid-file'].is_file(): # check is_file() to prevent deleting /dev/null! cfg['manager']['pid-file'].unlink() else: # Click is going to invoke a subcommand. pass
def test_server_multiproc_custom_stop_signals(mocker, set_timeout, restore_signal, start_method): mpctx = mp.get_context(start_method) mocker.patch('aiotools.server.mp', mpctx) started = mpctx.Value('i', 0) terminated = mpctx.Value('i', 0) received_signals = mpctx.Array('i', 2) proc_idxs = mpctx.Array('i', 2) set_timeout(0.2, interrupt_usr1) aiotools.start_server(myserver_multiproc_custom_stop_signals, num_workers=2, stop_signals={signal.SIGUSR1}, args=(started, terminated, received_signals, proc_idxs)) assert started.value == 2 assert terminated.value == 2 assert list(received_signals) == [signal.SIGUSR1, signal.SIGUSR1] assert list(proc_idxs) == [0, 1] assert len(mpctx.active_children()) == 0
def test_server_worker_init_error(mocker, restore_signal, use_threading, start_method): mpctx = mp.get_context(start_method) mocker.patch('aiotools.server.mp', mpctx) started = mpctx.Value('i', 0) terminated = mpctx.Value('i', 0) log_queue = mpctx.Queue() aiotools.start_server(myserver_worker_init_error, num_workers=3, use_threading=use_threading, args=(started, terminated, log_queue)) # it should automatically shut down! # reset logging logging.shutdown() assert started.value == 3 # workers who did not raise errors have already started, # and they should have terminated normally # when the errorneous worker interrupted the main loop. assert terminated.value == 2 assert len(mp.active_children()) == 0 assert not log_queue.empty() has_error_log = False while not log_queue.empty(): rec = log_queue.get() if rec.levelname == 'ERROR': has_error_log = True assert 'initialization' in rec.message # exception info is logged to the console, # but we cannot access it here because exceptions # are not picklable. assert rec.exc_info is None assert has_error_log
def test_server_worker_init_error_multi(mocker, restore_signal, use_threading, start_method): mpctx = mp.get_context(start_method) mocker.patch('aiotools.server.mp', mpctx) started = mpctx.Value('i', 0) terminated = mpctx.Value('i', 0) log_queue = mpctx.Queue() aiotools.start_server(myserver_worker_init_error_multi, num_workers=3, use_threading=use_threading, args=(started, terminated, log_queue)) # it should automatically shut down! # reset logging logging.shutdown() assert started.value >= 1 # non-errored workers should have been terminated normally. assert terminated.value >= 1 # there is one worker remaining -- which is "cancelled"! # just ensure that all workers have terminated now. assert len(mpctx.active_children()) == 0 assert not log_queue.empty() has_error_log = False while not log_queue.empty(): rec = log_queue.get() if rec.levelname == 'ERROR': has_error_log = True assert 'initialization' in rec.message # exception info is logged to the console, # but we cannot access it here because exceptions # are not picklable. assert rec.exc_info is None assert has_error_log
def test_server_extra_proc(set_timeout, restore_signal): extras = mp.Array('i', [0, 0]) def extra_proc(key, _, pidx, args): assert _ is None extras[key] = 980 + key try: while True: time.sleep(0.1) except KeyboardInterrupt: print(f'extra[{key}] interrupted', file=sys.stderr) except Exception as e: print(f'extra[{key}] exception', e, file=sys.stderr) finally: print(f'extra[{key}] finish', file=sys.stderr) extras[key] = 990 + key @aiotools.actxmgr async def myworker(loop, pidx, args): yield def interrupt(): os.kill(0, signal.SIGINT) set_timeout(0.2, interrupt) aiotools.start_server(myworker, extra_procs=[ functools.partial(extra_proc, 0), functools.partial(extra_proc, 1) ], num_workers=3, args=(123, )) assert extras[0] == 990 assert extras[1] == 991
def main(cli_ctx, config_path, debug): volume_config_iv = t.Dict({ t.Key('etcd'): t.Dict({ t.Key('namespace'): t.String, t.Key('addr'): tx.HostPortPair(allow_blank_host=False) }).allow_extra('*'), t.Key('logging'): t.Any, # checked in ai.backend.common.logging t.Key('agent'): t.Dict({ t.Key('mode'): t.Enum('scratch', 'vfolder'), t.Key('rpc-listen-addr'): tx.HostPortPair(allow_blank_host=True), t.Key('user-uid'): t.Int, t.Key('user-gid'): t.Int }), t.Key('storage'): t.Dict({ t.Key('mode'): t.Enum('xfs', 'btrfs'), t.Key('path'): t.String }) }).allow_extra('*') # Determine where to read configuration. raw_cfg, cfg_src_path = config.read_from_file(config_path, 'agent') config.override_with_env(raw_cfg, ('etcd', 'namespace'), 'BACKEND_NAMESPACE') config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR') config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER') config.override_with_env(raw_cfg, ('etcd', 'password'), 'BACKEND_ETCD_PASSWORD') config.override_with_env(raw_cfg, ('agent', 'rpc-listen-addr', 'host'), 'BACKEND_AGENT_HOST_OVERRIDE') config.override_with_env(raw_cfg, ('agent', 'rpc-listen-addr', 'port'), 'BACKEND_AGENT_PORT') if debug: config.override_key(raw_cfg, ('debug', 'enabled'), True) config.override_key(raw_cfg, ('logging', 'level'), 'DEBUG') config.override_key(raw_cfg, ('logging', 'pkg-ns', 'ai.backend'), 'DEBUG') try: cfg = config.check(raw_cfg, volume_config_iv) cfg['_src'] = cfg_src_path except config.ConfigurationError as e: print( 'ConfigurationError: Validation of agent configuration has failed:', file=sys.stderr) print(pformat(e.invalid_data), file=sys.stderr) raise click.Abort() rpc_host = cfg['agent']['rpc-listen-addr'].host if (isinstance(rpc_host, BaseIPAddress) and (rpc_host.is_unspecified or rpc_host.is_link_local)): print( 'ConfigurationError: ' 'Cannot use link-local or unspecified IP address as the RPC listening host.', file=sys.stderr) raise click.Abort() if os.getuid() != 0: print('Storage agent can only be run as root', file=sys.stderr) raise click.Abort() if cli_ctx.invoked_subcommand is None: setproctitle('Backend.AI: Storage Agent') logger = Logger(cfg['logging']) with logger: log.info('Backend.AI Storage Agent', VERSION) log_config = logging.getLogger('ai.backend.agent.config') if debug: log_config.debug('debug mode enabled.') if 'debug' in cfg and cfg['debug']['enabled']: print('== Agent configuration ==') pprint(cfg) aiotools.start_server(server_main, num_workers=1, use_threading=True, args=(cfg, )) log.info('exit.') return 0
def main(cli_ctx, config_path, debug): # Determine where to read configuration. raw_cfg, cfg_src_path = config.read_from_file(config_path, 'storage-proxy') config.override_with_env(raw_cfg, ('etcd', 'namespace'), 'BACKEND_NAMESPACE') config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR') config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER') config.override_with_env(raw_cfg, ('etcd', 'password'), 'BACKEND_ETCD_PASSWORD') if debug: config.override_key(raw_cfg, ('debug', 'enabled'), True) try: local_config = config.check(raw_cfg, local_config_iv) local_config['_src'] = cfg_src_path except config.ConfigurationError as e: print( 'ConfigurationError: Validation of agent configuration has failed:', file=sys.stderr) print(pformat(e.invalid_data), file=sys.stderr) raise click.Abort() if local_config['debug']['enabled']: config.override_key(local_config, ('logging', 'level'), 'DEBUG') config.override_key(local_config, ('logging', 'pkg-ns', 'ai.backend'), 'DEBUG') # if os.getuid() != 0: # print('Storage agent can only be run as root', file=sys.stderr) # raise click.Abort() multiprocessing.set_start_method('spawn') if cli_ctx.invoked_subcommand is None: local_config['storage-proxy']['pid-file'].write_text(str(os.getpid())) log_sockpath = Path( f'/tmp/backend.ai/ipc/storage-proxy-logger-{os.getpid()}.sock') log_sockpath.parent.mkdir(parents=True, exist_ok=True) log_endpoint = f'ipc://{log_sockpath}' local_config['logging']['endpoint'] = log_endpoint try: logger = Logger(local_config['logging'], is_master=True, log_endpoint=log_endpoint) with logger: setproctitle('backend.ai: storage-proxy') log.info('Backend.AI Storage Proxy', VERSION) log.info('Runtime: {0}', env_info()) log.info('Node ID: {0}', local_config['storage-proxy']['node-id']) log_config = logging.getLogger('ai.backend.agent.config') if local_config['debug']['enabled']: log_config.debug('debug mode enabled.') if 'debug' in local_config and local_config['debug']['enabled']: print('== Storage proxy configuration ==') pprint(local_config) if local_config['storage-proxy']['event-loop'] == 'uvloop': import uvloop uvloop.install() log.info('Using uvloop as the event loop backend') aiotools.start_server( server_main_logwrapper, use_threading=False, num_workers=local_config['storage-proxy']['num-proc'], args=(local_config, log_endpoint), ) log.info('exit.') finally: if local_config['storage-proxy']['pid-file'].is_file(): # check is_file() to prevent deleting /dev/null! local_config['storage-proxy']['pid-file'].unlink() return 0
router.connect('ipc://example-events') async def process_incoming(router): while True: data = await router.recv() if not data: return log.info(data) task = loop.create_task(process_incoming(router)) log.info('started') try: yield finally: await task router.close() zctx.term() log.info('terminated') if __name__ == '__main__': # This example must be run with multiprocessing. server = aiotools.start_server( worker_main, use_threading=False, num_workers=num_workers, extra_procs=[router_main], start_method='spawn', )
while True: log.value += 1 await asyncio.sleep(1) @aiotools.actxmgr async def worker_main(loop, pidx, args): app = web.Application() loop = asyncio.get_event_loop() future = loop.create_task(display_log(pidx)) app.add_routes(routes) web_handler = app.make_handler() server = await loop.create_server(web_handler, host='0.0.0.0', port=8888, reuse_port=True) try: yield finally: server.close() await server.wait_closed() await app.shutdown() await web_handler.finish_connections(60.0) await app.cleanup() if __name__ == '__main__': # Run the above server using 4 worker processes. aiotools.start_server(worker_main, num_workers=4)
def main(cli_ctx, config_path, debug): # Determine where to read configuration. raw_cfg, cfg_src_path = config.read_from_file(config_path, "storage-proxy") config.override_with_env(raw_cfg, ("etcd", "namespace"), "BACKEND_NAMESPACE") config.override_with_env(raw_cfg, ("etcd", "addr"), "BACKEND_ETCD_ADDR") config.override_with_env(raw_cfg, ("etcd", "user"), "BACKEND_ETCD_USER") config.override_with_env(raw_cfg, ("etcd", "password"), "BACKEND_ETCD_PASSWORD") if debug: config.override_key(raw_cfg, ("debug", "enabled"), True) try: local_config = config.check(raw_cfg, local_config_iv) local_config["_src"] = cfg_src_path except config.ConfigurationError as e: print( "ConfigurationError: Validation of agent configuration has failed:", file=sys.stderr, ) print(pformat(e.invalid_data), file=sys.stderr) raise click.Abort() if local_config["debug"]["enabled"]: config.override_key(local_config, ("logging", "level"), "DEBUG") config.override_key(local_config, ("logging", "pkg-ns", "ai.backend"), "DEBUG") # if os.getuid() != 0: # print('Storage agent can only be run as root', file=sys.stderr) # raise click.Abort() multiprocessing.set_start_method("spawn") if cli_ctx.invoked_subcommand is None: local_config["storage-proxy"]["pid-file"].write_text(str(os.getpid())) log_sockpath = Path( f"/tmp/backend.ai/ipc/storage-proxy-logger-{os.getpid()}.sock", ) log_sockpath.parent.mkdir(parents=True, exist_ok=True) log_endpoint = f"ipc://{log_sockpath}" local_config["logging"]["endpoint"] = log_endpoint try: logger = Logger( local_config["logging"], is_master=True, log_endpoint=log_endpoint, ) with logger: setproctitle("backend.ai: storage-proxy") log.info("Backend.AI Storage Proxy", VERSION) log.info("Runtime: {0}", env_info()) log.info("Node ID: {0}", local_config["storage-proxy"]["node-id"]) log_config = logging.getLogger("ai.backend.agent.config") if local_config["debug"]["enabled"]: log_config.debug("debug mode enabled.") if "debug" in local_config and local_config["debug"]["enabled"]: print("== Storage proxy configuration ==") pprint(local_config) if local_config["storage-proxy"]["event-loop"] == "uvloop": import uvloop uvloop.install() log.info("Using uvloop as the event loop backend") aiotools.start_server( server_main_logwrapper, num_workers=local_config["storage-proxy"]["num-proc"], args=(local_config, log_endpoint), ) log.info("exit.") finally: if local_config["storage-proxy"]["pid-file"].is_file(): # check is_file() to prevent deleting /dev/null! local_config["storage-proxy"]["pid-file"].unlink() return 0
def test_server_worker_init_error(restore_signal, use_threading): started = mp.Value('i', 0) terminated = mp.Value('i', 0) log_queue = mp.Queue() @aiotools.actxmgr async def myserver(loop, proc_idx, args): started, terminated = args await asyncio.sleep(0) with started.get_lock(): started.value += 1 if proc_idx == 0: raise ZeroDivisionError('oops') yield # should not be reached if errored. await asyncio.sleep(0) with terminated.get_lock(): terminated.value += 1 logging.config.dictConfig({ 'version': 1, 'handlers': { 'q': { 'class': 'logging.handlers.QueueHandler', 'queue': log_queue, 'level': 'DEBUG', }, 'console': { 'class': 'logging.StreamHandler', 'stream': 'ext://sys.stderr', 'level': 'DEBUG', }, }, 'loggers': { 'aiotools': { 'handlers': ['q', 'console'], 'level': 'DEBUG', }, }, }) aiotools.start_server(myserver, num_workers=3, use_threading=use_threading, args=(started, terminated)) # it should automatically shut down! # reset logging logging.shutdown() assert started.value == 3 # non-errored workers should have been terminated normally. assert terminated.value == 2 assert len(mp.active_children()) == 0 assert not log_queue.empty() while not log_queue.empty(): rec = log_queue.get() assert rec.levelname == 'ERROR' assert 'worker initialization' in rec.message # exception info is logged to the console, # but we cannot access it here because exceptions # are not picklable. assert rec.exc_info is None
def main( cli_ctx: click.Context, config_path: Path, debug: bool, ) -> int: # Determine where to read configuration. raw_cfg, cfg_src_path = config.read_from_file(config_path, 'agent') # Override the read config with environment variables (for legacy). config.override_with_env(raw_cfg, ('etcd', 'namespace'), 'BACKEND_NAMESPACE') config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR') config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER') config.override_with_env(raw_cfg, ('etcd', 'password'), 'BACKEND_ETCD_PASSWORD') config.override_with_env(raw_cfg, ('agent', 'rpc-listen-addr', 'host'), 'BACKEND_AGENT_HOST_OVERRIDE') config.override_with_env(raw_cfg, ('agent', 'rpc-listen-addr', 'port'), 'BACKEND_AGENT_PORT') config.override_with_env(raw_cfg, ('agent', 'pid-file'), 'BACKEND_PID_FILE') config.override_with_env(raw_cfg, ('container', 'port-range'), 'BACKEND_CONTAINER_PORT_RANGE') config.override_with_env(raw_cfg, ('container', 'kernel-host'), 'BACKEND_KERNEL_HOST_OVERRIDE') config.override_with_env(raw_cfg, ('container', 'sandbox-type'), 'BACKEND_SANDBOX_TYPE') config.override_with_env(raw_cfg, ('container', 'scratch-root'), 'BACKEND_SCRATCH_ROOT') if debug: config.override_key(raw_cfg, ('debug', 'enabled'), True) config.override_key(raw_cfg, ('logging', 'level'), 'DEBUG') config.override_key(raw_cfg, ('logging', 'pkg-ns', 'ai.backend'), 'DEBUG') # Validate and fill configurations # (allow_extra will make configs to be forward-copmatible) try: cfg = config.check(raw_cfg, agent_local_config_iv) if cfg['agent']['backend'] == AgentBackend.KUBERNETES: cfg = config.check(raw_cfg, k8s_extra_config_iv) if cfg['registry']['type'] == 'local': registry_target_config_iv = registry_local_config_iv elif cfg['registry']['type'] == 'ecr': registry_target_config_iv = registry_ecr_config_iv else: print('Validation of agent configuration has failed: registry type {} not supported' .format(cfg['registry']['type']), file=sys.stderr) raise click.Abort() registry_cfg = config.check(cfg['registry'], registry_target_config_iv) cfg['registry'] = registry_cfg if cfg['agent']['backend'] == AgentBackend.DOCKER: config.check(raw_cfg, docker_extra_config_iv) if 'debug' in cfg and cfg['debug']['enabled']: print('== Agent configuration ==') pprint(cfg) cfg['_src'] = cfg_src_path except config.ConfigurationError as e: print('ConfigurationError: Validation of agent configuration has failed:', file=sys.stderr) print(pformat(e.invalid_data), file=sys.stderr) raise click.Abort() rpc_host = cfg['agent']['rpc-listen-addr'].host if (isinstance(rpc_host, BaseIPAddress) and (rpc_host.is_unspecified or rpc_host.is_link_local)): print('ConfigurationError: ' 'Cannot use link-local or unspecified IP address as the RPC listening host.', file=sys.stderr) raise click.Abort() if os.getuid() != 0 and cfg['container']['stats-type'] == 'cgroup': print('Cannot use cgroup statistics collection mode unless the agent runs as root.', file=sys.stderr) raise click.Abort() if cli_ctx.invoked_subcommand is None: if cfg['debug']['coredump']['enabled']: if not sys.platform.startswith('linux'): print('ConfigurationError: ' 'Storing container coredumps is only supported in Linux.', file=sys.stderr) raise click.Abort() core_pattern = Path('/proc/sys/kernel/core_pattern').read_text().strip() if core_pattern.startswith('|') or not core_pattern.startswith('/'): print('ConfigurationError: ' '/proc/sys/kernel/core_pattern must be an absolute path ' 'to enable container coredumps.', file=sys.stderr) raise click.Abort() cfg['debug']['coredump']['core_path'] = Path(core_pattern).parent cfg['agent']['pid-file'].write_text(str(os.getpid())) log_sockpath = Path(f'/tmp/backend.ai/ipc/agent-logger-{os.getpid()}.sock') log_sockpath.parent.mkdir(parents=True, exist_ok=True) log_endpoint = f'ipc://{log_sockpath}' cfg['logging']['endpoint'] = log_endpoint try: logger = Logger(cfg['logging'], is_master=True, log_endpoint=log_endpoint) with logger: ns = cfg['etcd']['namespace'] setproctitle(f"backend.ai: agent {ns}") log.info('Backend.AI Agent {0}', VERSION) log.info('runtime: {0}', utils.env_info()) log_config = logging.getLogger('ai.backend.agent.config') if debug: log_config.debug('debug mode enabled.') if cfg['agent']['event-loop'] == 'uvloop': import uvloop uvloop.install() log.info('Using uvloop as the event loop backend') aiotools.start_server( server_main_logwrapper, num_workers=1, args=(cfg, log_endpoint), ) log.info('exit.') finally: if cfg['agent']['pid-file'].is_file(): # check is_file() to prevent deleting /dev/null! cfg['agent']['pid-file'].unlink() else: # Click is going to invoke a subcommand. pass return 0
def main(cli_ctx, config_path, debug): watcher_config_iv = t.Dict({ t.Key('watcher'): t.Dict({ t.Key('service-addr', default=('0.0.0.0', 6009)): tx.HostPortPair, t.Key('ssl-enabled', default=False): t.Bool, t.Key('ssl-cert', default=None): t.Null | tx.Path(type='file'), t.Key('ssl-key', default=None): t.Null | tx.Path(type='file'), t.Key('target-service', default='backendai-agent.service'): t.String, t.Key('soft-reset-available', default=False): t.Bool, }).allow_extra('*'), t.Key('logging'): t.Any, # checked in ai.backend.common.logging t.Key('debug'): t.Dict({ t.Key('enabled', default=False): t.Bool, }).allow_extra('*'), }).merge(config.etcd_config_iv).allow_extra('*') raw_cfg, cfg_src_path = config.read_from_file(config_path, 'agent') config.override_with_env(raw_cfg, ('etcd', 'namespace'), 'BACKEND_NAMESPACE') config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR') config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER') config.override_with_env(raw_cfg, ('etcd', 'password'), 'BACKEND_ETCD_PASSWORD') config.override_with_env(raw_cfg, ('watcher', 'service-addr', 'host'), 'BACKEND_WATCHER_SERVICE_IP') config.override_with_env(raw_cfg, ('watcher', 'service-addr', 'port'), 'BACKEND_WATCHER_SERVICE_PORT') if debug: config.override_key(raw_cfg, ('debug', 'enabled'), True) try: cfg = config.check(raw_cfg, watcher_config_iv) if 'debug' in cfg and cfg['debug']['enabled']: print('== Watcher configuration ==') pprint(cfg) cfg['_src'] = cfg_src_path except config.ConfigurationError as e: print('Validation of watcher configuration has failed:', file=sys.stderr) print(pformat(e.invalid_data), file=sys.stderr) raise click.Abort() # Change the filename from the logging config's file section. log_sockpath = Path(f'/tmp/backend.ai/ipc/watcher-logger-{os.getpid()}.sock') log_sockpath.parent.mkdir(parents=True, exist_ok=True) log_endpoint = f'ipc://{log_sockpath}' cfg['logging']['endpoint'] = log_endpoint logger = Logger(cfg['logging'], is_master=True, log_endpoint=log_endpoint) if 'file' in cfg['logging']['drivers']: fn = Path(cfg['logging']['file']['filename']) cfg['logging']['file']['filename'] = f"{fn.stem}-watcher{fn.suffix}" setproctitle(f"backend.ai: watcher {cfg['etcd']['namespace']}") with logger: log.info('Backend.AI Agent Watcher {0}', VERSION) log.info('runtime: {0}', utils.env_info()) log_config = logging.getLogger('ai.backend.agent.config') log_config.debug('debug mode enabled.') aiotools.start_server( watcher_server, num_workers=1, args=(cfg, ), stop_signals={signal.SIGINT, signal.SIGTERM, signal.SIGALRM}, ) log.info('exit.') return 0
@aiotools.actxmgr async def worker_main(loop, pidx, args): log = get_logger('examples.zmqserver.worker', pidx) router = await aiozmq.create_zmq_stream(zmq.PULL, connect='ipc://example-events') async def process_incoming(router): while True: try: data = await router.read() except aiozmq.ZmqStreamClosed: break log.info(data) task = loop.create_task(process_incoming(router)) log.info('started') yield router.close() await task log.info('terminated') if __name__ == '__main__': server = aiotools.start_server( worker_main, num_workers=4, extra_procs=[router_main], )
def run(self, *args, workers: int = 1, **kwargs) -> None: """Public run interface.""" aiotools.start_server(self.run_worker, num_workers=workers)