Esempio n. 1
0
def load(config_path: Path = None, debug: bool = False):

    # Determine where to read configuration.
    raw_cfg, cfg_src_path = config.read_from_file(config_path, 'manager')

    # Override the read config with environment variables (for legacy).
    config.override_with_env(raw_cfg, ('etcd', 'namespace'),
                             'BACKEND_NAMESPACE')
    config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR')
    config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER')
    config.override_with_env(raw_cfg, ('etcd', 'password'),
                             'BACKEND_ETCD_PASSWORD')
    config.override_with_env(raw_cfg, ('db', 'addr'), 'BACKEND_DB_ADDR')
    config.override_with_env(raw_cfg, ('db', 'name'), 'BACKEND_DB_NAME')
    config.override_with_env(raw_cfg, ('db', 'user'), 'BACKEND_DB_USER')
    config.override_with_env(raw_cfg, ('db', 'password'),
                             'BACKEND_DB_PASSWORD')
    config.override_with_env(raw_cfg, ('manager', 'num-proc'),
                             'BACKEND_GATEWAY_NPROC')
    config.override_with_env(raw_cfg, ('manager', 'ssl-cert'),
                             'BACKEND_SSL_CERT')
    config.override_with_env(raw_cfg, ('manager', 'ssl-privkey'),
                             'BACKEND_SSL_KEY')
    config.override_with_env(raw_cfg, ('manager', 'pid-file'),
                             'BACKEND_PID_FILE')
    config.override_with_env(raw_cfg, ('manager', 'api-listen-addr', 'host'),
                             'BACKEND_SERVICE_IP')
    config.override_with_env(raw_cfg, ('manager', 'api-listen-addr', 'port'),
                             'BACKEND_SERVICE_PORT')
    config.override_with_env(raw_cfg, ('manager', 'event-listen-addr', 'host'),
                             'BACKEND_ADVERTISED_MANAGER_HOST')
    config.override_with_env(raw_cfg, ('manager', 'event-listen-addr', 'port'),
                             'BACKEND_EVENTS_PORT')
    config.override_with_env(raw_cfg, ('docker-registry', 'ssl-verify'),
                             'BACKEND_SKIP_SSLCERT_VALIDATION')
    if debug:
        config.override_key(raw_cfg, ('debug', 'enabled'), True)
        config.override_key(raw_cfg, ('logging', 'level'), 'DEBUG')
        config.override_key(raw_cfg, ('logging', 'pkg-ns', 'ai.backend'),
                            'DEBUG')
        config.override_key(raw_cfg, ('logging', 'pkg-ns', 'aiohttp'), 'DEBUG')

    # Validate and fill configurations
    # (allow_extra will make configs to be forward-copmatible)
    try:
        cfg = config.check(raw_cfg, manager_config_iv)
        if 'debug' in cfg and cfg['debug']['enabled']:
            print('== Manager configuration ==', file=sys.stderr)
            print(pformat(cfg), file=sys.stderr)
        cfg['_src'] = cfg_src_path
        if cfg['manager']['secret'] is None:
            cfg['manager']['secret'] = secrets.token_urlsafe(16)
    except config.ConfigurationError as e:
        print('Validation of manager configuration has failed:',
              file=sys.stderr)
        print(pformat(e.invalid_data), file=sys.stderr)
        raise click.Abort()
    else:
        return cfg
Esempio n. 2
0
def main(cli_ctx, config_path, debug):
    volume_config_iv = t.Dict({
        t.Key('etcd'):
        t.Dict({
            t.Key('namespace'): t.String,
            t.Key('addr'): tx.HostPortPair(allow_blank_host=False)
        }).allow_extra('*'),
        t.Key('logging'):
        t.Any,  # checked in ai.backend.common.logging
        t.Key('agent'):
        t.Dict({
            t.Key('mode'): t.Enum('scratch', 'vfolder'),
            t.Key('rpc-listen-addr'): tx.HostPortPair(allow_blank_host=True),
            t.Key('user-uid'): t.Int,
            t.Key('user-gid'): t.Int
        }),
        t.Key('storage'):
        t.Dict({
            t.Key('mode'): t.Enum('xfs', 'btrfs'),
            t.Key('path'): t.String
        })
    }).allow_extra('*')

    # Determine where to read configuration.
    raw_cfg, cfg_src_path = config.read_from_file(config_path, 'agent')

    config.override_with_env(raw_cfg, ('etcd', 'namespace'),
                             'BACKEND_NAMESPACE')
    config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR')
    config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER')
    config.override_with_env(raw_cfg, ('etcd', 'password'),
                             'BACKEND_ETCD_PASSWORD')
    config.override_with_env(raw_cfg, ('agent', 'rpc-listen-addr', 'host'),
                             'BACKEND_AGENT_HOST_OVERRIDE')
    config.override_with_env(raw_cfg, ('agent', 'rpc-listen-addr', 'port'),
                             'BACKEND_AGENT_PORT')

    if debug:
        config.override_key(raw_cfg, ('debug', 'enabled'), True)
        config.override_key(raw_cfg, ('logging', 'level'), 'DEBUG')
        config.override_key(raw_cfg, ('logging', 'pkg-ns', 'ai.backend'),
                            'DEBUG')

    try:
        cfg = config.check(raw_cfg, volume_config_iv)
        cfg['_src'] = cfg_src_path
    except config.ConfigurationError as e:
        print(
            'ConfigurationError: Validation of agent configuration has failed:',
            file=sys.stderr)
        print(pformat(e.invalid_data), file=sys.stderr)
        raise click.Abort()

    rpc_host = cfg['agent']['rpc-listen-addr'].host
    if (isinstance(rpc_host, BaseIPAddress)
            and (rpc_host.is_unspecified or rpc_host.is_link_local)):
        print(
            'ConfigurationError: '
            'Cannot use link-local or unspecified IP address as the RPC listening host.',
            file=sys.stderr)
        raise click.Abort()

    if os.getuid() != 0:
        print('Storage agent can only be run as root', file=sys.stderr)
        raise click.Abort()

    if cli_ctx.invoked_subcommand is None:
        setproctitle('Backend.AI: Storage Agent')
        logger = Logger(cfg['logging'])
        with logger:
            log.info('Backend.AI Storage Agent', VERSION)

            log_config = logging.getLogger('ai.backend.agent.config')
            if debug:
                log_config.debug('debug mode enabled.')

            if 'debug' in cfg and cfg['debug']['enabled']:
                print('== Agent configuration ==')
                pprint(cfg)

            aiotools.start_server(server_main,
                                  num_workers=1,
                                  use_threading=True,
                                  args=(cfg, ))
            log.info('exit.')
    return 0
def main(cli_ctx, config_path, debug):
    # Determine where to read configuration.
    raw_cfg, cfg_src_path = config.read_from_file(config_path, 'storage-proxy')

    config.override_with_env(raw_cfg, ('etcd', 'namespace'),
                             'BACKEND_NAMESPACE')
    config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR')
    config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER')
    config.override_with_env(raw_cfg, ('etcd', 'password'),
                             'BACKEND_ETCD_PASSWORD')
    if debug:
        config.override_key(raw_cfg, ('debug', 'enabled'), True)

    try:
        local_config = config.check(raw_cfg, local_config_iv)
        local_config['_src'] = cfg_src_path
    except config.ConfigurationError as e:
        print(
            'ConfigurationError: Validation of agent configuration has failed:',
            file=sys.stderr)
        print(pformat(e.invalid_data), file=sys.stderr)
        raise click.Abort()

    if local_config['debug']['enabled']:
        config.override_key(local_config, ('logging', 'level'), 'DEBUG')
        config.override_key(local_config, ('logging', 'pkg-ns', 'ai.backend'),
                            'DEBUG')

    # if os.getuid() != 0:
    #     print('Storage agent can only be run as root', file=sys.stderr)
    #     raise click.Abort()

    multiprocessing.set_start_method('spawn')

    if cli_ctx.invoked_subcommand is None:
        local_config['storage-proxy']['pid-file'].write_text(str(os.getpid()))
        log_sockpath = Path(
            f'/tmp/backend.ai/ipc/storage-proxy-logger-{os.getpid()}.sock')
        log_sockpath.parent.mkdir(parents=True, exist_ok=True)
        log_endpoint = f'ipc://{log_sockpath}'
        local_config['logging']['endpoint'] = log_endpoint
        try:
            logger = Logger(local_config['logging'],
                            is_master=True,
                            log_endpoint=log_endpoint)
            with logger:
                setproctitle('backend.ai: storage-proxy')
                log.info('Backend.AI Storage Proxy', VERSION)
                log.info('Runtime: {0}', env_info())
                log.info('Node ID: {0}',
                         local_config['storage-proxy']['node-id'])
                log_config = logging.getLogger('ai.backend.agent.config')
                if local_config['debug']['enabled']:
                    log_config.debug('debug mode enabled.')
                if 'debug' in local_config and local_config['debug']['enabled']:
                    print('== Storage proxy configuration ==')
                    pprint(local_config)
                if local_config['storage-proxy']['event-loop'] == 'uvloop':
                    import uvloop
                    uvloop.install()
                    log.info('Using uvloop as the event loop backend')
                aiotools.start_server(
                    server_main_logwrapper,
                    use_threading=False,
                    num_workers=local_config['storage-proxy']['num-proc'],
                    args=(local_config, log_endpoint),
                )
                log.info('exit.')
        finally:
            if local_config['storage-proxy']['pid-file'].is_file():
                # check is_file() to prevent deleting /dev/null!
                local_config['storage-proxy']['pid-file'].unlink()
    return 0
Esempio n. 4
0
def main(cli_ctx, config_path, debug):
    # Determine where to read configuration.
    raw_cfg, cfg_src_path = config.read_from_file(config_path, "storage-proxy")

    config.override_with_env(raw_cfg, ("etcd", "namespace"),
                             "BACKEND_NAMESPACE")
    config.override_with_env(raw_cfg, ("etcd", "addr"), "BACKEND_ETCD_ADDR")
    config.override_with_env(raw_cfg, ("etcd", "user"), "BACKEND_ETCD_USER")
    config.override_with_env(raw_cfg, ("etcd", "password"),
                             "BACKEND_ETCD_PASSWORD")
    if debug:
        config.override_key(raw_cfg, ("debug", "enabled"), True)

    try:
        local_config = config.check(raw_cfg, local_config_iv)
        local_config["_src"] = cfg_src_path
    except config.ConfigurationError as e:
        print(
            "ConfigurationError: Validation of agent configuration has failed:",
            file=sys.stderr,
        )
        print(pformat(e.invalid_data), file=sys.stderr)
        raise click.Abort()

    if local_config["debug"]["enabled"]:
        config.override_key(local_config, ("logging", "level"), "DEBUG")
        config.override_key(local_config, ("logging", "pkg-ns", "ai.backend"),
                            "DEBUG")

    # if os.getuid() != 0:
    #     print('Storage agent can only be run as root', file=sys.stderr)
    #     raise click.Abort()

    multiprocessing.set_start_method("spawn")

    if cli_ctx.invoked_subcommand is None:
        local_config["storage-proxy"]["pid-file"].write_text(str(os.getpid()))
        log_sockpath = Path(
            f"/tmp/backend.ai/ipc/storage-proxy-logger-{os.getpid()}.sock", )
        log_sockpath.parent.mkdir(parents=True, exist_ok=True)
        log_endpoint = f"ipc://{log_sockpath}"
        local_config["logging"]["endpoint"] = log_endpoint
        try:
            logger = Logger(
                local_config["logging"],
                is_master=True,
                log_endpoint=log_endpoint,
            )
            with logger:
                setproctitle("backend.ai: storage-proxy")
                log.info("Backend.AI Storage Proxy", VERSION)
                log.info("Runtime: {0}", env_info())
                log.info("Node ID: {0}",
                         local_config["storage-proxy"]["node-id"])
                log_config = logging.getLogger("ai.backend.agent.config")
                if local_config["debug"]["enabled"]:
                    log_config.debug("debug mode enabled.")
                if "debug" in local_config and local_config["debug"]["enabled"]:
                    print("== Storage proxy configuration ==")
                    pprint(local_config)
                if local_config["storage-proxy"]["event-loop"] == "uvloop":
                    import uvloop

                    uvloop.install()
                    log.info("Using uvloop as the event loop backend")
                aiotools.start_server(
                    server_main_logwrapper,
                    num_workers=local_config["storage-proxy"]["num-proc"],
                    args=(local_config, log_endpoint),
                )
                log.info("exit.")
        finally:
            if local_config["storage-proxy"]["pid-file"].is_file():
                # check is_file() to prevent deleting /dev/null!
                local_config["storage-proxy"]["pid-file"].unlink()
    return 0
Esempio n. 5
0
def main(cli_ctx, config_path, debug):

    watcher_config_iv = t.Dict({
        t.Key('watcher'): t.Dict({
            t.Key('service-addr', default=('0.0.0.0', 6009)): tx.HostPortPair,
            t.Key('ssl-enabled', default=False): t.Bool,
            t.Key('ssl-cert', default=None): t.Null | tx.Path(type='file'),
            t.Key('ssl-key', default=None): t.Null | tx.Path(type='file'),
            t.Key('target-service', default='backendai-agent.service'): t.String,
            t.Key('soft-reset-available', default=False): t.Bool,
        }).allow_extra('*'),
        t.Key('logging'): t.Any,  # checked in ai.backend.common.logging
        t.Key('debug'): t.Dict({
            t.Key('enabled', default=False): t.Bool,
        }).allow_extra('*'),
    }).merge(config.etcd_config_iv).allow_extra('*')

    raw_cfg, cfg_src_path = config.read_from_file(config_path, 'agent')

    config.override_with_env(raw_cfg, ('etcd', 'namespace'), 'BACKEND_NAMESPACE')
    config.override_with_env(raw_cfg, ('etcd', 'addr'), 'BACKEND_ETCD_ADDR')
    config.override_with_env(raw_cfg, ('etcd', 'user'), 'BACKEND_ETCD_USER')
    config.override_with_env(raw_cfg, ('etcd', 'password'), 'BACKEND_ETCD_PASSWORD')
    config.override_with_env(raw_cfg, ('watcher', 'service-addr', 'host'),
                             'BACKEND_WATCHER_SERVICE_IP')
    config.override_with_env(raw_cfg, ('watcher', 'service-addr', 'port'),
                             'BACKEND_WATCHER_SERVICE_PORT')
    if debug:
        config.override_key(raw_cfg, ('debug', 'enabled'), True)

    try:
        cfg = config.check(raw_cfg, watcher_config_iv)
        if 'debug' in cfg and cfg['debug']['enabled']:
            print('== Watcher configuration ==')
            pprint(cfg)
        cfg['_src'] = cfg_src_path
    except config.ConfigurationError as e:
        print('Validation of watcher configuration has failed:', file=sys.stderr)
        print(pformat(e.invalid_data), file=sys.stderr)
        raise click.Abort()

    # Change the filename from the logging config's file section.
    log_sockpath = Path(f'/tmp/backend.ai/ipc/watcher-logger-{os.getpid()}.sock')
    log_sockpath.parent.mkdir(parents=True, exist_ok=True)
    log_endpoint = f'ipc://{log_sockpath}'
    cfg['logging']['endpoint'] = log_endpoint
    logger = Logger(cfg['logging'], is_master=True, log_endpoint=log_endpoint)
    if 'file' in cfg['logging']['drivers']:
        fn = Path(cfg['logging']['file']['filename'])
        cfg['logging']['file']['filename'] = f"{fn.stem}-watcher{fn.suffix}"

    setproctitle(f"backend.ai: watcher {cfg['etcd']['namespace']}")
    with logger:
        log.info('Backend.AI Agent Watcher {0}', VERSION)
        log.info('runtime: {0}', utils.env_info())

        log_config = logging.getLogger('ai.backend.agent.config')
        log_config.debug('debug mode enabled.')

        aiotools.start_server(
            watcher_server,
            num_workers=1,
            args=(cfg, ),
            stop_signals={signal.SIGINT, signal.SIGTERM, signal.SIGALRM},
        )
        log.info('exit.')
    return 0