Beispiel #1
0
 async def _check_image(self, reference: str) -> ImageRef:
     known_registries = await get_known_registries(self.etcd)
     ref = ImageRef(reference, known_registries)
     digest = await self.etcd.get(ref.tag_path)
     if digest is None:
         raise UnknownImageReference(reference)
     return ref
Beispiel #2
0
async def _list_existing_sessions(
    db_conn: SAConnection,
    sgroup: str,
) -> List[ExistingSession]:
    query = (
        sa.select([
            kernels.c.id,
            kernels.c.status,
            kernels.c.image,
            kernels.c.registry,
            kernels.c.sess_type,
            kernels.c.sess_id,
            kernels.c.access_key,
            kernels.c.domain_name,
            kernels.c.group_id,
            kernels.c.scaling_group,
            kernels.c.occupied_slots,
            kernels.c.resource_opts,
            kernels.c.environ,
            kernels.c.mounts,
            kernels.c.mount_map,
            kernels.c.startup_command,
            kernels.c.internal_data,
            keypairs.c.resource_policy,
        ])
        .select_from(sa.join(
            kernels, keypairs,
            keypairs.c.access_key == kernels.c.access_key
        ))
        .where(
            (kernels.c.status.in_(AGENT_RESOURCE_OCCUPYING_KERNEL_STATUSES)) &
            (kernels.c.scaling_group == sgroup)
        )
        .order_by(kernels.c.created_at)
    )
    items = []
    async for row in db_conn.execute(query):
        items.append(ExistingSession(
            kernel_id=row['id'],
            access_key=row['access_key'],
            session_type=row['sess_type'],
            session_name=row['sess_id'],
            domain_name=row['domain_name'],
            group_id=row['group_id'],
            scaling_group=row['scaling_group'],
            image_ref=ImageRef(row['image'], [row['registry']]),
            occupying_slots=row['occupied_slots'],
        ))
    return items
Beispiel #3
0
 async def _impl():
     async with config_ctx(cli_ctx) as config_server:
         displayed_items = []
         try:
             items = await config_server.list_images()
             for item in items:
                 if installed and not item['installed']:
                     continue
                 if short:
                     img = ImageRef(f"{item['name']}:{item['tag']}",
                                    item['registry'])
                     displayed_items.append((img.canonical, item['digest']))
                 else:
                     pprint(item)
             if short:
                 print(tabulate(displayed_items, tablefmt='plain'))
         except Exception:
             log.exception('An error occurred.')
Beispiel #4
0
 async def list_images(self):
     known_registries = await get_known_registries(self.etcd)
     reverse_aliases = await self._scan_reverse_aliases()
     data = await self.etcd.get_prefix('images')
     coros = []
     for registry, images in data.items():
         if registry == '_aliases':
             continue
         for image, tags in images.items():
             if image == '':
                 continue
             for tag, image_info in tags.items():
                 if tag == '':
                     continue
                 raw_ref = f'{etcd_unquote(registry)}/{etcd_unquote(image)}:{tag}'
                 ref = ImageRef(raw_ref, known_registries)
                 coros.append(
                     self._parse_image(ref, image_info, reverse_aliases))
     return await asyncio.gather(*coros)
async def _list_existing_sessions(
    db_conn: SAConnection,
    sgroup: str,
) -> List[ExistingSession]:
    query = (
        sa.select([
            kernels.c.id,
            kernels.c.status,
            kernels.c.image,
            kernels.c.cluster_mode,
            kernels.c.cluster_size,
            kernels.c.cluster_role,
            kernels.c.cluster_idx,
            kernels.c.cluster_hostname,
            kernels.c.registry,
            kernels.c.session_id,
            kernels.c.session_type,
            kernels.c.session_name,
            kernels.c.access_key,
            kernels.c.domain_name,
            kernels.c.group_id,
            kernels.c.scaling_group,
            kernels.c.occupied_slots,
            kernels.c.resource_opts,
            kernels.c.environ,
            kernels.c.mounts,
            kernels.c.mount_map,
            kernels.c.startup_command,
            kernels.c.internal_data,
            keypairs.c.resource_policy,
        ])
        .select_from(sa.join(
            kernels, keypairs,
            keypairs.c.access_key == kernels.c.access_key
        ))
        .where(
            (kernels.c.status.in_(AGENT_RESOURCE_OCCUPYING_KERNEL_STATUSES)) &
            (kernels.c.scaling_group == sgroup)
        )
        .order_by(kernels.c.created_at)
    )
    items: MutableMapping[str, ExistingSession] = {}
    async for row in db_conn.execute(query):
        if _session := items.get(row['session_id']):
            session = _session
        else:
            session = ExistingSession(
                kernels=[],
                access_key=row['access_key'],
                session_id=row['session_id'],
                session_type=row['session_type'],
                session_name=row['session_name'],
                cluster_mode=row['cluster_mode'],
                cluster_size=row['cluster_size'],
                domain_name=row['domain_name'],
                group_id=row['group_id'],
                scaling_group=row['scaling_group'],
                occupying_slots=ResourceSlot(),
            )
            items[row['session_id']] = session
        # TODO: support multi-container sessions
        session.kernels.append(KernelInfo(  # type: ignore
            kernel_id=row['id'],
            session_id=row['session_id'],
            access_key=row['access_key'],
            cluster_role=row['cluster_role'],
            cluster_idx=row['cluster_idx'],
            cluster_hostname=row['cluster_hostname'],
            image_ref=ImageRef(row['image'], [row['registry']]),
            bootstrap_script=None,
            startup_command=None,
            resource_opts=row['resource_opts'],
            requested_slots=row['occupied_slots'],
        ))
        session.occupying_slots += row['occupied_slots']  # type: ignore
async def _list_pending_sessions(
    db_conn: SAConnection,
    sgroup_name: str,
) -> List[PendingSession]:
    query = (
        sa.select([
            kernels.c.id,
            kernels.c.status,
            kernels.c.image,
            kernels.c.cluster_mode,
            kernels.c.cluster_size,
            kernels.c.cluster_role,
            kernels.c.cluster_idx,
            kernels.c.cluster_hostname,
            kernels.c.registry,
            kernels.c.session_id,
            kernels.c.session_type,
            kernels.c.session_name,
            kernels.c.access_key,
            kernels.c.domain_name,
            kernels.c.group_id,
            kernels.c.scaling_group,
            kernels.c.occupied_slots,
            kernels.c.resource_opts,
            kernels.c.environ,
            kernels.c.mounts,
            kernels.c.mount_map,
            kernels.c.bootstrap_script,
            kernels.c.startup_command,
            kernels.c.internal_data,
            kernels.c.preopen_ports,
            keypairs.c.resource_policy,
        ])
        .select_from(sa.join(
            kernels, keypairs,
            keypairs.c.access_key == kernels.c.access_key
        ))
        .where(
            (kernels.c.status == KernelStatus.PENDING) &
            (
                (kernels.c.scaling_group == sgroup_name) |
                (kernels.c.scaling_group.is_(None))
            )
        )
        .order_by(kernels.c.created_at)
    )
    # TODO: extend for multi-container sessions
    items: MutableMapping[str, PendingSession] = {}
    async for row in db_conn.execute(query):
        if _session := items.get(row['session_id']):
            session = _session
        else:
            session = PendingSession(
                kernels=[],
                access_key=row['access_key'],
                session_id=row['session_id'],
                session_type=row['session_type'],
                session_name=row['session_name'],
                cluster_mode=row['cluster_mode'],
                cluster_size=row['cluster_size'],
                domain_name=row['domain_name'],
                group_id=row['group_id'],
                scaling_group=row['scaling_group'],
                resource_policy=row['resource_policy'],
                resource_opts={},
                requested_slots=ResourceSlot(),
                internal_data=row['internal_data'],
                target_sgroup_names=[],
                environ={
                    k: v for k, v
                    in map(lambda s: s.split('=', maxsplit=1), row['environ'])
                },
                mounts=row['mounts'],
                mount_map=row['mount_map'],
                bootstrap_script=row['bootstrap_script'],
                startup_command=row['startup_command'],
                preopen_ports=row['preopen_ports'],
            )
            items[row['session_id']] = session
        session.kernels.append(KernelInfo(
            kernel_id=row['id'],
            session_id=row['session_id'],
            access_key=row['access_key'],
            cluster_role=row['cluster_role'],
            cluster_idx=row['cluster_idx'],
            cluster_hostname=row['cluster_hostname'],
            image_ref=ImageRef(row['image'], [row['registry']]),
            bootstrap_script=row['bootstrap_script'],
            startup_command=row['startup_command'],
            resource_opts=row['resource_opts'],
            requested_slots=row['occupied_slots'],
        ))
        session.requested_slots += row['occupied_slots']  # type: ignore
        merge_resource(session.resource_opts, row['resource_opts'])  # type: ignore
Beispiel #7
0
async def import_image(request: web.Request, params: Any) -> web.Response:
    '''
    Import a docker image and convert it to a Backend.AI-compatible one,
    by automatically installing a few packages and adding image labels.

    Currently we only support auto-conversion of Python-based kernels (e.g.,
    NGC images) which has its own Python version installed.

    Internally, it launches a temporary kernel in an arbitrary agent within
    the client's domain, the "default" group, and the "default" scaling group.
    (The client may change the group and scaling group using *launchOptions.*
    If the client is a super-admin, it uses the "default" domain.)

    This temporary kernel occupies only 1 CPU core and 1 GiB memory.
    The kernel concurrency limit is not applied here, but we choose an agent
    based on their resource availability.
    The owner of this kernel is always the client that makes the API request.

    This API returns immediately after launching the temporary kernel.
    The client may check the progress of the import task using session logs.
    '''

    tpl = jinja2.Template(DOCKERFILE_TEMPLATE)

    async with request.app['dbpool'].acquire() as conn, conn.begin():
        query = (
            sa.select([domains.c.allowed_docker_registries])
            .select_from(domains)
            .where(domains.c.name == request['user']['domain_name'])
        )
        result = await conn.execute(query)
        allowed_docker_registries = await result.scalar()

    source_image = ImageRef(params['src'], allowed_docker_registries)
    target_image = ImageRef(params['target'], allowed_docker_registries)

    # TODO: validate and convert arguments to template variables
    dockerfile_content = tpl.render({
        'base_distro': params['baseDistro'],
        'cpucount_envvars': ['NPROC', 'OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS'],
        'runtime_type': params['runtimeType'],
        'runtime_path': params['runtimePath'],
        'service_ports': params['servicePorts'],
        'min_cpu': params['minCPU'],
        'min_mem': params['minMemory'],
        'pref_shmem': params['preferredSharedMemory'],
        'accelerators': params['supportedAccelerators'],
        'src': params['src'],
        'brand': params['brand'],
        'has_ipykernel': True,  # TODO: in the future, we may allow import of service-port only kernels.
    })

    sess_id = f'image-import-{secrets.token_urlsafe(8)}'
    access_key = request['keypair']['access_key']
    registry = request.app['registry']
    resource_policy = request['keypair']['resource_policy']

    async with request.app['dbpool'].acquire() as conn, conn.begin():
        query = (
            sa.select([groups.c.id])
            .select_from(
                sa.join(groups, domains,
                        groups.c.domain_name == domains.c.name)
            )
            .where(
                (domains.c.name == request['user']['domain_name']) &
                (groups.c.name == params['launchOptions']['group']) &
                (domains.c.is_active) &
                (groups.c.is_active)
            )
        )
        result = await conn.execute(query)
        group_id = await result.scalar()
        if group_id is None:
            raise InvalidAPIParameters("Invalid domain or group.")

        query = (
            sa.select([agus])
            .select_from(agus)
            .where(
                (agus.c.user_id == request['user']['uuid']) &
                (agus.c.group_id == group_id)
            )
        )
        result = await conn.execute(query)
        row = await result.fetchone()
        if row is None:
            raise InvalidAPIParameters("You do not belong to the given group.")

    importer_image = ImageRef(
        request.app['config']['manager']['importer-image'],
        allowed_docker_registries,
    )

    docker_creds = {}
    for img_ref in (source_image, target_image):
        registry_info = await request.app['config_server'].etcd.get_prefix_dict(
            f'config/docker/registry/{etcd_quote(img_ref.registry)}')
        docker_creds[img_ref.registry] = {
            'username': registry_info.get('username'),
            'password': registry_info.get('password'),
        }

    kernel_id = await registry.enqueue_session(
        sess_id, access_key,
        importer_image,
        SessionTypes.BATCH,
        {
            'resources': {'cpu': '1', 'mem': '2g'},
            'scaling_group': params['launchOptions']['scalingGroup'],
            'environ': {
                'SRC_IMAGE': source_image.canonical,
                'TARGET_IMAGE': target_image.canonical,
                'RUNTIME_PATH': params['runtimePath'],
                'BUILD_SCRIPT': (base64.b64encode(dockerfile_content.encode('utf8'))
                                 .decode('ascii')),
            }
        },
        resource_policy,
        domain_name=request['user']['domain_name'],
        group_id=group_id,
        user_uuid=request['user']['uuid'],
        user_role=request['user']['role'],
        startup_command='/root/build-image.sh',
        internal_data={
            'domain_socket_proxies': ['/var/run/docker.sock'],
            'docker_credentials': docker_creds,
            'prevent_vfolder_mounts': True,
            'block_service_ports': True,
        }
    )
    return web.json_response({
        'importTask': {
            'sessionId': sess_id,
            'taskId': str(kernel_id),
        },
    }, status=200)
    SessionKernelIdPair(
        session_id=UUID('251907d9-1290-4126-bc6c-100000000100'),
        kernel_ids=[
            KernelId(UUID('251907d9-1290-4126-bc6c-100000000100')),
            KernelId(UUID('251907d9-1290-4126-bc6c-100000000101')),
        ]),
    SessionKernelIdPair(
        session_id=UUID('251907d9-1290-4126-bc6c-100000000200'),
        kernel_ids=[KernelId(UUID('251907d9-1290-4126-bc6c-100000000200'))]),
    SessionKernelIdPair(
        # single-node mode multi-container session
        session_id=UUID('251907d9-1290-4126-bc6c-100000000300'),
        kernel_ids=[KernelId(UUID('251907d9-1290-4126-bc6c-100000000300'))]),
]

common_image_ref = ImageRef('lablup/python:3.6-ubunt18.04'),

_common_dummy_for_pending_session: Mapping[str, Any] = dict(
    domain_name='default',
    group_id=example_group_id,
    resource_policy={},
    resource_opts={},
    mounts=[],
    mount_map={},
    environ={},
    bootstrap_script=None,
    startup_command=None,
    internal_data=None,
    preopen_ports=[],
)
Beispiel #9
0
async def _list_pending_sessions(
    db_conn: SAConnection,
    sgroup_name: str,
) -> List[PendingSession]:
    query = (
        sa.select([
            kernels.c.id,
            kernels.c.status,
            kernels.c.image,
            kernels.c.registry,
            kernels.c.sess_type,
            kernels.c.sess_id,
            kernels.c.access_key,
            kernels.c.domain_name,
            kernels.c.group_id,
            kernels.c.scaling_group,
            kernels.c.occupied_slots,
            kernels.c.resource_opts,
            kernels.c.environ,
            kernels.c.mounts,
            kernels.c.mount_map,
            kernels.c.bootstrap_script,
            kernels.c.startup_command,
            kernels.c.internal_data,
            kernels.c.preopen_ports,
            keypairs.c.resource_policy,
        ])
        .select_from(sa.join(
            kernels, keypairs,
            keypairs.c.access_key == kernels.c.access_key
        ))
        .where(
            (kernels.c.status == KernelStatus.PENDING) &
            (
                (kernels.c.scaling_group == sgroup_name) |
                (kernels.c.scaling_group.is_(None))
            )
        )
        .order_by(kernels.c.created_at)
    )
    items = []
    async for row in db_conn.execute(query):
        items.append(PendingSession(
            kernel_id=row['id'],
            access_key=row['access_key'],
            session_type=row['sess_type'],
            session_name=row['sess_id'],
            domain_name=row['domain_name'],
            group_id=row['group_id'],
            scaling_group=row['scaling_group'],
            image_ref=ImageRef(row['image'], [row['registry']]),
            resource_policy=row['resource_policy'],
            resource_opts=row['resource_opts'],
            requested_slots=row['occupied_slots'],
            internal_data=row['internal_data'],
            target_sgroup_names=[],
            environ={
                k: v for k, v
                in map(lambda s: s.split('=', maxsplit=1), row['environ'])
            },
            mounts=row['mounts'],
            mount_map=row['mount_map'],
            bootstrap_script=row['bootstrap_script'],
            startup_command=row['startup_command'],
            preopen_ports=row['preopen_ports'],
        ))
    return items
        stats_monitor=None,
        error_monitor=None,
        skip_initial_scan=True,
    )  # for faster test iteration
    try:
        yield agent
    finally:
        await agent.shutdown(signal.SIGTERM)


@pytest.mark.asyncio
async def test_init(agent, mocker):
    print(agent)


imgref = ImageRef('index.docker.io/lablup/lua:5.3-alpine3.8',
                  ['index.docker.io'])
query_digest = "sha256:b000000000000000000000000000000000000000000000000000000000000001"
digest_matching_image_info = {
    "Id":
    "sha256:b000000000000000000000000000000000000000000000000000000000000001",
    "RepoTags": ["lablup/lua:5.3-alpine3.8"],
}
digest_mismatching_image_info = {
    "Id":
    "sha256:a000000000000000000000000000000000000000000000000000000000000002",
    "RepoTags": ["lablup/lua:5.3-alpine3.8"],
}


@pytest.mark.asyncio
async def test_auto_pull_digest_when_digest_matching(agent, mocker):