Esempio n. 1
0
def rm_component_descriptor(
    component: gci.componentmodel.Component,
    recursive=True,
    oci_client: oc.Client = None,
):
    if not oci_client:
        oci_client = ccc.oci.oci_client()

    target_ref = _target_oci_ref(
        component=component,
        component_ref=component,
    )

    if recursive:
        for component_ref in component.componentReferences:
            component_descriptor = _resolve_dependency(
                component,
                component_ref,
                repository_ctx_base_url=None,
            )
            rm_component_descriptor(
                component=component_descriptor.component,
                recursive=recursive,
            )

    oci_client.delete_manifest(image_reference=target_ref)
Esempio n. 2
0
def v1_manifest_to_v2(
    manifest: om.OciImageManifestV1,
    oci_client: oc.Client,
    tgt_image_ref: str,
) -> om.OciImageManifest:
    docker_cfg = v2_cfg_from_v1_manifest(manifest=manifest)
    docker_cfg = dataclasses.asdict(docker_cfg)
    docker_cfg = json.dumps(docker_cfg).encode('utf-8')

    cfg_digest = f'sha256:{hashlib.sha256(docker_cfg).hexdigest()}'
    cfg_leng = len(docker_cfg)

    oci_client.put_blob(
        image_reference=tgt_image_ref,
        digest=cfg_digest,
        octets_count=cfg_leng,
        data=docker_cfg,
    )

    manifest_v2 = om.OciImageManifest(
        config=om.OciBlobRef(
            digest=cfg_digest,
            mediaType='application/vnd.docker.container.image.v1+json',
            size=cfg_leng,
        ),
        layers=manifest.layers,
    )

    return manifest_v2, docker_cfg
Esempio n. 3
0
def iter_platforms(
    image_reference: typing.Union[str, om.OciImageReference],
    oci_client: oc.Client=None,
) -> typing.Generator[tuple[om.OciImageReference, om.OciPlatform], None, None]:
    image_reference = om.OciImageReference.to_image_ref(image_reference)

    manifest = oci_client.manifest(
        image_reference=image_reference,
        accept=om.MimeTypes.prefer_multiarch,
    )

    if isinstance(manifest, om.OciImageManifest):
        platform = from_single_image(
            image_reference=image_reference,
            oci_client=oci_client,
        )
        yield (image_reference, platform)
        return
    elif isinstance(manifest, om.OciImageManifestList):
        manifest: om.OciImageManifestList
    else:
        raise NotImplementedError(type(manifest))

    prefix = image_reference.ref_without_tag

    for sub_manifest in manifest.manifests:
        platform_dict = dataclasses.asdict(sub_manifest)

        sub_manifest = oci_client.manifest(
            image_reference=(sub_img_ref := f'{prefix}@{sub_manifest.digest}'),
        )
Esempio n. 4
0
def single_platform_manifest(
    image_reference: om.OciImageReference | str,
    oci_client: oc.Client,
    platform: om.OciPlatform=None,
):
    '''
    returns a single-platform OCI Image Manifest for the given image_reference.
    lookup and validation depend on presence of platform argument.

    if given image-ref points to a single-arch manifest, the returned result will be identical
    to invoking `oci_client.manifest`. If platform argument is passed, and the discovered
    platform does not match, a `ValueError` will be raised.

    if given image-ref points to a multi-arch manifest, content-negotiation depends on presence of
    platform-argument. If absent, no preference will be stated (i.e. accept-header will not be set).
    Some Oci-Image-registries will return a single-arch manifest (thus saving a roundtrip).
    If platform is passed, preference for multi-arch will be stated via accept-header; the specified
    platform will be looked-up and returned. If not found, `ValueError` will be raised.
    '''
    image_reference = om.OciImageReference.to_image_ref(image_reference)

    if platform:
        accept = om.MimeTypes.prefer_multiarch
    else:
        accept = None

    manifest = oci_client.manifest(
        image_reference=image_reference,
        accept=accept,
    )

    if isinstance(manifest, om.OciImageManifest):
        if not platform:
            return manifest

        actual_platform = from_manifest(
            manifest=manifest,
            oci_client=oci_client,
        )

        if not actual_platform == platform:
            raise ValueError(f'{image_reference=} does not match {platform=}: {actual_platform=}')

        return manifest
    elif isinstance(manifest, om.OciImageManifestList):
        pass
    else:
        raise NotImplementedError(manifest)

    for manifest in manifest.manifests:
        manifest: om.OciImageManifestListEntry
        if manifest.platform == platform:
            break
    else:
        raise ValueError(f'{image_reference=} does not contain {platform=}')

    manifest_ref = f'{image_reference.ref_without_tag}@{manifest.digest}'
    return oci_client.manifest(image_reference=manifest_ref)
Esempio n. 5
0
def iter_image_files(
    container_image_reference: str,
    oci_client: oc.Client=None,
) -> typing.Iterable[typing.Tuple[typing.IO, str]]:
    '''
    returns a generator yielding the regular files contained in the specified oci-image
    as sequence of two-tuples (filelike-obj, <layer-digest:relpath>).

    The image's layer-blobs are retrieve in the order they are defined in the image-manifest.
    cfg-blobs are ignored. All layer-blobs are assued to be tarfiles (which is not necessarily
    a valid assumption for non-docker-compatible oci-artifacts).
    '''
    if not oci_client:
        oci_client = ccc.oci.oci_client()

    manifest = oci_client.manifest(image_reference=container_image_reference)

    # we ignore cfg-blob (which would be included in manifest.blobs())
    for layer_blob in manifest.layers:
        blob_resp = oci_client.blob(
            image_reference=container_image_reference,
            digest=layer_blob.digest,
            stream=True,
        )

        fileobj = _FilelikeProxy(
            generator=blob_resp.iter_content(
                chunk_size=tarfile.RECORDSIZE,
                decode_unicode=False,
            ),
            size=layer_blob.size,
        )
        with tarfile.open(
            fileobj=fileobj,
            mode='r|*',
        ) as layer_tarfile:
            for tar_info in layer_tarfile:
                if not tar_info.isfile():
                    continue
                yield (
                    layer_tarfile.extractfile(tar_info),
                    f'{layer_blob.digest}:{tar_info.name}',
                )
Esempio n. 6
0
def image_layers_as_tarfile_generator(
    image_reference: str,
    oci_client: oc.Client,
    chunk_size=tarfile.RECORDSIZE,
    include_config_blob=True,
) -> typing.Generator[bytes, None, None]:
    '''
    returns a generator yielding a tar-archive with the passed oci-image's layer-blobs as
    members. This is somewhat similar to the result of a `docker save` with the notable difference
    that the cfg-blob is discarded.
    This function is useful to e.g. upload file system contents of an oci-container-image to some
    scanning-tool (provided it supports the extraction of tar-archives)
    If include_config_blob is set to False the config blob will be ignored.
    '''
    manifest = oci_client.manifest(image_reference=image_reference)
    offset = 0
    for blob in manifest.blobs() if include_config_blob else manifest.layers:
        logger.debug(f'getting blob {blob.digest}')
        if not include_config_blob:
            logger.debug('skipping config blob')
        tarinfo = tarfile.TarInfo(name=blob.digest + '.tar') # note: may be gzipped
        tarinfo.size = blob.size
        tarinfo.offset = offset
        tarinfo.offset_data = offset + tarfile.BLOCKSIZE

        offset += blob.size + tarfile.BLOCKSIZE

        tarinfo_bytes = tarinfo.tobuf()
        yield tarinfo_bytes

        uploaded_bytes = len(tarinfo_bytes)
        for chunk in oci_client.blob(
            image_reference=image_reference,
            digest=blob.digest,
            stream=True,
            ).iter_content(chunk_size=chunk_size):
            uploaded_bytes += len(chunk)
            yield chunk

        # need to pad full blocks w/ NUL-bytes
        if (missing := tarfile.BLOCKSIZE - (uploaded_bytes % tarfile.BLOCKSIZE)):
            offset += missing
            yield tarfile.NUL * missing
Esempio n. 7
0
def publish_container_image_from_kaniko_tarfile(
    image_tarfile_path: str,
    oci_client: oc.Client,
    image_reference: str,
    additional_tags: typing.List[str] = (),
    manifest_mimetype: str = om.OCI_MANIFEST_SCHEMA_V2_MIME,
):
    image_reference = ou.normalise_image_reference(
        image_reference=image_reference)
    image_name = image_reference.rsplit(':', 1)[0]
    image_references = (image_reference, ) + tuple(
        [f'{image_name}:{tag}' for tag in additional_tags])

    with ok.read_kaniko_image_tar(tar_path=image_tarfile_path) as image:
        chunk_size = 1024 * 1024
        for kaniko_blob in image.blobs():
            oci_client.put_blob(
                image_reference=image_reference,
                digest=kaniko_blob.digest_str(),
                octets_count=kaniko_blob.size,
                data=kaniko_blob,
                max_chunk=chunk_size,
            )

        # optionally patch manifest's mimetype (e.g. required for docker-hub)
        manifest_dict = dataclasses.asdict(image.oci_manifest())
        manifest_dict['mediaType'] = manifest_mimetype

        manifest_bytes = json.dumps(manifest_dict, ).encode('utf-8')

        for tgt_ref in image_references:
            logger.info(f'publishing manifest {tgt_ref=}')
            oci_client.put_manifest(
                image_reference=tgt_ref,
                manifest=manifest_bytes,
            )
Esempio n. 8
0
def from_manifest(
    image_reference: om.OciImageReference,
    manifest: om.OciImageManifest,
    oci_client: oc.Client=None,
    base_platform: om.OciPlatform=None,
) -> om.OciPlatform:
    if base_platform:
        cfg = base_platform.as_dict()
    else:
        cfg = {}

    cfg |= oci_client.blob(
        image_reference=image_reference,
        digest=manifest.config.digest,
        stream=False, # we will need to json.load the (small) result anyhow
    ).json()

    return dacite.from_dict(
        data_class=om.OciPlatform,
        data=cfg,
    )
Esempio n. 9
0
def from_single_image(
    image_reference: typing.Union[str, om.OciImageReference],
    oci_client: oc.Client=None,
    base_platform: om.OciPlatform=None,
) -> om.OciPlatform:
    '''
    determines the platform from a "single oci image" (i.e. an oci image which is _not_
    a multiarch image).
    '''
    image_reference = om.OciImageReference.to_image_ref(image_reference)

    manifest = oci_client.manifest(image_reference=image_reference)

    if not isinstance(manifest, om.OciImageManifest):
        raise ValueError(f'{image_reference=} did not yield OciImageManifest: {type(manifest)=}')

    return from_manifest(
        manifest=manifest,
        oci_client=oci_client,
        base_platform=base_platform,
    )
Esempio n. 10
0
def sanitise_image(
    image_ref: typing.Union[str, om.OciImageReference],
    oci_client: oc.Client,
):
    manifest = oci_client.manifest(image_reference=image_ref)
    cfg_blob = oci_client.blob(image_reference=image_ref,
                               digest=manifest.config.digest).content

    if is_cfg_blob_sane(manifest=manifest, cfg_blob=cfg_blob):
        return image_ref

    sanitised_cfg_blob = sanitise_cfg_blob(manifest=manifest,
                                           cfg_blob=cfg_blob)
    cfg_blob_digest = 'sha256:' + hashlib.sha256(
        sanitised_cfg_blob).hexdigest()

    oci_client.put_blob(
        image_ref,
        digest=cfg_blob_digest,
        octets_count=len(sanitised_cfg_blob),
        data=sanitised_cfg_blob,
    )

    manifest = dataclasses.replace(
        manifest,
        config=dataclasses.replace(
            manifest.config,
            digest=cfg_blob_digest,
            size=len(sanitised_cfg_blob),
        ),
    )

    manifest_bytes = json.dumps(dataclasses.asdict(manifest)).encode('utf-8')

    oci_client.put_manifest(image_reference=image_ref, manifest=manifest_bytes)

    manifest_dig = 'sha256:' + hashlib.sha256(manifest_bytes).hexdigest()
    img_ref: om.OciImageReference = om.OciImageReference.to_image_ref(
        image_ref)

    patched_img_ref = f'{img_ref.ref_without_tag}@{manifest_dig}'

    return patched_img_ref
Esempio n. 11
0
def publish_container_image_from_kaniko_tarfile(
        image_tarfile_path: str,
        oci_client: oc.Client,
        image_reference: str,
        additional_tags: typing.List[str] = (),
):
    image_reference = ou.normalise_image_reference(
        image_reference=image_reference)
    image_name = image_reference.rsplit(':', 1)[0]
    image_references = (image_reference, ) + tuple(
        [f'{image_name}:{tag}' for tag in additional_tags])

    with ok.read_kaniko_image_tar(tar_path=image_tarfile_path) as image:
        chunk_size = 1024 * 1024
        for kaniko_blob in image.blobs():
            oci_client.put_blob(
                image_reference=image_reference,
                digest=kaniko_blob.digest_str(),
                octets_count=kaniko_blob.size,
                data=kaniko_blob,
                max_chunk=chunk_size,
            )

            oci_client.blob(
                image_reference=image_reference,
                digest=kaniko_blob.digest_str(),
                absent_ok=True,
            )

        manifest_bytes = json.dumps(dataclasses.asdict(
            image.oci_manifest())).encode('utf-8')

        for tgt_ref in image_references:
            logger.info(f'publishing manifest {tgt_ref=}')
            oci_client.put_manifest(
                image_reference=tgt_ref,
                manifest=manifest_bytes,
            )
Esempio n. 12
0
def filter_image(
        source_ref: str,
        target_ref: str,
        remove_files: typing.Sequence[str] = (),
        oci_client: oc.Client = None,
):
    if not oci_client:
        oci_client = ccc.oci.oci_client()

    # shortcut in case there are no filtering-rules
    if not remove_files:
        return oci.replicate_artifact(
            src_image_reference=source_ref,
            tgt_image_reference=target_ref,
            oci_client=oci_client,
        )

    manifest = oci_client.manifest(image_reference=source_ref)

    if not isinstance(manifest, om.OciImageManifest):
        raise NotImplementedError(manifest)

    # allow / ignore leading '/'
    remove_files = [p.lstrip('/') for p in remove_files]

    def tarmember_filter(tar_info: tarfile.TarInfo):
        stripped_name = tar_info.name.lstrip('./')
        if stripped_name in remove_files:
            logger.debug(f'rm: {tar_info.name=}')
            return False  # rm member
        return True  # keep member

    # prepare copy of layers to avoid modification while iterating
    layers_copy = manifest.layers.copy()

    for layer in manifest.layers:
        layer_hash = hashlib.sha256()
        leng = 0

        # unfortunately, GCR (our most important oci-registry) does not support chunked uploads,
        # so we have to resort to writing the streaming result into a local tempfile to be able
        # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming
        # when interacting w/ oci-registries that support chunked-uploads
        with tempfile.TemporaryFile() as f:
            src_tar_stream = oci_client.blob(
                image_reference=source_ref,
                digest=layer.digest,
                stream=True,
            ).iter_content(chunk_size=tarfile.BLOCKSIZE)
            src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream)
            filtered_stream = tarutil.filtered_tarfile_generator(
                src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'),
                filter_func=tarmember_filter,
            )

            for chunk in filtered_stream:
                layer_hash.update(chunk)
                leng += len(chunk)
                f.write(chunk)

            f.seek(0)

            oci_client.put_blob(
                image_reference=target_ref,
                digest=(layer_digest := 'sha256:' + layer_hash.hexdigest()),
                octets_count=leng,
                data=f,
            )

            # update copy of layers-list with new layer
            new_layer = dataclasses.replace(layer,
                                            digest=layer_digest,
                                            size=leng)
            layers_copy[layers_copy.index(layer)] = new_layer

    # switch layers in manifest to announce changes w/ manifest-upload
    manifest.layers = layers_copy

    # need to patch cfg-object, in case layer-digests changed
    cfg_blob = oci_client.blob(
        image_reference=source_ref,
        digest=manifest.config.digest,
        stream=False,
    ).json()  # cfg-blobs are small - no point in streaming
    if not 'rootfs' in cfg_blob:
        raise ValueError('expected attr `rootfs` not present on cfg-blob')

    cfg_blob['rootfs'] = {
        'diff_ids': [layer.digest for layer in manifest.layers],
        'type': 'layers',
    }
    cfg_blob = json.dumps(cfg_blob).encode('utf-8')
    cfg_digest = f'sha256:{hashlib.sha256(cfg_blob).hexdigest()}'
    cfg_leng = len(cfg_blob)
    oci_client.put_blob(
        image_reference=target_ref,
        digest=cfg_digest,
        octets_count=cfg_leng,
        data=cfg_blob,
    )

    manifest.config = dataclasses.replace(manifest.config,
                                          digest=cfg_digest,
                                          size=cfg_leng)

    manifest_raw = json.dumps(dataclasses.asdict(manifest)).encode('utf-8')

    oci_client.put_manifest(image_reference=target_ref, manifest=manifest_raw)
Esempio n. 13
0
def replicate_artifact(
    src_image_reference: typing.Union[str, om.OciImageReference],
    tgt_image_reference: typing.Union[str, om.OciImageReference],
    credentials_lookup: oa.credentials_lookup=None,
    routes: oc.OciRoutes=oc.OciRoutes(),
    oci_client: oc.Client=None,
    mode: ReplicationMode=ReplicationMode.REGISTRY_DEFAULTS,
    platform_filter: typing.Callable[[om.OciPlatform], bool]=None,
) -> typing.Tuple[requests.Response, str, bytes]:
    '''
    replicate the given OCI Artifact from src_image_reference to tgt_image_reference.

    try to be verbatim, if possible (i.e. target should reflect source as close as
    possible). Whether or not a fully identical replication is possible depends on
    the source artifact and chosen replication `mode`:

    If source artifact is a "legacy / v1" "docker image" (as it used to be
    created from older versions of docker) verbatim replication is not
    possible, because modern (v2) OCI Registries (such as GCR) will not accept
    those manifests. Therefore, conversion to "v2" is required (done
    transparently by this function).

    If source artifact is a "multiarch" image (oci.model.OciImageManifestList), OCI
    registries show different behaviour if ReplicationMode.REGISTRY_DEFAULTS is used.
    Some registries will in this case return a single-image manifest, instead of the
    multiarch-manifest (in this case, the replication result will only be a single-image).

    Use ReplicationMode.PREFER_MULTIARCH or ReplicationMode.NORMALISE_TO_MULTIARCH to
    prevent this.

    If platform_filter is specified (only applied for multi-arch images), the replication
    result will obviously also deviate from src, depending on the filter semantics.

    pass either `credentials_lookup`, `routes`, OR `oci_client`
    '''
    if not (bool(credentials_lookup) ^ bool(oci_client)):
        raise ValueError('either credentials-lookup + routes, xor client must be passed')

    src_image_reference = om.OciImageReference.to_image_ref(src_image_reference)
    tgt_image_reference = om.OciImageReference.to_image_ref(tgt_image_reference)

    if not oci_client:
        client = oc.Client(
            credentials_lookup=credentials_lookup,
            routes=routes,
        )
    else:
        client = oci_client

    if mode is ReplicationMode.REGISTRY_DEFAULTS:
        accept = None
    elif mode is ReplicationMode.PREFER_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    elif mode is ReplicationMode.NORMALISE_TO_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    else:
        raise NotImplementedError(mode)

    # we need the unaltered - manifest for verbatim replication
    raw_manifest = client.manifest_raw(
        image_reference=src_image_reference,
        accept=accept,
    ).text
    manifest = json.loads(raw_manifest)
    schema_version = int(manifest['schemaVersion'])
    need_to_synthesise_cfg_blob = False

    if schema_version == 1:
        need_to_synthesise_cfg_blob = True
        manifest = client.manifest(image_reference=src_image_reference)

        logger.warning(
          f'''
          manifest {src_image_reference=} is in legacy-format
          (schemaVersion==1). Cannot verbatimly replicate
          '''
        )

        manifest, _ = oconv.v1_manifest_to_v2(
            manifest=manifest,
            oci_client=client,
            tgt_image_ref=str(tgt_image_reference),
        )

        # we must determine the uncompressed layer-digests to synthesise a valid
        # cfg-blob docker will accept (this means in particular we must download
        # all layers, even if we do not need to upload them)
        need_uncompressed_layer_digests = True
        uncompressed_layer_digests = []
    elif schema_version == 2:
        manifest = json.loads(raw_manifest)
        media_type = manifest.get('mediaType', om.DOCKER_MANIFEST_SCHEMA_V2_MIME)

        if media_type == om.DOCKER_MANIFEST_LIST_MIME:
            # multi-arch
            manifest = dacite.from_dict(
                data_class=om.OciImageManifestList,
                data=manifest,
            )

            src_ref = om.OciImageReference(image_reference=src_image_reference)
            src_name = src_ref.ref_without_tag
            tgt_ref = om.OciImageReference(image_reference=tgt_image_reference)
            tgt_name = tgt_ref.ref_without_tag

            # try to avoid modifications (from x-serialisation) - unless we have to
            manifest_dirty = False

            # cp manifests to tuple, because we _might_ modify if there is a platform_filter
            for sub_manifest in tuple(manifest.manifests):
                src_reference = f'{src_name}@{sub_manifest.digest}'
                tgt_reference = f'{tgt_name}@{sub_manifest.digest}'

                if platform_filter:
                    platform = op.from_single_image(
                        image_reference=src_reference,
                        oci_client=oci_client,
                        base_platform=sub_manifest.platform,
                    )
                    if not platform_filter(platform):
                        logger.info(f'skipping {platform=} for {src_image_reference=}')
                        manifest_dirty = True
                        manifest.manifests.remove(sub_manifest)
                        continue

                logger.info(f'replicating to {tgt_reference=}')

                replicate_artifact(
                    src_image_reference=src_reference,
                    tgt_image_reference=tgt_reference,
                    oci_client=client,
                )

            if manifest_dirty:
                raw_manifest = json.dumps(manifest.as_dict())

            res = client.put_manifest(
                image_reference=tgt_image_reference,
                manifest=raw_manifest,
            )

            return res, tgt_image_reference, raw_manifest.encode('utf-8')

        elif media_type in (
            om.OCI_MANIFEST_SCHEMA_V2_MIME,
            om.DOCKER_MANIFEST_SCHEMA_V2_MIME,
        ):
            if mode is ReplicationMode.NORMALISE_TO_MULTIARCH:
                if not src_image_reference.has_digest_tag:
                    src_image_reference = om.OciImageReference.to_image_ref(
                        oci_client.to_digest_hash(
                            image_reference=src_image_reference,
                        )
                    )
                platform = op.from_single_image(
                    image_reference=src_image_reference,
                    oci_client=oci_client,
                )
                # force usage of digest-tag (symbolic tag required for manifest-list
                tgt_image_ref = \
                    f'{tgt_image_reference.ref_without_tag}@{src_image_reference.tag}'

                res, ref, manifest_bytes = replicate_artifact(
                    src_image_reference=src_image_reference,
                    tgt_image_reference=tgt_image_ref,
                    oci_client=oci_client,
                )

                manifest_list = om.OciImageManifestList(
                    manifests=[
                        om.OciImageManifestListEntry(
                            digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}',
                            mediaType=media_type,
                            size=len(manifest_bytes),
                            platform=platform,
                        ),
                    ]
                )

                manifest_list_bytes = json.dumps(
                    manifest_list.as_dict(),
                ).encode('utf-8')

                res = oci_client.put_manifest(
                    image_reference=tgt_image_reference,
                    manifest=manifest_list_bytes,
                )

                return res, tgt_image_reference, manifest_list_bytes

            manifest = dacite.from_dict(
                data_class=om.OciImageManifest,
                data=json.loads(raw_manifest)
            )
            need_uncompressed_layer_digests = False
            uncompressed_layer_digests = None
        else:
            raise NotImplementedError(f'{media_type=}')
    else:
      raise NotImplementedError(schema_version)

    for idx, layer in enumerate(manifest.blobs()):
        # need to specially handle cfg-blob (may be absent for v2 / legacy images)

        is_cfg_blob = idx == 0
        if is_cfg_blob and need_to_synthesise_cfg_blob:
            # if we need(ed) to synthesise cfg-blob (because source-image contained a v1-manifest)
            # then there will never be a cfg-blob in src.
            # -> silently skip to avoid emitting a confusing, but unhelpful warning
            logger.debug(f'{src_image_reference=} - synthesised cfg-blob - skipping replicatation')
            continue

        head_res = client.head_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
        )
        if head_res.ok:
            if not need_uncompressed_layer_digests:
                logger.info(f'skipping blob download {layer.digest=} - already exists in tgt')
                continue # no need to download if blob already exists in tgt
            elif not is_cfg_blob:
                # we will not need to re-upload, however we do need the uncompressed digest
                blob_res = client.blob(
                    image_reference=src_image_reference,
                    digest=layer.digest,
                    absent_ok=is_cfg_blob,
                )

                layer_hash = hashlib.sha256()
                decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16)

                for chunk in blob_res.iter_content(chunk_size=4096):
                    layer_hash.update(decompressor.decompress(chunk))

                uncompressed_layer_digests.append(f'sha256:{layer_hash.hexdigest()}')
                continue # we may still skip the upload, of course

        # todo: consider silencing warning if we do v1->v2-conversion (cfg-blob will never exist
        #       in this case
        blob_res = client.blob(
            image_reference=src_image_reference,
            digest=layer.digest,
            absent_ok=is_cfg_blob,
        )
        if not blob_res and is_cfg_blob:
            # fallback to non-verbatim replication; synthesise cfg
            logger.warning(
                'falling back to non-verbatim replication '
                f'{src_image_reference=} {tgt_image_reference=}'
            )
            need_to_synthesise_cfg_blob = True
            continue

        if need_uncompressed_layer_digests:
            uncompressed_layer_hash = hashlib.sha256()
            decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16)

            def intercept_chunks(blob_res):
                for chunk in blob_res.iter_content(chunk_size=4096):
                    uncompressed_layer_hash.update(decompressor.decompress(chunk))
                    yield chunk

                uncompressed_layer_digests.append(f'sha256:{uncompressed_layer_hash.hexdigest()}')

            blob_res = intercept_chunks(blob_res=blob_res)

        client.put_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
            octets_count=layer.size,
            data=blob_res,
        )

    if need_to_synthesise_cfg_blob:
        fake_cfg_dict = json.loads(json.loads(raw_manifest)['history'][0]['v1Compatibility'])

        # patch-in uncompressed layer-digests
        fake_cfg_dict['rootfs'] = {
            'diff_ids': uncompressed_layer_digests,
            'type': 'layers',
        }

        fake_cfg_raw = json.dumps(fake_cfg_dict).encode('utf-8')

        client.put_blob(
            image_reference=tgt_image_reference,
            digest=(cfg_digest := f'sha256:{hashlib.sha256(fake_cfg_raw).hexdigest()}'),
            octets_count=len(fake_cfg_raw),
            data=fake_cfg_raw,
        )

        manifest_dict = dataclasses.asdict(manifest)
        # patch-on altered cfg-digest
        manifest_dict['config']['digest'] = cfg_digest
        manifest_dict['config']['size'] = len(fake_cfg_raw)
        raw_manifest = json.dumps(manifest_dict)

    res = client.put_manifest(
        image_reference=tgt_image_reference,
        manifest=raw_manifest,
    )

    return res, tgt_image_reference, raw_manifest.encode('utf-8')
Esempio n. 14
0
def filter_image(
    source_ref: typing.Union[str, om.OciImageReference],
    target_ref: typing.Union[str, om.OciImageReference],
    remove_files: typing.Sequence[str] = (),
    oci_client: oc.Client = None,
    mode: oci.ReplicationMode = oci.ReplicationMode.REGISTRY_DEFAULTS,
    platform_filter: typing.Callable[[om.OciPlatform], bool] = None,
) -> typing.Tuple[requests.Response, str,
                  bytes]:  # response, tgt-ref, manifest_bytes
    if not oci_client:
        oci_client = ccc.oci.oci_client()

    source_ref = om.OciImageReference.to_image_ref(source_ref)
    target_ref = om.OciImageReference.to_image_ref(target_ref)

    # shortcut in case there are no filtering-rules
    if not remove_files:
        return oci.replicate_artifact(
            src_image_reference=source_ref,
            tgt_image_reference=target_ref,
            oci_client=oci_client,
            mode=mode,
            platform_filter=platform_filter,
        )

    if mode is oci.ReplicationMode.REGISTRY_DEFAULTS:
        accept = None
    elif mode is oci.ReplicationMode.PREFER_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    elif mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    else:
        raise NotImplementedError(mode)

    manifest = oci_client.manifest(
        image_reference=str(source_ref),
        accept=accept,
    )

    if isinstance(manifest, om.OciImageManifestList):
        # recurse into sub-images

        src_name = source_ref.ref_without_tag
        tgt_name = target_ref.ref_without_tag

        for idx, sub_manifest in enumerate(tuple(manifest.manifests)):
            source_ref = f'{src_name}@{sub_manifest.digest}'

            if platform_filter:
                platform = oci.platform.from_single_image(
                    image_reference=source_ref,
                    oci_client=oci_client,
                    base_platform=sub_manifest.platform,
                )
                if not platform_filter(platform):
                    logger.info(f'skipping {platform=} for {source_ref=}')
                    manifest.manifests.remove(sub_manifest)
                    continue

            logger.info(f'filtering to {tgt_name=}')

            res, tgt_ref, manifest_bytes = filter_image(
                source_ref=source_ref,
                target_ref=tgt_name,
                remove_files=remove_files,
                oci_client=oci_client,
            )

            # patch (potentially) modified manifest-digest
            patched_manifest = dataclasses.replace(
                sub_manifest,
                digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}',
                size=len(manifest_bytes),
            )
            manifest.manifests[idx] = patched_manifest

        manifest_dict = manifest.as_dict()
        manifest_raw = json.dumps(manifest_dict).encode('utf-8')
        res = oci_client.put_manifest(
            image_reference=str(target_ref),
            manifest=manifest_raw,
        )

        return res, str(target_ref), manifest_raw

    # normalise single-image to multi-arch (w/ one entry)
    if mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH:
        if not source_ref.has_digest_tag:
            source_ref = om.OciImageReference.to_image_ref(
                oci_client.to_digest_hash(image_reference=source_ref, ))

        platform = oci.platform.from_single_image(
            image_reference=source_ref,
            oci_client=oci_client,
        )

        res, ref, manifest_bytes = filter_image(
            source_ref=source_ref,
            target_ref=target_ref.ref_without_tag,
            remove_files=remove_files,
            oci_client=oci_client,
        )

        manifest_list = om.OciImageManifestList(manifests=[
            om.OciImageManifestListEntry(
                digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}',
                mediaType=manifest.mediaType,
                size=len(manifest_bytes),
                platform=platform,
            )
        ], )

        manifest_list_bytes = json.dumps(
            manifest_list.as_dict(), ).encode('utf-8')

        res = oci_client.put_manifest(
            image_reference=target_ref,
            manifest=manifest_list_bytes,
        )

        return res, target_ref, manifest_list_bytes

    cp_cfg_blob = True
    if isinstance(manifest, om.OciImageManifestV1):
        logger.info(
            f'converting v1-manifest -> v2 {source_ref=} {target_ref=}')
        manifest, cfg_blob = oconv.v1_manifest_to_v2(
            manifest=manifest,
            oci_client=oci_client,
            tgt_image_ref=str(target_ref),
        )
        cp_cfg_blob = False  # we synthesise new cfg - thus we cannot cp from src
    elif not isinstance(manifest, om.OciImageManifest):
        raise NotImplementedError(manifest)

    # allow / ignore leading '/'
    remove_files = [p.lstrip('/') for p in remove_files]

    def tarmember_filter(tar_info: tarfile.TarInfo):
        stripped_name = tar_info.name.lstrip('./')
        if stripped_name in remove_files:
            logger.debug(f'rm: {tar_info.name=}')
            return False  # rm member
        return True  # keep member

    # prepare copy of layers to avoid modification while iterating
    layers_copy = manifest.layers.copy()

    non_gzipped_layer_digests = {
    }  # {gzipped-digest: sha256:non-gzipped-digest}

    for layer in manifest.layers:
        layer_hash = hashlib.sha256()
        cfg_hash = hashlib.sha256(
        )  # we need to write "non-gzipped" hash to cfg-blob
        leng = 0
        src_leng = 0  # required for calculating leng for gzip-footer
        crc = 0  # requried for calculcating crc32-checksum for gzip-footer

        # unfortunately, GCR (our most important oci-registry) does not support chunked uploads,
        # so we have to resort to writing the streaming result into a local tempfile to be able
        # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming
        # when interacting w/ oci-registries that support chunked-uploads
        with tempfile.TemporaryFile() as f:
            src_tar_stream = oci_client.blob(
                image_reference=str(source_ref),
                digest=layer.digest,
                stream=True,
            ).iter_content(chunk_size=tarfile.BLOCKSIZE * 64)
            src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream)
            filtered_stream = tarutil.filtered_tarfile_generator(
                src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'),
                filter_func=tarmember_filter,
                chunk_size=tarfile.BLOCKSIZE * 64,
            )

            f.write((gzip_header := gziputil.gzip_header(fname=b'layer.tar')))
            layer_hash.update(gzip_header)
            leng += len(gzip_header)

            compressor = gziputil.zlib_compressobj()

            for chunk in filtered_stream:
                cfg_hash.update(
                    chunk)  # need to hash before compressing for cfg-blob
                crc = zlib.crc32(chunk, crc)
                src_leng += len(chunk)

                chunk = compressor.compress(chunk)
                layer_hash.update(chunk)
                leng += len(chunk)
                f.write(chunk)

            f.write((remainder := compressor.flush()))