Esempio n. 1
0
def publish_container_image_from_kaniko_tarfile(
        image_tarfile_path: str,
        oci_client: oc.Client,
        image_reference: str,
        additional_tags: typing.List[str] = (),
):
    image_reference = ou.normalise_image_reference(
        image_reference=image_reference)
    image_name = image_reference.rsplit(':', 1)[0]
    image_references = (image_reference, ) + tuple(
        [f'{image_name}:{tag}' for tag in additional_tags])

    with ok.read_kaniko_image_tar(tar_path=image_tarfile_path) as image:
        chunk_size = 1024 * 1024
        for kaniko_blob in image.blobs():
            oci_client.put_blob(
                image_reference=image_reference,
                digest=kaniko_blob.digest_str(),
                octets_count=kaniko_blob.size,
                data=kaniko_blob,
                max_chunk=chunk_size,
            )

            oci_client.blob(
                image_reference=image_reference,
                digest=kaniko_blob.digest_str(),
                absent_ok=True,
            )

        manifest_bytes = json.dumps(dataclasses.asdict(
            image.oci_manifest())).encode('utf-8')

        for tgt_ref in image_references:
            logger.info(f'publishing manifest {tgt_ref=}')
            oci_client.put_manifest(
                image_reference=tgt_ref,
                manifest=manifest_bytes,
            )
Esempio n. 2
0
def iter_image_files(
    container_image_reference: str,
    oci_client: oc.Client=None,
) -> typing.Iterable[typing.Tuple[typing.IO, str]]:
    '''
    returns a generator yielding the regular files contained in the specified oci-image
    as sequence of two-tuples (filelike-obj, <layer-digest:relpath>).

    The image's layer-blobs are retrieve in the order they are defined in the image-manifest.
    cfg-blobs are ignored. All layer-blobs are assued to be tarfiles (which is not necessarily
    a valid assumption for non-docker-compatible oci-artifacts).
    '''
    if not oci_client:
        oci_client = ccc.oci.oci_client()

    manifest = oci_client.manifest(image_reference=container_image_reference)

    # we ignore cfg-blob (which would be included in manifest.blobs())
    for layer_blob in manifest.layers:
        blob_resp = oci_client.blob(
            image_reference=container_image_reference,
            digest=layer_blob.digest,
            stream=True,
        )

        fileobj = _FilelikeProxy(
            generator=blob_resp.iter_content(
                chunk_size=tarfile.RECORDSIZE,
                decode_unicode=False,
            ),
            size=layer_blob.size,
        )
        with tarfile.open(
            fileobj=fileobj,
            mode='r|*',
        ) as layer_tarfile:
            for tar_info in layer_tarfile:
                if not tar_info.isfile():
                    continue
                yield (
                    layer_tarfile.extractfile(tar_info),
                    f'{layer_blob.digest}:{tar_info.name}',
                )
Esempio n. 3
0
def sanitise_image(
    image_ref: typing.Union[str, om.OciImageReference],
    oci_client: oc.Client,
):
    manifest = oci_client.manifest(image_reference=image_ref)
    cfg_blob = oci_client.blob(image_reference=image_ref,
                               digest=manifest.config.digest).content

    if is_cfg_blob_sane(manifest=manifest, cfg_blob=cfg_blob):
        return image_ref

    sanitised_cfg_blob = sanitise_cfg_blob(manifest=manifest,
                                           cfg_blob=cfg_blob)
    cfg_blob_digest = 'sha256:' + hashlib.sha256(
        sanitised_cfg_blob).hexdigest()

    oci_client.put_blob(
        image_ref,
        digest=cfg_blob_digest,
        octets_count=len(sanitised_cfg_blob),
        data=sanitised_cfg_blob,
    )

    manifest = dataclasses.replace(
        manifest,
        config=dataclasses.replace(
            manifest.config,
            digest=cfg_blob_digest,
            size=len(sanitised_cfg_blob),
        ),
    )

    manifest_bytes = json.dumps(dataclasses.asdict(manifest)).encode('utf-8')

    oci_client.put_manifest(image_reference=image_ref, manifest=manifest_bytes)

    manifest_dig = 'sha256:' + hashlib.sha256(manifest_bytes).hexdigest()
    img_ref: om.OciImageReference = om.OciImageReference.to_image_ref(
        image_ref)

    patched_img_ref = f'{img_ref.ref_without_tag}@{manifest_dig}'

    return patched_img_ref
Esempio n. 4
0
def image_layers_as_tarfile_generator(
    image_reference: str,
    oci_client: oc.Client,
    chunk_size=tarfile.RECORDSIZE,
    include_config_blob=True,
) -> typing.Generator[bytes, None, None]:
    '''
    returns a generator yielding a tar-archive with the passed oci-image's layer-blobs as
    members. This is somewhat similar to the result of a `docker save` with the notable difference
    that the cfg-blob is discarded.
    This function is useful to e.g. upload file system contents of an oci-container-image to some
    scanning-tool (provided it supports the extraction of tar-archives)
    If include_config_blob is set to False the config blob will be ignored.
    '''
    manifest = oci_client.manifest(image_reference=image_reference)
    offset = 0
    for blob in manifest.blobs() if include_config_blob else manifest.layers:
        logger.debug(f'getting blob {blob.digest}')
        if not include_config_blob:
            logger.debug('skipping config blob')
        tarinfo = tarfile.TarInfo(name=blob.digest + '.tar') # note: may be gzipped
        tarinfo.size = blob.size
        tarinfo.offset = offset
        tarinfo.offset_data = offset + tarfile.BLOCKSIZE

        offset += blob.size + tarfile.BLOCKSIZE

        tarinfo_bytes = tarinfo.tobuf()
        yield tarinfo_bytes

        uploaded_bytes = len(tarinfo_bytes)
        for chunk in oci_client.blob(
            image_reference=image_reference,
            digest=blob.digest,
            stream=True,
            ).iter_content(chunk_size=chunk_size):
            uploaded_bytes += len(chunk)
            yield chunk

        # need to pad full blocks w/ NUL-bytes
        if (missing := tarfile.BLOCKSIZE - (uploaded_bytes % tarfile.BLOCKSIZE)):
            offset += missing
            yield tarfile.NUL * missing
Esempio n. 5
0
def from_manifest(
    image_reference: om.OciImageReference,
    manifest: om.OciImageManifest,
    oci_client: oc.Client=None,
    base_platform: om.OciPlatform=None,
) -> om.OciPlatform:
    if base_platform:
        cfg = base_platform.as_dict()
    else:
        cfg = {}

    cfg |= oci_client.blob(
        image_reference=image_reference,
        digest=manifest.config.digest,
        stream=False, # we will need to json.load the (small) result anyhow
    ).json()

    return dacite.from_dict(
        data_class=om.OciPlatform,
        data=cfg,
    )
Esempio n. 6
0
def filter_image(
        source_ref: str,
        target_ref: str,
        remove_files: typing.Sequence[str] = (),
        oci_client: oc.Client = None,
):
    if not oci_client:
        oci_client = ccc.oci.oci_client()

    # shortcut in case there are no filtering-rules
    if not remove_files:
        return oci.replicate_artifact(
            src_image_reference=source_ref,
            tgt_image_reference=target_ref,
            oci_client=oci_client,
        )

    manifest = oci_client.manifest(image_reference=source_ref)

    if not isinstance(manifest, om.OciImageManifest):
        raise NotImplementedError(manifest)

    # allow / ignore leading '/'
    remove_files = [p.lstrip('/') for p in remove_files]

    def tarmember_filter(tar_info: tarfile.TarInfo):
        stripped_name = tar_info.name.lstrip('./')
        if stripped_name in remove_files:
            logger.debug(f'rm: {tar_info.name=}')
            return False  # rm member
        return True  # keep member

    # prepare copy of layers to avoid modification while iterating
    layers_copy = manifest.layers.copy()

    for layer in manifest.layers:
        layer_hash = hashlib.sha256()
        leng = 0

        # unfortunately, GCR (our most important oci-registry) does not support chunked uploads,
        # so we have to resort to writing the streaming result into a local tempfile to be able
        # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming
        # when interacting w/ oci-registries that support chunked-uploads
        with tempfile.TemporaryFile() as f:
            src_tar_stream = oci_client.blob(
                image_reference=source_ref,
                digest=layer.digest,
                stream=True,
            ).iter_content(chunk_size=tarfile.BLOCKSIZE)
            src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream)
            filtered_stream = tarutil.filtered_tarfile_generator(
                src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'),
                filter_func=tarmember_filter,
            )

            for chunk in filtered_stream:
                layer_hash.update(chunk)
                leng += len(chunk)
                f.write(chunk)

            f.seek(0)

            oci_client.put_blob(
                image_reference=target_ref,
                digest=(layer_digest := 'sha256:' + layer_hash.hexdigest()),
                octets_count=leng,
                data=f,
            )

            # update copy of layers-list with new layer
            new_layer = dataclasses.replace(layer,
                                            digest=layer_digest,
                                            size=leng)
            layers_copy[layers_copy.index(layer)] = new_layer

    # switch layers in manifest to announce changes w/ manifest-upload
    manifest.layers = layers_copy

    # need to patch cfg-object, in case layer-digests changed
    cfg_blob = oci_client.blob(
        image_reference=source_ref,
        digest=manifest.config.digest,
        stream=False,
    ).json()  # cfg-blobs are small - no point in streaming
    if not 'rootfs' in cfg_blob:
        raise ValueError('expected attr `rootfs` not present on cfg-blob')

    cfg_blob['rootfs'] = {
        'diff_ids': [layer.digest for layer in manifest.layers],
        'type': 'layers',
    }
    cfg_blob = json.dumps(cfg_blob).encode('utf-8')
    cfg_digest = f'sha256:{hashlib.sha256(cfg_blob).hexdigest()}'
    cfg_leng = len(cfg_blob)
    oci_client.put_blob(
        image_reference=target_ref,
        digest=cfg_digest,
        octets_count=cfg_leng,
        data=cfg_blob,
    )

    manifest.config = dataclasses.replace(manifest.config,
                                          digest=cfg_digest,
                                          size=cfg_leng)

    manifest_raw = json.dumps(dataclasses.asdict(manifest)).encode('utf-8')

    oci_client.put_manifest(image_reference=target_ref, manifest=manifest_raw)
Esempio n. 7
0
def filter_image(
    source_ref: typing.Union[str, om.OciImageReference],
    target_ref: typing.Union[str, om.OciImageReference],
    remove_files: typing.Sequence[str] = (),
    oci_client: oc.Client = None,
    mode: oci.ReplicationMode = oci.ReplicationMode.REGISTRY_DEFAULTS,
    platform_filter: typing.Callable[[om.OciPlatform], bool] = None,
) -> typing.Tuple[requests.Response, str,
                  bytes]:  # response, tgt-ref, manifest_bytes
    if not oci_client:
        oci_client = ccc.oci.oci_client()

    source_ref = om.OciImageReference.to_image_ref(source_ref)
    target_ref = om.OciImageReference.to_image_ref(target_ref)

    # shortcut in case there are no filtering-rules
    if not remove_files:
        return oci.replicate_artifact(
            src_image_reference=source_ref,
            tgt_image_reference=target_ref,
            oci_client=oci_client,
            mode=mode,
            platform_filter=platform_filter,
        )

    if mode is oci.ReplicationMode.REGISTRY_DEFAULTS:
        accept = None
    elif mode is oci.ReplicationMode.PREFER_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    elif mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    else:
        raise NotImplementedError(mode)

    manifest = oci_client.manifest(
        image_reference=str(source_ref),
        accept=accept,
    )

    if isinstance(manifest, om.OciImageManifestList):
        # recurse into sub-images

        src_name = source_ref.ref_without_tag
        tgt_name = target_ref.ref_without_tag

        for idx, sub_manifest in enumerate(tuple(manifest.manifests)):
            source_ref = f'{src_name}@{sub_manifest.digest}'

            if platform_filter:
                platform = oci.platform.from_single_image(
                    image_reference=source_ref,
                    oci_client=oci_client,
                    base_platform=sub_manifest.platform,
                )
                if not platform_filter(platform):
                    logger.info(f'skipping {platform=} for {source_ref=}')
                    manifest.manifests.remove(sub_manifest)
                    continue

            logger.info(f'filtering to {tgt_name=}')

            res, tgt_ref, manifest_bytes = filter_image(
                source_ref=source_ref,
                target_ref=tgt_name,
                remove_files=remove_files,
                oci_client=oci_client,
            )

            # patch (potentially) modified manifest-digest
            patched_manifest = dataclasses.replace(
                sub_manifest,
                digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}',
                size=len(manifest_bytes),
            )
            manifest.manifests[idx] = patched_manifest

        manifest_dict = manifest.as_dict()
        manifest_raw = json.dumps(manifest_dict).encode('utf-8')
        res = oci_client.put_manifest(
            image_reference=str(target_ref),
            manifest=manifest_raw,
        )

        return res, str(target_ref), manifest_raw

    # normalise single-image to multi-arch (w/ one entry)
    if mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH:
        if not source_ref.has_digest_tag:
            source_ref = om.OciImageReference.to_image_ref(
                oci_client.to_digest_hash(image_reference=source_ref, ))

        platform = oci.platform.from_single_image(
            image_reference=source_ref,
            oci_client=oci_client,
        )

        res, ref, manifest_bytes = filter_image(
            source_ref=source_ref,
            target_ref=target_ref.ref_without_tag,
            remove_files=remove_files,
            oci_client=oci_client,
        )

        manifest_list = om.OciImageManifestList(manifests=[
            om.OciImageManifestListEntry(
                digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}',
                mediaType=manifest.mediaType,
                size=len(manifest_bytes),
                platform=platform,
            )
        ], )

        manifest_list_bytes = json.dumps(
            manifest_list.as_dict(), ).encode('utf-8')

        res = oci_client.put_manifest(
            image_reference=target_ref,
            manifest=manifest_list_bytes,
        )

        return res, target_ref, manifest_list_bytes

    cp_cfg_blob = True
    if isinstance(manifest, om.OciImageManifestV1):
        logger.info(
            f'converting v1-manifest -> v2 {source_ref=} {target_ref=}')
        manifest, cfg_blob = oconv.v1_manifest_to_v2(
            manifest=manifest,
            oci_client=oci_client,
            tgt_image_ref=str(target_ref),
        )
        cp_cfg_blob = False  # we synthesise new cfg - thus we cannot cp from src
    elif not isinstance(manifest, om.OciImageManifest):
        raise NotImplementedError(manifest)

    # allow / ignore leading '/'
    remove_files = [p.lstrip('/') for p in remove_files]

    def tarmember_filter(tar_info: tarfile.TarInfo):
        stripped_name = tar_info.name.lstrip('./')
        if stripped_name in remove_files:
            logger.debug(f'rm: {tar_info.name=}')
            return False  # rm member
        return True  # keep member

    # prepare copy of layers to avoid modification while iterating
    layers_copy = manifest.layers.copy()

    non_gzipped_layer_digests = {
    }  # {gzipped-digest: sha256:non-gzipped-digest}

    for layer in manifest.layers:
        layer_hash = hashlib.sha256()
        cfg_hash = hashlib.sha256(
        )  # we need to write "non-gzipped" hash to cfg-blob
        leng = 0
        src_leng = 0  # required for calculating leng for gzip-footer
        crc = 0  # requried for calculcating crc32-checksum for gzip-footer

        # unfortunately, GCR (our most important oci-registry) does not support chunked uploads,
        # so we have to resort to writing the streaming result into a local tempfile to be able
        # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming
        # when interacting w/ oci-registries that support chunked-uploads
        with tempfile.TemporaryFile() as f:
            src_tar_stream = oci_client.blob(
                image_reference=str(source_ref),
                digest=layer.digest,
                stream=True,
            ).iter_content(chunk_size=tarfile.BLOCKSIZE * 64)
            src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream)
            filtered_stream = tarutil.filtered_tarfile_generator(
                src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'),
                filter_func=tarmember_filter,
                chunk_size=tarfile.BLOCKSIZE * 64,
            )

            f.write((gzip_header := gziputil.gzip_header(fname=b'layer.tar')))
            layer_hash.update(gzip_header)
            leng += len(gzip_header)

            compressor = gziputil.zlib_compressobj()

            for chunk in filtered_stream:
                cfg_hash.update(
                    chunk)  # need to hash before compressing for cfg-blob
                crc = zlib.crc32(chunk, crc)
                src_leng += len(chunk)

                chunk = compressor.compress(chunk)
                layer_hash.update(chunk)
                leng += len(chunk)
                f.write(chunk)

            f.write((remainder := compressor.flush()))