def iter_platforms( image_reference: typing.Union[str, om.OciImageReference], oci_client: oc.Client=None, ) -> typing.Generator[tuple[om.OciImageReference, om.OciPlatform], None, None]: image_reference = om.OciImageReference.to_image_ref(image_reference) manifest = oci_client.manifest( image_reference=image_reference, accept=om.MimeTypes.prefer_multiarch, ) if isinstance(manifest, om.OciImageManifest): platform = from_single_image( image_reference=image_reference, oci_client=oci_client, ) yield (image_reference, platform) return elif isinstance(manifest, om.OciImageManifestList): manifest: om.OciImageManifestList else: raise NotImplementedError(type(manifest)) prefix = image_reference.ref_without_tag for sub_manifest in manifest.manifests: platform_dict = dataclasses.asdict(sub_manifest) sub_manifest = oci_client.manifest( image_reference=(sub_img_ref := f'{prefix}@{sub_manifest.digest}'), )
def single_platform_manifest( image_reference: om.OciImageReference | str, oci_client: oc.Client, platform: om.OciPlatform=None, ): ''' returns a single-platform OCI Image Manifest for the given image_reference. lookup and validation depend on presence of platform argument. if given image-ref points to a single-arch manifest, the returned result will be identical to invoking `oci_client.manifest`. If platform argument is passed, and the discovered platform does not match, a `ValueError` will be raised. if given image-ref points to a multi-arch manifest, content-negotiation depends on presence of platform-argument. If absent, no preference will be stated (i.e. accept-header will not be set). Some Oci-Image-registries will return a single-arch manifest (thus saving a roundtrip). If platform is passed, preference for multi-arch will be stated via accept-header; the specified platform will be looked-up and returned. If not found, `ValueError` will be raised. ''' image_reference = om.OciImageReference.to_image_ref(image_reference) if platform: accept = om.MimeTypes.prefer_multiarch else: accept = None manifest = oci_client.manifest( image_reference=image_reference, accept=accept, ) if isinstance(manifest, om.OciImageManifest): if not platform: return manifest actual_platform = from_manifest( manifest=manifest, oci_client=oci_client, ) if not actual_platform == platform: raise ValueError(f'{image_reference=} does not match {platform=}: {actual_platform=}') return manifest elif isinstance(manifest, om.OciImageManifestList): pass else: raise NotImplementedError(manifest) for manifest in manifest.manifests: manifest: om.OciImageManifestListEntry if manifest.platform == platform: break else: raise ValueError(f'{image_reference=} does not contain {platform=}') manifest_ref = f'{image_reference.ref_without_tag}@{manifest.digest}' return oci_client.manifest(image_reference=manifest_ref)
def iter_image_files( image_reference: str, oci_client: oc.Client = None, ) -> typing.Iterable[typing.Tuple[typing.IO, str]]: ''' returns a generator yielding the regular files contained in the specified oci-image as sequence of two-tuples (filelike-obj, <layer-digest:relpath>). The image's layer-blobs are retrieve in the order they are defined in the image-manifest. cfg-blobs are ignored. All layer-blobs are assued to be tarfiles (which is not necessarily a valid assumption for non-docker-compatible oci-artifacts). ''' if not oci_client: oci_client = ccc.oci.oci_client() manifest = oci_client.manifest(image_reference=image_reference) executor = concurrent.futures.ThreadPoolExecutor( max_workers=len(manifest.layers)) def _iter_layer_blob_files(layer_blob: om.OciBlobRef): blob_resp = oci_client.blob( image_reference=image_reference, digest=layer_blob.digest, stream=True, ) fileobj = tarutil._FilelikeProxy(generator=blob_resp.iter_content( chunk_size=tarfile.RECORDSIZE, decode_unicode=False, ), ) with tarfile.open( fileobj=fileobj, mode='r|*', ) as layer_tarfile: for tar_info in layer_tarfile: if not tar_info.isfile(): continue yield ( layer_tarfile.extractfile(tar_info), f'{layer_blob.digest}:{tar_info.name}', ) # we ignore cfg-blob (which would be included in manifest.blobs()) for layer_blob_files in executor.map(_iter_layer_blob_files, manifest.layers): yield from layer_blob_files
def sanitise_image( image_ref: typing.Union[str, om.OciImageReference], oci_client: oc.Client, ): manifest = oci_client.manifest(image_reference=image_ref) cfg_blob = oci_client.blob(image_reference=image_ref, digest=manifest.config.digest).content if is_cfg_blob_sane(manifest=manifest, cfg_blob=cfg_blob): return image_ref sanitised_cfg_blob = sanitise_cfg_blob(manifest=manifest, cfg_blob=cfg_blob) cfg_blob_digest = 'sha256:' + hashlib.sha256( sanitised_cfg_blob).hexdigest() oci_client.put_blob( image_ref, digest=cfg_blob_digest, octets_count=len(sanitised_cfg_blob), data=sanitised_cfg_blob, ) manifest = dataclasses.replace( manifest, config=dataclasses.replace( manifest.config, digest=cfg_blob_digest, size=len(sanitised_cfg_blob), ), ) manifest_bytes = json.dumps(dataclasses.asdict(manifest)).encode('utf-8') oci_client.put_manifest(image_reference=image_ref, manifest=manifest_bytes) manifest_dig = 'sha256:' + hashlib.sha256(manifest_bytes).hexdigest() img_ref: om.OciImageReference = om.OciImageReference.to_image_ref( image_ref) patched_img_ref = f'{img_ref.ref_without_tag}@{manifest_dig}' return patched_img_ref
def image_layers_as_tarfile_generator( image_reference: str, oci_client: oc.Client, chunk_size=tarfile.RECORDSIZE, include_config_blob=True, ) -> typing.Generator[bytes, None, None]: ''' returns a generator yielding a tar-archive with the passed oci-image's layer-blobs as members. This is somewhat similar to the result of a `docker save` with the notable difference that the cfg-blob is discarded. This function is useful to e.g. upload file system contents of an oci-container-image to some scanning-tool (provided it supports the extraction of tar-archives) If include_config_blob is set to False the config blob will be ignored. ''' manifest = oci_client.manifest(image_reference=image_reference) offset = 0 for blob in manifest.blobs() if include_config_blob else manifest.layers: logger.debug(f'getting blob {blob.digest}') if not include_config_blob: logger.debug('skipping config blob') tarinfo = tarfile.TarInfo(name=blob.digest + '.tar') # note: may be gzipped tarinfo.size = blob.size tarinfo.offset = offset tarinfo.offset_data = offset + tarfile.BLOCKSIZE offset += blob.size + tarfile.BLOCKSIZE tarinfo_bytes = tarinfo.tobuf() yield tarinfo_bytes uploaded_bytes = len(tarinfo_bytes) for chunk in oci_client.blob( image_reference=image_reference, digest=blob.digest, stream=True, ).iter_content(chunk_size=chunk_size): uploaded_bytes += len(chunk) yield chunk # need to pad full blocks w/ NUL-bytes if (missing := tarfile.BLOCKSIZE - (uploaded_bytes % tarfile.BLOCKSIZE)): offset += missing yield tarfile.NUL * missing
def from_single_image( image_reference: typing.Union[str, om.OciImageReference], oci_client: oc.Client=None, base_platform: om.OciPlatform=None, ) -> om.OciPlatform: ''' determines the platform from a "single oci image" (i.e. an oci image which is _not_ a multiarch image). ''' image_reference = om.OciImageReference.to_image_ref(image_reference) manifest = oci_client.manifest(image_reference=image_reference) if not isinstance(manifest, om.OciImageManifest): raise ValueError(f'{image_reference=} did not yield OciImageManifest: {type(manifest)=}') return from_manifest( manifest=manifest, oci_client=oci_client, base_platform=base_platform, )
def filter_image( source_ref: str, target_ref: str, remove_files: typing.Sequence[str] = (), oci_client: oc.Client = None, ): if not oci_client: oci_client = ccc.oci.oci_client() # shortcut in case there are no filtering-rules if not remove_files: return oci.replicate_artifact( src_image_reference=source_ref, tgt_image_reference=target_ref, oci_client=oci_client, ) manifest = oci_client.manifest(image_reference=source_ref) if not isinstance(manifest, om.OciImageManifest): raise NotImplementedError(manifest) # allow / ignore leading '/' remove_files = [p.lstrip('/') for p in remove_files] def tarmember_filter(tar_info: tarfile.TarInfo): stripped_name = tar_info.name.lstrip('./') if stripped_name in remove_files: logger.debug(f'rm: {tar_info.name=}') return False # rm member return True # keep member # prepare copy of layers to avoid modification while iterating layers_copy = manifest.layers.copy() for layer in manifest.layers: layer_hash = hashlib.sha256() leng = 0 # unfortunately, GCR (our most important oci-registry) does not support chunked uploads, # so we have to resort to writing the streaming result into a local tempfile to be able # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming # when interacting w/ oci-registries that support chunked-uploads with tempfile.TemporaryFile() as f: src_tar_stream = oci_client.blob( image_reference=source_ref, digest=layer.digest, stream=True, ).iter_content(chunk_size=tarfile.BLOCKSIZE) src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream) filtered_stream = tarutil.filtered_tarfile_generator( src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'), filter_func=tarmember_filter, ) for chunk in filtered_stream: layer_hash.update(chunk) leng += len(chunk) f.write(chunk) f.seek(0) oci_client.put_blob( image_reference=target_ref, digest=(layer_digest := 'sha256:' + layer_hash.hexdigest()), octets_count=leng, data=f, ) # update copy of layers-list with new layer new_layer = dataclasses.replace(layer, digest=layer_digest, size=leng) layers_copy[layers_copy.index(layer)] = new_layer # switch layers in manifest to announce changes w/ manifest-upload manifest.layers = layers_copy # need to patch cfg-object, in case layer-digests changed cfg_blob = oci_client.blob( image_reference=source_ref, digest=manifest.config.digest, stream=False, ).json() # cfg-blobs are small - no point in streaming if not 'rootfs' in cfg_blob: raise ValueError('expected attr `rootfs` not present on cfg-blob') cfg_blob['rootfs'] = { 'diff_ids': [layer.digest for layer in manifest.layers], 'type': 'layers', } cfg_blob = json.dumps(cfg_blob).encode('utf-8') cfg_digest = f'sha256:{hashlib.sha256(cfg_blob).hexdigest()}' cfg_leng = len(cfg_blob) oci_client.put_blob( image_reference=target_ref, digest=cfg_digest, octets_count=cfg_leng, data=cfg_blob, ) manifest.config = dataclasses.replace(manifest.config, digest=cfg_digest, size=cfg_leng) manifest_raw = json.dumps(dataclasses.asdict(manifest)).encode('utf-8') oci_client.put_manifest(image_reference=target_ref, manifest=manifest_raw)
def filter_image( source_ref: typing.Union[str, om.OciImageReference], target_ref: typing.Union[str, om.OciImageReference], remove_files: typing.Sequence[str] = (), oci_client: oc.Client = None, mode: oci.ReplicationMode = oci.ReplicationMode.REGISTRY_DEFAULTS, platform_filter: typing.Callable[[om.OciPlatform], bool] = None, ) -> typing.Tuple[requests.Response, str, bytes]: # response, tgt-ref, manifest_bytes if not oci_client: oci_client = ccc.oci.oci_client() source_ref = om.OciImageReference.to_image_ref(source_ref) target_ref = om.OciImageReference.to_image_ref(target_ref) # shortcut in case there are no filtering-rules if not remove_files: return oci.replicate_artifact( src_image_reference=source_ref, tgt_image_reference=target_ref, oci_client=oci_client, mode=mode, platform_filter=platform_filter, ) if mode is oci.ReplicationMode.REGISTRY_DEFAULTS: accept = None elif mode is oci.ReplicationMode.PREFER_MULTIARCH: accept = om.MimeTypes.prefer_multiarch elif mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH: accept = om.MimeTypes.prefer_multiarch else: raise NotImplementedError(mode) manifest = oci_client.manifest( image_reference=str(source_ref), accept=accept, ) if isinstance(manifest, om.OciImageManifestList): # recurse into sub-images src_name = source_ref.ref_without_tag tgt_name = target_ref.ref_without_tag for idx, sub_manifest in enumerate(tuple(manifest.manifests)): source_ref = f'{src_name}@{sub_manifest.digest}' if platform_filter: platform = oci.platform.from_single_image( image_reference=source_ref, oci_client=oci_client, base_platform=sub_manifest.platform, ) if not platform_filter(platform): logger.info(f'skipping {platform=} for {source_ref=}') manifest.manifests.remove(sub_manifest) continue logger.info(f'filtering to {tgt_name=}') res, tgt_ref, manifest_bytes = filter_image( source_ref=source_ref, target_ref=tgt_name, remove_files=remove_files, oci_client=oci_client, ) # patch (potentially) modified manifest-digest patched_manifest = dataclasses.replace( sub_manifest, digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', size=len(manifest_bytes), ) manifest.manifests[idx] = patched_manifest manifest_dict = manifest.as_dict() manifest_raw = json.dumps(manifest_dict).encode('utf-8') res = oci_client.put_manifest( image_reference=str(target_ref), manifest=manifest_raw, ) return res, str(target_ref), manifest_raw # normalise single-image to multi-arch (w/ one entry) if mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH: if not source_ref.has_digest_tag: source_ref = om.OciImageReference.to_image_ref( oci_client.to_digest_hash(image_reference=source_ref, )) platform = oci.platform.from_single_image( image_reference=source_ref, oci_client=oci_client, ) res, ref, manifest_bytes = filter_image( source_ref=source_ref, target_ref=target_ref.ref_without_tag, remove_files=remove_files, oci_client=oci_client, ) manifest_list = om.OciImageManifestList(manifests=[ om.OciImageManifestListEntry( digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', mediaType=manifest.mediaType, size=len(manifest_bytes), platform=platform, ) ], ) manifest_list_bytes = json.dumps( manifest_list.as_dict(), ).encode('utf-8') res = oci_client.put_manifest( image_reference=target_ref, manifest=manifest_list_bytes, ) return res, target_ref, manifest_list_bytes cp_cfg_blob = True if isinstance(manifest, om.OciImageManifestV1): logger.info( f'converting v1-manifest -> v2 {source_ref=} {target_ref=}') manifest, cfg_blob = oconv.v1_manifest_to_v2( manifest=manifest, oci_client=oci_client, tgt_image_ref=str(target_ref), ) cp_cfg_blob = False # we synthesise new cfg - thus we cannot cp from src elif not isinstance(manifest, om.OciImageManifest): raise NotImplementedError(manifest) # allow / ignore leading '/' remove_files = [p.lstrip('/') for p in remove_files] def tarmember_filter(tar_info: tarfile.TarInfo): stripped_name = tar_info.name.lstrip('./') if stripped_name in remove_files: logger.debug(f'rm: {tar_info.name=}') return False # rm member return True # keep member # prepare copy of layers to avoid modification while iterating layers_copy = manifest.layers.copy() non_gzipped_layer_digests = { } # {gzipped-digest: sha256:non-gzipped-digest} for layer in manifest.layers: layer_hash = hashlib.sha256() cfg_hash = hashlib.sha256( ) # we need to write "non-gzipped" hash to cfg-blob leng = 0 src_leng = 0 # required for calculating leng for gzip-footer crc = 0 # requried for calculcating crc32-checksum for gzip-footer # unfortunately, GCR (our most important oci-registry) does not support chunked uploads, # so we have to resort to writing the streaming result into a local tempfile to be able # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming # when interacting w/ oci-registries that support chunked-uploads with tempfile.TemporaryFile() as f: src_tar_stream = oci_client.blob( image_reference=str(source_ref), digest=layer.digest, stream=True, ).iter_content(chunk_size=tarfile.BLOCKSIZE * 64) src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream) filtered_stream = tarutil.filtered_tarfile_generator( src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'), filter_func=tarmember_filter, chunk_size=tarfile.BLOCKSIZE * 64, ) f.write((gzip_header := gziputil.gzip_header(fname=b'layer.tar'))) layer_hash.update(gzip_header) leng += len(gzip_header) compressor = gziputil.zlib_compressobj() for chunk in filtered_stream: cfg_hash.update( chunk) # need to hash before compressing for cfg-blob crc = zlib.crc32(chunk, crc) src_leng += len(chunk) chunk = compressor.compress(chunk) layer_hash.update(chunk) leng += len(chunk) f.write(chunk) f.write((remainder := compressor.flush()))