def rm_component_descriptor( component: gci.componentmodel.Component, recursive=True, oci_client: oc.Client = None, ): if not oci_client: oci_client = ccc.oci.oci_client() target_ref = _target_oci_ref( component=component, component_ref=component, ) if recursive: for component_ref in component.componentReferences: component_descriptor = _resolve_dependency( component, component_ref, repository_ctx_base_url=None, ) rm_component_descriptor( component=component_descriptor.component, recursive=recursive, ) oci_client.delete_manifest(image_reference=target_ref)
def v1_manifest_to_v2( manifest: om.OciImageManifestV1, oci_client: oc.Client, tgt_image_ref: str, ) -> om.OciImageManifest: docker_cfg = v2_cfg_from_v1_manifest(manifest=manifest) docker_cfg = dataclasses.asdict(docker_cfg) docker_cfg = json.dumps(docker_cfg).encode('utf-8') cfg_digest = f'sha256:{hashlib.sha256(docker_cfg).hexdigest()}' cfg_leng = len(docker_cfg) oci_client.put_blob( image_reference=tgt_image_ref, digest=cfg_digest, octets_count=cfg_leng, data=docker_cfg, ) manifest_v2 = om.OciImageManifest( config=om.OciBlobRef( digest=cfg_digest, mediaType='application/vnd.docker.container.image.v1+json', size=cfg_leng, ), layers=manifest.layers, ) return manifest_v2, docker_cfg
def iter_platforms( image_reference: typing.Union[str, om.OciImageReference], oci_client: oc.Client=None, ) -> typing.Generator[tuple[om.OciImageReference, om.OciPlatform], None, None]: image_reference = om.OciImageReference.to_image_ref(image_reference) manifest = oci_client.manifest( image_reference=image_reference, accept=om.MimeTypes.prefer_multiarch, ) if isinstance(manifest, om.OciImageManifest): platform = from_single_image( image_reference=image_reference, oci_client=oci_client, ) yield (image_reference, platform) return elif isinstance(manifest, om.OciImageManifestList): manifest: om.OciImageManifestList else: raise NotImplementedError(type(manifest)) prefix = image_reference.ref_without_tag for sub_manifest in manifest.manifests: platform_dict = dataclasses.asdict(sub_manifest) sub_manifest = oci_client.manifest( image_reference=(sub_img_ref := f'{prefix}@{sub_manifest.digest}'), )
def single_platform_manifest( image_reference: om.OciImageReference | str, oci_client: oc.Client, platform: om.OciPlatform=None, ): ''' returns a single-platform OCI Image Manifest for the given image_reference. lookup and validation depend on presence of platform argument. if given image-ref points to a single-arch manifest, the returned result will be identical to invoking `oci_client.manifest`. If platform argument is passed, and the discovered platform does not match, a `ValueError` will be raised. if given image-ref points to a multi-arch manifest, content-negotiation depends on presence of platform-argument. If absent, no preference will be stated (i.e. accept-header will not be set). Some Oci-Image-registries will return a single-arch manifest (thus saving a roundtrip). If platform is passed, preference for multi-arch will be stated via accept-header; the specified platform will be looked-up and returned. If not found, `ValueError` will be raised. ''' image_reference = om.OciImageReference.to_image_ref(image_reference) if platform: accept = om.MimeTypes.prefer_multiarch else: accept = None manifest = oci_client.manifest( image_reference=image_reference, accept=accept, ) if isinstance(manifest, om.OciImageManifest): if not platform: return manifest actual_platform = from_manifest( manifest=manifest, oci_client=oci_client, ) if not actual_platform == platform: raise ValueError(f'{image_reference=} does not match {platform=}: {actual_platform=}') return manifest elif isinstance(manifest, om.OciImageManifestList): pass else: raise NotImplementedError(manifest) for manifest in manifest.manifests: manifest: om.OciImageManifestListEntry if manifest.platform == platform: break else: raise ValueError(f'{image_reference=} does not contain {platform=}') manifest_ref = f'{image_reference.ref_without_tag}@{manifest.digest}' return oci_client.manifest(image_reference=manifest_ref)
def iter_image_files( container_image_reference: str, oci_client: oc.Client=None, ) -> typing.Iterable[typing.Tuple[typing.IO, str]]: ''' returns a generator yielding the regular files contained in the specified oci-image as sequence of two-tuples (filelike-obj, <layer-digest:relpath>). The image's layer-blobs are retrieve in the order they are defined in the image-manifest. cfg-blobs are ignored. All layer-blobs are assued to be tarfiles (which is not necessarily a valid assumption for non-docker-compatible oci-artifacts). ''' if not oci_client: oci_client = ccc.oci.oci_client() manifest = oci_client.manifest(image_reference=container_image_reference) # we ignore cfg-blob (which would be included in manifest.blobs()) for layer_blob in manifest.layers: blob_resp = oci_client.blob( image_reference=container_image_reference, digest=layer_blob.digest, stream=True, ) fileobj = _FilelikeProxy( generator=blob_resp.iter_content( chunk_size=tarfile.RECORDSIZE, decode_unicode=False, ), size=layer_blob.size, ) with tarfile.open( fileobj=fileobj, mode='r|*', ) as layer_tarfile: for tar_info in layer_tarfile: if not tar_info.isfile(): continue yield ( layer_tarfile.extractfile(tar_info), f'{layer_blob.digest}:{tar_info.name}', )
def image_layers_as_tarfile_generator( image_reference: str, oci_client: oc.Client, chunk_size=tarfile.RECORDSIZE, include_config_blob=True, ) -> typing.Generator[bytes, None, None]: ''' returns a generator yielding a tar-archive with the passed oci-image's layer-blobs as members. This is somewhat similar to the result of a `docker save` with the notable difference that the cfg-blob is discarded. This function is useful to e.g. upload file system contents of an oci-container-image to some scanning-tool (provided it supports the extraction of tar-archives) If include_config_blob is set to False the config blob will be ignored. ''' manifest = oci_client.manifest(image_reference=image_reference) offset = 0 for blob in manifest.blobs() if include_config_blob else manifest.layers: logger.debug(f'getting blob {blob.digest}') if not include_config_blob: logger.debug('skipping config blob') tarinfo = tarfile.TarInfo(name=blob.digest + '.tar') # note: may be gzipped tarinfo.size = blob.size tarinfo.offset = offset tarinfo.offset_data = offset + tarfile.BLOCKSIZE offset += blob.size + tarfile.BLOCKSIZE tarinfo_bytes = tarinfo.tobuf() yield tarinfo_bytes uploaded_bytes = len(tarinfo_bytes) for chunk in oci_client.blob( image_reference=image_reference, digest=blob.digest, stream=True, ).iter_content(chunk_size=chunk_size): uploaded_bytes += len(chunk) yield chunk # need to pad full blocks w/ NUL-bytes if (missing := tarfile.BLOCKSIZE - (uploaded_bytes % tarfile.BLOCKSIZE)): offset += missing yield tarfile.NUL * missing
def publish_container_image_from_kaniko_tarfile( image_tarfile_path: str, oci_client: oc.Client, image_reference: str, additional_tags: typing.List[str] = (), manifest_mimetype: str = om.OCI_MANIFEST_SCHEMA_V2_MIME, ): image_reference = ou.normalise_image_reference( image_reference=image_reference) image_name = image_reference.rsplit(':', 1)[0] image_references = (image_reference, ) + tuple( [f'{image_name}:{tag}' for tag in additional_tags]) with ok.read_kaniko_image_tar(tar_path=image_tarfile_path) as image: chunk_size = 1024 * 1024 for kaniko_blob in image.blobs(): oci_client.put_blob( image_reference=image_reference, digest=kaniko_blob.digest_str(), octets_count=kaniko_blob.size, data=kaniko_blob, max_chunk=chunk_size, ) # optionally patch manifest's mimetype (e.g. required for docker-hub) manifest_dict = dataclasses.asdict(image.oci_manifest()) manifest_dict['mediaType'] = manifest_mimetype manifest_bytes = json.dumps(manifest_dict, ).encode('utf-8') for tgt_ref in image_references: logger.info(f'publishing manifest {tgt_ref=}') oci_client.put_manifest( image_reference=tgt_ref, manifest=manifest_bytes, )
def from_manifest( image_reference: om.OciImageReference, manifest: om.OciImageManifest, oci_client: oc.Client=None, base_platform: om.OciPlatform=None, ) -> om.OciPlatform: if base_platform: cfg = base_platform.as_dict() else: cfg = {} cfg |= oci_client.blob( image_reference=image_reference, digest=manifest.config.digest, stream=False, # we will need to json.load the (small) result anyhow ).json() return dacite.from_dict( data_class=om.OciPlatform, data=cfg, )
def from_single_image( image_reference: typing.Union[str, om.OciImageReference], oci_client: oc.Client=None, base_platform: om.OciPlatform=None, ) -> om.OciPlatform: ''' determines the platform from a "single oci image" (i.e. an oci image which is _not_ a multiarch image). ''' image_reference = om.OciImageReference.to_image_ref(image_reference) manifest = oci_client.manifest(image_reference=image_reference) if not isinstance(manifest, om.OciImageManifest): raise ValueError(f'{image_reference=} did not yield OciImageManifest: {type(manifest)=}') return from_manifest( manifest=manifest, oci_client=oci_client, base_platform=base_platform, )
def sanitise_image( image_ref: typing.Union[str, om.OciImageReference], oci_client: oc.Client, ): manifest = oci_client.manifest(image_reference=image_ref) cfg_blob = oci_client.blob(image_reference=image_ref, digest=manifest.config.digest).content if is_cfg_blob_sane(manifest=manifest, cfg_blob=cfg_blob): return image_ref sanitised_cfg_blob = sanitise_cfg_blob(manifest=manifest, cfg_blob=cfg_blob) cfg_blob_digest = 'sha256:' + hashlib.sha256( sanitised_cfg_blob).hexdigest() oci_client.put_blob( image_ref, digest=cfg_blob_digest, octets_count=len(sanitised_cfg_blob), data=sanitised_cfg_blob, ) manifest = dataclasses.replace( manifest, config=dataclasses.replace( manifest.config, digest=cfg_blob_digest, size=len(sanitised_cfg_blob), ), ) manifest_bytes = json.dumps(dataclasses.asdict(manifest)).encode('utf-8') oci_client.put_manifest(image_reference=image_ref, manifest=manifest_bytes) manifest_dig = 'sha256:' + hashlib.sha256(manifest_bytes).hexdigest() img_ref: om.OciImageReference = om.OciImageReference.to_image_ref( image_ref) patched_img_ref = f'{img_ref.ref_without_tag}@{manifest_dig}' return patched_img_ref
def publish_container_image_from_kaniko_tarfile( image_tarfile_path: str, oci_client: oc.Client, image_reference: str, additional_tags: typing.List[str] = (), ): image_reference = ou.normalise_image_reference( image_reference=image_reference) image_name = image_reference.rsplit(':', 1)[0] image_references = (image_reference, ) + tuple( [f'{image_name}:{tag}' for tag in additional_tags]) with ok.read_kaniko_image_tar(tar_path=image_tarfile_path) as image: chunk_size = 1024 * 1024 for kaniko_blob in image.blobs(): oci_client.put_blob( image_reference=image_reference, digest=kaniko_blob.digest_str(), octets_count=kaniko_blob.size, data=kaniko_blob, max_chunk=chunk_size, ) oci_client.blob( image_reference=image_reference, digest=kaniko_blob.digest_str(), absent_ok=True, ) manifest_bytes = json.dumps(dataclasses.asdict( image.oci_manifest())).encode('utf-8') for tgt_ref in image_references: logger.info(f'publishing manifest {tgt_ref=}') oci_client.put_manifest( image_reference=tgt_ref, manifest=manifest_bytes, )
def filter_image( source_ref: str, target_ref: str, remove_files: typing.Sequence[str] = (), oci_client: oc.Client = None, ): if not oci_client: oci_client = ccc.oci.oci_client() # shortcut in case there are no filtering-rules if not remove_files: return oci.replicate_artifact( src_image_reference=source_ref, tgt_image_reference=target_ref, oci_client=oci_client, ) manifest = oci_client.manifest(image_reference=source_ref) if not isinstance(manifest, om.OciImageManifest): raise NotImplementedError(manifest) # allow / ignore leading '/' remove_files = [p.lstrip('/') for p in remove_files] def tarmember_filter(tar_info: tarfile.TarInfo): stripped_name = tar_info.name.lstrip('./') if stripped_name in remove_files: logger.debug(f'rm: {tar_info.name=}') return False # rm member return True # keep member # prepare copy of layers to avoid modification while iterating layers_copy = manifest.layers.copy() for layer in manifest.layers: layer_hash = hashlib.sha256() leng = 0 # unfortunately, GCR (our most important oci-registry) does not support chunked uploads, # so we have to resort to writing the streaming result into a local tempfile to be able # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming # when interacting w/ oci-registries that support chunked-uploads with tempfile.TemporaryFile() as f: src_tar_stream = oci_client.blob( image_reference=source_ref, digest=layer.digest, stream=True, ).iter_content(chunk_size=tarfile.BLOCKSIZE) src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream) filtered_stream = tarutil.filtered_tarfile_generator( src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'), filter_func=tarmember_filter, ) for chunk in filtered_stream: layer_hash.update(chunk) leng += len(chunk) f.write(chunk) f.seek(0) oci_client.put_blob( image_reference=target_ref, digest=(layer_digest := 'sha256:' + layer_hash.hexdigest()), octets_count=leng, data=f, ) # update copy of layers-list with new layer new_layer = dataclasses.replace(layer, digest=layer_digest, size=leng) layers_copy[layers_copy.index(layer)] = new_layer # switch layers in manifest to announce changes w/ manifest-upload manifest.layers = layers_copy # need to patch cfg-object, in case layer-digests changed cfg_blob = oci_client.blob( image_reference=source_ref, digest=manifest.config.digest, stream=False, ).json() # cfg-blobs are small - no point in streaming if not 'rootfs' in cfg_blob: raise ValueError('expected attr `rootfs` not present on cfg-blob') cfg_blob['rootfs'] = { 'diff_ids': [layer.digest for layer in manifest.layers], 'type': 'layers', } cfg_blob = json.dumps(cfg_blob).encode('utf-8') cfg_digest = f'sha256:{hashlib.sha256(cfg_blob).hexdigest()}' cfg_leng = len(cfg_blob) oci_client.put_blob( image_reference=target_ref, digest=cfg_digest, octets_count=cfg_leng, data=cfg_blob, ) manifest.config = dataclasses.replace(manifest.config, digest=cfg_digest, size=cfg_leng) manifest_raw = json.dumps(dataclasses.asdict(manifest)).encode('utf-8') oci_client.put_manifest(image_reference=target_ref, manifest=manifest_raw)
def replicate_artifact( src_image_reference: typing.Union[str, om.OciImageReference], tgt_image_reference: typing.Union[str, om.OciImageReference], credentials_lookup: oa.credentials_lookup=None, routes: oc.OciRoutes=oc.OciRoutes(), oci_client: oc.Client=None, mode: ReplicationMode=ReplicationMode.REGISTRY_DEFAULTS, platform_filter: typing.Callable[[om.OciPlatform], bool]=None, ) -> typing.Tuple[requests.Response, str, bytes]: ''' replicate the given OCI Artifact from src_image_reference to tgt_image_reference. try to be verbatim, if possible (i.e. target should reflect source as close as possible). Whether or not a fully identical replication is possible depends on the source artifact and chosen replication `mode`: If source artifact is a "legacy / v1" "docker image" (as it used to be created from older versions of docker) verbatim replication is not possible, because modern (v2) OCI Registries (such as GCR) will not accept those manifests. Therefore, conversion to "v2" is required (done transparently by this function). If source artifact is a "multiarch" image (oci.model.OciImageManifestList), OCI registries show different behaviour if ReplicationMode.REGISTRY_DEFAULTS is used. Some registries will in this case return a single-image manifest, instead of the multiarch-manifest (in this case, the replication result will only be a single-image). Use ReplicationMode.PREFER_MULTIARCH or ReplicationMode.NORMALISE_TO_MULTIARCH to prevent this. If platform_filter is specified (only applied for multi-arch images), the replication result will obviously also deviate from src, depending on the filter semantics. pass either `credentials_lookup`, `routes`, OR `oci_client` ''' if not (bool(credentials_lookup) ^ bool(oci_client)): raise ValueError('either credentials-lookup + routes, xor client must be passed') src_image_reference = om.OciImageReference.to_image_ref(src_image_reference) tgt_image_reference = om.OciImageReference.to_image_ref(tgt_image_reference) if not oci_client: client = oc.Client( credentials_lookup=credentials_lookup, routes=routes, ) else: client = oci_client if mode is ReplicationMode.REGISTRY_DEFAULTS: accept = None elif mode is ReplicationMode.PREFER_MULTIARCH: accept = om.MimeTypes.prefer_multiarch elif mode is ReplicationMode.NORMALISE_TO_MULTIARCH: accept = om.MimeTypes.prefer_multiarch else: raise NotImplementedError(mode) # we need the unaltered - manifest for verbatim replication raw_manifest = client.manifest_raw( image_reference=src_image_reference, accept=accept, ).text manifest = json.loads(raw_manifest) schema_version = int(manifest['schemaVersion']) need_to_synthesise_cfg_blob = False if schema_version == 1: need_to_synthesise_cfg_blob = True manifest = client.manifest(image_reference=src_image_reference) logger.warning( f''' manifest {src_image_reference=} is in legacy-format (schemaVersion==1). Cannot verbatimly replicate ''' ) manifest, _ = oconv.v1_manifest_to_v2( manifest=manifest, oci_client=client, tgt_image_ref=str(tgt_image_reference), ) # we must determine the uncompressed layer-digests to synthesise a valid # cfg-blob docker will accept (this means in particular we must download # all layers, even if we do not need to upload them) need_uncompressed_layer_digests = True uncompressed_layer_digests = [] elif schema_version == 2: manifest = json.loads(raw_manifest) media_type = manifest.get('mediaType', om.DOCKER_MANIFEST_SCHEMA_V2_MIME) if media_type == om.DOCKER_MANIFEST_LIST_MIME: # multi-arch manifest = dacite.from_dict( data_class=om.OciImageManifestList, data=manifest, ) src_ref = om.OciImageReference(image_reference=src_image_reference) src_name = src_ref.ref_without_tag tgt_ref = om.OciImageReference(image_reference=tgt_image_reference) tgt_name = tgt_ref.ref_without_tag # try to avoid modifications (from x-serialisation) - unless we have to manifest_dirty = False # cp manifests to tuple, because we _might_ modify if there is a platform_filter for sub_manifest in tuple(manifest.manifests): src_reference = f'{src_name}@{sub_manifest.digest}' tgt_reference = f'{tgt_name}@{sub_manifest.digest}' if platform_filter: platform = op.from_single_image( image_reference=src_reference, oci_client=oci_client, base_platform=sub_manifest.platform, ) if not platform_filter(platform): logger.info(f'skipping {platform=} for {src_image_reference=}') manifest_dirty = True manifest.manifests.remove(sub_manifest) continue logger.info(f'replicating to {tgt_reference=}') replicate_artifact( src_image_reference=src_reference, tgt_image_reference=tgt_reference, oci_client=client, ) if manifest_dirty: raw_manifest = json.dumps(manifest.as_dict()) res = client.put_manifest( image_reference=tgt_image_reference, manifest=raw_manifest, ) return res, tgt_image_reference, raw_manifest.encode('utf-8') elif media_type in ( om.OCI_MANIFEST_SCHEMA_V2_MIME, om.DOCKER_MANIFEST_SCHEMA_V2_MIME, ): if mode is ReplicationMode.NORMALISE_TO_MULTIARCH: if not src_image_reference.has_digest_tag: src_image_reference = om.OciImageReference.to_image_ref( oci_client.to_digest_hash( image_reference=src_image_reference, ) ) platform = op.from_single_image( image_reference=src_image_reference, oci_client=oci_client, ) # force usage of digest-tag (symbolic tag required for manifest-list tgt_image_ref = \ f'{tgt_image_reference.ref_without_tag}@{src_image_reference.tag}' res, ref, manifest_bytes = replicate_artifact( src_image_reference=src_image_reference, tgt_image_reference=tgt_image_ref, oci_client=oci_client, ) manifest_list = om.OciImageManifestList( manifests=[ om.OciImageManifestListEntry( digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', mediaType=media_type, size=len(manifest_bytes), platform=platform, ), ] ) manifest_list_bytes = json.dumps( manifest_list.as_dict(), ).encode('utf-8') res = oci_client.put_manifest( image_reference=tgt_image_reference, manifest=manifest_list_bytes, ) return res, tgt_image_reference, manifest_list_bytes manifest = dacite.from_dict( data_class=om.OciImageManifest, data=json.loads(raw_manifest) ) need_uncompressed_layer_digests = False uncompressed_layer_digests = None else: raise NotImplementedError(f'{media_type=}') else: raise NotImplementedError(schema_version) for idx, layer in enumerate(manifest.blobs()): # need to specially handle cfg-blob (may be absent for v2 / legacy images) is_cfg_blob = idx == 0 if is_cfg_blob and need_to_synthesise_cfg_blob: # if we need(ed) to synthesise cfg-blob (because source-image contained a v1-manifest) # then there will never be a cfg-blob in src. # -> silently skip to avoid emitting a confusing, but unhelpful warning logger.debug(f'{src_image_reference=} - synthesised cfg-blob - skipping replicatation') continue head_res = client.head_blob( image_reference=tgt_image_reference, digest=layer.digest, ) if head_res.ok: if not need_uncompressed_layer_digests: logger.info(f'skipping blob download {layer.digest=} - already exists in tgt') continue # no need to download if blob already exists in tgt elif not is_cfg_blob: # we will not need to re-upload, however we do need the uncompressed digest blob_res = client.blob( image_reference=src_image_reference, digest=layer.digest, absent_ok=is_cfg_blob, ) layer_hash = hashlib.sha256() decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16) for chunk in blob_res.iter_content(chunk_size=4096): layer_hash.update(decompressor.decompress(chunk)) uncompressed_layer_digests.append(f'sha256:{layer_hash.hexdigest()}') continue # we may still skip the upload, of course # todo: consider silencing warning if we do v1->v2-conversion (cfg-blob will never exist # in this case blob_res = client.blob( image_reference=src_image_reference, digest=layer.digest, absent_ok=is_cfg_blob, ) if not blob_res and is_cfg_blob: # fallback to non-verbatim replication; synthesise cfg logger.warning( 'falling back to non-verbatim replication ' f'{src_image_reference=} {tgt_image_reference=}' ) need_to_synthesise_cfg_blob = True continue if need_uncompressed_layer_digests: uncompressed_layer_hash = hashlib.sha256() decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16) def intercept_chunks(blob_res): for chunk in blob_res.iter_content(chunk_size=4096): uncompressed_layer_hash.update(decompressor.decompress(chunk)) yield chunk uncompressed_layer_digests.append(f'sha256:{uncompressed_layer_hash.hexdigest()}') blob_res = intercept_chunks(blob_res=blob_res) client.put_blob( image_reference=tgt_image_reference, digest=layer.digest, octets_count=layer.size, data=blob_res, ) if need_to_synthesise_cfg_blob: fake_cfg_dict = json.loads(json.loads(raw_manifest)['history'][0]['v1Compatibility']) # patch-in uncompressed layer-digests fake_cfg_dict['rootfs'] = { 'diff_ids': uncompressed_layer_digests, 'type': 'layers', } fake_cfg_raw = json.dumps(fake_cfg_dict).encode('utf-8') client.put_blob( image_reference=tgt_image_reference, digest=(cfg_digest := f'sha256:{hashlib.sha256(fake_cfg_raw).hexdigest()}'), octets_count=len(fake_cfg_raw), data=fake_cfg_raw, ) manifest_dict = dataclasses.asdict(manifest) # patch-on altered cfg-digest manifest_dict['config']['digest'] = cfg_digest manifest_dict['config']['size'] = len(fake_cfg_raw) raw_manifest = json.dumps(manifest_dict) res = client.put_manifest( image_reference=tgt_image_reference, manifest=raw_manifest, ) return res, tgt_image_reference, raw_manifest.encode('utf-8')
def filter_image( source_ref: typing.Union[str, om.OciImageReference], target_ref: typing.Union[str, om.OciImageReference], remove_files: typing.Sequence[str] = (), oci_client: oc.Client = None, mode: oci.ReplicationMode = oci.ReplicationMode.REGISTRY_DEFAULTS, platform_filter: typing.Callable[[om.OciPlatform], bool] = None, ) -> typing.Tuple[requests.Response, str, bytes]: # response, tgt-ref, manifest_bytes if not oci_client: oci_client = ccc.oci.oci_client() source_ref = om.OciImageReference.to_image_ref(source_ref) target_ref = om.OciImageReference.to_image_ref(target_ref) # shortcut in case there are no filtering-rules if not remove_files: return oci.replicate_artifact( src_image_reference=source_ref, tgt_image_reference=target_ref, oci_client=oci_client, mode=mode, platform_filter=platform_filter, ) if mode is oci.ReplicationMode.REGISTRY_DEFAULTS: accept = None elif mode is oci.ReplicationMode.PREFER_MULTIARCH: accept = om.MimeTypes.prefer_multiarch elif mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH: accept = om.MimeTypes.prefer_multiarch else: raise NotImplementedError(mode) manifest = oci_client.manifest( image_reference=str(source_ref), accept=accept, ) if isinstance(manifest, om.OciImageManifestList): # recurse into sub-images src_name = source_ref.ref_without_tag tgt_name = target_ref.ref_without_tag for idx, sub_manifest in enumerate(tuple(manifest.manifests)): source_ref = f'{src_name}@{sub_manifest.digest}' if platform_filter: platform = oci.platform.from_single_image( image_reference=source_ref, oci_client=oci_client, base_platform=sub_manifest.platform, ) if not platform_filter(platform): logger.info(f'skipping {platform=} for {source_ref=}') manifest.manifests.remove(sub_manifest) continue logger.info(f'filtering to {tgt_name=}') res, tgt_ref, manifest_bytes = filter_image( source_ref=source_ref, target_ref=tgt_name, remove_files=remove_files, oci_client=oci_client, ) # patch (potentially) modified manifest-digest patched_manifest = dataclasses.replace( sub_manifest, digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', size=len(manifest_bytes), ) manifest.manifests[idx] = patched_manifest manifest_dict = manifest.as_dict() manifest_raw = json.dumps(manifest_dict).encode('utf-8') res = oci_client.put_manifest( image_reference=str(target_ref), manifest=manifest_raw, ) return res, str(target_ref), manifest_raw # normalise single-image to multi-arch (w/ one entry) if mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH: if not source_ref.has_digest_tag: source_ref = om.OciImageReference.to_image_ref( oci_client.to_digest_hash(image_reference=source_ref, )) platform = oci.platform.from_single_image( image_reference=source_ref, oci_client=oci_client, ) res, ref, manifest_bytes = filter_image( source_ref=source_ref, target_ref=target_ref.ref_without_tag, remove_files=remove_files, oci_client=oci_client, ) manifest_list = om.OciImageManifestList(manifests=[ om.OciImageManifestListEntry( digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', mediaType=manifest.mediaType, size=len(manifest_bytes), platform=platform, ) ], ) manifest_list_bytes = json.dumps( manifest_list.as_dict(), ).encode('utf-8') res = oci_client.put_manifest( image_reference=target_ref, manifest=manifest_list_bytes, ) return res, target_ref, manifest_list_bytes cp_cfg_blob = True if isinstance(manifest, om.OciImageManifestV1): logger.info( f'converting v1-manifest -> v2 {source_ref=} {target_ref=}') manifest, cfg_blob = oconv.v1_manifest_to_v2( manifest=manifest, oci_client=oci_client, tgt_image_ref=str(target_ref), ) cp_cfg_blob = False # we synthesise new cfg - thus we cannot cp from src elif not isinstance(manifest, om.OciImageManifest): raise NotImplementedError(manifest) # allow / ignore leading '/' remove_files = [p.lstrip('/') for p in remove_files] def tarmember_filter(tar_info: tarfile.TarInfo): stripped_name = tar_info.name.lstrip('./') if stripped_name in remove_files: logger.debug(f'rm: {tar_info.name=}') return False # rm member return True # keep member # prepare copy of layers to avoid modification while iterating layers_copy = manifest.layers.copy() non_gzipped_layer_digests = { } # {gzipped-digest: sha256:non-gzipped-digest} for layer in manifest.layers: layer_hash = hashlib.sha256() cfg_hash = hashlib.sha256( ) # we need to write "non-gzipped" hash to cfg-blob leng = 0 src_leng = 0 # required for calculating leng for gzip-footer crc = 0 # requried for calculcating crc32-checksum for gzip-footer # unfortunately, GCR (our most important oci-registry) does not support chunked uploads, # so we have to resort to writing the streaming result into a local tempfile to be able # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming # when interacting w/ oci-registries that support chunked-uploads with tempfile.TemporaryFile() as f: src_tar_stream = oci_client.blob( image_reference=str(source_ref), digest=layer.digest, stream=True, ).iter_content(chunk_size=tarfile.BLOCKSIZE * 64) src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream) filtered_stream = tarutil.filtered_tarfile_generator( src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'), filter_func=tarmember_filter, chunk_size=tarfile.BLOCKSIZE * 64, ) f.write((gzip_header := gziputil.gzip_header(fname=b'layer.tar'))) layer_hash.update(gzip_header) leng += len(gzip_header) compressor = gziputil.zlib_compressobj() for chunk in filtered_stream: cfg_hash.update( chunk) # need to hash before compressing for cfg-blob crc = zlib.crc32(chunk, crc) src_leng += len(chunk) chunk = compressor.compress(chunk) layer_hash.update(chunk) leng += len(chunk) f.write(chunk) f.write((remainder := compressor.flush()))