def publish_container_image_from_kaniko_tarfile( image_tarfile_path: str, oci_client: oc.Client, image_reference: str, additional_tags: typing.List[str] = (), ): image_reference = ou.normalise_image_reference( image_reference=image_reference) image_name = image_reference.rsplit(':', 1)[0] image_references = (image_reference, ) + tuple( [f'{image_name}:{tag}' for tag in additional_tags]) with ok.read_kaniko_image_tar(tar_path=image_tarfile_path) as image: chunk_size = 1024 * 1024 for kaniko_blob in image.blobs(): oci_client.put_blob( image_reference=image_reference, digest=kaniko_blob.digest_str(), octets_count=kaniko_blob.size, data=kaniko_blob, max_chunk=chunk_size, ) oci_client.blob( image_reference=image_reference, digest=kaniko_blob.digest_str(), absent_ok=True, ) manifest_bytes = json.dumps(dataclasses.asdict( image.oci_manifest())).encode('utf-8') for tgt_ref in image_references: logger.info(f'publishing manifest {tgt_ref=}') oci_client.put_manifest( image_reference=tgt_ref, manifest=manifest_bytes, )
def iter_image_files( container_image_reference: str, oci_client: oc.Client=None, ) -> typing.Iterable[typing.Tuple[typing.IO, str]]: ''' returns a generator yielding the regular files contained in the specified oci-image as sequence of two-tuples (filelike-obj, <layer-digest:relpath>). The image's layer-blobs are retrieve in the order they are defined in the image-manifest. cfg-blobs are ignored. All layer-blobs are assued to be tarfiles (which is not necessarily a valid assumption for non-docker-compatible oci-artifacts). ''' if not oci_client: oci_client = ccc.oci.oci_client() manifest = oci_client.manifest(image_reference=container_image_reference) # we ignore cfg-blob (which would be included in manifest.blobs()) for layer_blob in manifest.layers: blob_resp = oci_client.blob( image_reference=container_image_reference, digest=layer_blob.digest, stream=True, ) fileobj = _FilelikeProxy( generator=blob_resp.iter_content( chunk_size=tarfile.RECORDSIZE, decode_unicode=False, ), size=layer_blob.size, ) with tarfile.open( fileobj=fileobj, mode='r|*', ) as layer_tarfile: for tar_info in layer_tarfile: if not tar_info.isfile(): continue yield ( layer_tarfile.extractfile(tar_info), f'{layer_blob.digest}:{tar_info.name}', )
def sanitise_image( image_ref: typing.Union[str, om.OciImageReference], oci_client: oc.Client, ): manifest = oci_client.manifest(image_reference=image_ref) cfg_blob = oci_client.blob(image_reference=image_ref, digest=manifest.config.digest).content if is_cfg_blob_sane(manifest=manifest, cfg_blob=cfg_blob): return image_ref sanitised_cfg_blob = sanitise_cfg_blob(manifest=manifest, cfg_blob=cfg_blob) cfg_blob_digest = 'sha256:' + hashlib.sha256( sanitised_cfg_blob).hexdigest() oci_client.put_blob( image_ref, digest=cfg_blob_digest, octets_count=len(sanitised_cfg_blob), data=sanitised_cfg_blob, ) manifest = dataclasses.replace( manifest, config=dataclasses.replace( manifest.config, digest=cfg_blob_digest, size=len(sanitised_cfg_blob), ), ) manifest_bytes = json.dumps(dataclasses.asdict(manifest)).encode('utf-8') oci_client.put_manifest(image_reference=image_ref, manifest=manifest_bytes) manifest_dig = 'sha256:' + hashlib.sha256(manifest_bytes).hexdigest() img_ref: om.OciImageReference = om.OciImageReference.to_image_ref( image_ref) patched_img_ref = f'{img_ref.ref_without_tag}@{manifest_dig}' return patched_img_ref
def image_layers_as_tarfile_generator( image_reference: str, oci_client: oc.Client, chunk_size=tarfile.RECORDSIZE, include_config_blob=True, ) -> typing.Generator[bytes, None, None]: ''' returns a generator yielding a tar-archive with the passed oci-image's layer-blobs as members. This is somewhat similar to the result of a `docker save` with the notable difference that the cfg-blob is discarded. This function is useful to e.g. upload file system contents of an oci-container-image to some scanning-tool (provided it supports the extraction of tar-archives) If include_config_blob is set to False the config blob will be ignored. ''' manifest = oci_client.manifest(image_reference=image_reference) offset = 0 for blob in manifest.blobs() if include_config_blob else manifest.layers: logger.debug(f'getting blob {blob.digest}') if not include_config_blob: logger.debug('skipping config blob') tarinfo = tarfile.TarInfo(name=blob.digest + '.tar') # note: may be gzipped tarinfo.size = blob.size tarinfo.offset = offset tarinfo.offset_data = offset + tarfile.BLOCKSIZE offset += blob.size + tarfile.BLOCKSIZE tarinfo_bytes = tarinfo.tobuf() yield tarinfo_bytes uploaded_bytes = len(tarinfo_bytes) for chunk in oci_client.blob( image_reference=image_reference, digest=blob.digest, stream=True, ).iter_content(chunk_size=chunk_size): uploaded_bytes += len(chunk) yield chunk # need to pad full blocks w/ NUL-bytes if (missing := tarfile.BLOCKSIZE - (uploaded_bytes % tarfile.BLOCKSIZE)): offset += missing yield tarfile.NUL * missing
def from_manifest( image_reference: om.OciImageReference, manifest: om.OciImageManifest, oci_client: oc.Client=None, base_platform: om.OciPlatform=None, ) -> om.OciPlatform: if base_platform: cfg = base_platform.as_dict() else: cfg = {} cfg |= oci_client.blob( image_reference=image_reference, digest=manifest.config.digest, stream=False, # we will need to json.load the (small) result anyhow ).json() return dacite.from_dict( data_class=om.OciPlatform, data=cfg, )
def filter_image( source_ref: str, target_ref: str, remove_files: typing.Sequence[str] = (), oci_client: oc.Client = None, ): if not oci_client: oci_client = ccc.oci.oci_client() # shortcut in case there are no filtering-rules if not remove_files: return oci.replicate_artifact( src_image_reference=source_ref, tgt_image_reference=target_ref, oci_client=oci_client, ) manifest = oci_client.manifest(image_reference=source_ref) if not isinstance(manifest, om.OciImageManifest): raise NotImplementedError(manifest) # allow / ignore leading '/' remove_files = [p.lstrip('/') for p in remove_files] def tarmember_filter(tar_info: tarfile.TarInfo): stripped_name = tar_info.name.lstrip('./') if stripped_name in remove_files: logger.debug(f'rm: {tar_info.name=}') return False # rm member return True # keep member # prepare copy of layers to avoid modification while iterating layers_copy = manifest.layers.copy() for layer in manifest.layers: layer_hash = hashlib.sha256() leng = 0 # unfortunately, GCR (our most important oci-registry) does not support chunked uploads, # so we have to resort to writing the streaming result into a local tempfile to be able # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming # when interacting w/ oci-registries that support chunked-uploads with tempfile.TemporaryFile() as f: src_tar_stream = oci_client.blob( image_reference=source_ref, digest=layer.digest, stream=True, ).iter_content(chunk_size=tarfile.BLOCKSIZE) src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream) filtered_stream = tarutil.filtered_tarfile_generator( src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'), filter_func=tarmember_filter, ) for chunk in filtered_stream: layer_hash.update(chunk) leng += len(chunk) f.write(chunk) f.seek(0) oci_client.put_blob( image_reference=target_ref, digest=(layer_digest := 'sha256:' + layer_hash.hexdigest()), octets_count=leng, data=f, ) # update copy of layers-list with new layer new_layer = dataclasses.replace(layer, digest=layer_digest, size=leng) layers_copy[layers_copy.index(layer)] = new_layer # switch layers in manifest to announce changes w/ manifest-upload manifest.layers = layers_copy # need to patch cfg-object, in case layer-digests changed cfg_blob = oci_client.blob( image_reference=source_ref, digest=manifest.config.digest, stream=False, ).json() # cfg-blobs are small - no point in streaming if not 'rootfs' in cfg_blob: raise ValueError('expected attr `rootfs` not present on cfg-blob') cfg_blob['rootfs'] = { 'diff_ids': [layer.digest for layer in manifest.layers], 'type': 'layers', } cfg_blob = json.dumps(cfg_blob).encode('utf-8') cfg_digest = f'sha256:{hashlib.sha256(cfg_blob).hexdigest()}' cfg_leng = len(cfg_blob) oci_client.put_blob( image_reference=target_ref, digest=cfg_digest, octets_count=cfg_leng, data=cfg_blob, ) manifest.config = dataclasses.replace(manifest.config, digest=cfg_digest, size=cfg_leng) manifest_raw = json.dumps(dataclasses.asdict(manifest)).encode('utf-8') oci_client.put_manifest(image_reference=target_ref, manifest=manifest_raw)
def filter_image( source_ref: typing.Union[str, om.OciImageReference], target_ref: typing.Union[str, om.OciImageReference], remove_files: typing.Sequence[str] = (), oci_client: oc.Client = None, mode: oci.ReplicationMode = oci.ReplicationMode.REGISTRY_DEFAULTS, platform_filter: typing.Callable[[om.OciPlatform], bool] = None, ) -> typing.Tuple[requests.Response, str, bytes]: # response, tgt-ref, manifest_bytes if not oci_client: oci_client = ccc.oci.oci_client() source_ref = om.OciImageReference.to_image_ref(source_ref) target_ref = om.OciImageReference.to_image_ref(target_ref) # shortcut in case there are no filtering-rules if not remove_files: return oci.replicate_artifact( src_image_reference=source_ref, tgt_image_reference=target_ref, oci_client=oci_client, mode=mode, platform_filter=platform_filter, ) if mode is oci.ReplicationMode.REGISTRY_DEFAULTS: accept = None elif mode is oci.ReplicationMode.PREFER_MULTIARCH: accept = om.MimeTypes.prefer_multiarch elif mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH: accept = om.MimeTypes.prefer_multiarch else: raise NotImplementedError(mode) manifest = oci_client.manifest( image_reference=str(source_ref), accept=accept, ) if isinstance(manifest, om.OciImageManifestList): # recurse into sub-images src_name = source_ref.ref_without_tag tgt_name = target_ref.ref_without_tag for idx, sub_manifest in enumerate(tuple(manifest.manifests)): source_ref = f'{src_name}@{sub_manifest.digest}' if platform_filter: platform = oci.platform.from_single_image( image_reference=source_ref, oci_client=oci_client, base_platform=sub_manifest.platform, ) if not platform_filter(platform): logger.info(f'skipping {platform=} for {source_ref=}') manifest.manifests.remove(sub_manifest) continue logger.info(f'filtering to {tgt_name=}') res, tgt_ref, manifest_bytes = filter_image( source_ref=source_ref, target_ref=tgt_name, remove_files=remove_files, oci_client=oci_client, ) # patch (potentially) modified manifest-digest patched_manifest = dataclasses.replace( sub_manifest, digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', size=len(manifest_bytes), ) manifest.manifests[idx] = patched_manifest manifest_dict = manifest.as_dict() manifest_raw = json.dumps(manifest_dict).encode('utf-8') res = oci_client.put_manifest( image_reference=str(target_ref), manifest=manifest_raw, ) return res, str(target_ref), manifest_raw # normalise single-image to multi-arch (w/ one entry) if mode is oci.ReplicationMode.NORMALISE_TO_MULTIARCH: if not source_ref.has_digest_tag: source_ref = om.OciImageReference.to_image_ref( oci_client.to_digest_hash(image_reference=source_ref, )) platform = oci.platform.from_single_image( image_reference=source_ref, oci_client=oci_client, ) res, ref, manifest_bytes = filter_image( source_ref=source_ref, target_ref=target_ref.ref_without_tag, remove_files=remove_files, oci_client=oci_client, ) manifest_list = om.OciImageManifestList(manifests=[ om.OciImageManifestListEntry( digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', mediaType=manifest.mediaType, size=len(manifest_bytes), platform=platform, ) ], ) manifest_list_bytes = json.dumps( manifest_list.as_dict(), ).encode('utf-8') res = oci_client.put_manifest( image_reference=target_ref, manifest=manifest_list_bytes, ) return res, target_ref, manifest_list_bytes cp_cfg_blob = True if isinstance(manifest, om.OciImageManifestV1): logger.info( f'converting v1-manifest -> v2 {source_ref=} {target_ref=}') manifest, cfg_blob = oconv.v1_manifest_to_v2( manifest=manifest, oci_client=oci_client, tgt_image_ref=str(target_ref), ) cp_cfg_blob = False # we synthesise new cfg - thus we cannot cp from src elif not isinstance(manifest, om.OciImageManifest): raise NotImplementedError(manifest) # allow / ignore leading '/' remove_files = [p.lstrip('/') for p in remove_files] def tarmember_filter(tar_info: tarfile.TarInfo): stripped_name = tar_info.name.lstrip('./') if stripped_name in remove_files: logger.debug(f'rm: {tar_info.name=}') return False # rm member return True # keep member # prepare copy of layers to avoid modification while iterating layers_copy = manifest.layers.copy() non_gzipped_layer_digests = { } # {gzipped-digest: sha256:non-gzipped-digest} for layer in manifest.layers: layer_hash = hashlib.sha256() cfg_hash = hashlib.sha256( ) # we need to write "non-gzipped" hash to cfg-blob leng = 0 src_leng = 0 # required for calculating leng for gzip-footer crc = 0 # requried for calculcating crc32-checksum for gzip-footer # unfortunately, GCR (our most important oci-registry) does not support chunked uploads, # so we have to resort to writing the streaming result into a local tempfile to be able # to calculate digest-hash prior to upload to tgt; XXX: we might use streaming # when interacting w/ oci-registries that support chunked-uploads with tempfile.TemporaryFile() as f: src_tar_stream = oci_client.blob( image_reference=str(source_ref), digest=layer.digest, stream=True, ).iter_content(chunk_size=tarfile.BLOCKSIZE * 64) src_tar_fobj = tarutil._FilelikeProxy(generator=src_tar_stream) filtered_stream = tarutil.filtered_tarfile_generator( src_tf=tarfile.open(fileobj=src_tar_fobj, mode='r|*'), filter_func=tarmember_filter, chunk_size=tarfile.BLOCKSIZE * 64, ) f.write((gzip_header := gziputil.gzip_header(fname=b'layer.tar'))) layer_hash.update(gzip_header) leng += len(gzip_header) compressor = gziputil.zlib_compressobj() for chunk in filtered_stream: cfg_hash.update( chunk) # need to hash before compressing for cfg-blob crc = zlib.crc32(chunk, crc) src_leng += len(chunk) chunk = compressor.compress(chunk) layer_hash.update(chunk) leng += len(chunk) f.write(chunk) f.write((remainder := compressor.flush()))