def test_original_image_reference(): ref = om.OciImageReference('alpine:3') assert ref.original_image_reference == 'alpine:3' # without tag ref = om.OciImageReference('eu.gcr.io/example/foo') assert ref.original_image_reference == 'eu.gcr.io/example/foo'
def test_tag(): ref = om.OciImageReference('alpine:3') assert ref.tag == '3' ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}') assert ref.tag == f'sha256:{example_digest}' ref = om.OciImageReference(f'example.org:1234/path@sha256:{example_digest}') assert ref.tag == f'sha256:{example_digest}'
def test_normalised_image_reference(): ref = om.OciImageReference('alpine:3') assert ref.normalised_image_reference == 'registry-1.docker.io/library/alpine:3' ref = om.OciImageReference('eu.gcr.io/project/foo:bar') assert ref.normalised_image_reference == 'eu.gcr.io/project/foo:bar' # no tag ref = om.OciImageReference('eu.gcr.io/project/foo') assert ref.normalised_image_reference == 'eu.gcr.io/project/foo'
def test_eq(): ref1 = om.OciImageReference('alpine:3') ref2 = om.OciImageReference('registry-1.docker.io/library/alpine:3') assert ref1 == ref2 assert ref1 == ref1 assert ref2 == ref2 ref3 = om.OciImageReference('example.org/path:tag1') assert ref1 != ref3
def test_name(): ref = om.OciImageReference('example.org/path:tag') assert ref.name == 'path' ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}') assert ref.name == 'path' ref = om.OciImageReference('example.org:1234/path:tag') assert ref.name == 'path' # special handling to mimic docker-cli ref = om.OciImageReference('alpine:3') assert ref.name == 'library/alpine'
def test_ref_without_tag(): ref = om.OciImageReference('example.org/path:tag') assert ref.ref_without_tag == 'example.org/path' ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}') assert ref.ref_without_tag == 'example.org/path' ref = om.OciImageReference('example.org:1234/path:tag') assert ref.ref_without_tag == 'example.org:1234/path' # special handling to mimic docker-cli ref = om.OciImageReference('alpine:3') assert ref.ref_without_tag == 'registry-1.docker.io/library/alpine'
def test_parsed_digest_tag(): with pytest.raises(ValueError): om.OciImageReference('alpine:3').parsed_digest_tag ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}') alg, dig = ref.parsed_digest_tag assert alg == 'sha256' assert dig == example_digest.split(':')[-1] ref = om.OciImageReference(f'alpine@sha256:{example_digest}') alg, dig = ref.parsed_digest_tag assert alg == 'sha256' assert dig == example_digest.split(':')[-1]
def head_blob( self, image_reference: typing.Union[str, om.OciImageReference], digest: str, absent_ok=True, ): image_reference = om.OciImageReference(image_reference) scope = _scope(image_reference=image_reference, action='pull') res = self._request( url=self.routes.blob_url( image_reference=image_reference, digest=digest, ), method='HEAD', scope=scope, image_reference=image_reference, raise_for_status=False, warn_if_not_ok=not absent_ok, ) if absent_ok and res.status_code == 404: return res res.raise_for_status() return res
def blob( self, image_reference: typing.Union[str, om.OciImageReference], digest: str, stream=True, absent_ok=False, ) -> requests.models.Response: image_reference = om.OciImageReference(image_reference) scope = _scope(image_reference=image_reference, action='pull') res = self._request( url=self.routes.blob_url(image_reference=image_reference, digest=digest), image_reference=image_reference, scope=scope, method='GET', stream=stream, timeout=None, raise_for_status=False, ) if absent_ok and res.status_code == requests.codes.NOT_FOUND: return None res.raise_for_status() return res
def test_netloc(): # simple case w/ symbolic tag ref = om.OciImageReference('example.org/path:tag') assert ref.netloc == 'example.org' ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}') assert ref.netloc == 'example.org' ref = om.OciImageReference('example.org:1234/path:tag') assert ref.netloc == 'example.org:1234' ref = om.OciImageReference('example.org:1234/path@sha256:{example_digest}') assert ref.netloc == 'example.org:1234' # special handling to mimic docker-cli ref = om.OciImageReference('alpine:3') assert ref.netloc == 'registry-1.docker.io'
def _put_blob_single_post( self, image_reference: typing.Union[str, om.OciImageReference], digest: str, octets_count: int, data: bytes, ): logger.debug(f'single-post {image_reference=} {octets_count=}') image_reference = om.OciImageReference(image_reference) scope = _scope(image_reference=image_reference, action='push,pull') # XXX according to distribution-spec, single-POST should also work - however # this seems not to be true for registry-1.docker.io. To keep the code simple(r), # always do a two-step upload; we might add a cfg-option (or maybe even discovery) for # using single-post uploads for registries that support it (such as GCR or artifactory) res = self._request( url=self.routes.uploads_url( image_reference=image_reference, ), image_reference=image_reference, scope=scope, method='POST', ) upload_url = res.headers.get('Location') # returned url _may_ be relative if upload_url.startswith('/'): parsed_url = urllib.parse.urlparse(res.url) upload_url = f'{parsed_url.scheme}://{parsed_url.netloc}{upload_url}' if '?' in upload_url: prefix = '&' else: prefix = '?' upload_url += prefix + urllib.parse.urlencode({'digest': digest}) res = self._request( url=upload_url, image_reference=image_reference, scope=scope, method='PUT', headers={ 'content-type': 'application/octet-stream', 'content-length': str(octets_count), }, data=data, raise_for_status=False, ) if not res.status_code == 201: # spec says it MUST be 201 # also, 202 indicates the upload actually did not succeed e.g. for "docker-hub" logger.warning(f'{image_reference=} {res.status_code=} {digest=} - PUT may have failed') res.raise_for_status()
def test_str(): ref = om.OciImageReference('alpine:3') assert str(ref) == ref.normalised_image_reference
def test_tag_type(): ref = om.OciImageReference('example.org/path:symbolic-tag') assert ref.tag_type is om.OciTagType.SYMBOLIC ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}') assert ref.tag_type is om.OciTagType.DIGEST
def _put_blob_chunked( self, image_reference: typing.Union[str, om.OciImageReference], digest: str, octets_count: int, data_iterator: typing.Iterator[bytes], chunk_size: int=1024 * 1024 * 16, # 16 MiB ): image_reference = om.OciImageReference(image_reference) scope = _scope(image_reference=image_reference, action='push,pull') logger.debug(f'chunked-put {chunk_size=}') # start uploading session res = self._request( url=self.routes.uploads_url(image_reference=image_reference), image_reference=image_reference, scope=scope, method='POST', headers={ 'content-length': '0', } ) res.raise_for_status() upload_url = res.headers['location'] octets_left = octets_count octets_sent = 0 offset = 0 sha256 = hashlib.sha256() while octets_left > 0: octets_to_send = min(octets_left, chunk_size) octets_left -= octets_to_send data = next(data_iterator) sha256.update(data) if not len(data) == octets_to_send: # sanity check to detect programming errors raise ValueError(f'{len(data)=} vs {octets_to_send=}') logger.debug(f'{octets_to_send=} {octets_left=} {len(data)=}') logger.debug(f'{octets_sent + offset}-{octets_sent + octets_to_send + offset}') crange_from = octets_sent crange_to = crange_from + len(data) - 1 res = self._request( url=upload_url, image_reference=image_reference, scope=scope, method='PATCH', data=data, headers={ 'Content-Length': str(octets_to_send), 'Content-Type': 'application/octet-stream', 'Content-Range': f'{crange_from}-{crange_to}', 'Range': f'{crange_from}-{crange_to}', } ) res.raise_for_status() upload_url = res.headers['location'] octets_sent += len(data) sha256_digest = f'sha256:{sha256.hexdigest()}' # close uploading session query = urllib.parse.urlencode({'digest': sha256_digest}) upload_url = res.headers['location'] + '?' + query res = self._request( url=upload_url, image_reference=image_reference, scope=scope, method='PUT', headers={ 'Content-Length': '0', }, ) return res
def put_blob( self, image_reference: typing.Union[str, om.OciImageReference], digest: str, octets_count: int, data: requests.models.Response, max_chunk=1024 * 1024 * 1, # 1 MiB mimetype: str='application/data', ): image_reference = om.OciImageReference(image_reference) head_res = self.head_blob( image_reference=image_reference, digest=digest, ) if head_res.ok: logger.info(f'skipping blob upload {digest=} - already exists') return data_is_requests_resp = isinstance(data, requests.models.Response) data_is_generator = isinstance(data, typing.Generator) data_is_filelike = hasattr(data, 'read') data_is_bytes = isinstance(data, bytes) if octets_count < max_chunk or data_is_filelike or data_is_requests_resp or data_is_bytes: if data_is_requests_resp: data = data.content elif data_is_generator: # at least GCR does not like chunked-uploads; if small enough, workaround this # and create one (not-that-big) bytes-obj _data = bytes() for chunk in data: _data += chunk data = _data elif data_is_filelike: pass # if filelike, http.client will handle streaming for us return self._put_blob_single_post( image_reference=image_reference, digest=digest, octets_count=octets_count, data=data, ) elif octets_count >= max_chunk and data_is_generator: # workaround: write into temporary file, as at least GCR does not implement # chunked-upload, and requests will not properly work w/ all generators # (in particular, it will not work w/ our "fake" on) with tempfile.TemporaryFile() as tf: for chunk in data: tf.write(chunk) tf.seek(0) return self._put_blob_single_post( image_reference=image_reference, digest=digest, octets_count=octets_count, data=tf, ) else: if data_is_requests_resp: with data: return self._put_blob_chunked( image_reference=image_reference, digest=digest, octets_count=octets_count, data_iterator=data.iter_content(chunk_size=max_chunk), chunk_size=max_chunk, ) else: raise NotImplementedError
def replicate_artifact( src_image_reference: typing.Union[str, om.OciImageReference], tgt_image_reference: typing.Union[str, om.OciImageReference], credentials_lookup: oa.credentials_lookup=None, routes: oc.OciRoutes=oc.OciRoutes(), oci_client: oc.Client=None, mode: ReplicationMode=ReplicationMode.REGISTRY_DEFAULTS, platform_filter: typing.Callable[[om.OciPlatform], bool]=None, ) -> typing.Tuple[requests.Response, str, bytes]: ''' replicate the given OCI Artifact from src_image_reference to tgt_image_reference. try to be verbatim, if possible (i.e. target should reflect source as close as possible). Whether or not a fully identical replication is possible depends on the source artifact and chosen replication `mode`: If source artifact is a "legacy / v1" "docker image" (as it used to be created from older versions of docker) verbatim replication is not possible, because modern (v2) OCI Registries (such as GCR) will not accept those manifests. Therefore, conversion to "v2" is required (done transparently by this function). If source artifact is a "multiarch" image (oci.model.OciImageManifestList), OCI registries show different behaviour if ReplicationMode.REGISTRY_DEFAULTS is used. Some registries will in this case return a single-image manifest, instead of the multiarch-manifest (in this case, the replication result will only be a single-image). Use ReplicationMode.PREFER_MULTIARCH or ReplicationMode.NORMALISE_TO_MULTIARCH to prevent this. If platform_filter is specified (only applied for multi-arch images), the replication result will obviously also deviate from src, depending on the filter semantics. pass either `credentials_lookup`, `routes`, OR `oci_client` ''' if not (bool(credentials_lookup) ^ bool(oci_client)): raise ValueError('either credentials-lookup + routes, xor client must be passed') src_image_reference = om.OciImageReference.to_image_ref(src_image_reference) tgt_image_reference = om.OciImageReference.to_image_ref(tgt_image_reference) if not oci_client: client = oc.Client( credentials_lookup=credentials_lookup, routes=routes, ) else: client = oci_client if mode is ReplicationMode.REGISTRY_DEFAULTS: accept = None elif mode is ReplicationMode.PREFER_MULTIARCH: accept = om.MimeTypes.prefer_multiarch elif mode is ReplicationMode.NORMALISE_TO_MULTIARCH: accept = om.MimeTypes.prefer_multiarch else: raise NotImplementedError(mode) # we need the unaltered - manifest for verbatim replication raw_manifest = client.manifest_raw( image_reference=src_image_reference, accept=accept, ).text manifest = json.loads(raw_manifest) schema_version = int(manifest['schemaVersion']) need_to_synthesise_cfg_blob = False if schema_version == 1: need_to_synthesise_cfg_blob = True manifest = client.manifest(image_reference=src_image_reference) logger.warning( f''' manifest {src_image_reference=} is in legacy-format (schemaVersion==1). Cannot verbatimly replicate ''' ) manifest, _ = oconv.v1_manifest_to_v2( manifest=manifest, oci_client=client, tgt_image_ref=str(tgt_image_reference), ) # we must determine the uncompressed layer-digests to synthesise a valid # cfg-blob docker will accept (this means in particular we must download # all layers, even if we do not need to upload them) need_uncompressed_layer_digests = True uncompressed_layer_digests = [] elif schema_version == 2: manifest = json.loads(raw_manifest) media_type = manifest.get('mediaType', om.DOCKER_MANIFEST_SCHEMA_V2_MIME) if media_type == om.DOCKER_MANIFEST_LIST_MIME: # multi-arch manifest = dacite.from_dict( data_class=om.OciImageManifestList, data=manifest, ) src_ref = om.OciImageReference(image_reference=src_image_reference) src_name = src_ref.ref_without_tag tgt_ref = om.OciImageReference(image_reference=tgt_image_reference) tgt_name = tgt_ref.ref_without_tag # try to avoid modifications (from x-serialisation) - unless we have to manifest_dirty = False # cp manifests to tuple, because we _might_ modify if there is a platform_filter for sub_manifest in tuple(manifest.manifests): src_reference = f'{src_name}@{sub_manifest.digest}' tgt_reference = f'{tgt_name}@{sub_manifest.digest}' if platform_filter: platform = op.from_single_image( image_reference=src_reference, oci_client=oci_client, base_platform=sub_manifest.platform, ) if not platform_filter(platform): logger.info(f'skipping {platform=} for {src_image_reference=}') manifest_dirty = True manifest.manifests.remove(sub_manifest) continue logger.info(f'replicating to {tgt_reference=}') replicate_artifact( src_image_reference=src_reference, tgt_image_reference=tgt_reference, oci_client=client, ) if manifest_dirty: raw_manifest = json.dumps(manifest.as_dict()) res = client.put_manifest( image_reference=tgt_image_reference, manifest=raw_manifest, ) return res, tgt_image_reference, raw_manifest.encode('utf-8') elif media_type in ( om.OCI_MANIFEST_SCHEMA_V2_MIME, om.DOCKER_MANIFEST_SCHEMA_V2_MIME, ): if mode is ReplicationMode.NORMALISE_TO_MULTIARCH: if not src_image_reference.has_digest_tag: src_image_reference = om.OciImageReference.to_image_ref( oci_client.to_digest_hash( image_reference=src_image_reference, ) ) platform = op.from_single_image( image_reference=src_image_reference, oci_client=oci_client, ) # force usage of digest-tag (symbolic tag required for manifest-list tgt_image_ref = \ f'{tgt_image_reference.ref_without_tag}@{src_image_reference.tag}' res, ref, manifest_bytes = replicate_artifact( src_image_reference=src_image_reference, tgt_image_reference=tgt_image_ref, oci_client=oci_client, ) manifest_list = om.OciImageManifestList( manifests=[ om.OciImageManifestListEntry( digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', mediaType=media_type, size=len(manifest_bytes), platform=platform, ), ] ) manifest_list_bytes = json.dumps( manifest_list.as_dict(), ).encode('utf-8') res = oci_client.put_manifest( image_reference=tgt_image_reference, manifest=manifest_list_bytes, ) return res, tgt_image_reference, manifest_list_bytes manifest = dacite.from_dict( data_class=om.OciImageManifest, data=json.loads(raw_manifest) ) need_uncompressed_layer_digests = False uncompressed_layer_digests = None else: raise NotImplementedError(f'{media_type=}') else: raise NotImplementedError(schema_version) for idx, layer in enumerate(manifest.blobs()): # need to specially handle cfg-blob (may be absent for v2 / legacy images) is_cfg_blob = idx == 0 if is_cfg_blob and need_to_synthesise_cfg_blob: # if we need(ed) to synthesise cfg-blob (because source-image contained a v1-manifest) # then there will never be a cfg-blob in src. # -> silently skip to avoid emitting a confusing, but unhelpful warning logger.debug(f'{src_image_reference=} - synthesised cfg-blob - skipping replicatation') continue head_res = client.head_blob( image_reference=tgt_image_reference, digest=layer.digest, ) if head_res.ok: if not need_uncompressed_layer_digests: logger.info(f'skipping blob download {layer.digest=} - already exists in tgt') continue # no need to download if blob already exists in tgt elif not is_cfg_blob: # we will not need to re-upload, however we do need the uncompressed digest blob_res = client.blob( image_reference=src_image_reference, digest=layer.digest, absent_ok=is_cfg_blob, ) layer_hash = hashlib.sha256() decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16) for chunk in blob_res.iter_content(chunk_size=4096): layer_hash.update(decompressor.decompress(chunk)) uncompressed_layer_digests.append(f'sha256:{layer_hash.hexdigest()}') continue # we may still skip the upload, of course # todo: consider silencing warning if we do v1->v2-conversion (cfg-blob will never exist # in this case blob_res = client.blob( image_reference=src_image_reference, digest=layer.digest, absent_ok=is_cfg_blob, ) if not blob_res and is_cfg_blob: # fallback to non-verbatim replication; synthesise cfg logger.warning( 'falling back to non-verbatim replication ' f'{src_image_reference=} {tgt_image_reference=}' ) need_to_synthesise_cfg_blob = True continue if need_uncompressed_layer_digests: uncompressed_layer_hash = hashlib.sha256() decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16) def intercept_chunks(blob_res): for chunk in blob_res.iter_content(chunk_size=4096): uncompressed_layer_hash.update(decompressor.decompress(chunk)) yield chunk uncompressed_layer_digests.append(f'sha256:{uncompressed_layer_hash.hexdigest()}') blob_res = intercept_chunks(blob_res=blob_res) client.put_blob( image_reference=tgt_image_reference, digest=layer.digest, octets_count=layer.size, data=blob_res, ) if need_to_synthesise_cfg_blob: fake_cfg_dict = json.loads(json.loads(raw_manifest)['history'][0]['v1Compatibility']) # patch-in uncompressed layer-digests fake_cfg_dict['rootfs'] = { 'diff_ids': uncompressed_layer_digests, 'type': 'layers', } fake_cfg_raw = json.dumps(fake_cfg_dict).encode('utf-8') client.put_blob( image_reference=tgt_image_reference, digest=(cfg_digest := f'sha256:{hashlib.sha256(fake_cfg_raw).hexdigest()}'), octets_count=len(fake_cfg_raw), data=fake_cfg_raw, ) manifest_dict = dataclasses.asdict(manifest) # patch-on altered cfg-digest manifest_dict['config']['digest'] = cfg_digest manifest_dict['config']['size'] = len(fake_cfg_raw) raw_manifest = json.dumps(manifest_dict) res = client.put_manifest( image_reference=tgt_image_reference, manifest=raw_manifest, ) return res, tgt_image_reference, raw_manifest.encode('utf-8')