Ejemplo n.º 1
0
def test_original_image_reference():
    ref = om.OciImageReference('alpine:3')
    assert ref.original_image_reference == 'alpine:3'

    # without tag
    ref = om.OciImageReference('eu.gcr.io/example/foo')
    assert ref.original_image_reference == 'eu.gcr.io/example/foo'
Ejemplo n.º 2
0
def test_tag():
    ref = om.OciImageReference('alpine:3')
    assert ref.tag == '3'

    ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}')
    assert ref.tag == f'sha256:{example_digest}'

    ref = om.OciImageReference(f'example.org:1234/path@sha256:{example_digest}')
    assert ref.tag == f'sha256:{example_digest}'
Ejemplo n.º 3
0
def test_normalised_image_reference():
    ref = om.OciImageReference('alpine:3')
    assert ref.normalised_image_reference == 'registry-1.docker.io/library/alpine:3'

    ref = om.OciImageReference('eu.gcr.io/project/foo:bar')
    assert ref.normalised_image_reference == 'eu.gcr.io/project/foo:bar'

    # no tag
    ref = om.OciImageReference('eu.gcr.io/project/foo')
    assert ref.normalised_image_reference == 'eu.gcr.io/project/foo'
Ejemplo n.º 4
0
def test_eq():
    ref1 = om.OciImageReference('alpine:3')
    ref2 = om.OciImageReference('registry-1.docker.io/library/alpine:3')

    assert ref1 == ref2
    assert ref1 == ref1
    assert ref2 == ref2

    ref3 = om.OciImageReference('example.org/path:tag1')

    assert ref1 != ref3
Ejemplo n.º 5
0
def test_name():
    ref = om.OciImageReference('example.org/path:tag')
    assert ref.name == 'path'

    ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}')
    assert ref.name == 'path'

    ref = om.OciImageReference('example.org:1234/path:tag')
    assert ref.name == 'path'

    # special handling to mimic docker-cli
    ref = om.OciImageReference('alpine:3')
    assert ref.name == 'library/alpine'
Ejemplo n.º 6
0
def test_ref_without_tag():
    ref = om.OciImageReference('example.org/path:tag')
    assert ref.ref_without_tag == 'example.org/path'

    ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}')
    assert ref.ref_without_tag == 'example.org/path'

    ref = om.OciImageReference('example.org:1234/path:tag')
    assert ref.ref_without_tag == 'example.org:1234/path'

    # special handling to mimic docker-cli
    ref = om.OciImageReference('alpine:3')
    assert ref.ref_without_tag == 'registry-1.docker.io/library/alpine'
Ejemplo n.º 7
0
def test_parsed_digest_tag():
    with pytest.raises(ValueError):
        om.OciImageReference('alpine:3').parsed_digest_tag

    ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}')
    alg, dig = ref.parsed_digest_tag

    assert alg == 'sha256'
    assert dig == example_digest.split(':')[-1]

    ref = om.OciImageReference(f'alpine@sha256:{example_digest}')
    alg, dig = ref.parsed_digest_tag

    assert alg == 'sha256'
    assert dig == example_digest.split(':')[-1]
Ejemplo n.º 8
0
    def head_blob(
        self,
        image_reference: typing.Union[str, om.OciImageReference],
        digest: str,
        absent_ok=True,
    ):
        image_reference = om.OciImageReference(image_reference)
        scope = _scope(image_reference=image_reference, action='pull')

        res = self._request(
            url=self.routes.blob_url(
                image_reference=image_reference,
                digest=digest,
            ),
            method='HEAD',
            scope=scope,
            image_reference=image_reference,
            raise_for_status=False,
            warn_if_not_ok=not absent_ok,
        )

        if absent_ok and res.status_code == 404:
            return res

        res.raise_for_status()

        return res
Ejemplo n.º 9
0
    def blob(
        self,
        image_reference: typing.Union[str, om.OciImageReference],
        digest: str,
        stream=True,
        absent_ok=False,
    ) -> requests.models.Response:
        image_reference = om.OciImageReference(image_reference)

        scope = _scope(image_reference=image_reference, action='pull')

        res = self._request(
            url=self.routes.blob_url(image_reference=image_reference, digest=digest),
            image_reference=image_reference,
            scope=scope,
            method='GET',
            stream=stream,
            timeout=None,
            raise_for_status=False,
        )

        if absent_ok and res.status_code == requests.codes.NOT_FOUND:
            return None
        res.raise_for_status()

        return res
Ejemplo n.º 10
0
def test_netloc():
    # simple case w/ symbolic tag
    ref = om.OciImageReference('example.org/path:tag')
    assert ref.netloc == 'example.org'

    ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}')
    assert ref.netloc == 'example.org'

    ref = om.OciImageReference('example.org:1234/path:tag')
    assert ref.netloc == 'example.org:1234'

    ref = om.OciImageReference('example.org:1234/path@sha256:{example_digest}')
    assert ref.netloc == 'example.org:1234'

    # special handling to mimic docker-cli
    ref = om.OciImageReference('alpine:3')
    assert ref.netloc == 'registry-1.docker.io'
Ejemplo n.º 11
0
    def _put_blob_single_post(
        self,
        image_reference: typing.Union[str, om.OciImageReference],
        digest: str,
        octets_count: int,
        data: bytes,
    ):
        logger.debug(f'single-post {image_reference=} {octets_count=}')
        image_reference = om.OciImageReference(image_reference)
        scope = _scope(image_reference=image_reference, action='push,pull')

        # XXX according to distribution-spec, single-POST should also work - however
        # this seems not to be true for registry-1.docker.io. To keep the code simple(r),
        # always do a two-step upload; we might add a cfg-option (or maybe even discovery) for
        # using single-post uploads for registries that support it (such as GCR or artifactory)
        res = self._request(
            url=self.routes.uploads_url(
                image_reference=image_reference,
            ),
            image_reference=image_reference,
            scope=scope,
            method='POST',
        )

        upload_url = res.headers.get('Location')

        # returned url _may_ be relative
        if upload_url.startswith('/'):
            parsed_url = urllib.parse.urlparse(res.url)
            upload_url = f'{parsed_url.scheme}://{parsed_url.netloc}{upload_url}'

        if '?' in upload_url:
            prefix = '&'
        else:
            prefix = '?'

        upload_url += prefix + urllib.parse.urlencode({'digest': digest})

        res = self._request(
            url=upload_url,
            image_reference=image_reference,
            scope=scope,
            method='PUT',
            headers={
                'content-type': 'application/octet-stream',
                'content-length': str(octets_count),
            },
            data=data,
            raise_for_status=False,
        )

        if not res.status_code == 201: # spec says it MUST be 201
            # also, 202 indicates the upload actually did not succeed e.g. for "docker-hub"
            logger.warning(f'{image_reference=} {res.status_code=} {digest=} - PUT may have failed')

        res.raise_for_status()
Ejemplo n.º 12
0
def test_str():
    ref = om.OciImageReference('alpine:3')
    assert str(ref) == ref.normalised_image_reference
Ejemplo n.º 13
0
def test_tag_type():
    ref = om.OciImageReference('example.org/path:symbolic-tag')
    assert ref.tag_type is om.OciTagType.SYMBOLIC

    ref = om.OciImageReference(f'example.org/path@sha256:{example_digest}')
    assert ref.tag_type is om.OciTagType.DIGEST
Ejemplo n.º 14
0
    def _put_blob_chunked(
        self,
        image_reference: typing.Union[str, om.OciImageReference],
        digest: str,
        octets_count: int,
        data_iterator: typing.Iterator[bytes],
        chunk_size: int=1024 * 1024 * 16, # 16 MiB
    ):
        image_reference = om.OciImageReference(image_reference)
        scope = _scope(image_reference=image_reference, action='push,pull')
        logger.debug(f'chunked-put {chunk_size=}')

        # start uploading session
        res = self._request(
            url=self.routes.uploads_url(image_reference=image_reference),
            image_reference=image_reference,
            scope=scope,
            method='POST',
            headers={
                'content-length': '0',
            }
        )
        res.raise_for_status()

        upload_url = res.headers['location']

        octets_left = octets_count
        octets_sent = 0
        offset = 0
        sha256 = hashlib.sha256()

        while octets_left > 0:
            octets_to_send = min(octets_left, chunk_size)
            octets_left -= octets_to_send

            data = next(data_iterator)
            sha256.update(data)

            if not len(data) == octets_to_send:
                # sanity check to detect programming errors
                raise ValueError(f'{len(data)=} vs {octets_to_send=}')

            logger.debug(f'{octets_to_send=} {octets_left=} {len(data)=}')
            logger.debug(f'{octets_sent + offset}-{octets_sent + octets_to_send + offset}')

            crange_from = octets_sent
            crange_to = crange_from + len(data) - 1

            res = self._request(
                url=upload_url,
                image_reference=image_reference,
                scope=scope,
                method='PATCH',
                data=data,
                headers={
                 'Content-Length': str(octets_to_send),
                 'Content-Type': 'application/octet-stream',
                 'Content-Range': f'{crange_from}-{crange_to}',
                 'Range': f'{crange_from}-{crange_to}',
                }
            )
            res.raise_for_status()

            upload_url = res.headers['location']

            octets_sent += len(data)

        sha256_digest = f'sha256:{sha256.hexdigest()}'

        # close uploading session
        query = urllib.parse.urlencode({'digest': sha256_digest})
        upload_url = res.headers['location'] + '?' + query
        res = self._request(
            url=upload_url,
            image_reference=image_reference,
            scope=scope,
            method='PUT',
            headers={
                 'Content-Length': '0',
            },
        )
        return res
Ejemplo n.º 15
0
    def put_blob(
        self,
        image_reference: typing.Union[str, om.OciImageReference],
        digest: str,
        octets_count: int,
        data: requests.models.Response,
        max_chunk=1024 * 1024 * 1, # 1 MiB
        mimetype: str='application/data',
    ):
        image_reference = om.OciImageReference(image_reference)
        head_res = self.head_blob(
            image_reference=image_reference,
            digest=digest,
        )
        if head_res.ok:
            logger.info(f'skipping blob upload {digest=} - already exists')
            return

        data_is_requests_resp = isinstance(data, requests.models.Response)
        data_is_generator = isinstance(data, typing.Generator)
        data_is_filelike = hasattr(data, 'read')
        data_is_bytes = isinstance(data, bytes)

        if octets_count < max_chunk or data_is_filelike or data_is_requests_resp or data_is_bytes:
            if data_is_requests_resp:
                data = data.content
            elif data_is_generator:
                # at least GCR does not like chunked-uploads; if small enough, workaround this
                # and create one (not-that-big) bytes-obj
                _data = bytes()
                for chunk in data:
                    _data += chunk
                data = _data
            elif data_is_filelike:
                pass # if filelike, http.client will handle streaming for us

            return self._put_blob_single_post(
                image_reference=image_reference,
                digest=digest,
                octets_count=octets_count,
                data=data,
            )
        elif octets_count >= max_chunk and data_is_generator:
            # workaround: write into temporary file, as at least GCR does not implement
            # chunked-upload, and requests will not properly work w/ all generators
            # (in particular, it will not work w/ our "fake" on)
            with tempfile.TemporaryFile() as tf:
                for chunk in data:
                    tf.write(chunk)
                tf.seek(0)

                return self._put_blob_single_post(
                    image_reference=image_reference,
                    digest=digest,
                    octets_count=octets_count,
                    data=tf,
                )
        else:
            if data_is_requests_resp:
                with data:
                  return self._put_blob_chunked(
                      image_reference=image_reference,
                      digest=digest,
                      octets_count=octets_count,
                      data_iterator=data.iter_content(chunk_size=max_chunk),
                      chunk_size=max_chunk,
                  )
            else:
              raise NotImplementedError
Ejemplo n.º 16
0
def replicate_artifact(
    src_image_reference: typing.Union[str, om.OciImageReference],
    tgt_image_reference: typing.Union[str, om.OciImageReference],
    credentials_lookup: oa.credentials_lookup=None,
    routes: oc.OciRoutes=oc.OciRoutes(),
    oci_client: oc.Client=None,
    mode: ReplicationMode=ReplicationMode.REGISTRY_DEFAULTS,
    platform_filter: typing.Callable[[om.OciPlatform], bool]=None,
) -> typing.Tuple[requests.Response, str, bytes]:
    '''
    replicate the given OCI Artifact from src_image_reference to tgt_image_reference.

    try to be verbatim, if possible (i.e. target should reflect source as close as
    possible). Whether or not a fully identical replication is possible depends on
    the source artifact and chosen replication `mode`:

    If source artifact is a "legacy / v1" "docker image" (as it used to be
    created from older versions of docker) verbatim replication is not
    possible, because modern (v2) OCI Registries (such as GCR) will not accept
    those manifests. Therefore, conversion to "v2" is required (done
    transparently by this function).

    If source artifact is a "multiarch" image (oci.model.OciImageManifestList), OCI
    registries show different behaviour if ReplicationMode.REGISTRY_DEFAULTS is used.
    Some registries will in this case return a single-image manifest, instead of the
    multiarch-manifest (in this case, the replication result will only be a single-image).

    Use ReplicationMode.PREFER_MULTIARCH or ReplicationMode.NORMALISE_TO_MULTIARCH to
    prevent this.

    If platform_filter is specified (only applied for multi-arch images), the replication
    result will obviously also deviate from src, depending on the filter semantics.

    pass either `credentials_lookup`, `routes`, OR `oci_client`
    '''
    if not (bool(credentials_lookup) ^ bool(oci_client)):
        raise ValueError('either credentials-lookup + routes, xor client must be passed')

    src_image_reference = om.OciImageReference.to_image_ref(src_image_reference)
    tgt_image_reference = om.OciImageReference.to_image_ref(tgt_image_reference)

    if not oci_client:
        client = oc.Client(
            credentials_lookup=credentials_lookup,
            routes=routes,
        )
    else:
        client = oci_client

    if mode is ReplicationMode.REGISTRY_DEFAULTS:
        accept = None
    elif mode is ReplicationMode.PREFER_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    elif mode is ReplicationMode.NORMALISE_TO_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    else:
        raise NotImplementedError(mode)

    # we need the unaltered - manifest for verbatim replication
    raw_manifest = client.manifest_raw(
        image_reference=src_image_reference,
        accept=accept,
    ).text
    manifest = json.loads(raw_manifest)
    schema_version = int(manifest['schemaVersion'])
    need_to_synthesise_cfg_blob = False

    if schema_version == 1:
        need_to_synthesise_cfg_blob = True
        manifest = client.manifest(image_reference=src_image_reference)

        logger.warning(
          f'''
          manifest {src_image_reference=} is in legacy-format
          (schemaVersion==1). Cannot verbatimly replicate
          '''
        )

        manifest, _ = oconv.v1_manifest_to_v2(
            manifest=manifest,
            oci_client=client,
            tgt_image_ref=str(tgt_image_reference),
        )

        # we must determine the uncompressed layer-digests to synthesise a valid
        # cfg-blob docker will accept (this means in particular we must download
        # all layers, even if we do not need to upload them)
        need_uncompressed_layer_digests = True
        uncompressed_layer_digests = []
    elif schema_version == 2:
        manifest = json.loads(raw_manifest)
        media_type = manifest.get('mediaType', om.DOCKER_MANIFEST_SCHEMA_V2_MIME)

        if media_type == om.DOCKER_MANIFEST_LIST_MIME:
            # multi-arch
            manifest = dacite.from_dict(
                data_class=om.OciImageManifestList,
                data=manifest,
            )

            src_ref = om.OciImageReference(image_reference=src_image_reference)
            src_name = src_ref.ref_without_tag
            tgt_ref = om.OciImageReference(image_reference=tgt_image_reference)
            tgt_name = tgt_ref.ref_without_tag

            # try to avoid modifications (from x-serialisation) - unless we have to
            manifest_dirty = False

            # cp manifests to tuple, because we _might_ modify if there is a platform_filter
            for sub_manifest in tuple(manifest.manifests):
                src_reference = f'{src_name}@{sub_manifest.digest}'
                tgt_reference = f'{tgt_name}@{sub_manifest.digest}'

                if platform_filter:
                    platform = op.from_single_image(
                        image_reference=src_reference,
                        oci_client=oci_client,
                        base_platform=sub_manifest.platform,
                    )
                    if not platform_filter(platform):
                        logger.info(f'skipping {platform=} for {src_image_reference=}')
                        manifest_dirty = True
                        manifest.manifests.remove(sub_manifest)
                        continue

                logger.info(f'replicating to {tgt_reference=}')

                replicate_artifact(
                    src_image_reference=src_reference,
                    tgt_image_reference=tgt_reference,
                    oci_client=client,
                )

            if manifest_dirty:
                raw_manifest = json.dumps(manifest.as_dict())

            res = client.put_manifest(
                image_reference=tgt_image_reference,
                manifest=raw_manifest,
            )

            return res, tgt_image_reference, raw_manifest.encode('utf-8')

        elif media_type in (
            om.OCI_MANIFEST_SCHEMA_V2_MIME,
            om.DOCKER_MANIFEST_SCHEMA_V2_MIME,
        ):
            if mode is ReplicationMode.NORMALISE_TO_MULTIARCH:
                if not src_image_reference.has_digest_tag:
                    src_image_reference = om.OciImageReference.to_image_ref(
                        oci_client.to_digest_hash(
                            image_reference=src_image_reference,
                        )
                    )
                platform = op.from_single_image(
                    image_reference=src_image_reference,
                    oci_client=oci_client,
                )
                # force usage of digest-tag (symbolic tag required for manifest-list
                tgt_image_ref = \
                    f'{tgt_image_reference.ref_without_tag}@{src_image_reference.tag}'

                res, ref, manifest_bytes = replicate_artifact(
                    src_image_reference=src_image_reference,
                    tgt_image_reference=tgt_image_ref,
                    oci_client=oci_client,
                )

                manifest_list = om.OciImageManifestList(
                    manifests=[
                        om.OciImageManifestListEntry(
                            digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}',
                            mediaType=media_type,
                            size=len(manifest_bytes),
                            platform=platform,
                        ),
                    ]
                )

                manifest_list_bytes = json.dumps(
                    manifest_list.as_dict(),
                ).encode('utf-8')

                res = oci_client.put_manifest(
                    image_reference=tgt_image_reference,
                    manifest=manifest_list_bytes,
                )

                return res, tgt_image_reference, manifest_list_bytes

            manifest = dacite.from_dict(
                data_class=om.OciImageManifest,
                data=json.loads(raw_manifest)
            )
            need_uncompressed_layer_digests = False
            uncompressed_layer_digests = None
        else:
            raise NotImplementedError(f'{media_type=}')
    else:
      raise NotImplementedError(schema_version)

    for idx, layer in enumerate(manifest.blobs()):
        # need to specially handle cfg-blob (may be absent for v2 / legacy images)

        is_cfg_blob = idx == 0
        if is_cfg_blob and need_to_synthesise_cfg_blob:
            # if we need(ed) to synthesise cfg-blob (because source-image contained a v1-manifest)
            # then there will never be a cfg-blob in src.
            # -> silently skip to avoid emitting a confusing, but unhelpful warning
            logger.debug(f'{src_image_reference=} - synthesised cfg-blob - skipping replicatation')
            continue

        head_res = client.head_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
        )
        if head_res.ok:
            if not need_uncompressed_layer_digests:
                logger.info(f'skipping blob download {layer.digest=} - already exists in tgt')
                continue # no need to download if blob already exists in tgt
            elif not is_cfg_blob:
                # we will not need to re-upload, however we do need the uncompressed digest
                blob_res = client.blob(
                    image_reference=src_image_reference,
                    digest=layer.digest,
                    absent_ok=is_cfg_blob,
                )

                layer_hash = hashlib.sha256()
                decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16)

                for chunk in blob_res.iter_content(chunk_size=4096):
                    layer_hash.update(decompressor.decompress(chunk))

                uncompressed_layer_digests.append(f'sha256:{layer_hash.hexdigest()}')
                continue # we may still skip the upload, of course

        # todo: consider silencing warning if we do v1->v2-conversion (cfg-blob will never exist
        #       in this case
        blob_res = client.blob(
            image_reference=src_image_reference,
            digest=layer.digest,
            absent_ok=is_cfg_blob,
        )
        if not blob_res and is_cfg_blob:
            # fallback to non-verbatim replication; synthesise cfg
            logger.warning(
                'falling back to non-verbatim replication '
                f'{src_image_reference=} {tgt_image_reference=}'
            )
            need_to_synthesise_cfg_blob = True
            continue

        if need_uncompressed_layer_digests:
            uncompressed_layer_hash = hashlib.sha256()
            decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16)

            def intercept_chunks(blob_res):
                for chunk in blob_res.iter_content(chunk_size=4096):
                    uncompressed_layer_hash.update(decompressor.decompress(chunk))
                    yield chunk

                uncompressed_layer_digests.append(f'sha256:{uncompressed_layer_hash.hexdigest()}')

            blob_res = intercept_chunks(blob_res=blob_res)

        client.put_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
            octets_count=layer.size,
            data=blob_res,
        )

    if need_to_synthesise_cfg_blob:
        fake_cfg_dict = json.loads(json.loads(raw_manifest)['history'][0]['v1Compatibility'])

        # patch-in uncompressed layer-digests
        fake_cfg_dict['rootfs'] = {
            'diff_ids': uncompressed_layer_digests,
            'type': 'layers',
        }

        fake_cfg_raw = json.dumps(fake_cfg_dict).encode('utf-8')

        client.put_blob(
            image_reference=tgt_image_reference,
            digest=(cfg_digest := f'sha256:{hashlib.sha256(fake_cfg_raw).hexdigest()}'),
            octets_count=len(fake_cfg_raw),
            data=fake_cfg_raw,
        )

        manifest_dict = dataclasses.asdict(manifest)
        # patch-on altered cfg-digest
        manifest_dict['config']['digest'] = cfg_digest
        manifest_dict['config']['size'] = len(fake_cfg_raw)
        raw_manifest = json.dumps(manifest_dict)

    res = client.put_manifest(
        image_reference=tgt_image_reference,
        manifest=raw_manifest,
    )

    return res, tgt_image_reference, raw_manifest.encode('utf-8')