예제 #1
0
    def _get_shared_storage(self, blob_digest):
        """ Returns an ImageStorage row for the blob digest if it is a globally shared storage. """
        # If the EMPTY_LAYER_BLOB_DIGEST is in the checksums, look it up directly. Since we have
        # so many duplicate copies in the database currently, looking it up bound to a repository
        # can be incredibly slow, and, since it is defined as a globally shared layer, this is extra
        # work we don't need to do.
        if blob_digest == EMPTY_LAYER_BLOB_DIGEST:
            return get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)

        return None
예제 #2
0
def _build_blob_map(
    repository_id,
    manifest_interface_instance,
    retriever,
    storage,
    raise_on_error=False,
    require_empty_layer=True,
):
    """Builds a map containing the digest of each blob referenced by the given manifest,
    to its associated Blob row in the database. This method also verifies that the blob
    is accessible under the given repository. Returns None on error (unless raise_on_error
    is specified). If require_empty_layer is set to True, the method will check if the manifest
    references the special shared empty layer blob and, if so, add it to the map. Otherwise,
    the empty layer blob is only returned if it was *explicitly* referenced in the manifest.
    This is necessary because Docker V2_2/OCI manifests can implicitly reference an empty blob
    layer for image layers that only change metadata.
    """

    # Ensure all the blobs in the manifest exist.
    digests = set(manifest_interface_instance.local_blob_digests)
    blob_map = {}

    # If the special empty layer is required, simply load it directly. This is much faster
    # than trying to load it on a per repository basis, and that is unnecessary anyway since
    # this layer is predefined.
    if EMPTY_LAYER_BLOB_DIGEST in digests:
        digests.remove(EMPTY_LAYER_BLOB_DIGEST)
        blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(
            EMPTY_LAYER_BLOB_DIGEST)
        if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
            if raise_on_error:
                raise CreateManifestException(
                    "Unable to retrieve specialized empty blob")

            logger.warning("Could not find the special empty blob in storage")
            return None

    if digests:
        query = lookup_repo_storages_by_content_checksum(repository_id,
                                                         digests,
                                                         with_uploads=True)
        blob_map.update({s.content_checksum: s for s in query})
        for digest_str in digests:
            if digest_str not in blob_map:
                logger.warning(
                    "Unknown blob `%s` under manifest `%s` for repository `%s`",
                    digest_str,
                    manifest_interface_instance.digest,
                    repository_id,
                )

                if raise_on_error:
                    raise CreateManifestException("Unknown blob `%s`" %
                                                  digest_str)

                return None

    # Special check: If the empty layer blob is needed for this manifest, add it to the
    # blob map. This is necessary because Docker decided to elide sending of this special
    # empty layer in schema version 2, but we need to have it referenced for schema version 1.
    if require_empty_layer and EMPTY_LAYER_BLOB_DIGEST not in blob_map:
        try:
            requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob(
                retriever)
        except ManifestException as ex:
            if raise_on_error:
                raise CreateManifestException(str(ex))

            return None

        if requires_empty_layer is None:
            if raise_on_error:
                raise CreateManifestException(
                    "Could not load configuration blob")

            return None

        if requires_empty_layer:
            shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST,
                                                    EMPTY_LAYER_BYTES, storage)
            assert not shared_blob.uploading
            assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
            blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob

    return blob_map
예제 #3
0
def _create_manifest(
    repository_id,
    manifest_interface_instance,
    storage,
    temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
    for_tagging=False,
    raise_on_error=False,
    retriever=None,
):
    # Validate the manifest.
    retriever = retriever or RepositoryContentRetriever.for_repository(
        repository_id, storage)
    try:
        manifest_interface_instance.validate(retriever)
    except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist,
            IOError) as ex:
        logger.exception("Could not validate manifest `%s`",
                         manifest_interface_instance.digest)
        if raise_on_error:
            raise CreateManifestException(str(ex))

        return None

    # Load, parse and get/create the child manifests, if any.
    child_manifest_refs = manifest_interface_instance.child_manifests(
        retriever)
    child_manifest_rows = {}
    child_manifest_label_dicts = []

    if child_manifest_refs is not None:
        for child_manifest_ref in child_manifest_refs:
            # Load and parse the child manifest.
            try:
                child_manifest = child_manifest_ref.manifest_obj
            except (
                    ManifestException,
                    MalformedSchema2ManifestList,
                    BlobDoesNotExist,
                    IOError,
            ) as ex:
                logger.exception(
                    "Could not load manifest list for manifest `%s`",
                    manifest_interface_instance.digest,
                )
                if raise_on_error:
                    raise CreateManifestException(str(ex))

                return None

            # Retrieve its labels.
            labels = child_manifest.get_manifest_labels(retriever)
            if labels is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve manifest labels")

                logger.exception(
                    "Could not load manifest labels for child manifest")
                return None

            # Get/create the child manifest in the database.
            child_manifest_info = get_or_create_manifest(
                repository_id,
                child_manifest,
                storage,
                raise_on_error=raise_on_error)
            if child_manifest_info is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve child manifest")

                logger.error("Could not get/create child manifest")
                return None

            child_manifest_rows[child_manifest_info.manifest.
                                digest] = child_manifest_info.manifest
            child_manifest_label_dicts.append(labels)

    # Ensure all the blobs in the manifest exist.
    digests = set(manifest_interface_instance.local_blob_digests)
    blob_map = {}

    # If the special empty layer is required, simply load it directly. This is much faster
    # than trying to load it on a per repository basis, and that is unnecessary anyway since
    # this layer is predefined.
    if EMPTY_LAYER_BLOB_DIGEST in digests:
        digests.remove(EMPTY_LAYER_BLOB_DIGEST)
        blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(
            EMPTY_LAYER_BLOB_DIGEST)
        if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
            if raise_on_error:
                raise CreateManifestException(
                    "Unable to retrieve specialized empty blob")

            logger.warning("Could not find the special empty blob in storage")
            return None

    if digests:
        query = lookup_repo_storages_by_content_checksum(
            repository_id, digests)
        blob_map.update({s.content_checksum: s for s in query})
        for digest_str in digests:
            if digest_str not in blob_map:
                logger.warning(
                    "Unknown blob `%s` under manifest `%s` for repository `%s`",
                    digest_str,
                    manifest_interface_instance.digest,
                    repository_id,
                )

                if raise_on_error:
                    raise CreateManifestException("Unknown blob `%s`" %
                                                  digest_str)

                return None

    # Special check: If the empty layer blob is needed for this manifest, add it to the
    # blob map. This is necessary because Docker decided to elide sending of this special
    # empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
    if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
        try:
            requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob(
                retriever)
        except ManifestException as ex:
            if raise_on_error:
                raise CreateManifestException(str(ex))

            return None

        if requires_empty_layer is None:
            if raise_on_error:
                raise CreateManifestException(
                    "Could not load configuration blob")

            return None

        if requires_empty_layer:
            shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST,
                                                    EMPTY_LAYER_BYTES, storage)
            assert not shared_blob.uploading
            assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
            blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob

    # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
    # image.
    legacy_image = None
    if manifest_interface_instance.has_legacy_image:
        legacy_image_id = _populate_legacy_image(repository_id,
                                                 manifest_interface_instance,
                                                 blob_map, retriever,
                                                 raise_on_error)
        if legacy_image_id is None:
            return None

        legacy_image = get_image(repository_id, legacy_image_id)
        if legacy_image is None:
            return None

    # Create the manifest and its blobs.
    media_type = Manifest.media_type.get_id(
        manifest_interface_instance.media_type)
    storage_ids = {storage.id for storage in blob_map.values()}

    with db_transaction():
        # Check for the manifest. This is necessary because Postgres doesn't handle IntegrityErrors
        # well under transactions.
        try:
            manifest = Manifest.get(repository=repository_id,
                                    digest=manifest_interface_instance.digest)
            return CreatedManifest(manifest=manifest,
                                   newly_created=False,
                                   labels_to_apply=None)
        except Manifest.DoesNotExist:
            pass

        # Create the manifest.
        try:
            manifest = Manifest.create(
                repository=repository_id,
                digest=manifest_interface_instance.digest,
                media_type=media_type,
                manifest_bytes=manifest_interface_instance.bytes.
                as_encoded_str(),
            )
        except IntegrityError as ie:
            try:
                manifest = Manifest.get(
                    repository=repository_id,
                    digest=manifest_interface_instance.digest)
            except Manifest.DoesNotExist:
                logger.error(
                    "Got integrity error when trying to create manifest: %s",
                    ie)
                if raise_on_error:
                    raise CreateManifestException(
                        "Attempt to create an invalid manifest. Please report this issue."
                    )

                return None

            return CreatedManifest(manifest=manifest,
                                   newly_created=False,
                                   labels_to_apply=None)

        # Insert the blobs.
        blobs_to_insert = [
            dict(manifest=manifest, repository=repository_id, blob=storage_id)
            for storage_id in storage_ids
        ]
        if blobs_to_insert:
            ManifestBlob.insert_many(blobs_to_insert).execute()

        # Set the legacy image (if applicable).
        if legacy_image is not None:
            ManifestLegacyImage.create(repository=repository_id,
                                       image=legacy_image,
                                       manifest=manifest)

        # Insert the manifest child rows (if applicable).
        if child_manifest_rows:
            children_to_insert = [
                dict(manifest=manifest,
                     child_manifest=child_manifest,
                     repository=repository_id)
                for child_manifest in child_manifest_rows.values()
            ]
            ManifestChild.insert_many(children_to_insert).execute()

        # If this manifest is being created not for immediate tagging, add a temporary tag to the
        # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
        # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
        # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
        # in a repository for GC, then we will have to reevaluate this optimization at that time.
        if not for_tagging:
            create_temporary_tag_if_necessary(manifest,
                                              temp_tag_expiration_sec)

    # Define the labels for the manifest (if any).
    # TODO: Once the old data model is gone, turn this into a batch operation and make the label
    # application to the manifest occur under the transaction.
    labels = manifest_interface_instance.get_manifest_labels(retriever)
    if labels:
        for key, value in labels.iteritems():
            # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any
            # such `labels`, as they don't really mean anything.
            if not key:
                continue

            media_type = "application/json" if is_json(value) else "text/plain"
            create_manifest_label(manifest, key, value, "manifest", media_type)

    # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
    # on the manifest or its resulting tags). We only return those labels either defined on
    # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
    # to ensure that any action performed is defined in all manifests.
    labels_to_apply = labels or {}
    if child_manifest_label_dicts:
        labels_to_apply = child_manifest_label_dicts[0].viewitems()
        for child_manifest_label_dict in child_manifest_label_dicts[1:]:
            # Intersect the key+values of the labels to ensure we get the exact same result
            # for all the child manifests.
            labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems(
            )

        labels_to_apply = dict(labels_to_apply)

    return CreatedManifest(manifest=manifest,
                           newly_created=True,
                           labels_to_apply=labels_to_apply)