Example #1
0
def test_create_manifest_label(key, value, source_type, expected_error,
                               initialized_db):
    manifest = Manifest.get()

    if expected_error:
        with pytest.raises(DataModelException) as ex:
            create_manifest_label(manifest, key, value, source_type)

        assert ex.match(expected_error)
        return

    label = create_manifest_label(manifest, key, value, source_type)
    labels = [
        ml.label_id for ml in ManifestLabel.select().where(
            ManifestLabel.manifest == manifest)
    ]
    assert label.id in labels

    with assert_query_count(1):
        assert label in list_manifest_labels(manifest)

    assert label not in list_manifest_labels(manifest, 'someprefix')
    assert label in list_manifest_labels(manifest, key[0:2])

    with assert_query_count(1):
        assert get_manifest_label(label.uuid, manifest) == label
Example #2
0
def test_list_manifest_labels(initialized_db):
    manifest = Manifest.get()

    label1 = create_manifest_label(manifest, 'foo', '1', 'manifest')
    label2 = create_manifest_label(manifest, 'bar', '2', 'api')
    label3 = create_manifest_label(manifest, 'baz', '3', 'internal')

    assert label1 in list_manifest_labels(manifest)
    assert label2 in list_manifest_labels(manifest)
    assert label3 in list_manifest_labels(manifest)

    other_manifest = Manifest.select().where(Manifest.id != manifest.id).get()
    assert label1 not in list_manifest_labels(other_manifest)
    assert label2 not in list_manifest_labels(other_manifest)
    assert label3 not in list_manifest_labels(other_manifest)
Example #3
0
def _create_manifest(
    repository_id,
    manifest_interface_instance,
    storage,
    temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
    for_tagging=False,
    raise_on_error=False,
    retriever=None,
):
    # Validate the manifest.
    retriever = retriever or RepositoryContentRetriever.for_repository(
        repository_id, storage)
    try:
        manifest_interface_instance.validate(retriever)
    except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist,
            IOError) as ex:
        logger.exception("Could not validate manifest `%s`",
                         manifest_interface_instance.digest)
        if raise_on_error:
            raise CreateManifestException(str(ex))

        return None

    # Load, parse and get/create the child manifests, if any.
    child_manifest_refs = manifest_interface_instance.child_manifests(
        retriever)
    child_manifest_rows = {}
    child_manifest_label_dicts = []

    if child_manifest_refs is not None:
        for child_manifest_ref in child_manifest_refs:
            # Load and parse the child manifest.
            try:
                child_manifest = child_manifest_ref.manifest_obj
            except (
                    ManifestException,
                    MalformedSchema2ManifestList,
                    BlobDoesNotExist,
                    IOError,
            ) as ex:
                logger.exception(
                    "Could not load manifest list for manifest `%s`",
                    manifest_interface_instance.digest,
                )
                if raise_on_error:
                    raise CreateManifestException(str(ex))

                return None

            # Retrieve its labels.
            labels = child_manifest.get_manifest_labels(retriever)
            if labels is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve manifest labels")

                logger.exception(
                    "Could not load manifest labels for child manifest")
                return None

            # Get/create the child manifest in the database.
            child_manifest_info = get_or_create_manifest(
                repository_id,
                child_manifest,
                storage,
                raise_on_error=raise_on_error)
            if child_manifest_info is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve child manifest")

                logger.error("Could not get/create child manifest")
                return None

            child_manifest_rows[child_manifest_info.manifest.
                                digest] = child_manifest_info.manifest
            child_manifest_label_dicts.append(labels)

    # Build the map from required blob digests to the blob objects.
    blob_map = _build_blob_map(
        repository_id,
        manifest_interface_instance,
        retriever,
        storage,
        raise_on_error,
        require_empty_layer=False,
    )
    if blob_map is None:
        return None

    # Create the manifest and its blobs.
    media_type = Manifest.media_type.get_id(
        manifest_interface_instance.media_type)
    storage_ids = {storage.id for storage in list(blob_map.values())}

    # Check for the manifest, in case it was created since we checked earlier.
    try:
        manifest = Manifest.get(repository=repository_id,
                                digest=manifest_interface_instance.digest)
        return CreatedManifest(manifest=manifest,
                               newly_created=False,
                               labels_to_apply=None)
    except Manifest.DoesNotExist:
        pass

    try:
        with db_transaction():
            # Create the manifest.
            try:
                manifest = Manifest.create(
                    repository=repository_id,
                    digest=manifest_interface_instance.digest,
                    media_type=media_type,
                    manifest_bytes=manifest_interface_instance.bytes.
                    as_encoded_str(),
                    config_media_type=manifest_interface_instance.
                    config_media_type,
                    layers_compressed_size=manifest_interface_instance.
                    layers_compressed_size,
                )
            except IntegrityError as ie:
                # NOTE: An IntegrityError means (barring a bug) that the manifest was created by
                # another caller while we were attempting to create it. Since we need to return
                # the manifest, we raise a specialized exception here to break out of the
                # transaction so we can retrieve it.
                raise _ManifestAlreadyExists(ie)

            # Insert the blobs.
            blobs_to_insert = [
                dict(manifest=manifest,
                     repository=repository_id,
                     blob=storage_id) for storage_id in storage_ids
            ]
            if blobs_to_insert:
                try:
                    ManifestBlob.insert_many(blobs_to_insert).execute()
                except IntegrityError as ie:
                    raise _ManifestAlreadyExists(ie)

            # Insert the manifest child rows (if applicable).
            if child_manifest_rows:
                children_to_insert = [
                    dict(manifest=manifest,
                         child_manifest=child_manifest,
                         repository=repository_id)
                    for child_manifest in list(child_manifest_rows.values())
                ]
                try:
                    ManifestChild.insert_many(children_to_insert).execute()
                except IntegrityError as ie:
                    raise _ManifestAlreadyExists(ie)

            # If this manifest is being created not for immediate tagging, add a temporary tag to the
            # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
            # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
            # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
            # in a repository for GC, then we will have to reevaluate this optimization at that time.
            if not for_tagging:
                create_temporary_tag_if_necessary(manifest,
                                                  temp_tag_expiration_sec)

        # Define the labels for the manifest (if any).
        # TODO: Once the old data model is gone, turn this into a batch operation and make the label
        # application to the manifest occur under the transaction.
        labels = manifest_interface_instance.get_manifest_labels(retriever)
        if labels:
            for key, value in labels.items():
                # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any
                # such `labels`, as they don't really mean anything.
                if not key:
                    continue

                media_type = "application/json" if is_json(
                    value) else "text/plain"
                create_manifest_label(manifest, key, value, "manifest",
                                      media_type)

        # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
        # on the manifest or its resulting tags). We only return those labels either defined on
        # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
        # to ensure that any action performed is defined in all manifests.
        labels_to_apply = labels or {}
        if child_manifest_label_dicts:
            labels_to_apply = child_manifest_label_dicts[0].items()
            for child_manifest_label_dict in child_manifest_label_dicts[1:]:
                # Intersect the key+values of the labels to ensure we get the exact same result
                # for all the child manifests.
                labels_to_apply = labels_to_apply & child_manifest_label_dict.items(
                )

            labels_to_apply = dict(labels_to_apply)

        return CreatedManifest(manifest=manifest,
                               newly_created=True,
                               labels_to_apply=labels_to_apply)
    except _ManifestAlreadyExists as mae:
        try:
            manifest = Manifest.get(repository=repository_id,
                                    digest=manifest_interface_instance.digest)
        except Manifest.DoesNotExist:
            # NOTE: If we've reached this point, then somehow we had an IntegrityError without it
            # being due to a duplicate manifest. We therefore log the error.
            logger.error(
                "Got integrity error when trying to create manifest: %s",
                mae.internal_exception)
            if raise_on_error:
                raise CreateManifestException(
                    "Attempt to create an invalid manifest. Please report this issue."
                )

            return None

        return CreatedManifest(manifest=manifest,
                               newly_created=False,
                               labels_to_apply=None)
Example #4
0
def _create_manifest(
    repository_id,
    manifest_interface_instance,
    storage,
    temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
    for_tagging=False,
    raise_on_error=False,
    retriever=None,
):
    # Validate the manifest.
    retriever = retriever or RepositoryContentRetriever.for_repository(
        repository_id, storage)
    try:
        manifest_interface_instance.validate(retriever)
    except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist,
            IOError) as ex:
        logger.exception("Could not validate manifest `%s`",
                         manifest_interface_instance.digest)
        if raise_on_error:
            raise CreateManifestException(str(ex))

        return None

    # Load, parse and get/create the child manifests, if any.
    child_manifest_refs = manifest_interface_instance.child_manifests(
        retriever)
    child_manifest_rows = {}
    child_manifest_label_dicts = []

    if child_manifest_refs is not None:
        for child_manifest_ref in child_manifest_refs:
            # Load and parse the child manifest.
            try:
                child_manifest = child_manifest_ref.manifest_obj
            except (
                    ManifestException,
                    MalformedSchema2ManifestList,
                    BlobDoesNotExist,
                    IOError,
            ) as ex:
                logger.exception(
                    "Could not load manifest list for manifest `%s`",
                    manifest_interface_instance.digest,
                )
                if raise_on_error:
                    raise CreateManifestException(str(ex))

                return None

            # Retrieve its labels.
            labels = child_manifest.get_manifest_labels(retriever)
            if labels is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve manifest labels")

                logger.exception(
                    "Could not load manifest labels for child manifest")
                return None

            # Get/create the child manifest in the database.
            child_manifest_info = get_or_create_manifest(
                repository_id,
                child_manifest,
                storage,
                raise_on_error=raise_on_error)
            if child_manifest_info is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve child manifest")

                logger.error("Could not get/create child manifest")
                return None

            child_manifest_rows[child_manifest_info.manifest.
                                digest] = child_manifest_info.manifest
            child_manifest_label_dicts.append(labels)

    # Ensure all the blobs in the manifest exist.
    digests = set(manifest_interface_instance.local_blob_digests)
    blob_map = {}

    # If the special empty layer is required, simply load it directly. This is much faster
    # than trying to load it on a per repository basis, and that is unnecessary anyway since
    # this layer is predefined.
    if EMPTY_LAYER_BLOB_DIGEST in digests:
        digests.remove(EMPTY_LAYER_BLOB_DIGEST)
        blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(
            EMPTY_LAYER_BLOB_DIGEST)
        if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
            if raise_on_error:
                raise CreateManifestException(
                    "Unable to retrieve specialized empty blob")

            logger.warning("Could not find the special empty blob in storage")
            return None

    if digests:
        query = lookup_repo_storages_by_content_checksum(
            repository_id, digests)
        blob_map.update({s.content_checksum: s for s in query})
        for digest_str in digests:
            if digest_str not in blob_map:
                logger.warning(
                    "Unknown blob `%s` under manifest `%s` for repository `%s`",
                    digest_str,
                    manifest_interface_instance.digest,
                    repository_id,
                )

                if raise_on_error:
                    raise CreateManifestException("Unknown blob `%s`" %
                                                  digest_str)

                return None

    # Special check: If the empty layer blob is needed for this manifest, add it to the
    # blob map. This is necessary because Docker decided to elide sending of this special
    # empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
    if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
        try:
            requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob(
                retriever)
        except ManifestException as ex:
            if raise_on_error:
                raise CreateManifestException(str(ex))

            return None

        if requires_empty_layer is None:
            if raise_on_error:
                raise CreateManifestException(
                    "Could not load configuration blob")

            return None

        if requires_empty_layer:
            shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST,
                                                    EMPTY_LAYER_BYTES, storage)
            assert not shared_blob.uploading
            assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
            blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob

    # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
    # image.
    legacy_image = None
    if manifest_interface_instance.has_legacy_image:
        legacy_image_id = _populate_legacy_image(repository_id,
                                                 manifest_interface_instance,
                                                 blob_map, retriever,
                                                 raise_on_error)
        if legacy_image_id is None:
            return None

        legacy_image = get_image(repository_id, legacy_image_id)
        if legacy_image is None:
            return None

    # Create the manifest and its blobs.
    media_type = Manifest.media_type.get_id(
        manifest_interface_instance.media_type)
    storage_ids = {storage.id for storage in blob_map.values()}

    with db_transaction():
        # Check for the manifest. This is necessary because Postgres doesn't handle IntegrityErrors
        # well under transactions.
        try:
            manifest = Manifest.get(repository=repository_id,
                                    digest=manifest_interface_instance.digest)
            return CreatedManifest(manifest=manifest,
                                   newly_created=False,
                                   labels_to_apply=None)
        except Manifest.DoesNotExist:
            pass

        # Create the manifest.
        try:
            manifest = Manifest.create(
                repository=repository_id,
                digest=manifest_interface_instance.digest,
                media_type=media_type,
                manifest_bytes=manifest_interface_instance.bytes.
                as_encoded_str(),
            )
        except IntegrityError as ie:
            try:
                manifest = Manifest.get(
                    repository=repository_id,
                    digest=manifest_interface_instance.digest)
            except Manifest.DoesNotExist:
                logger.error(
                    "Got integrity error when trying to create manifest: %s",
                    ie)
                if raise_on_error:
                    raise CreateManifestException(
                        "Attempt to create an invalid manifest. Please report this issue."
                    )

                return None

            return CreatedManifest(manifest=manifest,
                                   newly_created=False,
                                   labels_to_apply=None)

        # Insert the blobs.
        blobs_to_insert = [
            dict(manifest=manifest, repository=repository_id, blob=storage_id)
            for storage_id in storage_ids
        ]
        if blobs_to_insert:
            ManifestBlob.insert_many(blobs_to_insert).execute()

        # Set the legacy image (if applicable).
        if legacy_image is not None:
            ManifestLegacyImage.create(repository=repository_id,
                                       image=legacy_image,
                                       manifest=manifest)

        # Insert the manifest child rows (if applicable).
        if child_manifest_rows:
            children_to_insert = [
                dict(manifest=manifest,
                     child_manifest=child_manifest,
                     repository=repository_id)
                for child_manifest in child_manifest_rows.values()
            ]
            ManifestChild.insert_many(children_to_insert).execute()

        # If this manifest is being created not for immediate tagging, add a temporary tag to the
        # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
        # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
        # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
        # in a repository for GC, then we will have to reevaluate this optimization at that time.
        if not for_tagging:
            create_temporary_tag_if_necessary(manifest,
                                              temp_tag_expiration_sec)

    # Define the labels for the manifest (if any).
    # TODO: Once the old data model is gone, turn this into a batch operation and make the label
    # application to the manifest occur under the transaction.
    labels = manifest_interface_instance.get_manifest_labels(retriever)
    if labels:
        for key, value in labels.iteritems():
            # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any
            # such `labels`, as they don't really mean anything.
            if not key:
                continue

            media_type = "application/json" if is_json(value) else "text/plain"
            create_manifest_label(manifest, key, value, "manifest", media_type)

    # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
    # on the manifest or its resulting tags). We only return those labels either defined on
    # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
    # to ensure that any action performed is defined in all manifests.
    labels_to_apply = labels or {}
    if child_manifest_label_dicts:
        labels_to_apply = child_manifest_label_dicts[0].viewitems()
        for child_manifest_label_dict in child_manifest_label_dicts[1:]:
            # Intersect the key+values of the labels to ensure we get the exact same result
            # for all the child manifests.
            labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems(
            )

        labels_to_apply = dict(labels_to_apply)

    return CreatedManifest(manifest=manifest,
                           newly_created=True,
                           labels_to_apply=labels_to_apply)