Beispiel #1
0
Datei: tag.py Projekt: zhill/quay
def populate_manifest(repository, manifest, legacy_image, storage_ids):
    """ Populates the rows for the manifest, including its blobs and legacy image. """
    media_type = Manifest.media_type.get_id(manifest.media_type)

    # Check for an existing manifest. If present, return it.
    try:
        return Manifest.get(repository=repository, digest=manifest.digest)
    except Manifest.DoesNotExist:
        pass

    with db_transaction():
        try:
            manifest_row = Manifest.create(
                digest=manifest.digest,
                repository=repository,
                manifest_bytes=manifest.bytes.as_encoded_str(),
                media_type=media_type,
            )
        except IntegrityError as ie:
            logger.debug(
                "Got integrity error when trying to write manifest: %s", ie)
            return Manifest.get(repository=repository, digest=manifest.digest)

        ManifestLegacyImage.create(manifest=manifest_row,
                                   repository=repository,
                                   image=legacy_image)

        blobs_to_insert = [
            dict(manifest=manifest_row, repository=repository, blob=storage_id)
            for storage_id in storage_ids
        ]
        if blobs_to_insert:
            ManifestBlob.insert_many(blobs_to_insert).execute()

        return manifest_row
def test_load_security_information_unknown_manifest(initialized_db):
    repository_ref = registry_model.lookup_repository("devtable", "simple")
    tag = registry_model.get_repo_tag(repository_ref, "latest")
    manifest = registry_model.get_manifest_for_tag(tag)

    registry_model.populate_legacy_images_for_testing(manifest, storage)

    # Delete the manifest.
    Manifest.get(id=manifest._db_id).delete_instance(recursive=True)

    secscan = V2SecurityScanner(app, instance_keys, storage)
    assert (secscan.load_security_information(manifest).status ==
            ScanLookupStatus.UNSUPPORTED_FOR_INDEXING)
Beispiel #3
0
def test_create_manifest_label(key, value, source_type, expected_error,
                               initialized_db):
    manifest = Manifest.get()

    if expected_error:
        with pytest.raises(DataModelException) as ex:
            create_manifest_label(manifest, key, value, source_type)

        assert ex.match(expected_error)
        return

    label = create_manifest_label(manifest, key, value, source_type)
    labels = [
        ml.label_id for ml in ManifestLabel.select().where(
            ManifestLabel.manifest == manifest)
    ]
    assert label.id in labels

    with assert_query_count(1):
        assert label in list_manifest_labels(manifest)

    assert label not in list_manifest_labels(manifest, 'someprefix')
    assert label in list_manifest_labels(manifest, key[0:2])

    with assert_query_count(1):
        assert get_manifest_label(label.uuid, manifest) == label
Beispiel #4
0
def test_change_tag_expiration(expiration_offset, expected_offset, initialized_db):
  repository = create_repository('devtable', 'somenewrepo', None)
  image1 = find_create_or_link_image('foobarimage1', repository, None, {}, 'local_us')

  manifest = Manifest.get()
  footag = create_or_update_tag_for_repo(repository, 'foo', image1.docker_image_id,
                                         oci_manifest=manifest)

  expiration_date = None
  if expiration_offset is not None:
    expiration_date = datetime.utcnow() + convert_to_timedelta(expiration_offset)

  assert change_tag_expiration(footag, expiration_date)

  # Lookup the tag again.
  footag_updated = get_active_tag('devtable', 'somenewrepo', 'foo')
  oci_tag = _get_oci_tag(footag_updated)

  if expected_offset is None:
    assert footag_updated.lifetime_end_ts is None
    assert oci_tag.lifetime_end_ms is None
  else:
    start_date = datetime.utcfromtimestamp(footag_updated.lifetime_start_ts)
    end_date = datetime.utcfromtimestamp(footag_updated.lifetime_end_ts)
    expected_end_date = start_date + convert_to_timedelta(expected_offset)
    assert (expected_end_date - end_date).total_seconds() < 5 # variance in test

    assert oci_tag.lifetime_end_ms == (footag_updated.lifetime_end_ts * 1000)
Beispiel #5
0
    def _namespace_from_kwargs(self, args_dict):
        if "namespace_name" in args_dict:
            return args_dict["namespace_name"]

        if "repository_ref" in args_dict:
            return args_dict["repository_ref"].namespace_name

        if "tag" in args_dict:
            return args_dict["tag"].repository.namespace_name

        if "manifest" in args_dict:
            manifest = args_dict["manifest"]
            if manifest._is_tag_manifest:
                return TagManifest.get(
                    id=manifest._db_id).tag.repository.namespace_user.username
            else:
                return Manifest.get(
                    id=manifest._db_id).repository.namespace_user.username

        if "manifest_or_legacy_image" in args_dict:
            manifest_or_legacy_image = args_dict["manifest_or_legacy_image"]
            if isinstance(manifest_or_legacy_image, LegacyImage):
                return Image.get(id=manifest_or_legacy_image._db_id
                                 ).repository.namespace_user.username
            else:
                manifest = manifest_or_legacy_image
                if manifest._is_tag_manifest:
                    return TagManifest.get(
                        id=manifest._db_id
                    ).tag.repository.namespace_user.username
                else:
                    return Manifest.get(
                        id=manifest._db_id).repository.namespace_user.username

        if "derived_image" in args_dict:
            return DerivedStorageForImage.get(
                id=args_dict["derived_image"]._db_id
            ).source_image.repository.namespace_user.username

        if "blob" in args_dict:
            return ""  # Blob functions are shared, so no need to do anything.

        if "blob_upload" in args_dict:
            return ""  # Blob functions are shared, so no need to do anything.

        raise Exception("Unknown namespace for dict `%s`" % args_dict)
Beispiel #6
0
  def _namespace_from_kwargs(self, args_dict):
    if 'namespace_name' in args_dict:
      return args_dict['namespace_name']

    if 'repository_ref' in args_dict:
      return args_dict['repository_ref'].namespace_name

    if 'tag' in args_dict:
      return args_dict['tag'].repository.namespace_name

    if 'manifest' in args_dict:
      manifest = args_dict['manifest']
      if manifest._is_tag_manifest:
        return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username
      else:
        return Manifest.get(id=manifest._db_id).repository.namespace_user.username

    if 'manifest_or_legacy_image' in args_dict:
      manifest_or_legacy_image = args_dict['manifest_or_legacy_image']
      if isinstance(manifest_or_legacy_image, LegacyImage):
        return Image.get(id=manifest_or_legacy_image._db_id).repository.namespace_user.username
      else:
        manifest = manifest_or_legacy_image
        if manifest._is_tag_manifest:
          return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username
        else:
          return Manifest.get(id=manifest._db_id).repository.namespace_user.username

    if 'derived_image' in args_dict:
      return (DerivedStorageForImage
              .get(id=args_dict['derived_image']._db_id)
              .source_image
              .repository
              .namespace_user
              .username)

    if 'blob' in args_dict:
      return '' # Blob functions are shared, so no need to do anything.

    if 'blob_upload' in args_dict:
      return '' # Blob functions are shared, so no need to do anything.

    raise Exception('Unknown namespace for dict `%s`' % args_dict)
Beispiel #7
0
    def load_security_information(self, manifest_or_legacy_image,
                                  include_vulnerabilities):
        if isinstance(manifest_or_legacy_image, ManifestDataType):
            namespace = Manifest.get(manifest_or_legacy_image._db_id
                                     ).repository.namespace_user.username

            if namespace in self._v4_namespace_whitelist:
                return self._v4_model.load_security_information(
                    manifest_or_legacy_image, include_vulnerabilities)

        return self._model.load_security_information(manifest_or_legacy_image,
                                                     include_vulnerabilities)
Beispiel #8
0
def test_create_reversion_tag(initialized_db):
  repository = create_repository('devtable', 'somenewrepo', None)
  manifest = Manifest.get()
  image1 = find_create_or_link_image('foobarimage1', repository, None, {}, 'local_us')

  footag = create_or_update_tag_for_repo(repository, 'foo', image1.docker_image_id,
                                         oci_manifest=manifest, reversion=True)
  assert footag.reversion

  oci_tag = _get_oci_tag(footag)
  assert oci_tag.name == footag.name
  assert not oci_tag.hidden
  assert oci_tag.reversion == footag.reversion
Beispiel #9
0
def compute_layer_id(layer):
    """
    Returns the ID for the layer in the security scanner.
    """
    assert isinstance(layer, ManifestDataType)

    manifest = Manifest.get(id=layer._db_id)
    try:
        layer = ManifestLegacyImage.get(manifest=manifest).image
    except ManifestLegacyImage.DoesNotExist:
        return None

    assert layer.docker_image_id
    assert layer.storage.uuid
    return "%s.%s" % (layer.docker_image_id, layer.storage.uuid)
Beispiel #10
0
def test_list_manifest_labels(initialized_db):
    manifest = Manifest.get()

    label1 = create_manifest_label(manifest, 'foo', '1', 'manifest')
    label2 = create_manifest_label(manifest, 'bar', '2', 'api')
    label3 = create_manifest_label(manifest, 'baz', '3', 'internal')

    assert label1 in list_manifest_labels(manifest)
    assert label2 in list_manifest_labels(manifest)
    assert label3 in list_manifest_labels(manifest)

    other_manifest = Manifest.select().where(Manifest.id != manifest.id).get()
    assert label1 not in list_manifest_labels(other_manifest)
    assert label2 not in list_manifest_labels(other_manifest)
    assert label3 not in list_manifest_labels(other_manifest)
Beispiel #11
0
def compute_layer_id(layer):
  """ Returns the ID for the layer in the security scanner. """
  # NOTE: this is temporary until we switch to Clair V3.
  if isinstance(layer, ManifestDataType):
    if layer._is_tag_manifest:
      layer = TagManifest.get(id=layer._db_id).tag.image
    else:
      manifest = Manifest.get(id=layer._db_id)
      try:
        layer = ManifestLegacyImage.get(manifest=manifest).image
      except ManifestLegacyImage.DoesNotExist:
        return None
  elif isinstance(layer, LegacyImage):
    layer = Image.get(id=layer._db_id)

  assert layer.docker_image_id
  assert layer.storage.uuid
  return '%s.%s' % (layer.docker_image_id, layer.storage.uuid)
def test_remove_obsolete_tags(initialized_db):
    """
    As part of the mirror, the set of tags on the remote repository is compared to the local
    existing tags.

    Those not present on the remote are removed locally.
    """

    mirror, repository = create_mirror_repo_robot(["updated", "created"], repo_name="removed")
    manifest = Manifest.get()
    image = find_create_or_link_image("removed", repository, None, {}, "local_us")
    tag = create_or_update_tag_for_repo(
        repository, "oldtag", image.docker_image_id, oci_manifest=manifest, reversion=True
    )

    incoming_tags = ["one", "two"]
    deleted_tags = delete_obsolete_tags(mirror, incoming_tags)

    assert [tag.name for tag in deleted_tags] == [tag.name]
Beispiel #13
0
def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest):
    """
    Restores a tag to a specific manifest digest.
    """
    with db_transaction():
        # Verify that the manifest digest already existed under this repository under the
        # tag.
        try:
            tag_manifest = (TagManifest.select(
                TagManifest, RepositoryTag,
                Image).join(RepositoryTag).join(Image).where(
                    RepositoryTag.repository == repo_obj).where(
                        RepositoryTag.name == tag_name).where(
                            TagManifest.digest == manifest_digest).get())
        except TagManifest.DoesNotExist:
            raise DataModelException(
                "Cannot restore to unknown or invalid digest")

        # Lookup the existing image, if any.
        try:
            existing_image = get_repo_tag_image(repo_obj, tag_name)
        except DataModelException:
            existing_image = None

        docker_image_id = tag_manifest.tag.image.docker_image_id
        oci_manifest = None
        try:
            oci_manifest = Manifest.get(repository=repo_obj,
                                        digest=manifest_digest)
        except Manifest.DoesNotExist:
            pass

        # Change the tag and tag manifest to point to the updated image.
        updated_tag = create_or_update_tag_for_repo(repo_obj,
                                                    tag_name,
                                                    docker_image_id,
                                                    reversion=True,
                                                    oci_manifest=oci_manifest)
        tag_manifest.tag = updated_tag
        tag_manifest.save()
        return existing_image
Beispiel #14
0
def _create_manifest(
    repository_id,
    manifest_interface_instance,
    storage,
    temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
    for_tagging=False,
    raise_on_error=False,
    retriever=None,
):
    # Validate the manifest.
    retriever = retriever or RepositoryContentRetriever.for_repository(
        repository_id, storage)
    try:
        manifest_interface_instance.validate(retriever)
    except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist,
            IOError) as ex:
        logger.exception("Could not validate manifest `%s`",
                         manifest_interface_instance.digest)
        if raise_on_error:
            raise CreateManifestException(str(ex))

        return None

    # Load, parse and get/create the child manifests, if any.
    child_manifest_refs = manifest_interface_instance.child_manifests(
        retriever)
    child_manifest_rows = {}
    child_manifest_label_dicts = []

    if child_manifest_refs is not None:
        for child_manifest_ref in child_manifest_refs:
            # Load and parse the child manifest.
            try:
                child_manifest = child_manifest_ref.manifest_obj
            except (
                    ManifestException,
                    MalformedSchema2ManifestList,
                    BlobDoesNotExist,
                    IOError,
            ) as ex:
                logger.exception(
                    "Could not load manifest list for manifest `%s`",
                    manifest_interface_instance.digest,
                )
                if raise_on_error:
                    raise CreateManifestException(str(ex))

                return None

            # Retrieve its labels.
            labels = child_manifest.get_manifest_labels(retriever)
            if labels is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve manifest labels")

                logger.exception(
                    "Could not load manifest labels for child manifest")
                return None

            # Get/create the child manifest in the database.
            child_manifest_info = get_or_create_manifest(
                repository_id,
                child_manifest,
                storage,
                raise_on_error=raise_on_error)
            if child_manifest_info is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve child manifest")

                logger.error("Could not get/create child manifest")
                return None

            child_manifest_rows[child_manifest_info.manifest.
                                digest] = child_manifest_info.manifest
            child_manifest_label_dicts.append(labels)

    # Ensure all the blobs in the manifest exist.
    digests = set(manifest_interface_instance.local_blob_digests)
    blob_map = {}

    # If the special empty layer is required, simply load it directly. This is much faster
    # than trying to load it on a per repository basis, and that is unnecessary anyway since
    # this layer is predefined.
    if EMPTY_LAYER_BLOB_DIGEST in digests:
        digests.remove(EMPTY_LAYER_BLOB_DIGEST)
        blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(
            EMPTY_LAYER_BLOB_DIGEST)
        if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
            if raise_on_error:
                raise CreateManifestException(
                    "Unable to retrieve specialized empty blob")

            logger.warning("Could not find the special empty blob in storage")
            return None

    if digests:
        query = lookup_repo_storages_by_content_checksum(
            repository_id, digests)
        blob_map.update({s.content_checksum: s for s in query})
        for digest_str in digests:
            if digest_str not in blob_map:
                logger.warning(
                    "Unknown blob `%s` under manifest `%s` for repository `%s`",
                    digest_str,
                    manifest_interface_instance.digest,
                    repository_id,
                )

                if raise_on_error:
                    raise CreateManifestException("Unknown blob `%s`" %
                                                  digest_str)

                return None

    # Special check: If the empty layer blob is needed for this manifest, add it to the
    # blob map. This is necessary because Docker decided to elide sending of this special
    # empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
    if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
        try:
            requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob(
                retriever)
        except ManifestException as ex:
            if raise_on_error:
                raise CreateManifestException(str(ex))

            return None

        if requires_empty_layer is None:
            if raise_on_error:
                raise CreateManifestException(
                    "Could not load configuration blob")

            return None

        if requires_empty_layer:
            shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST,
                                                    EMPTY_LAYER_BYTES, storage)
            assert not shared_blob.uploading
            assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
            blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob

    # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
    # image.
    legacy_image = None
    if manifest_interface_instance.has_legacy_image:
        legacy_image_id = _populate_legacy_image(repository_id,
                                                 manifest_interface_instance,
                                                 blob_map, retriever,
                                                 raise_on_error)
        if legacy_image_id is None:
            return None

        legacy_image = get_image(repository_id, legacy_image_id)
        if legacy_image is None:
            return None

    # Create the manifest and its blobs.
    media_type = Manifest.media_type.get_id(
        manifest_interface_instance.media_type)
    storage_ids = {storage.id for storage in blob_map.values()}

    with db_transaction():
        # Check for the manifest. This is necessary because Postgres doesn't handle IntegrityErrors
        # well under transactions.
        try:
            manifest = Manifest.get(repository=repository_id,
                                    digest=manifest_interface_instance.digest)
            return CreatedManifest(manifest=manifest,
                                   newly_created=False,
                                   labels_to_apply=None)
        except Manifest.DoesNotExist:
            pass

        # Create the manifest.
        try:
            manifest = Manifest.create(
                repository=repository_id,
                digest=manifest_interface_instance.digest,
                media_type=media_type,
                manifest_bytes=manifest_interface_instance.bytes.
                as_encoded_str(),
            )
        except IntegrityError as ie:
            try:
                manifest = Manifest.get(
                    repository=repository_id,
                    digest=manifest_interface_instance.digest)
            except Manifest.DoesNotExist:
                logger.error(
                    "Got integrity error when trying to create manifest: %s",
                    ie)
                if raise_on_error:
                    raise CreateManifestException(
                        "Attempt to create an invalid manifest. Please report this issue."
                    )

                return None

            return CreatedManifest(manifest=manifest,
                                   newly_created=False,
                                   labels_to_apply=None)

        # Insert the blobs.
        blobs_to_insert = [
            dict(manifest=manifest, repository=repository_id, blob=storage_id)
            for storage_id in storage_ids
        ]
        if blobs_to_insert:
            ManifestBlob.insert_many(blobs_to_insert).execute()

        # Set the legacy image (if applicable).
        if legacy_image is not None:
            ManifestLegacyImage.create(repository=repository_id,
                                       image=legacy_image,
                                       manifest=manifest)

        # Insert the manifest child rows (if applicable).
        if child_manifest_rows:
            children_to_insert = [
                dict(manifest=manifest,
                     child_manifest=child_manifest,
                     repository=repository_id)
                for child_manifest in child_manifest_rows.values()
            ]
            ManifestChild.insert_many(children_to_insert).execute()

        # If this manifest is being created not for immediate tagging, add a temporary tag to the
        # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
        # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
        # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
        # in a repository for GC, then we will have to reevaluate this optimization at that time.
        if not for_tagging:
            create_temporary_tag_if_necessary(manifest,
                                              temp_tag_expiration_sec)

    # Define the labels for the manifest (if any).
    # TODO: Once the old data model is gone, turn this into a batch operation and make the label
    # application to the manifest occur under the transaction.
    labels = manifest_interface_instance.get_manifest_labels(retriever)
    if labels:
        for key, value in labels.iteritems():
            # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any
            # such `labels`, as they don't really mean anything.
            if not key:
                continue

            media_type = "application/json" if is_json(value) else "text/plain"
            create_manifest_label(manifest, key, value, "manifest", media_type)

    # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
    # on the manifest or its resulting tags). We only return those labels either defined on
    # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
    # to ensure that any action performed is defined in all manifests.
    labels_to_apply = labels or {}
    if child_manifest_label_dicts:
        labels_to_apply = child_manifest_label_dicts[0].viewitems()
        for child_manifest_label_dict in child_manifest_label_dicts[1:]:
            # Intersect the key+values of the labels to ensure we get the exact same result
            # for all the child manifests.
            labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems(
            )

        labels_to_apply = dict(labels_to_apply)

    return CreatedManifest(manifest=manifest,
                           newly_created=True,
                           labels_to_apply=labels_to_apply)
Beispiel #15
0
def test_list_active_tags(initialized_db):
  # Create a new repository.
  repository = create_repository('devtable', 'somenewrepo', None)
  manifest = Manifest.get()

  # Create some images.
  image1 = find_create_or_link_image('foobarimage1', repository, None, {}, 'local_us')
  image2 = find_create_or_link_image('foobarimage2', repository, None, {}, 'local_us')

  # Make sure its tags list is empty.
  assert_tags(repository)

  # Add some new tags.
  footag = create_or_update_tag_for_repo(repository, 'foo', image1.docker_image_id,
                                         oci_manifest=manifest)
  bartag = create_or_update_tag_for_repo(repository, 'bar', image1.docker_image_id,
                                         oci_manifest=manifest)

  # Since timestamps are stored on a second-granularity, we need to make the tags "start"
  # before "now", so when we recreate them below, they don't conflict.
  footag.lifetime_start_ts -= 5
  footag.save()

  bartag.lifetime_start_ts -= 5
  bartag.save()

  footag_oci = _get_oci_tag(footag)
  footag_oci.lifetime_start_ms -= 5000
  footag_oci.save()

  bartag_oci = _get_oci_tag(bartag)
  bartag_oci.lifetime_start_ms -= 5000
  bartag_oci.save()

  # Make sure they are returned.
  assert_tags(repository, 'foo', 'bar')

  # Set the expirations to be explicitly empty.
  set_tag_end_ts(footag, None)
  set_tag_end_ts(bartag, None)

  # Make sure they are returned.
  assert_tags(repository, 'foo', 'bar')

  # Mark as a tag as expiring in the far future, and make sure it is still returned.
  set_tag_end_ts(footag, footag.lifetime_start_ts + 10000000)

  # Make sure they are returned.
  assert_tags(repository, 'foo', 'bar')

  # Delete a tag and make sure it isn't returned.
  footag = delete_tag('devtable', 'somenewrepo', 'foo')
  set_tag_end_ts(footag, footag.lifetime_end_ts - 4)

  assert_tags(repository, 'bar')

  # Add a new foo again.
  footag = create_or_update_tag_for_repo(repository, 'foo', image1.docker_image_id,
                                         oci_manifest=manifest)
  footag.lifetime_start_ts -= 3
  footag.save()

  footag_oci = _get_oci_tag(footag)
  footag_oci.lifetime_start_ms -= 3000
  footag_oci.save()

  assert_tags(repository, 'foo', 'bar')

  # Mark as a tag as expiring in the far future, and make sure it is still returned.
  set_tag_end_ts(footag, footag.lifetime_start_ts + 10000000)

  # Make sure they are returned.
  assert_tags(repository, 'foo', 'bar')

  # "Move" foo by updating it and make sure we don't get duplicates.
  create_or_update_tag_for_repo(repository, 'foo', image2.docker_image_id, oci_manifest=manifest)
  assert_tags(repository, 'foo', 'bar')
Beispiel #16
0
def _backfill_manifest(tag_manifest):
    logger.info('Backfilling manifest for tag manifest %s', tag_manifest.id)

    # Ensure that a mapping row doesn't already exist. If it does, we've been preempted.
    if lookup_manifest_map_row(tag_manifest):
        return False

    # Parse the manifest. If we cannot parse, then we treat the manifest as broken and just emit it
    # without additional rows or data, as it will eventually not be useful.
    is_broken = False
    try:
        manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(
            tag_manifest.json_data),
                                         validate=False)
    except ManifestException:
        logger.exception('Exception when trying to parse manifest %s',
                         tag_manifest.id)
        manifest = BrokenManifest(tag_manifest.digest, tag_manifest.json_data)
        is_broken = True

    # Lookup the storages for the digests.
    root_image = tag_manifest.tag.image
    repository = tag_manifest.tag.repository

    image_storage_id_map = {
        root_image.storage.content_checksum: root_image.storage.id
    }

    try:
        parent_images = get_parent_images(repository.namespace_user.username,
                                          repository.name, root_image)
    except DataModelException:
        logger.exception(
            'Exception when trying to load parent images for manifest `%s`',
            tag_manifest.id)
        parent_images = {}
        is_broken = True

    for parent_image in parent_images:
        image_storage_id_map[
            parent_image.storage.content_checksum] = parent_image.storage.id

    # Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo
    # and add its storage ID. If the blob is not found, we mark the manifest as broken.
    storage_ids = set()
    try:
        for blob_digest in manifest.get_blob_digests_for_translation():
            if blob_digest in image_storage_id_map:
                storage_ids.add(image_storage_id_map[blob_digest])
            else:
                logger.debug(
                    'Blob `%s` not found in images for manifest `%s`; checking repo',
                    blob_digest, tag_manifest.id)
                try:
                    blob_storage = get_repo_blob_by_digest(
                        repository.namespace_user.username, repository.name,
                        blob_digest)
                    storage_ids.add(blob_storage.id)
                except BlobDoesNotExist:
                    logger.debug(
                        'Blob `%s` not found in repo for manifest `%s`',
                        blob_digest, tag_manifest.id)
                    is_broken = True
    except MalformedSchema1Manifest:
        logger.warning(
            'Found malformed schema 1 manifest during blob backfill')
        is_broken = True

    with db_transaction():
        # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag.
        try:
            tag_manifest = TagManifest.get(id=tag_manifest.id)
        except TagManifest.DoesNotExist:
            return True

        # Ensure it wasn't already created.
        if lookup_manifest_map_row(tag_manifest):
            return False

        # Check for a pre-existing manifest matching the digest in the repository. This can happen
        # if we've already created the manifest row (typically for tag reverision).
        try:
            manifest_row = Manifest.get(digest=manifest.digest,
                                        repository=tag_manifest.tag.repository)
        except Manifest.DoesNotExist:
            # Create the new-style rows for the manifest.
            try:
                manifest_row = populate_manifest(tag_manifest.tag.repository,
                                                 manifest,
                                                 tag_manifest.tag.image,
                                                 storage_ids)
            except IntegrityError:
                # Pre-empted.
                return False

        # Create the mapping row. If we find another was created for this tag manifest in the
        # meantime, then we've been preempted.
        try:
            TagManifestToManifest.create(tag_manifest=tag_manifest,
                                         manifest=manifest_row,
                                         broken=is_broken)
        except IntegrityError:
            return False

    # Backfill any labels on the manifest.
    _backfill_labels(tag_manifest, manifest_row, repository)
    return True
Beispiel #17
0
def _create_manifest(
    repository_id,
    manifest_interface_instance,
    storage,
    temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
    for_tagging=False,
    raise_on_error=False,
    retriever=None,
):
    # Validate the manifest.
    retriever = retriever or RepositoryContentRetriever.for_repository(
        repository_id, storage)
    try:
        manifest_interface_instance.validate(retriever)
    except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist,
            IOError) as ex:
        logger.exception("Could not validate manifest `%s`",
                         manifest_interface_instance.digest)
        if raise_on_error:
            raise CreateManifestException(str(ex))

        return None

    # Load, parse and get/create the child manifests, if any.
    child_manifest_refs = manifest_interface_instance.child_manifests(
        retriever)
    child_manifest_rows = {}
    child_manifest_label_dicts = []

    if child_manifest_refs is not None:
        for child_manifest_ref in child_manifest_refs:
            # Load and parse the child manifest.
            try:
                child_manifest = child_manifest_ref.manifest_obj
            except (
                    ManifestException,
                    MalformedSchema2ManifestList,
                    BlobDoesNotExist,
                    IOError,
            ) as ex:
                logger.exception(
                    "Could not load manifest list for manifest `%s`",
                    manifest_interface_instance.digest,
                )
                if raise_on_error:
                    raise CreateManifestException(str(ex))

                return None

            # Retrieve its labels.
            labels = child_manifest.get_manifest_labels(retriever)
            if labels is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve manifest labels")

                logger.exception(
                    "Could not load manifest labels for child manifest")
                return None

            # Get/create the child manifest in the database.
            child_manifest_info = get_or_create_manifest(
                repository_id,
                child_manifest,
                storage,
                raise_on_error=raise_on_error)
            if child_manifest_info is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve child manifest")

                logger.error("Could not get/create child manifest")
                return None

            child_manifest_rows[child_manifest_info.manifest.
                                digest] = child_manifest_info.manifest
            child_manifest_label_dicts.append(labels)

    # Build the map from required blob digests to the blob objects.
    blob_map = _build_blob_map(
        repository_id,
        manifest_interface_instance,
        retriever,
        storage,
        raise_on_error,
        require_empty_layer=False,
    )
    if blob_map is None:
        return None

    # Create the manifest and its blobs.
    media_type = Manifest.media_type.get_id(
        manifest_interface_instance.media_type)
    storage_ids = {storage.id for storage in list(blob_map.values())}

    # Check for the manifest, in case it was created since we checked earlier.
    try:
        manifest = Manifest.get(repository=repository_id,
                                digest=manifest_interface_instance.digest)
        return CreatedManifest(manifest=manifest,
                               newly_created=False,
                               labels_to_apply=None)
    except Manifest.DoesNotExist:
        pass

    try:
        with db_transaction():
            # Create the manifest.
            try:
                manifest = Manifest.create(
                    repository=repository_id,
                    digest=manifest_interface_instance.digest,
                    media_type=media_type,
                    manifest_bytes=manifest_interface_instance.bytes.
                    as_encoded_str(),
                    config_media_type=manifest_interface_instance.
                    config_media_type,
                    layers_compressed_size=manifest_interface_instance.
                    layers_compressed_size,
                )
            except IntegrityError as ie:
                # NOTE: An IntegrityError means (barring a bug) that the manifest was created by
                # another caller while we were attempting to create it. Since we need to return
                # the manifest, we raise a specialized exception here to break out of the
                # transaction so we can retrieve it.
                raise _ManifestAlreadyExists(ie)

            # Insert the blobs.
            blobs_to_insert = [
                dict(manifest=manifest,
                     repository=repository_id,
                     blob=storage_id) for storage_id in storage_ids
            ]
            if blobs_to_insert:
                try:
                    ManifestBlob.insert_many(blobs_to_insert).execute()
                except IntegrityError as ie:
                    raise _ManifestAlreadyExists(ie)

            # Insert the manifest child rows (if applicable).
            if child_manifest_rows:
                children_to_insert = [
                    dict(manifest=manifest,
                         child_manifest=child_manifest,
                         repository=repository_id)
                    for child_manifest in list(child_manifest_rows.values())
                ]
                try:
                    ManifestChild.insert_many(children_to_insert).execute()
                except IntegrityError as ie:
                    raise _ManifestAlreadyExists(ie)

            # If this manifest is being created not for immediate tagging, add a temporary tag to the
            # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
            # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
            # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
            # in a repository for GC, then we will have to reevaluate this optimization at that time.
            if not for_tagging:
                create_temporary_tag_if_necessary(manifest,
                                                  temp_tag_expiration_sec)

        # Define the labels for the manifest (if any).
        # TODO: Once the old data model is gone, turn this into a batch operation and make the label
        # application to the manifest occur under the transaction.
        labels = manifest_interface_instance.get_manifest_labels(retriever)
        if labels:
            for key, value in labels.items():
                # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any
                # such `labels`, as they don't really mean anything.
                if not key:
                    continue

                media_type = "application/json" if is_json(
                    value) else "text/plain"
                create_manifest_label(manifest, key, value, "manifest",
                                      media_type)

        # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
        # on the manifest or its resulting tags). We only return those labels either defined on
        # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
        # to ensure that any action performed is defined in all manifests.
        labels_to_apply = labels or {}
        if child_manifest_label_dicts:
            labels_to_apply = child_manifest_label_dicts[0].items()
            for child_manifest_label_dict in child_manifest_label_dicts[1:]:
                # Intersect the key+values of the labels to ensure we get the exact same result
                # for all the child manifests.
                labels_to_apply = labels_to_apply & child_manifest_label_dict.items(
                )

            labels_to_apply = dict(labels_to_apply)

        return CreatedManifest(manifest=manifest,
                               newly_created=True,
                               labels_to_apply=labels_to_apply)
    except _ManifestAlreadyExists as mae:
        try:
            manifest = Manifest.get(repository=repository_id,
                                    digest=manifest_interface_instance.digest)
        except Manifest.DoesNotExist:
            # NOTE: If we've reached this point, then somehow we had an IntegrityError without it
            # being due to a duplicate manifest. We therefore log the error.
            logger.error(
                "Got integrity error when trying to create manifest: %s",
                mae.internal_exception)
            if raise_on_error:
                raise CreateManifestException(
                    "Attempt to create an invalid manifest. Please report this issue."
                )

            return None

        return CreatedManifest(manifest=manifest,
                               newly_created=False,
                               labels_to_apply=None)