Ejemplo n.º 1
0
def _check_image_used(legacy_image_id):
  assert legacy_image_id is not None

  with db_transaction():
    # Check if the image is referenced by a manifest.
    try:
      ManifestLegacyImage.select().where(ManifestLegacyImage.image == legacy_image_id).get()
      return True
    except ManifestLegacyImage.DoesNotExist:
      pass

    # Check if the image is referenced by a tag.
    try:
      RepositoryTag.select().where(RepositoryTag.image == legacy_image_id).get()
      return True
    except RepositoryTag.DoesNotExist:
      pass

    # Check if the image is referenced by another image.
    try:
      Image.select().where(Image.parent == legacy_image_id).get()
      return True
    except Image.DoesNotExist:
      pass

  return False
Ejemplo n.º 2
0
Archivo: gc.py Proyecto: kleesc/quay
def _garbage_collect_legacy_image(legacy_image_id, context):
    assert legacy_image_id is not None

    # Check if the image is referenced.
    if _check_image_used(legacy_image_id):
        return False

    # We have an unreferenced image. We can now delete it.
    # Grab any derived storage for the image.
    for derived in DerivedStorageForImage.select().where(
            DerivedStorageForImage.source_image == legacy_image_id):
        context.add_blob_id(derived.derivative_id)

    try:
        image = Image.select().where(Image.id == legacy_image_id).get()
    except Image.DoesNotExist:
        return False

    assert image.repository_id == context.repository.id

    # Add the image's blob to be GCed.
    context.add_blob_id(image.storage_id)

    # If the image has a parent ID, add the parent for GC.
    if image.parent_id is not None:
        context.add_legacy_image_id(image.parent_id)

    # Delete the image.
    with db_transaction():
        if _check_image_used(legacy_image_id):
            return False

        try:
            image = Image.select().where(Image.id == legacy_image_id).get()
        except Image.DoesNotExist:
            return False

        assert image.id == legacy_image_id
        assert image.repository_id == context.repository.id

        # Delete any derived storage for the image.
        deleted_derived_storage = (DerivedStorageForImage.delete().where(
            DerivedStorageForImage.source_image == legacy_image_id).execute())

        # Delete the image itself.
        image.delete_instance()

    context.mark_legacy_image_removed(image)

    gc_table_rows_deleted.labels(table="Image").inc()
    gc_table_rows_deleted.labels(
        table="DerivedStorageForImage").inc(deleted_derived_storage)

    if config.image_cleanup_callbacks:
        for callback in config.image_cleanup_callbacks:
            callback([image])

    return True
Ejemplo n.º 3
0
Archivo: tag.py Proyecto: zhill/quay
def _get_repo_tag_image(tag_name, include_storage, modifier):
    query = Image.select().join(RepositoryTag)

    if include_storage:
        query = (Image.select(
            Image,
            ImageStorage).join(ImageStorage).switch(Image).join(RepositoryTag))

    images = _tag_alive(modifier(query.where(RepositoryTag.name == tag_name)))
    if not images:
        raise DataModelException("Unable to find image for tag.")
    else:
        return images[0]
Ejemplo n.º 4
0
def get_image(repo, docker_image_id):
    try:
        return (Image.select(Image, ImageStorage).join(ImageStorage).where(
            Image.docker_image_id == docker_image_id,
            Image.repository == repo).get())
    except Image.DoesNotExist:
        return None
Ejemplo n.º 5
0
def __translate_ancestry(old_ancestry, translations, repo_obj, username,
                         preferred_location):
    if old_ancestry == "/":
        return "/"

    def translate_id(old_id, docker_image_id):
        logger.debug("Translating id: %s", old_id)
        if old_id not in translations:
            image_in_repo = find_create_or_link_image(docker_image_id,
                                                      repo_obj, username,
                                                      translations,
                                                      preferred_location)
            translations[old_id] = image_in_repo.id
        return translations[old_id]

    # Select all the ancestor Docker IDs in a single query.
    old_ids = [int(id_str) for id_str in old_ancestry.split("/")[1:-1]]
    query = Image.select(Image.id,
                         Image.docker_image_id).where(Image.id << old_ids)
    old_images = {i.id: i.docker_image_id for i in query}

    # Translate the old images into new ones.
    new_ids = [
        str(translate_id(old_id, old_images[old_id])) for old_id in old_ids
    ]
    return "/%s/" % "/".join(new_ids)
Ejemplo n.º 6
0
def backfill_replication():
    encountered = set()
    query = (
        Image.select(Image, ImageStorage, Repository, User)
        .join(ImageStorage)
        .switch(Image)
        .join(Repository)
        .join(User)
    )

    for image in query:
        if image.storage.uuid in encountered:
            continue

        namespace = image.repository.namespace_user
        locations = model.user.get_region_locations(namespace)
        locations_required = locations | set(storage.default_locations)

        query = (
            ImageStoragePlacement.select(ImageStoragePlacement, ImageStorageLocation)
            .where(ImageStoragePlacement.storage == image.storage)
            .join(ImageStorageLocation)
        )

        existing_locations = set([p.location.name for p in query])
        locations_missing = locations_required - existing_locations
        if locations_missing:
            print("Enqueueing image storage %s to be replicated" % (image.storage.uuid))
            encountered.add(image.storage.uuid)

            if not image_replication_queue.alive([image.storage.uuid]):
                queue_storage_replication(image.repository.namespace_user.username, image.storage)
Ejemplo n.º 7
0
def test_get_matching_tag_ids_images_filtered(initialized_db):
    def filter_query(query):
        return query.join(Repository).where(Repository.name == "simple")

    filtered_images = filter_query(
        Image.select(Image, ImageStorage)
        .join(RepositoryTag)
        .switch(Image)
        .join(ImageStorage)
        .switch(Image)
    )

    expected_tags_query = _tag_alive(filter_query(RepositoryTag.select()))

    pairs = []
    for image in filtered_images:
        pairs.append((image.docker_image_id, image.storage.uuid))

    matching_tags = get_matching_tags_for_images(
        pairs, filter_images=filter_query, filter_tags=filter_query
    )

    expected_tag_ids = set([tag.id for tag in expected_tags_query])
    matching_tags_ids = set([tag.id for tag in matching_tags])

    # Ensure every alive tag was found.
    assert matching_tags_ids == expected_tag_ids
Ejemplo n.º 8
0
def set_image_storage_metadata(docker_image_id, namespace_name,
                               repository_name, image_size, uncompressed_size):
    """ Sets metadata that is specific to the binary storage of the data, irrespective of how it
      is used in the layer tree.
  """
    if image_size is None:
        raise DataModelException('Empty image size field')

    try:
        image = (Image.select(Image, ImageStorage).join(Repository).join(
            Namespace, on=(Repository.namespace_user == Namespace.id
                           )).switch(Image).join(ImageStorage).where(
                               Repository.name == repository_name,
                               Namespace.username == namespace_name,
                               Image.docker_image_id == docker_image_id).get())
    except ImageStorage.DoesNotExist:
        raise InvalidImageException(
            'No image with specified id and repository')

    # We MUST do this here, it can't be done in the corresponding image call because the storage
    # has not yet been pushed
    image.aggregate_size = _basequery.calculate_image_aggregate_size(
        image.ancestors, image_size, image.parent)
    image.save()

    image.storage.image_size = image_size
    image.storage.uncompressed_size = uncompressed_size
    image.storage.save()
    return image.storage
Ejemplo n.º 9
0
def _get_repository_images(namespace_name, repository_name, query_modifier):
    query = (Image.select().join(Repository).join(
        Namespace, on=(Repository.namespace_user == Namespace.id)).where(
            Repository.name == repository_name,
            Namespace.username == namespace_name))

    query = query_modifier(query)
    return query
Ejemplo n.º 10
0
def _get_dangling_storage_count():
    storage_ids = set([current.id for current in ImageStorage.select()])
    referenced_by_image = set([image.storage_id for image in Image.select()])
    referenced_by_manifest = set([blob.blob_id for blob in ManifestBlob.select()])
    referenced_by_derived = set(
        [derived.derivative_id for derived in DerivedStorageForImage.select()]
    )
    return len(storage_ids - referenced_by_image - referenced_by_derived - referenced_by_manifest)
Ejemplo n.º 11
0
def get_image_with_storage(docker_image_id, storage_uuid):
    """ Returns the image with the given docker image ID and storage uuid or None if none.
  """
    try:
        return (Image.select(Image, ImageStorage).join(ImageStorage).where(
            Image.docker_image_id == docker_image_id,
            ImageStorage.uuid == storage_uuid).get())
    except Image.DoesNotExist:
        return None
Ejemplo n.º 12
0
def _purge_repository_contents(repo):
  """ Purges all the contents of a repository, removing all of its tags,
      manifests and images.
  """
  logger.debug('Purging repository %s', repo)

  # Purge via all the tags.
  while True:
    found = False
    for tags in _chunk_iterate_for_deletion(Tag.select().where(Tag.repository == repo)):
      logger.debug('Found %s tags to GC under repository %s', len(tags), repo)
      found = True
      context = _GarbageCollectorContext(repo)
      for tag in tags:
        logger.debug('Deleting tag %s under repository %s', tag, repo)
        assert tag.repository_id == repo.id
        _purge_oci_tag(tag, context, allow_non_expired=True)

      _run_garbage_collection(context)

    if not found:
      break

  # TODO: remove this once we're fully on the OCI data model.
  while True:
    found = False
    repo_tag_query = RepositoryTag.select().where(RepositoryTag.repository == repo)
    for tags in _chunk_iterate_for_deletion(repo_tag_query):
      logger.debug('Found %s tags to GC under repository %s', len(tags), repo)
      found = True
      context = _GarbageCollectorContext(repo)

      for tag in tags:
        logger.debug('Deleting tag %s under repository %s', tag, repo)
        assert tag.repository_id == repo.id
        _purge_pre_oci_tag(tag, context, allow_non_expired=True)

      _run_garbage_collection(context)

    if not found:
      break

  # Add all remaining images to a new context. We do this here to minimize the number of images
  # we need to load.
  while True:
    found_image = False
    image_context = _GarbageCollectorContext(repo)
    for image in Image.select().where(Image.repository == repo):
      found_image = True
      logger.debug('Deleting image %s under repository %s', image, repo)
      assert image.repository_id == repo.id
      image_context.add_legacy_image_id(image.id)

    _run_garbage_collection(image_context)

    if not found_image:
      break
Ejemplo n.º 13
0
def purge_repository(repo, force=False):
    """
    Completely delete all traces of the repository.

    Will return True upon complete success, and False upon partial or total failure. Garbage
    collection is incremental and repeatable, so this return value does not need to be checked or
    responded to.
    """
    assert repo.state == RepositoryState.MARKED_FOR_DELETION or force

    # Delete the repository of all Appr-referenced entries.
    # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags,
    # and MySQL is... particular... about such relationships when deleting.
    if repo.kind.name == "application":
        ApprTag.delete().where(ApprTag.repository == repo,
                               ~(ApprTag.linked_tag >> None)).execute()
        ApprTag.delete().where(ApprTag.repository == repo).execute()
    else:
        # GC to remove the images and storage.
        _purge_repository_contents(repo)

    # Ensure there are no additional tags, manifests, images or blobs in the repository.
    assert ApprTag.select().where(ApprTag.repository == repo).count() == 0
    assert Tag.select().where(Tag.repository == repo).count() == 0
    assert RepositoryTag.select().where(
        RepositoryTag.repository == repo).count() == 0
    assert Manifest.select().where(Manifest.repository == repo).count() == 0
    assert ManifestBlob.select().where(
        ManifestBlob.repository == repo).count() == 0
    assert Image.select().where(Image.repository == repo).count() == 0

    # Delete any repository build triggers, builds, and any other large-ish reference tables for
    # the repository.
    _chunk_delete_all(repo, RepositoryPermission, force=force)
    _chunk_delete_all(repo, RepositoryBuild, force=force)
    _chunk_delete_all(repo, RepositoryBuildTrigger, force=force)
    _chunk_delete_all(repo, RepositoryActionCount, force=force)
    _chunk_delete_all(repo, Star, force=force)
    _chunk_delete_all(repo, AccessToken, force=force)
    _chunk_delete_all(repo, RepositoryNotification, force=force)
    _chunk_delete_all(repo, BlobUpload, force=force)
    _chunk_delete_all(repo, RepoMirrorConfig, force=force)
    _chunk_delete_all(repo, RepositoryAuthorizedEmail, force=force)

    # Delete any marker rows for the repository.
    DeletedRepository.delete().where(
        DeletedRepository.repository == repo).execute()

    # Delete the rest of the repository metadata.
    try:
        # Make sure the repository still exists.
        fetched = Repository.get(id=repo.id)
    except Repository.DoesNotExist:
        return False

    fetched.delete_instance(recursive=True, delete_nullable=False, force=force)
    return True
Ejemplo n.º 14
0
Archivo: image.py Proyecto: quay/quay
def find_create_or_link_image(
    docker_image_id, repo_obj, username, translations, preferred_location
):

    # First check for the image existing in the repository. If found, we simply return it.
    repo_image = get_repo_image(repo_obj.namespace_user.username, repo_obj.name, docker_image_id)
    if repo_image:
        return repo_image

    # We next check to see if there is an existing storage the new image can link to.
    existing_image_query = (
        Image.select(Image, ImageStorage)
        .distinct()
        .join(ImageStorage)
        .switch(Image)
        .join(Repository)
        .join(RepositoryPermission, JOIN.LEFT_OUTER)
        .switch(Repository)
        .join(Namespace, on=(Repository.namespace_user == Namespace.id))
        .where(Image.docker_image_id == docker_image_id)
    )

    existing_image_query = _basequery.filter_to_repos_for_user(
        existing_image_query, _namespace_id_for_username(username)
    )

    # If there is an existing image, we try to translate its ancestry and copy its storage.
    new_image = None
    try:
        logger.debug("Looking up existing image for ID: %s", docker_image_id)
        existing_image = existing_image_query.get()

        logger.debug("Existing image %s found for ID: %s", existing_image.id, docker_image_id)
        new_image = _find_or_link_image(
            existing_image, repo_obj, username, translations, preferred_location
        )
        if new_image:
            return new_image
    except Image.DoesNotExist:
        logger.debug("No existing image found for ID: %s", docker_image_id)

    # Otherwise, create a new storage directly.
    with db_transaction():
        # Final check for an existing image, under the transaction.
        repo_image = get_repo_image(
            repo_obj.namespace_user.username, repo_obj.name, docker_image_id
        )
        if repo_image:
            return repo_image

        logger.debug("Creating new storage for docker id: %s", docker_image_id)
        new_storage = storage.create_v1_storage(preferred_location)

        return Image.create(
            docker_image_id=docker_image_id, repository=repo_obj, storage=new_storage, ancestors="/"
        )
Ejemplo n.º 15
0
Archivo: image.py Proyecto: quay/quay
def get_repository_images_without_placements(repo_obj, with_ancestor=None):
    query = Image.select(Image, ImageStorage).join(ImageStorage).where(Image.repository == repo_obj)

    if with_ancestor:
        ancestors_string = "%s%s/" % (with_ancestor.ancestors, with_ancestor.id)
        query = query.where(
            (Image.ancestors ** (ancestors_string + "%")) | (Image.id == with_ancestor.id)
        )

    return query
Ejemplo n.º 16
0
def test_purge_repository_storage_blob(default_tag_policy, initialized_db):
    with populate_storage_for_gc():
        expected_blobs_removed_from_storage = set()
        preferred = storage.preferred_locations[0]

        # Check that existing uploadedblobs has an object in storage
        for repo in database.Repository.select().order_by(database.Repository.id):
            for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo):
                assert storage.exists(
                    {preferred}, storage.blob_path(uploadedblob.blob.content_checksum)
                )

        # Remove eveyrhing
        for repo in database.Repository.select():  # .order_by(database.Repository.id):
            for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo):
                # Check if only this repository is referencing the uploadedblob
                # If so, the blob should be removed from storage
                has_depedent_manifestblob = (
                    ManifestBlob.select()
                    .where(
                        ManifestBlob.blob == uploadedblob.blob,
                        ManifestBlob.repository != repo,
                    )
                    .count()
                )
                has_dependent_image = (
                    Image.select()
                    .where(
                        Image.storage == uploadedblob.blob,
                        Image.repository != repo,
                    )
                    .count()
                )
                has_dependent_uploadedblobs = (
                    UploadedBlob.select()
                    .where(
                        UploadedBlob == uploadedblob,
                        UploadedBlob.repository != repo,
                    )
                    .count()
                )

                if (
                    not has_depedent_manifestblob
                    and not has_dependent_image
                    and not has_dependent_uploadedblobs
                ):
                    expected_blobs_removed_from_storage.add(uploadedblob.blob)

            assert model.gc.purge_repository(repo, force=True)

        for removed_blob_from_storage in expected_blobs_removed_from_storage:
            assert not storage.exists(
                {preferred}, storage.blob_path(removed_blob_from_storage.content_checksum)
            )
Ejemplo n.º 17
0
Archivo: image.py Proyecto: quay/quay
def get_image_with_storage_and_parent_base():
    Parent = Image.alias()
    ParentImageStorage = ImageStorage.alias()

    return (
        Image.select(Image, ImageStorage, Parent, ParentImageStorage)
        .join(ImageStorage)
        .switch(Image)
        .join(Parent, JOIN.LEFT_OUTER, on=(Image.parent == Parent.id))
        .join(ParentImageStorage, JOIN.LEFT_OUTER, on=(ParentImageStorage.id == Parent.storage))
    )
Ejemplo n.º 18
0
def test_get_matching_tag_ids_for_all_images(max_subqueries, max_image_lookup_count, initialized_db):
  with patch('data.model.tag._MAX_SUB_QUERIES', max_subqueries):
    with patch('data.model.tag._MAX_IMAGE_LOOKUP_COUNT', max_image_lookup_count):
      pairs = []
      for image in Image.select(Image, ImageStorage).join(ImageStorage):
        pairs.append((image.docker_image_id, image.storage.uuid))

      expected_tags_ids = set([tag.id for tag in _tag_alive(RepositoryTag.select())])
      matching_tags_ids = set([tag.id for tag in get_matching_tags_for_images(pairs)])

      # Ensure every alive tag was found.
      assert matching_tags_ids == expected_tags_ids
Ejemplo n.º 19
0
Archivo: image.py Proyecto: quay/quay
def set_image_metadata(
    docker_image_id,
    namespace_name,
    repository_name,
    created_date_str,
    comment,
    command,
    v1_json_metadata,
    parent=None,
):
    """
    Sets metadata that is specific to how a binary piece of storage fits into the layer tree.
    """
    with db_transaction():
        try:
            fetched = (
                Image.select(Image, ImageStorage)
                .join(Repository)
                .join(Namespace, on=(Repository.namespace_user == Namespace.id))
                .switch(Image)
                .join(ImageStorage)
                .where(
                    Repository.name == repository_name,
                    Namespace.username == namespace_name,
                    Image.docker_image_id == docker_image_id,
                )
                .get()
            )
        except Image.DoesNotExist:
            raise DataModelException("No image with specified id and repository")

        fetched.created = datetime.now()
        if created_date_str is not None:
            try:
                fetched.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
            except:
                # parse raises different exceptions, so we cannot use a specific kind of handler here.
                pass

        # We cleanup any old checksum in case it's a retry after a fail
        fetched.v1_checksum = None
        fetched.comment = comment
        fetched.command = command
        fetched.v1_json_metadata = v1_json_metadata

        if parent:
            fetched.ancestors = "%s%s/" % (parent.ancestors, parent.id)
            fetched.parent = parent

        fetched.save()
        return fetched
Ejemplo n.º 20
0
def _find_or_link_image(existing_image, repo_obj, username, translations,
                        preferred_location):
    with db_transaction():
        # Check for an existing image, under the transaction, to make sure it doesn't already exist.
        repo_image = get_repo_image(repo_obj.namespace_user.username,
                                    repo_obj.name,
                                    existing_image.docker_image_id)
        if repo_image:
            return repo_image

        # Make sure the existing base image still exists.
        try:
            to_copy = Image.select().join(ImageStorage).where(
                Image.id == existing_image.id).get()

            msg = "Linking image to existing storage with docker id: %s and uuid: %s"
            logger.debug(msg, existing_image.docker_image_id,
                         to_copy.storage.uuid)

            new_image_ancestry = __translate_ancestry(to_copy.ancestors,
                                                      translations, repo_obj,
                                                      username,
                                                      preferred_location)

            copied_storage = to_copy.storage

            translated_parent_id = None
            if new_image_ancestry != "/":
                translated_parent_id = int(new_image_ancestry.split("/")[-2])

            new_image = Image.create(
                docker_image_id=existing_image.docker_image_id,
                repository=repo_obj,
                storage=copied_storage,
                ancestors=new_image_ancestry,
                command=existing_image.command,
                created=existing_image.created,
                comment=existing_image.comment,
                v1_json_metadata=existing_image.v1_json_metadata,
                aggregate_size=existing_image.aggregate_size,
                parent=translated_parent_id,
                v1_checksum=existing_image.v1_checksum,
            )

            logger.debug("Storing translation %s -> %s", existing_image.id,
                         new_image.id)
            translations[existing_image.id] = new_image.id
            return new_image
        except Image.DoesNotExist:
            return None
Ejemplo n.º 21
0
def test_get_matching_tags(max_subqueries, max_image_lookup_count, initialized_db):
  with patch('data.model.tag._MAX_SUB_QUERIES', max_subqueries):
    with patch('data.model.tag._MAX_IMAGE_LOOKUP_COUNT', max_image_lookup_count):
      # Test for every image in the test database.
      for image in Image.select(Image, ImageStorage).join(ImageStorage):
        matching_query = get_matching_tags(image.docker_image_id, image.storage.uuid)
        matching_tags = set([tag.id for tag in matching_query])
        expected_tags = _get_expected_tags(image)
        assert matching_tags == expected_tags, "mismatch for image %s" % image.id

        oci_tags = list(Tag
                        .select()
                        .join(TagToRepositoryTag)
                        .where(TagToRepositoryTag.repository_tag << expected_tags))
        assert len(oci_tags) == len(expected_tags)        
Ejemplo n.º 22
0
def test_get_matching_tag_ids_for_images(max_subqueries, max_image_lookup_count, initialized_db):
  with patch('data.model.tag._MAX_SUB_QUERIES', max_subqueries):
    with patch('data.model.tag._MAX_IMAGE_LOOKUP_COUNT', max_image_lookup_count):
      # Try for various sets of the first N images.
      for count in [5, 10, 15]:
        pairs = []
        expected_tags_ids = set()
        for image in Image.select(Image, ImageStorage).join(ImageStorage):
          if len(pairs) >= count:
            break

          pairs.append((image.docker_image_id, image.storage.uuid))
          expected_tags_ids.update(_get_expected_tags(image))

        matching_tags_ids = set([tag.id for tag in get_matching_tags_for_images(pairs)])
        assert matching_tags_ids == expected_tags_ids
Ejemplo n.º 23
0
def purge_repository(namespace_name, repository_name):
  """ Completely delete all traces of the repository. Will return True upon
      complete success, and False upon partial or total failure. Garbage
      collection is incremental and repeatable, so this return value does
      not need to be checked or responded to.
      """
  try:
    repo = _basequery.get_existing_repository(namespace_name, repository_name)
  except Repository.DoesNotExist:
    return False

  assert repo.name == repository_name

  # Delete the repository of all Appr-referenced entries.
  # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags,
  # and MySQL is... particular... about such relationships when deleting.
  if repo.kind.name == 'application':
    ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute()
    ApprTag.delete().where(ApprTag.repository == repo).execute()
  else:
    # GC to remove the images and storage.
    _purge_repository_contents(repo)

  # Ensure there are no additional tags, manifests, images or blobs in the repository.
  assert ApprTag.select().where(ApprTag.repository == repo).count() == 0
  assert Tag.select().where(Tag.repository == repo).count() == 0
  assert RepositoryTag.select().where(RepositoryTag.repository == repo).count() == 0
  assert Manifest.select().where(Manifest.repository == repo).count() == 0
  assert ManifestBlob.select().where(ManifestBlob.repository == repo).count() == 0
  assert Image.select().where(Image.repository == repo).count() == 0

  # Delete the rest of the repository metadata.
  try:
    # Make sure the repository still exists.
    fetched = _basequery.get_existing_repository(namespace_name, repository_name)
  except Repository.DoesNotExist:
    return False

  fetched.delete_instance(recursive=True, delete_nullable=False)

  # Run callbacks
  for callback in config.repo_cleanup_callbacks:
    callback(namespace_name, repository_name)

  return True
Ejemplo n.º 24
0
Archivo: tag.py Proyecto: xzwupeng/quay
def lookup_notifiable_tags_for_legacy_image(docker_image_id, storage_uuid,
                                            event_name):
    """ Yields any alive Tags found in repositories with an event with the given name registered
      and whose legacy Image has the given docker image ID and storage UUID.
  """
    event = ExternalNotificationEvent.get(name=event_name)
    images = (Image.select().join(ImageStorage).where(
        Image.docker_image_id == docker_image_id,
        ImageStorage.uuid == storage_uuid))

    for image in list(images):
        # Ensure the image is under a repository that supports the event.
        try:
            RepositoryNotification.get(repository=image.repository_id,
                                       event=event)
        except RepositoryNotification.DoesNotExist:
            continue

        # If found in a repository with the valid event, yield the tag(s) that contains the image.
        for tag in tags_containing_legacy_image(image):
            yield tag
Ejemplo n.º 25
0
def test_retriever(initialized_db):
    repository = create_repository("devtable", "newrepo", None)

    layer_json = json.dumps({
        "config": {},
        "rootfs": {
            "type": "layers",
            "diff_ids": []
        },
        "history": [
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
        ],
    })

    # Add a blob containing the config.
    _, config_digest = _populate_blob(layer_json)

    # Add a blob of random data.
    random_data = "hello world"
    _, random_digest = _populate_blob(random_data)

    # Add another blob of random data.
    other_random_data = "hi place"
    _, other_random_digest = _populate_blob(other_random_data)

    remote_digest = sha256_digest(b"something")

    builder = DockerSchema2ManifestBuilder()
    builder.set_config_digest(config_digest, len(layer_json.encode("utf-8")))
    builder.add_layer(other_random_digest,
                      len(other_random_data.encode("utf-8")))
    builder.add_layer(random_digest, len(random_data.encode("utf-8")))
    manifest = builder.build()

    assert config_digest in manifest.blob_digests
    assert random_digest in manifest.blob_digests
    assert other_random_digest in manifest.blob_digests

    assert config_digest in manifest.local_blob_digests
    assert random_digest in manifest.local_blob_digests
    assert other_random_digest in manifest.local_blob_digests

    # Write the manifest.
    created_tuple = get_or_create_manifest(repository, manifest, storage)
    assert created_tuple is not None

    created_manifest = created_tuple.manifest
    assert created_manifest
    assert created_manifest.media_type.name == manifest.media_type
    assert created_manifest.digest == manifest.digest

    # Verify the linked blobs.
    blob_digests = {
        mb.blob.content_checksum
        for mb in ManifestBlob.select().where(
            ManifestBlob.manifest == created_manifest)
    }

    assert random_digest in blob_digests
    assert other_random_digest in blob_digests
    assert config_digest in blob_digests

    # Delete any Image rows linking to the blobs from temp tags.
    for blob_digest in blob_digests:
        storage_row = ImageStorage.get(content_checksum=blob_digest)
        for image in list(Image.select().where(Image.storage == storage_row)):
            all_temp = all([
                rt.hidden for rt in RepositoryTag.select().where(
                    RepositoryTag.image == image)
            ])
            if all_temp:
                RepositoryTag.delete().where(
                    RepositoryTag.image == image).execute()
                image.delete_instance(recursive=True)

    # Verify the blobs in the retriever.
    retriever = RepositoryContentRetriever(repository, storage)
    assert (retriever.get_manifest_bytes_with_digest(
        created_manifest.digest) == manifest.bytes.as_encoded_str())

    for blob_digest in blob_digests:
        assert retriever.get_blob_bytes_with_digest(blob_digest) is not None
Ejemplo n.º 26
0
def delete_broken_layers():
    result = raw_input('Please make sure your registry is not running and enter "GO" to continue: ')
    if result != "GO":
        print "Declined to run"
        return

    broken_storages = find_broken_storages()
    if not broken_storages:
        print "No broken layers found"
        return

    # Find all the images referencing the broken layers.
    print "Finding broken images..."
    IMAGE_BATCH_SIZE = 100

    all_images = []
    for i in tqdm(range(0, len(broken_storages) / IMAGE_BATCH_SIZE)):
        start = i * IMAGE_BATCH_SIZE
        end = (i + 1) * IMAGE_BATCH_SIZE

        images = (
            Image.select().join(ImageStorage).where(Image.storage << broken_storages[start:end])
        )
        all_images.extend(images)

    if not all_images:
        print "No broken layers found"
        return

    # Find all the tags containing the images.
    print "Finding associated tags for %s images..." % len(all_images)
    all_tags = {}
    for image in tqdm(all_images):
        query = model.tag.get_matching_tags(
            image.docker_image_id, image.storage.uuid, RepositoryTag
        )
        for tag in query:
            all_tags[tag.id] = tag

    # Ask to delete them.
    print ""
    print "The following tags were found to reference invalid images:"
    for tag in all_tags.values():
        print "%s/%s: %s" % (tag.repository.namespace_user.username, tag.repository.name, tag.name)

    if not all_tags:
        print "(Tags in time machine)"

    print ""
    result = raw_input(
        'Enter "DELETENOW" to delete these tags and ALL associated images (THIS IS PERMANENT): '
    )
    if result != "DELETENOW":
        print "Declined to delete"
        return

    print ""
    print "Marking tags to be GCed..."
    for tag in tqdm(all_tags.values()):
        tag.lifetime_end_ts = 0
        tag.save()

    print "GCing all repositories..."
    for tag in tqdm(all_tags.values()):
        model.repository.garbage_collect_repo(tag.repository)

    print "All done! You may now restart your registry."
Ejemplo n.º 27
0
def assert_gc_integrity(expect_storage_removed=True):
    """
    Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes
    the callback for images removed and doesn't invoke the callback for images *not* removed.
    """

    # Add a callback for when images are removed.
    removed_image_storages = []
    remove_callback = model.config.register_image_cleanup_callback(
        removed_image_storages.extend)

    # Store existing storages. We won't verify these for existence because they
    # were likely created as test data.
    existing_digests = set()
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            existing_digests.add(storage_row.content_checksum)

    for blob_row in ApprBlob.select():
        existing_digests.add(blob_row.digest)

    # Store the number of dangling objects.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()

    # Yield to the GC test.
    with check_transitive_modifications():
        try:
            yield
        finally:
            remove_callback()

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        assert isinstance(removed_image_and_storage, Image)

        try:
            # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row
            # with the same ID, make sure it does not have the same Docker Image ID.
            # See: https://www.sqlite.org/autoinc.html
            found_image = Image.get(id=removed_image_and_storage.id)
            assert (found_image.docker_image_id !=
                    removed_image_and_storage.docker_image_id
                    ), "Found unexpected removed image %s under repo %s" % (
                        found_image.id,
                        found_image.repository,
                    )
        except Image.DoesNotExist:
            pass

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            shared = (UploadedBlob.select().where(
                UploadedBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.content_checksum in existing_digests:
            continue

        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        if blob_row.digest in existing_digests:
            continue

        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
Ejemplo n.º 28
0
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True):
    """ Specialized assertion for ensuring that GC cleans up all dangling storages
      and labels, invokes the callback for images removed and doesn't invoke the
      callback for images *not* removed.
  """
    # Add a callback for when images are removed.
    removed_image_storages = []
    model.config.register_image_cleanup_callback(removed_image_storages.extend)

    # Store the number of dangling storages and labels.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()
    yield

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        with pytest.raises(Image.DoesNotExist):
            Image.get(id=removed_image_and_storage.id)

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure there are no danglings OCI tags.
    if check_oci_tags:
        oci_tags = {t.id for t in Tag.select()}
        referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()}
        assert not oci_tags - referenced_oci_tags

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
Ejemplo n.º 29
0
Archivo: gc.py Proyecto: kleesc/quay
def _purge_repository_contents(repo):
    """
    Purges all the contents of a repository, removing all of its tags, manifests and images.
    """
    logger.debug("Purging repository %s", repo)

    # Purge via all the tags.
    while True:
        found = False
        for tags in _chunk_iterate_for_deletion(
                Tag.select().where(Tag.repository == repo)):
            logger.debug("Found %s tags to GC under repository %s", len(tags),
                         repo)
            found = True
            context = _GarbageCollectorContext(repo)
            for tag in tags:
                logger.debug("Deleting tag %s under repository %s", tag, repo)
                assert tag.repository_id == repo.id
                _purge_oci_tag(tag, context, allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    # Purge any uploaded blobs that have expired.
    while True:
        found = False
        for uploaded_blobs in _chunk_iterate_for_deletion(
                UploadedBlob.select().where(UploadedBlob.repository == repo)):
            logger.debug("Found %s uploaded blobs to GC under repository %s",
                         len(uploaded_blobs), repo)
            found = True
            context = _GarbageCollectorContext(repo)
            for uploaded_blob in uploaded_blobs:
                logger.debug("Deleting uploaded blob %s under repository %s",
                             uploaded_blob, repo)
                assert uploaded_blob.repository_id == repo.id
                _purge_uploaded_blob(uploaded_blob,
                                     context,
                                     allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    # TODO: remove this once we've removed the foreign key constraints from RepositoryTag
    # and Image.
    while True:
        found = False
        repo_tag_query = RepositoryTag.select().where(
            RepositoryTag.repository == repo)
        for tags in _chunk_iterate_for_deletion(repo_tag_query):
            logger.debug("Found %s tags to GC under repository %s", len(tags),
                         repo)
            found = True
            context = _GarbageCollectorContext(repo)

            for tag in tags:
                logger.debug("Deleting tag %s under repository %s", tag, repo)
                assert tag.repository_id == repo.id
                _purge_pre_oci_tag(tag, context, allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    assert Tag.select().where(Tag.repository == repo).count() == 0
    assert RepositoryTag.select().where(
        RepositoryTag.repository == repo).count() == 0
    assert Manifest.select().where(Manifest.repository == repo).count() == 0
    assert ManifestBlob.select().where(
        ManifestBlob.repository == repo).count() == 0
    assert UploadedBlob.select().where(
        UploadedBlob.repository == repo).count() == 0

    # Add all remaining images to a new context. We do this here to minimize the number of images
    # we need to load.
    while True:
        found_image = False
        image_context = _GarbageCollectorContext(repo)

        existing_count = Image.select().where(Image.repository == repo).count()
        if not existing_count:
            break

        for image in Image.select().where(Image.repository == repo):
            found_image = True
            logger.debug("Trying to delete image %s under repository %s",
                         image, repo)
            assert image.repository_id == repo.id
            image_context.add_legacy_image_id(image.id)

        _run_garbage_collection(image_context)
        new_count = Image.select().where(Image.repository == repo).count()
        if new_count >= existing_count:
            raise Exception("GC purge bug! Please report this to support!")
Ejemplo n.º 30
0
Archivo: tag.py Proyecto: zhill/quay
def get_matching_tags_for_images(image_pairs,
                                 filter_images=None,
                                 filter_tags=None,
                                 selections=None):
    """ Returns all tags that contain the images with the given docker_image_id and storage_uuid,
      as specified as an iterable of pairs. """
    if not image_pairs:
        return []

    image_pairs_set = set(image_pairs)

    # Find all possible matching image+storages.
    images = []

    while image_pairs:
        image_pairs_slice = image_pairs[:_MAX_IMAGE_LOOKUP_COUNT]

        ids = [pair[0] for pair in image_pairs_slice]
        uuids = [pair[1] for pair in image_pairs_slice]

        images_query = (Image.select(
            Image.id, Image.docker_image_id, Image.ancestors,
            ImageStorage.uuid).join(ImageStorage).where(
                Image.docker_image_id << ids,
                ImageStorage.uuid << uuids).switch(Image))

        if filter_images is not None:
            images_query = filter_images(images_query)

        images.extend(list(images_query))
        image_pairs = image_pairs[_MAX_IMAGE_LOOKUP_COUNT:]

    # Filter down to those images actually in the pairs set and build the set of queries to run.
    individual_image_queries = []

    for img in images:
        # Make sure the image found is in the set of those requested, and that we haven't already
        # processed it. We need this check because the query above checks for images with matching
        # IDs OR storage UUIDs, rather than the expected ID+UUID pair. We do this for efficiency
        # reasons, and it is highly unlikely we'll find an image with a mismatch, but we need this
        # check to be absolutely sure.
        pair = (img.docker_image_id, img.storage.uuid)
        if pair not in image_pairs_set:
            continue

        # Remove the pair so we don't try it again.
        image_pairs_set.remove(pair)

        ancestors_str = "%s%s/%%" % (img.ancestors, img.id)
        query = Image.select(
            Image.id).where((Image.id == img.id)
                            | (Image.ancestors**ancestors_str))

        individual_image_queries.append(query)

    if not individual_image_queries:
        return []

    # Shard based on the max subquery count. This is used to prevent going over the DB's max query
    # size, as well as to prevent the DB from locking up on a massive query.
    sharded_queries = []
    while individual_image_queries:
        shard = individual_image_queries[:_MAX_SUB_QUERIES]
        sharded_queries.append(_basequery.reduce_as_tree(shard))
        individual_image_queries = individual_image_queries[_MAX_SUB_QUERIES:]

    # Collect IDs of the tags found for each query.
    tags = {}
    for query in sharded_queries:
        ImageAlias = Image.alias()
        tag_query = _tag_alive(
            RepositoryTag.select(
                *(selections or [])).distinct().join(ImageAlias).where(
                    RepositoryTag.hidden == False).where(
                        ImageAlias.id << query).switch(RepositoryTag))

        if filter_tags is not None:
            tag_query = filter_tags(tag_query)

        for tag in tag_query:
            tags[tag.id] = tag

    return tags.values()