Beispiel #1
0
def _lookup_repo_storages_by_content_checksum(repo, checksums, model_class):
    assert checksums

    # There may be many duplicates of the checksums, so for performance reasons we are going
    # to use a union to select just one storage with each checksum
    queries = []

    for counter, checksum in enumerate(checksums):
        query_alias = "q{0}".format(counter)

        candidate_subq = (
            ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            )
            .join(model_class)
            .where(model_class.repository == repo, ImageStorage.content_checksum == checksum)
            .limit(1)
            .alias(query_alias)
        )

        queries.append(ImageStorage.select(SQL("*")).from_(candidate_subq))

    assert queries
    return _basequery.reduce_as_tree(queries)
Beispiel #2
0
def _orphaned_storage_query(candidate_ids):
    """ Returns the subset of the candidate ImageStorage IDs representing storages that are no
      longer referenced by images.
  """
    # Issue a union query to find all storages that are still referenced by a candidate storage. This
    # is much faster than the group_by and having call we used to use here.
    nonorphaned_queries = []
    for counter, candidate_id in enumerate(candidate_ids):
        query_alias = 'q{0}'.format(counter)

        # TODO: remove the join with Image once fully on the OCI data model.
        storage_subq = (ImageStorage.select(ImageStorage.id).join(Image).where(
            ImageStorage.id == candidate_id).limit(1).alias(query_alias))

        nonorphaned_queries.append(
            ImageStorage.select(SQL('*')).from_(storage_subq))

        manifest_storage_subq = (ImageStorage.select(
            ImageStorage.id).join(ManifestBlob).where(
                ImageStorage.id == candidate_id).limit(1).alias(query_alias))

        nonorphaned_queries.append(
            ImageStorage.select(SQL('*')).from_(manifest_storage_subq))

    # Build the set of storages that are missing. These storages are orphaned.
    nonorphaned_storage_ids = {
        storage.id
        for storage in _basequery.reduce_as_tree(nonorphaned_queries)
    }
    return list(candidate_ids - nonorphaned_storage_ids)
Beispiel #3
0
def lookup_repo_storages_by_content_checksum(repo,
                                             checksums,
                                             by_manifest=False):
    """
    Looks up repository storages (without placements) matching the given repository and checksum.
    """
    if not checksums:
        return []

    # There may be many duplicates of the checksums, so for performance reasons we are going
    # to use a union to select just one storage with each checksum
    queries = []

    for counter, checksum in enumerate(set(checksums)):
        query_alias = "q{0}".format(counter)

        # TODO: Remove once we have a new-style model for tracking temp uploaded blobs and
        # all legacy tables have been removed.
        if by_manifest:
            candidate_subq = (ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            ).join(ManifestBlob).where(
                ManifestBlob.repository == repo, ImageStorage.content_checksum
                == checksum).limit(1).alias(query_alias))
        else:
            candidate_subq = (ImageStorage.select(
                ImageStorage.id,
                ImageStorage.content_checksum,
                ImageStorage.image_size,
                ImageStorage.uuid,
                ImageStorage.cas_path,
                ImageStorage.uncompressed_size,
                ImageStorage.uploading,
            ).join(Image).where(Image.repository == repo,
                                ImageStorage.content_checksum ==
                                checksum).limit(1).alias(query_alias))

        queries.append(ImageStorage.select(SQL("*")).from_(candidate_subq))

    return _basequery.reduce_as_tree(queries)
Beispiel #4
0
Datei: tag.py Projekt: zhill/quay
def get_matching_tags_for_images(image_pairs,
                                 filter_images=None,
                                 filter_tags=None,
                                 selections=None):
    """ Returns all tags that contain the images with the given docker_image_id and storage_uuid,
      as specified as an iterable of pairs. """
    if not image_pairs:
        return []

    image_pairs_set = set(image_pairs)

    # Find all possible matching image+storages.
    images = []

    while image_pairs:
        image_pairs_slice = image_pairs[:_MAX_IMAGE_LOOKUP_COUNT]

        ids = [pair[0] for pair in image_pairs_slice]
        uuids = [pair[1] for pair in image_pairs_slice]

        images_query = (Image.select(
            Image.id, Image.docker_image_id, Image.ancestors,
            ImageStorage.uuid).join(ImageStorage).where(
                Image.docker_image_id << ids,
                ImageStorage.uuid << uuids).switch(Image))

        if filter_images is not None:
            images_query = filter_images(images_query)

        images.extend(list(images_query))
        image_pairs = image_pairs[_MAX_IMAGE_LOOKUP_COUNT:]

    # Filter down to those images actually in the pairs set and build the set of queries to run.
    individual_image_queries = []

    for img in images:
        # Make sure the image found is in the set of those requested, and that we haven't already
        # processed it. We need this check because the query above checks for images with matching
        # IDs OR storage UUIDs, rather than the expected ID+UUID pair. We do this for efficiency
        # reasons, and it is highly unlikely we'll find an image with a mismatch, but we need this
        # check to be absolutely sure.
        pair = (img.docker_image_id, img.storage.uuid)
        if pair not in image_pairs_set:
            continue

        # Remove the pair so we don't try it again.
        image_pairs_set.remove(pair)

        ancestors_str = "%s%s/%%" % (img.ancestors, img.id)
        query = Image.select(
            Image.id).where((Image.id == img.id)
                            | (Image.ancestors**ancestors_str))

        individual_image_queries.append(query)

    if not individual_image_queries:
        return []

    # Shard based on the max subquery count. This is used to prevent going over the DB's max query
    # size, as well as to prevent the DB from locking up on a massive query.
    sharded_queries = []
    while individual_image_queries:
        shard = individual_image_queries[:_MAX_SUB_QUERIES]
        sharded_queries.append(_basequery.reduce_as_tree(shard))
        individual_image_queries = individual_image_queries[_MAX_SUB_QUERIES:]

    # Collect IDs of the tags found for each query.
    tags = {}
    for query in sharded_queries:
        ImageAlias = Image.alias()
        tag_query = _tag_alive(
            RepositoryTag.select(
                *(selections or [])).distinct().join(ImageAlias).where(
                    RepositoryTag.hidden == False).where(
                        ImageAlias.id << query).switch(RepositoryTag))

        if filter_tags is not None:
            tag_query = filter_tags(tag_query)

        for tag in tag_query:
            tags[tag.id] = tag

    return tags.values()