Exemple #1
0
def test_manifest_v2_shared_config_and_blobs(app, default_tag_policy):
    """
    Test that GCing a tag that refers to a V2 manifest with the same config and some shared blobs as
    another manifest ensures that the config blob and shared blob are NOT GCed.
    """
    repo = model.repository.create_repository("devtable", "newrepo", None)
    manifest1, built1 = create_manifest_for_testing(repo,
                                                    differentiation_field="1",
                                                    include_shared_blob=True)
    manifest2, built2 = create_manifest_for_testing(repo,
                                                    differentiation_field="2",
                                                    include_shared_blob=True)

    assert set(built1.local_blob_digests).intersection(
        built2.local_blob_digests)
    assert built1.config.digest == built2.config.digest

    # Create tags pointing to the manifests.
    model.oci.tag.retarget_tag("tag1", manifest1)
    model.oci.tag.retarget_tag("tag2", manifest2)

    with assert_gc_integrity(expect_storage_removed=True):
        # Delete tag2.
        model.oci.tag.delete_tag(repo, "tag2")
        assert gc_now(repo)

    # Ensure the blobs for manifest1 still all exist.
    preferred = storage.preferred_locations[0]
    for blob_digest in built1.local_blob_digests:
        storage_row = ImageStorage.get(content_checksum=blob_digest)

        assert storage_row.cas_path
        storage.get_content({preferred},
                            storage.blob_path(storage_row.content_checksum))
Exemple #2
0
def test_manifest_backfill_broken_tag(clear_rows, initialized_db):
    """
    Tests backfilling a broken tag.
    """
    # Delete existing tag manifest so we can reuse the tag.
    TagManifestLabel.delete().execute()
    TagManifest.delete().execute()

    # Create a tag with an image referenced missing parent images.
    repo = model.repository.get_repository("devtable", "gargantuan")
    broken_image = Image.create(
        docker_image_id="foo",
        repository=repo,
        ancestors="/348723847234/",
        storage=ImageStorage.get(),
    )
    broken_image_tag = RepositoryTag.create(repository=repo,
                                            image=broken_image,
                                            name="broken")

    # Backfill the tag.
    assert backfill_tag(broken_image_tag)

    # Ensure we backfilled, even though we reference a broken manifest.
    tag_manifest = TagManifest.get(tag=broken_image_tag)

    map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
    manifest = map_row.manifest
    assert manifest.manifest_bytes == tag_manifest.json_data

    tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag
    assert tag.name == "broken"
    assert tag.manifest == manifest
Exemple #3
0
def get_or_create_shared_blob(digest, byte_data, storage):
    """
    Returns the ImageStorage blob with the given digest or, if not present, adds a row and writes
    the given byte data to the storage engine.

    This method is *only* to be used for shared blobs that are globally accessible, such as the
    special empty gzipped tar layer that Docker no longer pushes to us.
    """
    assert digest
    assert byte_data is not None and isinstance(byte_data, bytes)
    assert storage

    try:
        return ImageStorage.get(content_checksum=digest)
    except ImageStorage.DoesNotExist:
        preferred = storage.preferred_locations[0]
        location_obj = ImageStorageLocation.get(name=preferred)

        record = ImageStorage.create(image_size=len(byte_data),
                                     content_checksum=digest)

        try:
            storage.put_content([preferred],
                                storage_model.get_layer_path(record),
                                byte_data)
            ImageStoragePlacement.create(storage=record, location=location_obj)
        except:
            logger.exception("Exception when trying to write special layer %s",
                             digest)
            record.delete_instance()
            raise

        return record
Exemple #4
0
def get_shared_blob(digest):
    """ Returns the ImageStorage blob with the given digest or, if not present,
      returns None. This method is *only* to be used for shared blobs that are
      globally accessible, such as the special empty gzipped tar layer that Docker
      no longer pushes to us.
  """
    assert digest
    try:
        return ImageStorage.get(content_checksum=digest, uploading=False)
    except ImageStorage.DoesNotExist:
        return None
Exemple #5
0
def temp_link_blob(repository_id, blob_digest, link_expiration_s):
    """ Temporarily links to the blob record from the given repository. If the blob record is not
      found, return None.
  """
    assert blob_digest

    with db_transaction():
        try:
            storage = ImageStorage.get(content_checksum=blob_digest)
        except ImageStorage.DoesNotExist:
            return None

        _temp_link_blob(repository_id, storage, link_expiration_s)
        return storage
Exemple #6
0
def store_blob_record_and_temp_link_in_repo(
    repository_id,
    blob_digest,
    location_obj,
    byte_count,
    link_expiration_s,
    uncompressed_byte_count=None,
):
    """
    Store a record of the blob and temporarily link it to the specified repository.
    """
    assert blob_digest
    assert byte_count is not None

    with db_transaction():
        try:
            storage = ImageStorage.get(content_checksum=blob_digest)
            save_changes = False

            if storage.image_size is None:
                storage.image_size = byte_count
                save_changes = True

            if storage.uncompressed_size is None and uncompressed_byte_count is not None:
                storage.uncompressed_size = uncompressed_byte_count
                save_changes = True

            if save_changes:
                storage.save()

            ImageStoragePlacement.get(storage=storage, location=location_obj)
        except ImageStorage.DoesNotExist:
            storage = ImageStorage.create(
                content_checksum=blob_digest,
                uploading=False,
                image_size=byte_count,
                uncompressed_size=uncompressed_byte_count,
            )
            ImageStoragePlacement.create(storage=storage,
                                         location=location_obj)
        except ImageStoragePlacement.DoesNotExist:
            ImageStoragePlacement.create(storage=storage,
                                         location=location_obj)

        _temp_link_blob(repository_id, storage, link_expiration_s)
        return storage
 def _create_blob(self, digest: str, size: int, manifest_id: int,
                  repo_id: int):
     try:
         blob = ImageStorage.get(content_checksum=digest)
     except ImageStorage.DoesNotExist:
         # TODO: which size should we really be setting here?
         blob = ImageStorage.create(content_checksum=digest,
                                    image_size=size,
                                    compressed_size=size)
     try:
         ManifestBlob.get(manifest_id=manifest_id,
                          blob=blob,
                          repository_id=repo_id)
     except ManifestBlob.DoesNotExist:
         ManifestBlob.create(manifest_id=manifest_id,
                             blob=blob,
                             repository_id=repo_id)
     return blob
Exemple #8
0
    def done(self):
        """ Marks the manifest builder as complete and disposes of any state. This call is optional
        and it is expected manifest builders will eventually time out if unused for an
        extended period of time.
    """
        temp_storages = self._builder_state.temp_storages
        for storage_id in temp_storages:
            try:
                storage = ImageStorage.get(id=storage_id)
                if storage.uploading and storage.content_checksum != EMPTY_LAYER_BLOB_DIGEST:
                    # Delete all the placements pointing to the storage.
                    ImageStoragePlacement.delete().where(
                        ImageStoragePlacement.storage == storage).execute()

                    # Delete the storage.
                    storage.delete_instance()
            except ImageStorage.DoesNotExist:
                pass

        session.pop(_SESSION_KEY, None)
Exemple #9
0
def assert_gc_integrity(expect_storage_removed=True):
    """
    Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes
    the callback for images removed and doesn't invoke the callback for images *not* removed.
    """

    # Add a callback for when images are removed.
    removed_image_storages = []
    remove_callback = model.config.register_image_cleanup_callback(
        removed_image_storages.extend)

    # Store existing storages. We won't verify these for existence because they
    # were likely created as test data.
    existing_digests = set()
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            existing_digests.add(storage_row.content_checksum)

    for blob_row in ApprBlob.select():
        existing_digests.add(blob_row.digest)

    # Store the number of dangling objects.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()

    # Yield to the GC test.
    with check_transitive_modifications():
        try:
            yield
        finally:
            remove_callback()

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        assert isinstance(removed_image_and_storage, Image)

        try:
            # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row
            # with the same ID, make sure it does not have the same Docker Image ID.
            # See: https://www.sqlite.org/autoinc.html
            found_image = Image.get(id=removed_image_and_storage.id)
            assert (found_image.docker_image_id !=
                    removed_image_and_storage.docker_image_id
                    ), "Found unexpected removed image %s under repo %s" % (
                        found_image.id,
                        found_image.repository,
                    )
        except Image.DoesNotExist:
            pass

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            shared = (UploadedBlob.select().where(
                UploadedBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.content_checksum in existing_digests:
            continue

        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        if blob_row.digest in existing_digests:
            continue

        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
Exemple #10
0
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True):
    """ Specialized assertion for ensuring that GC cleans up all dangling storages
      and labels, invokes the callback for images removed and doesn't invoke the
      callback for images *not* removed.
  """
    # Add a callback for when images are removed.
    removed_image_storages = []
    model.config.register_image_cleanup_callback(removed_image_storages.extend)

    # Store the number of dangling storages and labels.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()
    yield

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        with pytest.raises(Image.DoesNotExist):
            Image.get(id=removed_image_and_storage.id)

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure there are no danglings OCI tags.
    if check_oci_tags:
        oci_tags = {t.id for t in Tag.select()}
        referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()}
        assert not oci_tags - referenced_oci_tags

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
Exemple #11
0
def test_retriever(initialized_db):
    repository = create_repository("devtable", "newrepo", None)

    layer_json = json.dumps({
        "config": {},
        "rootfs": {
            "type": "layers",
            "diff_ids": []
        },
        "history": [
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
        ],
    })

    # Add a blob containing the config.
    _, config_digest = _populate_blob(layer_json)

    # Add a blob of random data.
    random_data = "hello world"
    _, random_digest = _populate_blob(random_data)

    # Add another blob of random data.
    other_random_data = "hi place"
    _, other_random_digest = _populate_blob(other_random_data)

    remote_digest = sha256_digest(b"something")

    builder = DockerSchema2ManifestBuilder()
    builder.set_config_digest(config_digest, len(layer_json.encode("utf-8")))
    builder.add_layer(other_random_digest,
                      len(other_random_data.encode("utf-8")))
    builder.add_layer(random_digest, len(random_data.encode("utf-8")))
    manifest = builder.build()

    assert config_digest in manifest.blob_digests
    assert random_digest in manifest.blob_digests
    assert other_random_digest in manifest.blob_digests

    assert config_digest in manifest.local_blob_digests
    assert random_digest in manifest.local_blob_digests
    assert other_random_digest in manifest.local_blob_digests

    # Write the manifest.
    created_tuple = get_or_create_manifest(repository, manifest, storage)
    assert created_tuple is not None

    created_manifest = created_tuple.manifest
    assert created_manifest
    assert created_manifest.media_type.name == manifest.media_type
    assert created_manifest.digest == manifest.digest

    # Verify the linked blobs.
    blob_digests = {
        mb.blob.content_checksum
        for mb in ManifestBlob.select().where(
            ManifestBlob.manifest == created_manifest)
    }

    assert random_digest in blob_digests
    assert other_random_digest in blob_digests
    assert config_digest in blob_digests

    # Delete any Image rows linking to the blobs from temp tags.
    for blob_digest in blob_digests:
        storage_row = ImageStorage.get(content_checksum=blob_digest)
        for image in list(Image.select().where(Image.storage == storage_row)):
            all_temp = all([
                rt.hidden for rt in RepositoryTag.select().where(
                    RepositoryTag.image == image)
            ])
            if all_temp:
                RepositoryTag.delete().where(
                    RepositoryTag.image == image).execute()
                image.delete_instance(recursive=True)

    # Verify the blobs in the retriever.
    retriever = RepositoryContentRetriever(repository, storage)
    assert (retriever.get_manifest_bytes_with_digest(
        created_manifest.digest) == manifest.bytes.as_encoded_str())

    for blob_digest in blob_digests:
        assert retriever.get_blob_bytes_with_digest(blob_digest) is not None