Exemple #1
0
def test_manifest_backfill_broken_tag(clear_rows, initialized_db):
    """
    Tests backfilling a broken tag.
    """
    # Delete existing tag manifest so we can reuse the tag.
    TagManifestLabel.delete().execute()
    TagManifest.delete().execute()

    # Create a tag with an image referenced missing parent images.
    repo = model.repository.get_repository("devtable", "gargantuan")
    broken_image = Image.create(
        docker_image_id="foo",
        repository=repo,
        ancestors="/348723847234/",
        storage=ImageStorage.get(),
    )
    broken_image_tag = RepositoryTag.create(repository=repo,
                                            image=broken_image,
                                            name="broken")

    # Backfill the tag.
    assert backfill_tag(broken_image_tag)

    # Ensure we backfilled, even though we reference a broken manifest.
    tag_manifest = TagManifest.get(tag=broken_image_tag)

    map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
    manifest = map_row.manifest
    assert manifest.manifest_bytes == tag_manifest.json_data

    tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag
    assert tag.name == "broken"
    assert tag.manifest == manifest
Exemple #2
0
def store_tag_manifest_for_repo(repository_id,
                                tag_name,
                                manifest,
                                leaf_layer_id,
                                storage_id_map,
                                reversion=False):
    """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest
      object, as well as a boolean indicating whether the TagManifest was created.
  """
    # Create the new-style OCI manifest and its blobs.
    oci_manifest = _populate_manifest_and_blobs(repository_id,
                                                manifest,
                                                storage_id_map,
                                                leaf_layer_id=leaf_layer_id)

    # Create the tag for the tag manifest.
    tag = create_or_update_tag_for_repo(repository_id,
                                        tag_name,
                                        leaf_layer_id,
                                        reversion=reversion,
                                        oci_manifest=oci_manifest)

    # Add a tag manifest pointing to that tag.
    try:
        manifest = TagManifest.get(digest=manifest.digest)
        manifest.tag = tag
        manifest.save()
        return manifest, False
    except TagManifest.DoesNotExist:
        created = _associate_manifest(tag, oci_manifest)
        return created, True
Exemple #3
0
    def _namespace_from_kwargs(self, args_dict):
        if "namespace_name" in args_dict:
            return args_dict["namespace_name"]

        if "repository_ref" in args_dict:
            return args_dict["repository_ref"].namespace_name

        if "tag" in args_dict:
            return args_dict["tag"].repository.namespace_name

        if "manifest" in args_dict:
            manifest = args_dict["manifest"]
            if manifest._is_tag_manifest:
                return TagManifest.get(
                    id=manifest._db_id).tag.repository.namespace_user.username
            else:
                return Manifest.get(
                    id=manifest._db_id).repository.namespace_user.username

        if "manifest_or_legacy_image" in args_dict:
            manifest_or_legacy_image = args_dict["manifest_or_legacy_image"]
            if isinstance(manifest_or_legacy_image, LegacyImage):
                return Image.get(id=manifest_or_legacy_image._db_id
                                 ).repository.namespace_user.username
            else:
                manifest = manifest_or_legacy_image
                if manifest._is_tag_manifest:
                    return TagManifest.get(
                        id=manifest._db_id
                    ).tag.repository.namespace_user.username
                else:
                    return Manifest.get(
                        id=manifest._db_id).repository.namespace_user.username

        if "derived_image" in args_dict:
            return DerivedStorageForImage.get(
                id=args_dict["derived_image"]._db_id
            ).source_image.repository.namespace_user.username

        if "blob" in args_dict:
            return ""  # Blob functions are shared, so no need to do anything.

        if "blob_upload" in args_dict:
            return ""  # Blob functions are shared, so no need to do anything.

        raise Exception("Unknown namespace for dict `%s`" % args_dict)
Exemple #4
0
  def _namespace_from_kwargs(self, args_dict):
    if 'namespace_name' in args_dict:
      return args_dict['namespace_name']

    if 'repository_ref' in args_dict:
      return args_dict['repository_ref'].namespace_name

    if 'tag' in args_dict:
      return args_dict['tag'].repository.namespace_name

    if 'manifest' in args_dict:
      manifest = args_dict['manifest']
      if manifest._is_tag_manifest:
        return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username
      else:
        return Manifest.get(id=manifest._db_id).repository.namespace_user.username

    if 'manifest_or_legacy_image' in args_dict:
      manifest_or_legacy_image = args_dict['manifest_or_legacy_image']
      if isinstance(manifest_or_legacy_image, LegacyImage):
        return Image.get(id=manifest_or_legacy_image._db_id).repository.namespace_user.username
      else:
        manifest = manifest_or_legacy_image
        if manifest._is_tag_manifest:
          return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username
        else:
          return Manifest.get(id=manifest._db_id).repository.namespace_user.username

    if 'derived_image' in args_dict:
      return (DerivedStorageForImage
              .get(id=args_dict['derived_image']._db_id)
              .source_image
              .repository
              .namespace_user
              .username)

    if 'blob' in args_dict:
      return '' # Blob functions are shared, so no need to do anything.

    if 'blob_upload' in args_dict:
      return '' # Blob functions are shared, so no need to do anything.

    raise Exception('Unknown namespace for dict `%s`' % args_dict)
Exemple #5
0
def _get_manifest_id(repositorytag):
    repository_tag_datatype = TagDataType.for_repository_tag(repositorytag)

    # Retrieve the TagManifest for the RepositoryTag, backfilling if necessary.
    with db_transaction():
        manifest_datatype = None

        try:
            manifest_datatype = pre_oci_model.get_manifest_for_tag(
                repository_tag_datatype, backfill_if_necessary=True)
        except MalformedSchema1Manifest:
            logger.exception('Error backfilling manifest for tag `%s`',
                             repositorytag.id)

        if manifest_datatype is None:
            logger.error('Could not load or backfill manifest for tag `%s`',
                         repositorytag.id)

            # Create a broken manifest for the tag.
            tag_manifest = TagManifest.create(tag=repositorytag,
                                              digest='BROKEN-%s' %
                                              repositorytag.id,
                                              json_data='{}')
        else:
            # Retrieve the new-style Manifest for the TagManifest, if any.
            try:
                tag_manifest = TagManifest.get(id=manifest_datatype._db_id)
            except TagManifest.DoesNotExist:
                logger.exception('Could not find tag manifest')
                return None

    try:
        found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest

        # Verify that the new-style manifest has the same contents as the old-style manifest.
        # If not, update and then return. This is an extra check put in place to ensure unicode
        # manifests have been correctly copied.
        if found.manifest_bytes != tag_manifest.json_data:
            logger.warning('Fixing manifest `%s`', found.id)
            found.manifest_bytes = tag_manifest.json_data
            found.save()

        return found.id
    except TagManifestToManifest.DoesNotExist:
        # Could not find the new style manifest, so backfill.
        _backfill_manifest(tag_manifest)

    # Try to retrieve the manifest again, since we've performed a backfill.
    try:
        return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id
    except TagManifestToManifest.DoesNotExist:
        return None
Exemple #6
0
def verify_backfill(namespace_name):
  logger.info('Checking namespace %s', namespace_name)
  namespace_user = model.user.get_namespace_user(namespace_name)
  assert namespace_user

  repo_tags = (RepositoryTag
               .select()
               .join(Repository)
               .where(Repository.namespace_user == namespace_user)
               .where(RepositoryTag.hidden == False))

  repo_tags = list(repo_tags)
  logger.info('Found %s tags', len(repo_tags))

  for index, repo_tag in enumerate(repo_tags):
    logger.info('Checking tag %s under repository %s (%s/%s)', repo_tag.name,
                repo_tag.repository.name, index + 1, len(repo_tags))

    tag = TagToRepositoryTag.get(repository_tag=repo_tag).tag
    assert not tag.hidden
    assert tag.repository == repo_tag.repository
    assert tag.name == repo_tag.name, _vs(tag.name, repo_tag.name)
    assert tag.repository == repo_tag.repository, _vs(tag.repository_id, repo_tag.repository_id)
    assert tag.reversion == repo_tag.reversion, _vs(tag.reversion, repo_tag.reversion)

    start_check = int(tag.lifetime_start_ms / 1000) == repo_tag.lifetime_start_ts
    assert start_check, _vs(tag.lifetime_start_ms, repo_tag.lifetime_start_ts)
    if repo_tag.lifetime_end_ts is not None:
      end_check = int(tag.lifetime_end_ms / 1000) == repo_tag.lifetime_end_ts
      assert end_check, _vs(tag.lifetime_end_ms, repo_tag.lifetime_end_ts)
    else:
      assert tag.lifetime_end_ms is None

    try:
      tag_manifest = tag.manifest
      repo_tag_manifest = TagManifest.get(tag=repo_tag)

      digest_check = tag_manifest.digest == repo_tag_manifest.digest
      assert digest_check, _vs(tag_manifest.digest, repo_tag_manifest.digest)

      bytes_check = tag_manifest.manifest_bytes == repo_tag_manifest.json_data
      assert bytes_check, _vs(tag_manifest.manifest_bytes, repo_tag_manifest.json_data)
    except TagManifest.DoesNotExist:
      logger.info('No tag manifest found for repository tag %s', repo_tag.id)

    mli = ManifestLegacyImage.get(manifest=tag_manifest)
    assert mli.repository == repo_tag.repository

    manifest_legacy_image = mli.image
    assert manifest_legacy_image == repo_tag.image, _vs(manifest_legacy_image.id, repo_tag.image_id)
Exemple #7
0
def compute_layer_id(layer):
  """ Returns the ID for the layer in the security scanner. """
  # NOTE: this is temporary until we switch to Clair V3.
  if isinstance(layer, ManifestDataType):
    if layer._is_tag_manifest:
      layer = TagManifest.get(id=layer._db_id).tag.image
    else:
      manifest = Manifest.get(id=layer._db_id)
      try:
        layer = ManifestLegacyImage.get(manifest=manifest).image
      except ManifestLegacyImage.DoesNotExist:
        return None
  elif isinstance(layer, LegacyImage):
    layer = Image.get(id=layer._db_id)

  assert layer.docker_image_id
  assert layer.storage.uuid
  return '%s.%s' % (layer.docker_image_id, layer.storage.uuid)
Exemple #8
0
def test_retarget_tag(initialized_db):
    repo = get_repository("devtable", "history")
    results, _ = list_repository_tag_history(repo,
                                             1,
                                             100,
                                             specific_tag_name="latest")

    assert len(results) == 2
    assert results[0].lifetime_end_ms is None
    assert results[1].lifetime_end_ms is not None

    # Revert back to the original manifest.
    created = retarget_tag("latest",
                           results[0].manifest,
                           is_reversion=True,
                           now_ms=results[1].lifetime_end_ms + 10000)
    assert created.lifetime_end_ms is None
    assert created.reversion
    assert created.name == "latest"
    assert created.manifest == results[0].manifest

    # Verify in the history.
    results, _ = list_repository_tag_history(repo,
                                             1,
                                             100,
                                             specific_tag_name="latest")

    assert len(results) == 3
    assert results[0].lifetime_end_ms is None
    assert results[1].lifetime_end_ms is not None
    assert results[2].lifetime_end_ms is not None

    assert results[0] == created

    # Verify old-style tables.
    repository_tag = TagToRepositoryTag.get(tag=created).repository_tag
    assert repository_tag.lifetime_start_ts == int(created.lifetime_start_ms /
                                                   1000)

    tag_manifest = TagManifest.get(tag=repository_tag)
    assert TagManifestToManifest.get(
        tag_manifest=tag_manifest).manifest == created.manifest
Exemple #9
0
def get_tag_manifest(tag):
    try:
        return TagManifest.get(tag=tag)
    except TagManifest.DoesNotExist:
        return None
Exemple #10
0
def test_tagbackfillworker(clear_all_rows, initialized_db):
    # Remove the new-style rows so we can backfill.
    TagToRepositoryTag.delete().execute()
    Tag.delete().execute()

    if clear_all_rows:
        TagManifestLabelMap.delete().execute()
        ManifestLabel.delete().execute()
        ManifestBlob.delete().execute()
        ManifestLegacyImage.delete().execute()
        TagManifestToManifest.delete().execute()
        Manifest.delete().execute()

    found_dead_tag = False

    for repository_tag in list(RepositoryTag.select()):
        # Backfill the tag.
        assert backfill_tag(repository_tag)

        # Ensure if we try again, the backfill is skipped.
        assert not backfill_tag(repository_tag)

        # Ensure that we now have the expected tag rows.
        tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag)
        tag = tag_to_repo_tag.tag
        assert tag.name == repository_tag.name
        assert tag.repository == repository_tag.repository
        assert not tag.hidden
        assert tag.reversion == repository_tag.reversion

        if repository_tag.lifetime_start_ts is None:
            assert tag.lifetime_start_ms is None
        else:
            assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts *
                                             1000)

        if repository_tag.lifetime_end_ts is None:
            assert tag.lifetime_end_ms is None
        else:
            assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts *
                                           1000)
            found_dead_tag = True

        assert tag.manifest

        # Ensure that we now have the expected manifest rows.
        try:
            tag_manifest = TagManifest.get(tag=repository_tag)
        except TagManifest.DoesNotExist:
            continue

        map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
        assert not map_row.broken

        manifest_row = map_row.manifest
        assert manifest_row.manifest_bytes == tag_manifest.json_data
        assert manifest_row.digest == tag_manifest.digest
        assert manifest_row.repository == tag_manifest.tag.repository

        assert tag.manifest == map_row.manifest

        legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
        assert tag_manifest.tag.image == legacy_image

        expected_storages = {tag_manifest.tag.image.storage.id}
        for parent_image_id in tag_manifest.tag.image.ancestor_id_list():
            expected_storages.add(Image.get(id=parent_image_id).storage_id)

        found_storages = {
            manifest_blob.blob_id
            for manifest_blob in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest_row)
        }
        assert expected_storages == found_storages

        # Ensure the labels were copied over.
        tmls = list(TagManifestLabel.select().where(
            TagManifestLabel.annotated == tag_manifest))
        expected_labels = {tml.label_id for tml in tmls}
        found_labels = {
            m.label_id
            for m in ManifestLabel.select().where(
                ManifestLabel.manifest == manifest_row)
        }
        assert found_labels == expected_labels

    # Verify at the repository level.
    for repository in list(Repository.select()):
        tags = RepositoryTag.select().where(
            RepositoryTag.repository == repository,
            RepositoryTag.hidden == False)
        oci_tags = Tag.select().where(Tag.repository == repository)
        assert len(tags) == len(oci_tags)
        assert {t.name for t in tags} == {t.name for t in oci_tags}

        for tag in tags:
            tag_manifest = TagManifest.get(tag=tag)
            ttr = TagToRepositoryTag.get(repository_tag=tag)
            manifest = ttr.tag.manifest

            assert tag_manifest.json_data == manifest.manifest_bytes
            assert tag_manifest.digest == manifest.digest
            assert tag.image == ManifestLegacyImage.get(
                manifest=manifest).image
            assert tag.lifetime_start_ts == (ttr.tag.lifetime_start_ms / 1000)

            if tag.lifetime_end_ts:
                assert tag.lifetime_end_ts == (ttr.tag.lifetime_end_ms / 1000)
            else:
                assert ttr.tag.lifetime_end_ms is None

    assert found_dead_tag
Exemple #11
0
def _backfill_manifest(tag_manifest):
    logger.info('Backfilling manifest for tag manifest %s', tag_manifest.id)

    # Ensure that a mapping row doesn't already exist. If it does, we've been preempted.
    if lookup_manifest_map_row(tag_manifest):
        return False

    # Parse the manifest. If we cannot parse, then we treat the manifest as broken and just emit it
    # without additional rows or data, as it will eventually not be useful.
    is_broken = False
    try:
        manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(
            tag_manifest.json_data),
                                         validate=False)
    except ManifestException:
        logger.exception('Exception when trying to parse manifest %s',
                         tag_manifest.id)
        manifest = BrokenManifest(tag_manifest.digest, tag_manifest.json_data)
        is_broken = True

    # Lookup the storages for the digests.
    root_image = tag_manifest.tag.image
    repository = tag_manifest.tag.repository

    image_storage_id_map = {
        root_image.storage.content_checksum: root_image.storage.id
    }

    try:
        parent_images = get_parent_images(repository.namespace_user.username,
                                          repository.name, root_image)
    except DataModelException:
        logger.exception(
            'Exception when trying to load parent images for manifest `%s`',
            tag_manifest.id)
        parent_images = {}
        is_broken = True

    for parent_image in parent_images:
        image_storage_id_map[
            parent_image.storage.content_checksum] = parent_image.storage.id

    # Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo
    # and add its storage ID. If the blob is not found, we mark the manifest as broken.
    storage_ids = set()
    try:
        for blob_digest in manifest.get_blob_digests_for_translation():
            if blob_digest in image_storage_id_map:
                storage_ids.add(image_storage_id_map[blob_digest])
            else:
                logger.debug(
                    'Blob `%s` not found in images for manifest `%s`; checking repo',
                    blob_digest, tag_manifest.id)
                try:
                    blob_storage = get_repo_blob_by_digest(
                        repository.namespace_user.username, repository.name,
                        blob_digest)
                    storage_ids.add(blob_storage.id)
                except BlobDoesNotExist:
                    logger.debug(
                        'Blob `%s` not found in repo for manifest `%s`',
                        blob_digest, tag_manifest.id)
                    is_broken = True
    except MalformedSchema1Manifest:
        logger.warning(
            'Found malformed schema 1 manifest during blob backfill')
        is_broken = True

    with db_transaction():
        # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag.
        try:
            tag_manifest = TagManifest.get(id=tag_manifest.id)
        except TagManifest.DoesNotExist:
            return True

        # Ensure it wasn't already created.
        if lookup_manifest_map_row(tag_manifest):
            return False

        # Check for a pre-existing manifest matching the digest in the repository. This can happen
        # if we've already created the manifest row (typically for tag reverision).
        try:
            manifest_row = Manifest.get(digest=manifest.digest,
                                        repository=tag_manifest.tag.repository)
        except Manifest.DoesNotExist:
            # Create the new-style rows for the manifest.
            try:
                manifest_row = populate_manifest(tag_manifest.tag.repository,
                                                 manifest,
                                                 tag_manifest.tag.image,
                                                 storage_ids)
            except IntegrityError:
                # Pre-empted.
                return False

        # Create the mapping row. If we find another was created for this tag manifest in the
        # meantime, then we've been preempted.
        try:
            TagManifestToManifest.create(tag_manifest=tag_manifest,
                                         manifest=manifest_row,
                                         broken=is_broken)
        except IntegrityError:
            return False

    # Backfill any labels on the manifest.
    _backfill_labels(tag_manifest, manifest_row, repository)
    return True