def get_legacy_image(self, repository_ref, docker_image_id, storage, include_blob=False): """ Returns the matching LegacyImage under the matching repository, if any. If none, returns None. """ retriever = RepositoryContentRetriever(repository_ref._db_id, storage) # Resolves the manifest and the layer index from the synthetic ID. manifest, layer_index = self._resolve_legacy_image_id(docker_image_id) if manifest is None: return None # Lookup the legacy image for the index. legacy_image = manifest.lookup_legacy_image(layer_index, retriever) if legacy_image is None or not include_blob: return legacy_image # If a blob was requested, load it into the legacy image. return legacy_image.with_blob( self.get_repo_blob_by_digest(repository_ref, legacy_image.blob_digest, include_placements=True))
def get_schema1_parsed_manifest( self, manifest, namespace_name, repo_name, tag_name, storage, raise_on_error=False ): """ Returns the schema 1 manifest for this manifest, or None if none. """ try: parsed = manifest.get_parsed_manifest() except ManifestException: if raise_on_error: raise ManifestException return None try: manifest_row = database.Manifest.get(id=manifest._db_id) except database.Manifest.DoesNotExist: if raise_on_error: raise ManifestDoesNotExist return None retriever = RepositoryContentRetriever(manifest_row.repository_id, storage) schema1 = parsed.get_schema1_manifest(namespace_name, repo_name, tag_name, retriever) if schema1 is None and raise_on_error: raise ManifestException return schema1
def _list_manifest_layers(self, repo_id, parsed, storage, include_placements=False, by_manifest=False): """ Returns an *ordered list* of the layers found in the manifest, starting at the base and working towards the leaf, including the associated Blob and its placements (if specified). Returns None if the manifest could not be parsed and validated. """ assert not parsed.is_manifest_list retriever = RepositoryContentRetriever(repo_id, storage) requires_empty_blob = parsed.get_requires_empty_layer_blob(retriever) storage_map = {} blob_digests = list(parsed.local_blob_digests) if requires_empty_blob: blob_digests.append(EMPTY_LAYER_BLOB_DIGEST) if blob_digests: blob_query = self._lookup_repo_storages_by_content_checksum( repo_id, blob_digests, by_manifest=by_manifest) storage_map = {blob.content_checksum: blob for blob in blob_query} layers = parsed.get_layers(retriever) if layers is None: logger.error("Could not load layers for manifest `%s`", parsed.digest) return None manifest_layers = [] for layer in layers: if layer.is_remote: manifest_layers.append(ManifestLayer(layer, None)) continue digest_str = str(layer.blob_digest) if digest_str not in storage_map: logger.error("Missing digest `%s` for manifest `%s`", layer.blob_digest, parsed.digest) return None image_storage = storage_map[digest_str] assert image_storage.cas_path is not None assert image_storage.image_size is not None placements = None if include_placements: placements = list( model.storage.get_storage_locations(image_storage.uuid)) blob = Blob.for_image_storage( image_storage, storage_path=model.storage.get_layer_path(image_storage), placements=placements, ) manifest_layers.append(ManifestLayer(layer, blob)) return manifest_layers
def get_schema1_parsed_manifest(self, manifest, namespace_name, repo_name, tag_name, storage): """ Returns the schema 1 manifest for this manifest, or None if none. """ try: parsed = manifest.get_parsed_manifest() except ManifestException: return None try: manifest_row = database.Manifest.get(id=manifest._db_id) except database.Manifest.DoesNotExist: return None retriever = RepositoryContentRetriever(manifest_row.repository_id, storage) return parsed.get_schema1_manifest(namespace_name, repo_name, tag_name, retriever)
def get_tag_legacy_image_id(self, repository_ref, tag_name, storage): """ Returns the legacy image ID for the tag in the repository. If there is no legacy image, returns None. """ tag = self.get_repo_tag(repository_ref, tag_name) if tag is None: return None retriever = RepositoryContentRetriever(repository_ref.id, storage) legacy_image = tag.manifest.lookup_legacy_image(0, retriever) if legacy_image is None: return None return legacy_image.docker_image_id
def convert_manifest(self, manifest, namespace_name, repo_name, tag_name, allowed_mediatypes, storage): try: parsed = manifest.get_parsed_manifest() except ManifestException: return None try: manifest_row = database.Manifest.get(id=manifest._db_id) except database.Manifest.DoesNotExist: return None retriever = RepositoryContentRetriever(manifest_row.repository_id, storage) return parsed.convert_manifest(allowed_mediatypes, namespace_name, repo_name, tag_name, retriever)
def populate_legacy_images_for_testing(manifest, manifest_interface_instance, storage): """ Populates the legacy image rows for the given manifest. """ # NOTE: This method is only kept around for use by legacy tests that still require # legacy images. As a result, we make sure we're in testing mode before we run. assert os.getenv("TEST") == "true" repository_id = manifest.repository_id retriever = RepositoryContentRetriever.for_repository( repository_id, storage) blob_map = _build_blob_map(repository_id, manifest_interface_instance, storage, True, require_empty_layer=True) if blob_map is None: return None # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy # image. legacy_image = None if manifest_interface_instance.has_legacy_image: try: legacy_image_id = _populate_legacy_image( repository_id, manifest_interface_instance, blob_map, retriever, True) except ManifestException as me: raise CreateManifestException( "Attempt to create an invalid manifest: %s. Please report this issue." % me) if legacy_image_id is None: return None legacy_image = get_image(repository_id, legacy_image_id) if legacy_image is None: return None # Set the legacy image (if applicable). if legacy_image is not None: ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest)
def test_repository_tags(repo_namespace, repo_name, registry_model): repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) tags = registry_model.list_all_active_repository_tags(repository_ref) assert len(tags) tags_map = registry_model.get_legacy_tags_map(repository_ref, storage) for tag in tags: found_tag = registry_model.get_repo_tag(repository_ref, tag.name) assert found_tag == tag retriever = RepositoryContentRetriever(repository_ref.id, storage) legacy_image = tag.manifest.lookup_legacy_image(0, retriever) found_image = registry_model.get_legacy_image( repository_ref, found_tag.manifest.legacy_image_root_id, storage) if found_image is not None: assert found_image.docker_image_id == legacy_image.docker_image_id assert tags_map[tag.name] == found_image.docker_image_id
def _create_manifest( repository_id, manifest_interface_instance, storage, temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC, for_tagging=False, raise_on_error=False, retriever=None, ): # Validate the manifest. retriever = retriever or RepositoryContentRetriever.for_repository( repository_id, storage) try: manifest_interface_instance.validate(retriever) except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError) as ex: logger.exception("Could not validate manifest `%s`", manifest_interface_instance.digest) if raise_on_error: raise CreateManifestException(str(ex)) return None # Load, parse and get/create the child manifests, if any. child_manifest_refs = manifest_interface_instance.child_manifests( retriever) child_manifest_rows = {} child_manifest_label_dicts = [] if child_manifest_refs is not None: for child_manifest_ref in child_manifest_refs: # Load and parse the child manifest. try: child_manifest = child_manifest_ref.manifest_obj except ( ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError, ) as ex: logger.exception( "Could not load manifest list for manifest `%s`", manifest_interface_instance.digest, ) if raise_on_error: raise CreateManifestException(str(ex)) return None # Retrieve its labels. labels = child_manifest.get_manifest_labels(retriever) if labels is None: if raise_on_error: raise CreateManifestException( "Unable to retrieve manifest labels") logger.exception( "Could not load manifest labels for child manifest") return None # Get/create the child manifest in the database. child_manifest_info = get_or_create_manifest( repository_id, child_manifest, storage, raise_on_error=raise_on_error) if child_manifest_info is None: if raise_on_error: raise CreateManifestException( "Unable to retrieve child manifest") logger.error("Could not get/create child manifest") return None child_manifest_rows[child_manifest_info.manifest. digest] = child_manifest_info.manifest child_manifest_label_dicts.append(labels) # Build the map from required blob digests to the blob objects. blob_map = _build_blob_map( repository_id, manifest_interface_instance, retriever, storage, raise_on_error, require_empty_layer=False, ) if blob_map is None: return None # Create the manifest and its blobs. media_type = Manifest.media_type.get_id( manifest_interface_instance.media_type) storage_ids = {storage.id for storage in list(blob_map.values())} # Check for the manifest, in case it was created since we checked earlier. try: manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest) return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None) except Manifest.DoesNotExist: pass try: with db_transaction(): # Create the manifest. try: manifest = Manifest.create( repository=repository_id, digest=manifest_interface_instance.digest, media_type=media_type, manifest_bytes=manifest_interface_instance.bytes. as_encoded_str(), config_media_type=manifest_interface_instance. config_media_type, layers_compressed_size=manifest_interface_instance. layers_compressed_size, ) except IntegrityError as ie: # NOTE: An IntegrityError means (barring a bug) that the manifest was created by # another caller while we were attempting to create it. Since we need to return # the manifest, we raise a specialized exception here to break out of the # transaction so we can retrieve it. raise _ManifestAlreadyExists(ie) # Insert the blobs. blobs_to_insert = [ dict(manifest=manifest, repository=repository_id, blob=storage_id) for storage_id in storage_ids ] if blobs_to_insert: try: ManifestBlob.insert_many(blobs_to_insert).execute() except IntegrityError as ie: raise _ManifestAlreadyExists(ie) # Insert the manifest child rows (if applicable). if child_manifest_rows: children_to_insert = [ dict(manifest=manifest, child_manifest=child_manifest, repository=repository_id) for child_manifest in list(child_manifest_rows.values()) ] try: ManifestChild.insert_many(children_to_insert).execute() except IntegrityError as ie: raise _ManifestAlreadyExists(ie) # If this manifest is being created not for immediate tagging, add a temporary tag to the # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests # in a repository for GC, then we will have to reevaluate this optimization at that time. if not for_tagging: create_temporary_tag_if_necessary(manifest, temp_tag_expiration_sec) # Define the labels for the manifest (if any). # TODO: Once the old data model is gone, turn this into a batch operation and make the label # application to the manifest occur under the transaction. labels = manifest_interface_instance.get_manifest_labels(retriever) if labels: for key, value in labels.items(): # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any # such `labels`, as they don't really mean anything. if not key: continue media_type = "application/json" if is_json( value) else "text/plain" create_manifest_label(manifest, key, value, "manifest", media_type) # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken # on the manifest or its resulting tags). We only return those labels either defined on # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests # to ensure that any action performed is defined in all manifests. labels_to_apply = labels or {} if child_manifest_label_dicts: labels_to_apply = child_manifest_label_dicts[0].items() for child_manifest_label_dict in child_manifest_label_dicts[1:]: # Intersect the key+values of the labels to ensure we get the exact same result # for all the child manifests. labels_to_apply = labels_to_apply & child_manifest_label_dict.items( ) labels_to_apply = dict(labels_to_apply) return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply) except _ManifestAlreadyExists as mae: try: manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest) except Manifest.DoesNotExist: # NOTE: If we've reached this point, then somehow we had an IntegrityError without it # being due to a duplicate manifest. We therefore log the error. logger.error( "Got integrity error when trying to create manifest: %s", mae.internal_exception) if raise_on_error: raise CreateManifestException( "Attempt to create an invalid manifest. Please report this issue." ) return None return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None)
def _create_manifest( repository_id, manifest_interface_instance, storage, temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC, for_tagging=False, raise_on_error=False, retriever=None, ): # Validate the manifest. retriever = retriever or RepositoryContentRetriever.for_repository( repository_id, storage) try: manifest_interface_instance.validate(retriever) except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError) as ex: logger.exception("Could not validate manifest `%s`", manifest_interface_instance.digest) if raise_on_error: raise CreateManifestException(str(ex)) return None # Load, parse and get/create the child manifests, if any. child_manifest_refs = manifest_interface_instance.child_manifests( retriever) child_manifest_rows = {} child_manifest_label_dicts = [] if child_manifest_refs is not None: for child_manifest_ref in child_manifest_refs: # Load and parse the child manifest. try: child_manifest = child_manifest_ref.manifest_obj except ( ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError, ) as ex: logger.exception( "Could not load manifest list for manifest `%s`", manifest_interface_instance.digest, ) if raise_on_error: raise CreateManifestException(str(ex)) return None # Retrieve its labels. labels = child_manifest.get_manifest_labels(retriever) if labels is None: if raise_on_error: raise CreateManifestException( "Unable to retrieve manifest labels") logger.exception( "Could not load manifest labels for child manifest") return None # Get/create the child manifest in the database. child_manifest_info = get_or_create_manifest( repository_id, child_manifest, storage, raise_on_error=raise_on_error) if child_manifest_info is None: if raise_on_error: raise CreateManifestException( "Unable to retrieve child manifest") logger.error("Could not get/create child manifest") return None child_manifest_rows[child_manifest_info.manifest. digest] = child_manifest_info.manifest child_manifest_label_dicts.append(labels) # Ensure all the blobs in the manifest exist. digests = set(manifest_interface_instance.local_blob_digests) blob_map = {} # If the special empty layer is required, simply load it directly. This is much faster # than trying to load it on a per repository basis, and that is unnecessary anyway since # this layer is predefined. if EMPTY_LAYER_BLOB_DIGEST in digests: digests.remove(EMPTY_LAYER_BLOB_DIGEST) blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob( EMPTY_LAYER_BLOB_DIGEST) if not blob_map[EMPTY_LAYER_BLOB_DIGEST]: if raise_on_error: raise CreateManifestException( "Unable to retrieve specialized empty blob") logger.warning("Could not find the special empty blob in storage") return None if digests: query = lookup_repo_storages_by_content_checksum( repository_id, digests) blob_map.update({s.content_checksum: s for s in query}) for digest_str in digests: if digest_str not in blob_map: logger.warning( "Unknown blob `%s` under manifest `%s` for repository `%s`", digest_str, manifest_interface_instance.digest, repository_id, ) if raise_on_error: raise CreateManifestException("Unknown blob `%s`" % digest_str) return None # Special check: If the empty layer blob is needed for this manifest, add it to the # blob map. This is necessary because Docker decided to elide sending of this special # empty layer in schema version 2, but we need to have it referenced for GC and schema version 1. if EMPTY_LAYER_BLOB_DIGEST not in blob_map: try: requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob( retriever) except ManifestException as ex: if raise_on_error: raise CreateManifestException(str(ex)) return None if requires_empty_layer is None: if raise_on_error: raise CreateManifestException( "Could not load configuration blob") return None if requires_empty_layer: shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage) assert not shared_blob.uploading assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy # image. legacy_image = None if manifest_interface_instance.has_legacy_image: legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, retriever, raise_on_error) if legacy_image_id is None: return None legacy_image = get_image(repository_id, legacy_image_id) if legacy_image is None: return None # Create the manifest and its blobs. media_type = Manifest.media_type.get_id( manifest_interface_instance.media_type) storage_ids = {storage.id for storage in blob_map.values()} with db_transaction(): # Check for the manifest. This is necessary because Postgres doesn't handle IntegrityErrors # well under transactions. try: manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest) return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None) except Manifest.DoesNotExist: pass # Create the manifest. try: manifest = Manifest.create( repository=repository_id, digest=manifest_interface_instance.digest, media_type=media_type, manifest_bytes=manifest_interface_instance.bytes. as_encoded_str(), ) except IntegrityError as ie: try: manifest = Manifest.get( repository=repository_id, digest=manifest_interface_instance.digest) except Manifest.DoesNotExist: logger.error( "Got integrity error when trying to create manifest: %s", ie) if raise_on_error: raise CreateManifestException( "Attempt to create an invalid manifest. Please report this issue." ) return None return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None) # Insert the blobs. blobs_to_insert = [ dict(manifest=manifest, repository=repository_id, blob=storage_id) for storage_id in storage_ids ] if blobs_to_insert: ManifestBlob.insert_many(blobs_to_insert).execute() # Set the legacy image (if applicable). if legacy_image is not None: ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest) # Insert the manifest child rows (if applicable). if child_manifest_rows: children_to_insert = [ dict(manifest=manifest, child_manifest=child_manifest, repository=repository_id) for child_manifest in child_manifest_rows.values() ] ManifestChild.insert_many(children_to_insert).execute() # If this manifest is being created not for immediate tagging, add a temporary tag to the # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests # in a repository for GC, then we will have to reevaluate this optimization at that time. if not for_tagging: create_temporary_tag_if_necessary(manifest, temp_tag_expiration_sec) # Define the labels for the manifest (if any). # TODO: Once the old data model is gone, turn this into a batch operation and make the label # application to the manifest occur under the transaction. labels = manifest_interface_instance.get_manifest_labels(retriever) if labels: for key, value in labels.iteritems(): # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any # such `labels`, as they don't really mean anything. if not key: continue media_type = "application/json" if is_json(value) else "text/plain" create_manifest_label(manifest, key, value, "manifest", media_type) # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken # on the manifest or its resulting tags). We only return those labels either defined on # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests # to ensure that any action performed is defined in all manifests. labels_to_apply = labels or {} if child_manifest_label_dicts: labels_to_apply = child_manifest_label_dicts[0].viewitems() for child_manifest_label_dict in child_manifest_label_dicts[1:]: # Intersect the key+values of the labels to ensure we get the exact same result # for all the child manifests. labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems( ) labels_to_apply = dict(labels_to_apply) return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply)
def test_retriever(initialized_db): repository = create_repository("devtable", "newrepo", None) layer_json = json.dumps({ "config": {}, "rootfs": { "type": "layers", "diff_ids": [] }, "history": [ { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, { "created": "2018-04-03T18:37:09.284840891Z", "created_by": "do something", }, ], }) # Add a blob containing the config. _, config_digest = _populate_blob(layer_json) # Add a blob of random data. random_data = "hello world" _, random_digest = _populate_blob(random_data) # Add another blob of random data. other_random_data = "hi place" _, other_random_digest = _populate_blob(other_random_data) remote_digest = sha256_digest(b"something") builder = DockerSchema2ManifestBuilder() builder.set_config_digest(config_digest, len(layer_json.encode("utf-8"))) builder.add_layer(other_random_digest, len(other_random_data.encode("utf-8"))) builder.add_layer(random_digest, len(random_data.encode("utf-8"))) manifest = builder.build() assert config_digest in manifest.blob_digests assert random_digest in manifest.blob_digests assert other_random_digest in manifest.blob_digests assert config_digest in manifest.local_blob_digests assert random_digest in manifest.local_blob_digests assert other_random_digest in manifest.local_blob_digests # Write the manifest. created_tuple = get_or_create_manifest(repository, manifest, storage) assert created_tuple is not None created_manifest = created_tuple.manifest assert created_manifest assert created_manifest.media_type.name == manifest.media_type assert created_manifest.digest == manifest.digest # Verify the linked blobs. blob_digests = { mb.blob.content_checksum for mb in ManifestBlob.select().where( ManifestBlob.manifest == created_manifest) } assert random_digest in blob_digests assert other_random_digest in blob_digests assert config_digest in blob_digests # Delete any Image rows linking to the blobs from temp tags. for blob_digest in blob_digests: storage_row = ImageStorage.get(content_checksum=blob_digest) for image in list(Image.select().where(Image.storage == storage_row)): all_temp = all([ rt.hidden for rt in RepositoryTag.select().where( RepositoryTag.image == image) ]) if all_temp: RepositoryTag.delete().where( RepositoryTag.image == image).execute() image.delete_instance(recursive=True) # Verify the blobs in the retriever. retriever = RepositoryContentRetriever(repository, storage) assert (retriever.get_manifest_bytes_with_digest( created_manifest.digest) == manifest.bytes.as_encoded_str()) for blob_digest in blob_digests: assert retriever.get_blob_bytes_with_digest(blob_digest) is not None