def _read_bundle_manifest(cls, replica: Replica, fqid: BundleFQID) -> dict: handle = Config.get_blobstore_handle(replica) bucket_name = replica.bucket manifest_string = handle.get(bucket_name, fqid.to_key()).decode("utf-8") logger.debug( "Read bundle manifest from bucket %s with bundle key %s: %s", bucket_name, fqid.to_key(), manifest_string) manifest = json.loads(manifest_string, encoding="utf-8") return manifest
def _get_bundle_manifest( uuid: str, replica: Replica, version: typing.Optional[str], *, bucket: typing.Optional[str] = None) -> typing.Optional[dict]: """ Return the contents of the bundle manifest file from cloud storage, subject to the rules of tombstoning. If version is None, return the latest version, once again, subject to the rules of tombstoning. If the bundle cannot be found, return None """ uuid = uuid.lower() handle = Config.get_blobstore_handle(replica) default_bucket = replica.bucket # need the ability to use fixture bucket for testing bucket = default_bucket if bucket is None else bucket def tombstone_exists(uuid: str, version: typing.Optional[str]): return test_object_exists( handle, bucket, BundleTombstoneID(uuid=uuid, version=version).to_key()) # handle the following deletion cases # 1. the whole bundle is deleted # 2. the specific version of the bundle is deleted if tombstone_exists(uuid, None) or (version and tombstone_exists(uuid, version)): return None # handle the following deletion case # 3. no version is specified, we want the latest _non-deleted_ version if version is None: # list the files and find the one that is the most recent. prefix = f"bundles/{uuid}." object_names = handle.list(bucket, prefix) version = _latest_version_from_object_names(object_names) if version is None: # no matches! return None bundle_fqid = BundleFQID(uuid=uuid, version=version) # retrieve the bundle metadata. try: bundle_manifest_blob = handle.get(bucket, bundle_fqid.to_key()).decode("utf-8") return json.loads(bundle_manifest_blob) except BlobNotFoundError: return None
def get_bundle_from_bucket(uuid: str, replica: Replica, version: typing.Optional[str], bucket: typing.Optional[str], directurls: bool = False): uuid = uuid.lower() handle = Config.get_blobstore_handle(replica) default_bucket = replica.bucket # need the ability to use fixture bucket for testing bucket = default_bucket if bucket is None else bucket def tombstone_exists(uuid: str, version: typing.Optional[str]): return test_object_exists( handle, bucket, TombstoneID(uuid=uuid, version=version).to_key()) # handle the following deletion cases # 1. the whole bundle is deleted # 2. the specific version of the bundle is deleted if tombstone_exists(uuid, None) or (version and tombstone_exists(uuid, version)): raise DSSException(404, "not_found", "EMPTY Cannot find file!") # handle the following deletion case # 3. no version is specified, we want the latest _non-deleted_ version if version is None: # list the files and find the one that is the most recent. prefix = f"bundles/{uuid}." object_names = handle.list(bucket, prefix) version = _latest_version_from_object_names(object_names) if version is None: # no matches! raise DSSException(404, "not_found", "Cannot find file!") bundle_fqid = BundleFQID(uuid=uuid, version=version) # retrieve the bundle metadata. try: bundle_metadata = json.loads( handle.get( bucket, bundle_fqid.to_key(), ).decode("utf-8")) except BlobNotFoundError: raise DSSException(404, "not_found", "Cannot find file!") filesresponse = [] # type: typing.List[dict] for file in bundle_metadata[BundleMetadata.FILES]: file_version = { 'name': file[BundleFileMetadata.NAME], 'content-type': file[BundleFileMetadata.CONTENT_TYPE], 'size': file[BundleFileMetadata.SIZE], 'uuid': file[BundleFileMetadata.UUID], 'version': file[BundleFileMetadata.VERSION], 'crc32c': file[BundleFileMetadata.CRC32C], 's3_etag': file[BundleFileMetadata.S3_ETAG], 'sha1': file[BundleFileMetadata.SHA1], 'sha256': file[BundleFileMetadata.SHA256], 'indexed': file[BundleFileMetadata.INDEXED], } if directurls: file_version['url'] = str(UrlBuilder().set( scheme=replica.storage_schema, netloc=bucket, path="blobs/{}.{}.{}.{}".format( file[BundleFileMetadata.SHA256], file[BundleFileMetadata.SHA1], file[BundleFileMetadata.S3_ETAG], file[BundleFileMetadata.CRC32C], ), )) filesresponse.append(file_version) return dict(bundle=dict( uuid=uuid, version=version, files=filesresponse, creator_uid=bundle_metadata[BundleMetadata.CREATOR_UID], ))