def test_from_key(self): """ Test that the from key method correctly returns the right types of identifiers """ uuid = "ca11ab1e-0000-4a6b-8f0d-a7d2105c23be" version = "2017-12-05T235728.441373Z" self.assertEquals( BundleFQID(uuid, version), ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/{uuid}.{version}"), ) self.assertEquals( FileFQID(uuid, version), ObjectIdentifier.from_key(f"{FILE_PREFIX}/{uuid}.{version}"), ) self.assertEquals( CollectionFQID(uuid, version), ObjectIdentifier.from_key(f"{COLLECTION_PREFIX}/{uuid}.{version}"), ) self.assertEquals( CollectionTombstoneID(uuid, version), ObjectIdentifier.from_key(f"{COLLECTION_PREFIX}/{uuid}.{version}.dead"), ) self.assertEquals( BundleTombstoneID(uuid, version), ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/{uuid}.{version}.dead"), ) self.assertRaises( ValueError, lambda: ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/trash"), ) self.assertRaises( ValueError, lambda: ObjectIdentifier.from_key(f"trash/{uuid}.{version}.dead"), )
def test_to_str(self): uuid = "0ddba11-0000-4a6b-8f0d-a7d2105c23be" version = "2017-12-05T235728.441373Z" self.assertEquals(str(BundleFQID(uuid=uuid, version=version)), f"{uuid}.{version}") self.assertEquals(str(FileFQID(uuid=uuid, version=version)), f"{uuid}.{version}") self.assertEquals(str(TombstoneID(uuid=uuid, version=version)), f"{uuid}.{version}.dead") self.assertEquals(str(TombstoneID(uuid=uuid, version=None)), f"{uuid}.dead")
def test_to_key(self): uuid = "0ddba11-0000-4a6b-8f0d-a7d2105c23be" version = "2017-12-05T235728.441373Z" self.assertEquals( BundleFQID(uuid=uuid, version=version).to_key(), f"{BUNDLE_PREFIX}/{uuid}.{version}") self.assertEquals( FileFQID(uuid=uuid, version=version).to_key(), f"{FILE_PREFIX}/{uuid}.{version}") self.assertEquals( TombstoneID(uuid=uuid, version=version).to_key(), f"{BUNDLE_PREFIX}/{uuid}.{version}.dead") self.assertEquals( TombstoneID(uuid=uuid, version=None).to_key(), f"{BUNDLE_PREFIX}/{uuid}.dead")
def _get_file_metadata(_file): metadata_key = FileFQID( uuid=_file['user_supplied_metadata']['uuid'], version=_file['user_supplied_metadata']['version'], ).to_key() while True: try: file_metadata = handle.get(replica.bucket, metadata_key) except BlobNotFoundError: if time_left() > PUT_TIME_ALLOWANCE_SECONDS: time.sleep(1) else: break else: return json.loads(file_metadata) return None
def get_file_fqid() -> FileFQID: return FileFQID(uuid=str(uuid.uuid4()), version=get_version())
def dependencies_exist(source_replica: Replica, dest_replica: Replica, key: str): """ Given a source replica and manifest key, checks if all dependencies of the corresponding DSS object are present in dest_replica: - Given a file manifest key, checks if blobs exist in dest_replica. - Given a bundle manifest key, checks if file manifests exist in dest_replica. - Given a collection key, checks if all collection contents exist in dest_replica. Returns true if all dependencies exist in dest_replica, false otherwise. """ source_handle = Config.get_blobstore_handle(source_replica) dest_handle = Config.get_blobstore_handle(dest_replica) if key.endswith(TOMBSTONE_SUFFIX): return True elif key.startswith(FILE_PREFIX): file_id = FileFQID.from_key(key) file_manifest = get_json_metadata( entity_type="file", uuid=file_id.uuid, version=file_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) blob_path = compose_blob_key(file_manifest) if exists(dest_replica, blob_path): return True elif key.startswith(BUNDLE_PREFIX): # head all file manifests bundle_id = BundleFQID.from_key(key) bundle_manifest = get_json_metadata( entity_type="bundle", uuid=bundle_id.uuid, version=bundle_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) try: with ThreadPoolExecutor(max_workers=20) as e: futures = list() for file in bundle_manifest[BundleMetadata.FILES]: file_uuid = file[BundleFileMetadata.UUID] file_version = file[BundleFileMetadata.VERSION] futures.append( e.submit(get_json_metadata, entity_type="file", uuid=file_uuid, version=file_version, replica=dest_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size)) for future in as_completed(futures): future.result() return True except Exception: pass elif key.startswith(COLLECTION_PREFIX): collection_id = CollectionFQID.from_key(key) collection_manifest = get_json_metadata( entity_type="collection", uuid=collection_id.uuid, version=collection_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) try: verify_collection(contents=collection_manifest["contents"], replica=dest_replica, blobstore_handle=dest_handle) return True except Exception: pass else: raise NotImplementedError("Unknown prefix for key {}".format(key)) return False
def put(uuid: str, replica: str, json_request_body: dict, version: str = None): uuid = uuid.lower() if version is not None: # convert it to date-time so we can format exactly as the system requires (with microsecond precision) timestamp = iso8601.parse_date(version) else: timestamp = datetime.datetime.utcnow() version = datetime_to_version_format(timestamp) handle = Config.get_blobstore_handle(Replica[replica]) bucket = Replica[replica].bucket # what's the target object name for the bundle manifest? bundle_manifest_key = BundleFQID(uuid=uuid, version=version).to_key() # decode the list of files. files = [{'user_supplied_metadata': file} for file in json_request_body['files']] time_left = nestedcontext.inject("time_left") while True: # each time through the outer while-loop, we try to gather up all the file metadata. for file in files: user_supplied_metadata = file['user_supplied_metadata'] metadata_key = FileFQID( uuid=user_supplied_metadata['uuid'], version=user_supplied_metadata['version'], ).to_key() if 'file_metadata' not in file: try: file_metadata = handle.get(bucket, metadata_key) except BlobNotFoundError: continue file['file_metadata'] = json.loads(file_metadata) if uuid != file['file_metadata']['bundle_uuid']: raise DSSException( requests.codes.conflict, "incorrect_file_bundle_uuid", f"File bundle_uuid {file['file_metadata']['bundle_uuid']} does not equal bundle uuid {uuid}" ) # check to see if any file metadata is still not yet loaded. for file in files: if 'file_metadata' not in file: missing_file_user_metadata = file['user_supplied_metadata'] break else: break # if we're out of time, give up. if time_left() > PUT_TIME_ALLOWANCE_SECONDS: time.sleep(1) continue raise DSSException( requests.codes.conflict, "file_missing", f"Could not find file {missing_file_user_metadata['uuid']}/{missing_file_user_metadata['version']}." ) # build a manifest consisting of all the files. bundle_metadata = { BundleMetadata.FORMAT: BundleMetadata.FILE_FORMAT_VERSION, BundleMetadata.VERSION: version, BundleMetadata.FILES: [ { BundleFileMetadata.NAME: file['user_supplied_metadata']['name'], BundleFileMetadata.UUID: file['user_supplied_metadata']['uuid'], BundleFileMetadata.VERSION: file['user_supplied_metadata']['version'], BundleFileMetadata.CONTENT_TYPE: file['file_metadata'][FileMetadata.CONTENT_TYPE], BundleFileMetadata.SIZE: file['file_metadata'][FileMetadata.SIZE], BundleFileMetadata.INDEXED: file['user_supplied_metadata']['indexed'], BundleFileMetadata.CRC32C: file['file_metadata'][FileMetadata.CRC32C], BundleFileMetadata.S3_ETAG: file['file_metadata'][FileMetadata.S3_ETAG], BundleFileMetadata.SHA1: file['file_metadata'][FileMetadata.SHA1], BundleFileMetadata.SHA256: file['file_metadata'][FileMetadata.SHA256], } for file in files ], BundleMetadata.CREATOR_UID: json_request_body['creator_uid'], } created, idempotent = _idempotent_save( handle, bucket, bundle_manifest_key, bundle_metadata, ) if not idempotent: raise DSSException( requests.codes.conflict, "bundle_already_exists", f"bundle with UUID {uuid} and version {version} already exists" ) status_code = requests.codes.created if created else requests.codes.ok return jsonify(dict(version=version)), status_code
def test_dependencies_exist(self): file_uuid, file_version = str(uuid.uuid4()), get_version() bundle_uuid, bundle_version = str(uuid.uuid4()), get_version() collection_data = { "contents": [{ "type": "bundle", "uuid": bundle_uuid, "version": bundle_version }, { "type": "file", "uuid": file_uuid, "version": file_version }] } bundle_data = { BundleMetadata.FILES: [{ BundleFileMetadata.UUID: file_uuid, BundleFileMetadata.VERSION: file_version }] } file_data = { FileMetadata.SHA256: "sync_test", FileMetadata.SHA1: "sync_test", FileMetadata.S3_ETAG: "sync_test", FileMetadata.CRC32C: str(uuid.uuid4()) } with self.subTest("collection without deps"): collection_key = "{}/{}".format(COLLECTION_PREFIX, get_collection_fqid()) collection_blob = self.s3_bucket.Object(collection_key) collection_blob.put(Body=json.dumps(collection_data).encode()) self.assertFalse( sync.dependencies_exist(Replica.aws, Replica.aws, collection_key)) with self.subTest("bundle without deps"): bundle_key = "{}/{}".format( BUNDLE_PREFIX, BundleFQID(uuid=bundle_uuid, version=bundle_version)) bundle_blob = self.s3_bucket.Object(bundle_key) bundle_blob.put(Body=json.dumps(bundle_data).encode()) self.assertFalse( sync.dependencies_exist(Replica.aws, Replica.aws, collection_key)) self.assertFalse( sync.dependencies_exist(Replica.aws, Replica.aws, bundle_key)) with self.subTest("file without deps"): file_key = "{}/{}".format( FILE_PREFIX, FileFQID(uuid=file_uuid, version=file_version)) file_blob = self.s3_bucket.Object(file_key) file_blob.put(Body=json.dumps(file_data).encode()) @eventually(timeout=8, interval=1, errors={Exception}) def check_file_revdeps(): self.assertTrue( sync.dependencies_exist(Replica.aws, Replica.aws, collection_key)) self.assertTrue( sync.dependencies_exist(Replica.aws, Replica.aws, bundle_key)) self.assertFalse( sync.dependencies_exist(Replica.aws, Replica.aws, file_key)) check_file_revdeps() with self.subTest( "blob presence causes all dependencies to be resolved"): blob_key = compose_blob_key(file_data) blob_blob = self.s3_bucket.Object(blob_key) blob_blob.put(Body=b"sync_test") @eventually(timeout=8, interval=1, errors={Exception}) def check_blob_revdeps(): self.assertTrue( sync.dependencies_exist(Replica.aws, Replica.aws, collection_key)) self.assertTrue( sync.dependencies_exist(Replica.aws, Replica.aws, bundle_key)) self.assertTrue( sync.dependencies_exist(Replica.aws, Replica.aws, file_key)) check_blob_revdeps()