def idempotent_save(blobstore: BlobStore, bucket: str, key: str, data: bytes) -> typing.Tuple[bool, bool]: """ idempotent_save attempts to save an object to the BlobStore. Its return values indicate whether the save was made successfully and whether the operation could be completed idempotently. If the data in the blobstore does not match the data parameter, the data in the blobstore is _not_ overwritten. :param blobstore: the blobstore to save the data to :param bucket: the bucket in the blobstore to save the data to :param key: the key of the object to save :param data: the data to save :return: a tuple of booleans (was the data saved?, was the save idempotent?) """ if test_object_exists(blobstore, bucket, key): # fetch the file metadata, compare it to what we have. existing_data = blobstore.get(bucket, key) return False, existing_data == data else: # write manifest to persistent store part_size = 16 * 1024 * 1024 if isinstance(blobstore, S3BlobStore) and len(data) > part_size: with io.BytesIO(data) as fh: multipart_parallel_upload(blobstore.s3_client, bucket, key, fh, part_size=part_size, parallelization_factor=20) else: blobstore.upload_file_handle(bucket, key, io.BytesIO(data)) return True, True
def test_object_exists(blobstore: BlobStore, bucket: str, match: str, test_type: ObjectTest = ObjectTest.EXACT) -> bool: """ Test if an object exists in the BlobStore :param blobstore: the blobstore to check :param bucket: the bucket to check in the blobstore :param match: the string to match against; this is _not_ a regex pattern, strings must match exactly :param test_type: the type of test to conduct, prefix matches test if the object name starts with the match string, exact matches must match the full string :return: test bool """ if test_type == ObjectTest.PREFIX: try: blobstore.list(bucket, prefix=match).__iter__().__next__() except StopIteration: return False else: return True elif test_type == ObjectTest.EXACT: try: blobstore.get_user_metadata(bucket, match) return True except BlobNotFoundError: return False else: raise ValueError(f"Not a valid storage object test type: " + test_type.name)
def _idempotent_save(blobstore: BlobStore, bucket: str, key: str, data: dict) -> typing.Tuple[bool, bool]: """ _idempotent_save attempts to save an object to the BlobStore. Its return values indicate whether the save was made successfully and whether the operation could be completed idempotently. If the data in the blobstore does not match the data parameter, the data in the blobstore is _not_ overwritten. :param blobstore: the blobstore to save the data to :param bucket: the bucket in the blobstore to save the data to :param key: the key of the object to save :param data: the data to save :return: a tuple of booleans (was the data saved?, was the save idempotent?) """ if test_object_exists(blobstore, bucket, key): # fetch the file metadata, compare it to what we have. existing_data = json.loads(blobstore.get(bucket, key).decode("utf-8")) return False, existing_data == data else: # write manifest to persistent store blobstore.upload_file_handle( bucket, key, io.BytesIO(json.dumps(data).encode("utf-8")), ) return True, True
def write_file_metadata(handle: BlobStore, dst_bucket: str, file_uuid: str, file_version: str, document: str): # what's the target object name for the file metadata? metadata_key = f"files/{file_uuid}.{file_version}" # if it already exists, then it's a failure. try: handle.get_user_metadata(dst_bucket, metadata_key) except BlobNotFoundError: pass else: raise BlobAlreadyExistsError() handle.upload_file_handle(dst_bucket, metadata_key, io.BytesIO(document.encode("utf-8")))
def map_bucket_results(func: typing.Callable, handle: BlobStore, bucket: str, base_pfx: str, parallelization=10): """ Call `func` on an iterable of keys func is expected to be thread safe. """ with ThreadPoolExecutor(max_workers=parallelization) as e: futures = list() for pfx in "0123456789abcdef": f = e.submit(func, handle.list(bucket, prefix=f"{base_pfx}{pfx}")) futures.append(f) for f in as_completed(futures): try: yield f.result() except Exception: logger.error(traceback.format_exc())