def get_impl(uuid: str, replica: str, version: str = None): uuid = uuid.lower() bucket = Replica[replica].bucket handle = Config.get_blobstore_handle(Replica[replica]) tombstone_key = CollectionTombstoneID(uuid, version=None).to_key() if test_object_exists(handle, bucket, tombstone_key): raise DSSException( 404, "not_found", "Could not find collection for UUID {}".format(uuid)) if version is None: # list the collections and find the one that is the most recent. prefix = CollectionFQID(uuid, version=None).to_key_prefix() for matching_key in handle.list(bucket, prefix): matching_key = matching_key[len(prefix):] if version is None or matching_key > version: version = matching_key try: collection_blob = handle.get(bucket, CollectionFQID(uuid, version).to_key()) except BlobNotFoundError: raise DSSException( 404, "not_found", "Could not find collection for UUID {}".format(uuid)) return json.loads(collection_blob)
def test_from_key(self): """ Test that the from key method correctly returns the right types of identifiers """ uuid = "ca11ab1e-0000-4a6b-8f0d-a7d2105c23be" version = "2017-12-05T235728.441373Z" self.assertEquals( BundleFQID(uuid, version), ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/{uuid}.{version}"), ) self.assertEquals( FileFQID(uuid, version), ObjectIdentifier.from_key(f"{FILE_PREFIX}/{uuid}.{version}"), ) self.assertEquals( CollectionFQID(uuid, version), ObjectIdentifier.from_key(f"{COLLECTION_PREFIX}/{uuid}.{version}"), ) self.assertEquals( CollectionTombstoneID(uuid, version), ObjectIdentifier.from_key(f"{COLLECTION_PREFIX}/{uuid}.{version}.dead"), ) self.assertEquals( BundleTombstoneID(uuid, version), ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/{uuid}.{version}.dead"), ) self.assertRaises( ValueError, lambda: ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/trash"), ) self.assertRaises( ValueError, lambda: ObjectIdentifier.from_key(f"trash/{uuid}.{version}.dead"), )
def _collections_in_database_but_not_in_bucket(self): """ Determines collection items in the table that: 1. No longer exist in the bucket. 2. Are tombstoned in the bucket. 3. Have an owner that doesn't match the owner found in the bucket's collection file. Returns an iterable tuple of strings: (owner, collection_fqid) representing the item's key pair. The returned keys can then be removed from the collections dynamodb table. """ for owner, collection_fqid in owner_lookup.get_all_collection_keys(): self.total_database_collection_items += 1 collection = CollectionFQID.from_key(f'{COLLECTION_PREFIX}/{collection_fqid}') try: collection_owner = json.loads(self.handle.get(self.bucket, collection.to_key()))['owner'] assert not self._is_uuid_tombstoned(collection.uuid) assert collection_owner == owner except BlobNotFoundError: yield owner, collection_fqid except AssertionError: yield owner, collection_fqid
def patch(uuid: str, json_request_body: dict, replica: str, version: str): authenticated_user_email = security.get_token_email(request.token_info) uuid = uuid.lower() owner = get_impl(uuid=uuid, replica=replica)["owner"] if owner != authenticated_user_email: raise DSSException(requests.codes.forbidden, "forbidden", f"Collection access denied") handle = Config.get_blobstore_handle(Replica[replica]) try: cur_collection_blob = handle.get( Replica[replica].bucket, CollectionFQID(uuid, version).to_key()) except BlobNotFoundError: raise DSSException( 404, "not_found", "Could not find collection for UUID {}".format(uuid)) collection = json.loads(cur_collection_blob) for field in "name", "description", "details": if field in json_request_body: collection[field] = json_request_body[field] remove_contents_set = set( map(hashabledict, json_request_body.get("remove_contents", []))) collection["contents"] = [ i for i in collection["contents"] if hashabledict(i) not in remove_contents_set ] verify_collection(json_request_body.get("add_contents", []), Replica[replica], handle) collection["contents"].extend(json_request_body.get("add_contents", [])) collection["contents"] = _dedpuplicate_contents(collection["contents"]) timestamp = datetime.datetime.utcnow() new_collection_version = datetime_to_version_format(timestamp) handle.upload_file_handle( Replica[replica].bucket, CollectionFQID(uuid, new_collection_version).to_key(), io.BytesIO(json.dumps(collection).encode("utf-8"))) return jsonify(dict(uuid=uuid, version=new_collection_version)), requests.codes.ok
def put(json_request_body: dict, replica: str, uuid: str, version: str): authenticated_user_email = security.get_token_email(request.token_info) collection_body = dict(json_request_body, owner=authenticated_user_email) uuid = uuid.lower() handle = Config.get_blobstore_handle(Replica[replica]) collection_body["contents"] = _dedpuplicate_contents( collection_body["contents"]) verify_collection(collection_body["contents"], Replica[replica], handle) collection_uuid = uuid if uuid else str(uuid4()) collection_version = version # update dynamoDB; used to speed up lookup time; will not update if owner already associated w/uuid owner_lookup.put_collection(owner=authenticated_user_email, collection_fqid=str( CollectionFQID(collection_uuid, collection_version))) # add the collection file to the bucket handle.upload_file_handle( Replica[replica].bucket, CollectionFQID(collection_uuid, collection_version).to_key(), io.BytesIO(json.dumps(collection_body).encode("utf-8"))) return jsonify(dict(uuid=collection_uuid, version=collection_version)), requests.codes.created
def list_collections(per_page: int, start_at: int = 0): """ Return a list of a user's collections. Collection uuids are indexed and called by the user's email in a dynamoDB table. :param int per_page: # of collections returned per paged response. :param int start_at: Where the next chunk of paged response should start at. :return: A dictionary containing a list of dictionaries looking like: {'collections': [{'uuid': uuid, 'version': version}, {'uuid': uuid, 'version': version}, ... , ...]} """ # TODO: Replica is unused, so this does not use replica. Appropriate? owner = security.get_token_email(request.token_info) collections = [] for collection in owner_lookup.get_collection_fqids_for_owner(owner): fqid = CollectionFQID.from_key(f'{COLLECTION_PREFIX}/{collection}') collections.append({'uuid': fqid.uuid, 'version': fqid.version}) # paged response if len(collections) - start_at > per_page: next_url = UrlBuilder(request.url) next_url.replace_query("start_at", str(start_at + per_page)) collection_page = collections[start_at:start_at + per_page] response = make_response(jsonify({'collections': collection_page}), requests.codes.partial) response.headers['Link'] = f"<{next_url}>; rel='next'" response.headers['X-OpenAPI-Pagination'] = 'true' # single response returning all collections (or those remaining) else: collection_page = collections[start_at:] response = make_response(jsonify({'collections': collection_page}), requests.codes.ok) response.headers['X-OpenAPI-Pagination'] = 'false' response.headers['X-OpenAPI-Paginated-Content-Key'] = 'collections' return response
def _collections_in_bucket_but_not_in_database(self): """ Returns any (owner, collection_fqid) present in the bucket but not in the collections table. Returns an iterable tuple of strings: (owner, collection_fqid) representing the item's key pair. The returned keys can then be added to the collections dynamodb table. """ for collection_key in self.handle.list(self.bucket, prefix=f'{COLLECTION_PREFIX}/'): self.total_bucket_collection_items += 1 collection_fqid = CollectionFQID.from_key(collection_key) if not self._is_uuid_tombstoned(collection_fqid.uuid): try: collection = json.loads(self.handle.get(self.bucket, collection_key)) try: owner_lookup.get_collection(owner=collection['owner'], collection_fqid=str(collection_fqid)) except DynamoDBItemNotFound: yield collection['owner'], str(collection_fqid) except BlobNotFoundError: pass # if deleted from bucket while being listed except KeyError: pass # unexpected response else: self.total_tombstoned_bucket_collection_items += 1
def dependencies_exist(source_replica: Replica, dest_replica: Replica, key: str): """ Given a source replica and manifest key, checks if all dependencies of the corresponding DSS object are present in dest_replica: - Given a file manifest key, checks if blobs exist in dest_replica. - Given a bundle manifest key, checks if file manifests exist in dest_replica. - Given a collection key, checks if all collection contents exist in dest_replica. Returns true if all dependencies exist in dest_replica, false otherwise. """ source_handle = Config.get_blobstore_handle(source_replica) dest_handle = Config.get_blobstore_handle(dest_replica) if key.endswith(TOMBSTONE_SUFFIX): return True elif key.startswith(FILE_PREFIX): file_id = FileFQID.from_key(key) file_manifest = get_json_metadata( entity_type="file", uuid=file_id.uuid, version=file_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) blob_path = compose_blob_key(file_manifest) if exists(dest_replica, blob_path): return True elif key.startswith(BUNDLE_PREFIX): # head all file manifests bundle_id = BundleFQID.from_key(key) bundle_manifest = get_json_metadata( entity_type="bundle", uuid=bundle_id.uuid, version=bundle_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) try: with ThreadPoolExecutor(max_workers=20) as e: futures = list() for file in bundle_manifest[BundleMetadata.FILES]: file_uuid = file[BundleFileMetadata.UUID] file_version = file[BundleFileMetadata.VERSION] futures.append( e.submit(get_json_metadata, entity_type="file", uuid=file_uuid, version=file_version, replica=dest_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size)) for future in as_completed(futures): future.result() return True except Exception: pass elif key.startswith(COLLECTION_PREFIX): collection_id = CollectionFQID.from_key(key) collection_manifest = get_json_metadata( entity_type="collection", uuid=collection_id.uuid, version=collection_id.version, replica=source_replica, blobstore_handle=source_handle, max_metadata_size=max_syncable_metadata_size) try: verify_collection(contents=collection_manifest["contents"], replica=dest_replica, blobstore_handle=dest_handle) return True except Exception: pass else: raise NotImplementedError("Unknown prefix for key {}".format(key)) return False
def get_collection_fqid() -> CollectionFQID: return CollectionFQID(uuid=str(uuid.uuid4()), version=get_version())