def get_deleted_bundle_metadata_document(replica: Replica, key: str) -> dict:
    """
    Build the bundle metadata document assocated with a non-existent key.
    """
    fqid = BundleFQID.from_key(key)
    return dict(event_type="DELETE",
                bundle_info=dict(uuid=fqid.uuid, version=fqid.version))
Exemple #2
0
    def __iter__(self):
        for key in self._keys():
            fqid = BundleFQID.from_key(key)
            if fqid.uuid != self.bundle_info['uuid']:
                for bundle_fqid in self._living_fqids_in_bundle_info():
                    yield bundle_fqid
                self._init_bundle_info(fqid)
            else:
                if not fqid.is_fully_qualified():
                    self.bundle_info['contains_unversioned_tombstone'] = True
                else:
                    self.bundle_info['fqids'][fqid] = isinstance(
                        fqid, BundleTombstoneID)

        for bundle_fqid in self._living_fqids_in_bundle_info():
            yield bundle_fqid
def build_bundle_metadata_document(replica: Replica, key: str) -> dict:
    """
    This returns a JSON document with bundle manifest and metadata files suitable for JMESPath filters.
    """
    handle = Config.get_blobstore_handle(replica)
    manifest = json.loads(handle.get(replica.bucket, key).decode("utf-8"))
    fqid = BundleFQID.from_key(key)
    bundle_info = dict(uuid=fqid.uuid, version=fqid.version)
    if key.endswith(TOMBSTONE_SUFFIX):
        return dict(event_type="TOMBSTONE",
                    bundle_info=bundle_info,
                    **manifest)
    else:
        lock = threading.Lock()
        files: dict = defaultdict(list)

        def _read_file(file_metadata):
            blob_key = "blobs/{}.{}.{}.{}".format(
                file_metadata['sha256'],
                file_metadata['sha1'],
                file_metadata['s3-etag'],
                file_metadata['crc32c'],
            )
            contents = handle.get(replica.bucket, blob_key).decode("utf-8")
            try:
                file_info = json.loads(contents)
            except json.decoder.JSONDecodeError:
                logging.info(f"{file_metadata['name']} not json decodable")
            else:
                # Modify name to avoid confusion with JMESPath syntax
                name = _dot_to_underscore_and_strip_numeric_suffix(
                    file_metadata['name'])
                with lock:
                    files[name].append(file_info)

        # TODO: Consider scaling parallelization with Lambda size
        with ThreadPoolExecutor(max_workers=4) as e:
            e.map(_read_file, [
                file_metadata for file_metadata in manifest['files']
                if file_metadata['content-type'].startswith("application/json")
            ])

        return dict(event_type="CREATE",
                    bundle_info=bundle_info,
                    manifest=manifest,
                    files=dict(files))
Exemple #4
0
def dependencies_exist(source_replica: Replica, dest_replica: Replica,
                       key: str):
    """
    Given a source replica and manifest key, checks if all dependencies of the corresponding DSS object are present in
    dest_replica:
     - Given a file manifest key, checks if blobs exist in dest_replica.
     - Given a bundle manifest key, checks if file manifests exist in dest_replica.
     - Given a collection key, checks if all collection contents exist in dest_replica.
    Returns true if all dependencies exist in dest_replica, false otherwise.
    """
    source_handle = Config.get_blobstore_handle(source_replica)
    dest_handle = Config.get_blobstore_handle(dest_replica)
    if key.endswith(TOMBSTONE_SUFFIX):
        return True
    elif key.startswith(FILE_PREFIX):
        file_id = FileFQID.from_key(key)
        file_manifest = get_json_metadata(
            entity_type="file",
            uuid=file_id.uuid,
            version=file_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        blob_path = compose_blob_key(file_manifest)
        if exists(dest_replica, blob_path):
            return True
    elif key.startswith(BUNDLE_PREFIX):
        # head all file manifests
        bundle_id = BundleFQID.from_key(key)
        bundle_manifest = get_json_metadata(
            entity_type="bundle",
            uuid=bundle_id.uuid,
            version=bundle_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        try:
            with ThreadPoolExecutor(max_workers=20) as e:
                futures = list()
                for file in bundle_manifest[BundleMetadata.FILES]:
                    file_uuid = file[BundleFileMetadata.UUID]
                    file_version = file[BundleFileMetadata.VERSION]
                    futures.append(
                        e.submit(get_json_metadata,
                                 entity_type="file",
                                 uuid=file_uuid,
                                 version=file_version,
                                 replica=dest_replica,
                                 blobstore_handle=source_handle,
                                 max_metadata_size=max_syncable_metadata_size))
                for future in as_completed(futures):
                    future.result()
            return True
        except Exception:
            pass
    elif key.startswith(COLLECTION_PREFIX):
        collection_id = CollectionFQID.from_key(key)
        collection_manifest = get_json_metadata(
            entity_type="collection",
            uuid=collection_id.uuid,
            version=collection_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        try:
            verify_collection(contents=collection_manifest["contents"],
                              replica=dest_replica,
                              blobstore_handle=dest_handle)
            return True
        except Exception:
            pass
    else:
        raise NotImplementedError("Unknown prefix for key {}".format(key))
    return False