Example #1
0
 def test_from_key(self):
     """
     Test that the from key method correctly returns the right types of identifiers
     """
     uuid = "ca11ab1e-0000-4a6b-8f0d-a7d2105c23be"
     version = "2017-12-05T235728.441373Z"
     self.assertEquals(
         BundleFQID(uuid, version),
         ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/{uuid}.{version}"),
     )
     self.assertEquals(
         FileFQID(uuid, version),
         ObjectIdentifier.from_key(f"{FILE_PREFIX}/{uuid}.{version}"),
     )
     self.assertEquals(
         CollectionFQID(uuid, version),
         ObjectIdentifier.from_key(f"{COLLECTION_PREFIX}/{uuid}.{version}"),
     )
     self.assertEquals(
         CollectionTombstoneID(uuid, version),
         ObjectIdentifier.from_key(f"{COLLECTION_PREFIX}/{uuid}.{version}.dead"),
     )
     self.assertEquals(
         BundleTombstoneID(uuid, version),
         ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/{uuid}.{version}.dead"),
     )
     self.assertRaises(
         ValueError,
         lambda: ObjectIdentifier.from_key(f"{BUNDLE_PREFIX}/trash"),
     )
     self.assertRaises(
         ValueError,
         lambda: ObjectIdentifier.from_key(f"trash/{uuid}.{version}.dead"),
     )
 def test_to_str(self):
     uuid = "0ddba11-0000-4a6b-8f0d-a7d2105c23be"
     version = "2017-12-05T235728.441373Z"
     self.assertEquals(str(BundleFQID(uuid=uuid, version=version)),
                       f"{uuid}.{version}")
     self.assertEquals(str(FileFQID(uuid=uuid, version=version)),
                       f"{uuid}.{version}")
     self.assertEquals(str(TombstoneID(uuid=uuid, version=version)),
                       f"{uuid}.{version}.dead")
     self.assertEquals(str(TombstoneID(uuid=uuid, version=None)),
                       f"{uuid}.dead")
 def test_to_key(self):
     uuid = "0ddba11-0000-4a6b-8f0d-a7d2105c23be"
     version = "2017-12-05T235728.441373Z"
     self.assertEquals(
         BundleFQID(uuid=uuid, version=version).to_key(),
         f"{BUNDLE_PREFIX}/{uuid}.{version}")
     self.assertEquals(
         FileFQID(uuid=uuid, version=version).to_key(),
         f"{FILE_PREFIX}/{uuid}.{version}")
     self.assertEquals(
         TombstoneID(uuid=uuid, version=version).to_key(),
         f"{BUNDLE_PREFIX}/{uuid}.{version}.dead")
     self.assertEquals(
         TombstoneID(uuid=uuid, version=None).to_key(),
         f"{BUNDLE_PREFIX}/{uuid}.dead")
Example #4
0
 def _get_file_metadata(_file):
     metadata_key = FileFQID(
         uuid=_file['user_supplied_metadata']['uuid'],
         version=_file['user_supplied_metadata']['version'],
     ).to_key()
     while True:
         try:
             file_metadata = handle.get(replica.bucket, metadata_key)
         except BlobNotFoundError:
             if time_left() > PUT_TIME_ALLOWANCE_SECONDS:
                 time.sleep(1)
             else:
                 break
         else:
             return json.loads(file_metadata)
     return None
Example #5
0
def get_file_fqid() -> FileFQID:
    return FileFQID(uuid=str(uuid.uuid4()), version=get_version())
Example #6
0
def dependencies_exist(source_replica: Replica, dest_replica: Replica,
                       key: str):
    """
    Given a source replica and manifest key, checks if all dependencies of the corresponding DSS object are present in
    dest_replica:
     - Given a file manifest key, checks if blobs exist in dest_replica.
     - Given a bundle manifest key, checks if file manifests exist in dest_replica.
     - Given a collection key, checks if all collection contents exist in dest_replica.
    Returns true if all dependencies exist in dest_replica, false otherwise.
    """
    source_handle = Config.get_blobstore_handle(source_replica)
    dest_handle = Config.get_blobstore_handle(dest_replica)
    if key.endswith(TOMBSTONE_SUFFIX):
        return True
    elif key.startswith(FILE_PREFIX):
        file_id = FileFQID.from_key(key)
        file_manifest = get_json_metadata(
            entity_type="file",
            uuid=file_id.uuid,
            version=file_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        blob_path = compose_blob_key(file_manifest)
        if exists(dest_replica, blob_path):
            return True
    elif key.startswith(BUNDLE_PREFIX):
        # head all file manifests
        bundle_id = BundleFQID.from_key(key)
        bundle_manifest = get_json_metadata(
            entity_type="bundle",
            uuid=bundle_id.uuid,
            version=bundle_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        try:
            with ThreadPoolExecutor(max_workers=20) as e:
                futures = list()
                for file in bundle_manifest[BundleMetadata.FILES]:
                    file_uuid = file[BundleFileMetadata.UUID]
                    file_version = file[BundleFileMetadata.VERSION]
                    futures.append(
                        e.submit(get_json_metadata,
                                 entity_type="file",
                                 uuid=file_uuid,
                                 version=file_version,
                                 replica=dest_replica,
                                 blobstore_handle=source_handle,
                                 max_metadata_size=max_syncable_metadata_size))
                for future in as_completed(futures):
                    future.result()
            return True
        except Exception:
            pass
    elif key.startswith(COLLECTION_PREFIX):
        collection_id = CollectionFQID.from_key(key)
        collection_manifest = get_json_metadata(
            entity_type="collection",
            uuid=collection_id.uuid,
            version=collection_id.version,
            replica=source_replica,
            blobstore_handle=source_handle,
            max_metadata_size=max_syncable_metadata_size)
        try:
            verify_collection(contents=collection_manifest["contents"],
                              replica=dest_replica,
                              blobstore_handle=dest_handle)
            return True
        except Exception:
            pass
    else:
        raise NotImplementedError("Unknown prefix for key {}".format(key))
    return False
Example #7
0
def put(uuid: str, replica: str, json_request_body: dict, version: str = None):
    uuid = uuid.lower()
    if version is not None:
        # convert it to date-time so we can format exactly as the system requires (with microsecond precision)
        timestamp = iso8601.parse_date(version)
    else:
        timestamp = datetime.datetime.utcnow()
    version = datetime_to_version_format(timestamp)

    handle = Config.get_blobstore_handle(Replica[replica])
    bucket = Replica[replica].bucket

    # what's the target object name for the bundle manifest?
    bundle_manifest_key = BundleFQID(uuid=uuid, version=version).to_key()

    # decode the list of files.
    files = [{'user_supplied_metadata': file} for file in json_request_body['files']]

    time_left = nestedcontext.inject("time_left")

    while True:  # each time through the outer while-loop, we try to gather up all the file metadata.
        for file in files:
            user_supplied_metadata = file['user_supplied_metadata']
            metadata_key = FileFQID(
                uuid=user_supplied_metadata['uuid'],
                version=user_supplied_metadata['version'],
            ).to_key()
            if 'file_metadata' not in file:
                try:
                    file_metadata = handle.get(bucket, metadata_key)
                except BlobNotFoundError:
                    continue
                file['file_metadata'] = json.loads(file_metadata)
                if uuid != file['file_metadata']['bundle_uuid']:
                    raise DSSException(
                        requests.codes.conflict,
                        "incorrect_file_bundle_uuid",
                        f"File bundle_uuid {file['file_metadata']['bundle_uuid']} does not equal bundle uuid {uuid}"
                    )

        # check to see if any file metadata is still not yet loaded.
        for file in files:
            if 'file_metadata' not in file:
                missing_file_user_metadata = file['user_supplied_metadata']
                break
        else:
            break

        # if we're out of time, give up.
        if time_left() > PUT_TIME_ALLOWANCE_SECONDS:
            time.sleep(1)
            continue

        raise DSSException(
            requests.codes.conflict,
            "file_missing",
            f"Could not find file {missing_file_user_metadata['uuid']}/{missing_file_user_metadata['version']}."
        )

    # build a manifest consisting of all the files.
    bundle_metadata = {
        BundleMetadata.FORMAT: BundleMetadata.FILE_FORMAT_VERSION,
        BundleMetadata.VERSION: version,
        BundleMetadata.FILES: [
            {
                BundleFileMetadata.NAME: file['user_supplied_metadata']['name'],
                BundleFileMetadata.UUID: file['user_supplied_metadata']['uuid'],
                BundleFileMetadata.VERSION: file['user_supplied_metadata']['version'],
                BundleFileMetadata.CONTENT_TYPE: file['file_metadata'][FileMetadata.CONTENT_TYPE],
                BundleFileMetadata.SIZE: file['file_metadata'][FileMetadata.SIZE],
                BundleFileMetadata.INDEXED: file['user_supplied_metadata']['indexed'],
                BundleFileMetadata.CRC32C: file['file_metadata'][FileMetadata.CRC32C],
                BundleFileMetadata.S3_ETAG: file['file_metadata'][FileMetadata.S3_ETAG],
                BundleFileMetadata.SHA1: file['file_metadata'][FileMetadata.SHA1],
                BundleFileMetadata.SHA256: file['file_metadata'][FileMetadata.SHA256],
            }
            for file in files
        ],
        BundleMetadata.CREATOR_UID: json_request_body['creator_uid'],
    }

    created, idempotent = _idempotent_save(
        handle,
        bucket,
        bundle_manifest_key,
        bundle_metadata,
    )

    if not idempotent:
        raise DSSException(
            requests.codes.conflict,
            "bundle_already_exists",
            f"bundle with UUID {uuid} and version {version} already exists"
        )
    status_code = requests.codes.created if created else requests.codes.ok

    return jsonify(dict(version=version)), status_code
Example #8
0
    def test_dependencies_exist(self):
        file_uuid, file_version = str(uuid.uuid4()), get_version()
        bundle_uuid, bundle_version = str(uuid.uuid4()), get_version()
        collection_data = {
            "contents": [{
                "type": "bundle",
                "uuid": bundle_uuid,
                "version": bundle_version
            }, {
                "type": "file",
                "uuid": file_uuid,
                "version": file_version
            }]
        }
        bundle_data = {
            BundleMetadata.FILES: [{
                BundleFileMetadata.UUID: file_uuid,
                BundleFileMetadata.VERSION: file_version
            }]
        }
        file_data = {
            FileMetadata.SHA256: "sync_test",
            FileMetadata.SHA1: "sync_test",
            FileMetadata.S3_ETAG: "sync_test",
            FileMetadata.CRC32C: str(uuid.uuid4())
        }

        with self.subTest("collection without deps"):
            collection_key = "{}/{}".format(COLLECTION_PREFIX,
                                            get_collection_fqid())
            collection_blob = self.s3_bucket.Object(collection_key)
            collection_blob.put(Body=json.dumps(collection_data).encode())
            self.assertFalse(
                sync.dependencies_exist(Replica.aws, Replica.aws,
                                        collection_key))

        with self.subTest("bundle without deps"):
            bundle_key = "{}/{}".format(
                BUNDLE_PREFIX,
                BundleFQID(uuid=bundle_uuid, version=bundle_version))
            bundle_blob = self.s3_bucket.Object(bundle_key)
            bundle_blob.put(Body=json.dumps(bundle_data).encode())

            self.assertFalse(
                sync.dependencies_exist(Replica.aws, Replica.aws,
                                        collection_key))
            self.assertFalse(
                sync.dependencies_exist(Replica.aws, Replica.aws, bundle_key))

        with self.subTest("file without deps"):
            file_key = "{}/{}".format(
                FILE_PREFIX, FileFQID(uuid=file_uuid, version=file_version))
            file_blob = self.s3_bucket.Object(file_key)
            file_blob.put(Body=json.dumps(file_data).encode())

            @eventually(timeout=8, interval=1, errors={Exception})
            def check_file_revdeps():
                self.assertTrue(
                    sync.dependencies_exist(Replica.aws, Replica.aws,
                                            collection_key))
                self.assertTrue(
                    sync.dependencies_exist(Replica.aws, Replica.aws,
                                            bundle_key))
                self.assertFalse(
                    sync.dependencies_exist(Replica.aws, Replica.aws,
                                            file_key))

            check_file_revdeps()

        with self.subTest(
                "blob presence causes all dependencies to be resolved"):
            blob_key = compose_blob_key(file_data)
            blob_blob = self.s3_bucket.Object(blob_key)
            blob_blob.put(Body=b"sync_test")

            @eventually(timeout=8, interval=1, errors={Exception})
            def check_blob_revdeps():
                self.assertTrue(
                    sync.dependencies_exist(Replica.aws, Replica.aws,
                                            collection_key))
                self.assertTrue(
                    sync.dependencies_exist(Replica.aws, Replica.aws,
                                            bundle_key))
                self.assertTrue(
                    sync.dependencies_exist(Replica.aws, Replica.aws,
                                            file_key))

            check_blob_revdeps()