コード例 #1
0
    def test_exists(self):
        with self.subTest("gs"):
            test_key = self.test_blob_prefix['exists_gs']
            test_blob = self.gs_bucket.blob(test_key)
            self.assertFalse(sync.exists(replica=Replica.gcp, key=test_key))
            test_blob.upload_from_string(b"1")
            self.assertTrue(sync.exists(replica=Replica.gcp, key=test_key))

        with self.subTest("s3"):
            test_key = self.test_blob_prefix['exists_s3']
            test_blob = self.s3_bucket.Object(test_key)
            self.assertFalse(sync.exists(replica=Replica.aws, key=test_key))
            test_blob.put(Body=b"2")
            self.assertTrue(sync.exists(replica=Replica.aws, key=test_key))
コード例 #2
0
ファイル: app.py プロジェクト: HumanCellAtlas/data-store
def launch_from_forwarded_event(event, context):
    executions = {}
    for event_record in event["Records"]:
        message = json.loads(json.loads(event_record["body"])["Message"])
        if message['resourceState'] == "not_exists":
            logger.info("Ignoring object deletion event")
        elif message["selfLink"].startswith(
                "https://www.googleapis.com/storage"):
            source_replica = Replica.gcp
            source_key = message["name"]
            bucket = source_replica.bucket
            if source_key.startswith(
                    BLOB_PREFIX) and not BLOB_KEY_REGEX.match(source_key):
                logger.info(
                    "Key %s does not match blob key format, skipping sync",
                    source_key)
                continue
            for dest_replica in Config.get_replication_destinations(
                    source_replica):
                if exists(dest_replica, source_key):
                    logger.info("Key %s already exists in %s, skipping sync",
                                source_key, dest_replica)
                    continue
                exec_name = bucket + "/" + message[
                    "name"] + ":" + source_replica.name + ":" + dest_replica.name
                exec_input = dict(source_replica=source_replica.name,
                                  dest_replica=dest_replica.name,
                                  source_key=message["name"],
                                  source_obj_metadata=message)
                executions[exec_name] = app.state_machine.start_execution(
                    **exec_input)["executionArn"]
        else:
            raise NotImplementedError()
    return executions
コード例 #3
0
ファイル: app.py プロジェクト: HumanCellAtlas/data-store
def launch_from_operator_queue(event, context):
    executions = {}
    for event_record in event['Records']:
        message = json.loads(event_record['body'])
        try:
            source_replica = Replica[message['source_replica']]
            dest_replica = Replica[message['dest_replica']]
            key = message['key']
            assert source_replica != dest_replica
        except (KeyError, AssertionError):
            logger.error("Inoperable operation sync message %s", message)
            continue
        bucket = source_replica.bucket
        if exists(dest_replica, key):
            logger.info("Key %s already exists in %s, skipping sync", key,
                        dest_replica)
            continue
        try:
            size = Config.get_blobstore_handle(source_replica).get_size(
                bucket, key)
        except BlobNotFoundError:
            logger.error("Key %s does not exist on source replica %s", key,
                         source_replica)
            continue
        exec_name = bucket + "/" + key + ":" + source_replica.name + ":" + dest_replica.name
        exec_input = dict(source_replica=source_replica.name,
                          dest_replica=dest_replica.name,
                          source_key=key,
                          source_obj_metadata=dict(size=size))
        executions[exec_name] = app.state_machine.start_execution(
            **exec_input)["executionArn"]
    return executions
コード例 #4
0
ファイル: app.py プロジェクト: HumanCellAtlas/data-store
def launch_from_notification_queue(event, context):
    for event_record in event['Records']:
        message = json.loads(event_record['body'])
        replica = Replica[message['replica']]
        owner = message['owner']
        uuid = message['uuid']
        key = message['key']
        event_type = message['event_type']
        subscription = get_subscription(replica, owner, uuid)
        if subscription is not None:
            if "DELETE" == event_type:
                metadata_document = get_deleted_bundle_metadata_document(
                    replica, key)
            else:
                if not exists(replica, key):
                    logger.warning(
                        f"Key %s not found in replica %s, unable to notify %s",
                        key, replica.name, uuid)
                    return
                metadata_document = get_bundle_metadata_document(replica, key)
            if not notify(subscription, metadata_document, key):
                # Erroring causes the message to remain in the queue
                raise DSSFailedNotificationDelivery()
        else:
            logger.warning(
                f"Recieved queue message with no matching subscription:{message}"
            )
コード例 #5
0
 def process_new_indexable_object(self, key: str) -> None:
     try:
         if exists(self.replica, key):
             try:
                 self.index_object(key)
             except ObjectIdentifierError:
                 # This is expected with events about blobs as they don't have a valid object identifier
                 logger.debug(
                     f"Not processing {self.replica.name} key: {key}")
         else:
             logger.error("Key %s not found in replica %s", key,
                          self.replica.name)
     except Exception:
         logger.error("Exception occurred while processing %s event: %s",
                      self.replica,
                      key,
                      exc_info=True)
         raise
コード例 #6
0
ファイル: app.py プロジェクト: HumanCellAtlas/data-store
def _handle_bucket_event(replica: Replica, key: str, is_delete_event: bool):
    if is_delete_event:
        metadata_document = get_deleted_bundle_metadata_document(replica, key)
    else:
        if exists(replica, key):
            if key.endswith(TOMBSTONE_SUFFIX):
                for zombie_key in get_tombstoned_bundles(replica, key):
                    delete_event_for_bundle(replica, zombie_key)
                metadata_document = build_bundle_metadata_document(
                    replica, key)
            else:
                metadata_document = record_event_for_bundle(replica, key)
        else:
            logger.error(
                json.dumps(dict(message="Key not found",
                                replica=replica.name,
                                key=key),
                           indent=4))
            return

    _deliver_notifications(replica, metadata_document, key)
コード例 #7
0
ファイル: app.py プロジェクト: HumanCellAtlas/data-store
def launch_from_s3_event(event, context):
    source_replica = Replica.aws
    executions = {}
    if event.get("Event") == "s3:TestEvent":
        logger.info("S3 test event received and processed successfully")
    else:
        for event_record in event["Records"]:
            bucket = resources.s3.Bucket(event_record["s3"]["bucket"]["name"])
            obj = bucket.Object(unquote(event_record["s3"]["object"]["key"]))
            if obj.key.startswith(BLOB_PREFIX) and not BLOB_KEY_REGEX.match(
                    obj.key):
                logger.info(
                    "Key %s does not match blob key format, skipping sync",
                    obj.key)
                continue
            if obj.key.startswith("cache"):
                logger.info("Ignoring cache object")
                continue
            if bucket.name != source_replica.bucket:
                logger.error(
                    "Received S3 event for bucket %s with no configured replica",
                    bucket.name)
                continue
            for dest_replica in Config.get_replication_destinations(
                    source_replica):
                if exists(dest_replica, obj.key):
                    # Logging error here causes daemons/invoke_lambda.sh to report failure, for some reason
                    # - Brian Hannafious, 2019-01-31
                    logger.info("Key %s already exists in %s, skipping sync",
                                obj.key, dest_replica)
                    continue
                exec_name = bucket.name + "/" + obj.key + ":" + source_replica.name + ":" + dest_replica.name
                exec_input = dict(
                    source_replica=source_replica.name,
                    dest_replica=dest_replica.name,
                    source_key=obj.key,
                    source_obj_metadata=event_record["s3"]["object"])
                executions[exec_name] = app.state_machine.start_execution(
                    **exec_input)["executionArn"]
    return executions