def test_exists(self): with self.subTest("gs"): test_key = self.test_blob_prefix['exists_gs'] test_blob = self.gs_bucket.blob(test_key) self.assertFalse(sync.exists(replica=Replica.gcp, key=test_key)) test_blob.upload_from_string(b"1") self.assertTrue(sync.exists(replica=Replica.gcp, key=test_key)) with self.subTest("s3"): test_key = self.test_blob_prefix['exists_s3'] test_blob = self.s3_bucket.Object(test_key) self.assertFalse(sync.exists(replica=Replica.aws, key=test_key)) test_blob.put(Body=b"2") self.assertTrue(sync.exists(replica=Replica.aws, key=test_key))
def launch_from_forwarded_event(event, context): executions = {} for event_record in event["Records"]: message = json.loads(json.loads(event_record["body"])["Message"]) if message['resourceState'] == "not_exists": logger.info("Ignoring object deletion event") elif message["selfLink"].startswith( "https://www.googleapis.com/storage"): source_replica = Replica.gcp source_key = message["name"] bucket = source_replica.bucket if source_key.startswith( BLOB_PREFIX) and not BLOB_KEY_REGEX.match(source_key): logger.info( "Key %s does not match blob key format, skipping sync", source_key) continue for dest_replica in Config.get_replication_destinations( source_replica): if exists(dest_replica, source_key): logger.info("Key %s already exists in %s, skipping sync", source_key, dest_replica) continue exec_name = bucket + "/" + message[ "name"] + ":" + source_replica.name + ":" + dest_replica.name exec_input = dict(source_replica=source_replica.name, dest_replica=dest_replica.name, source_key=message["name"], source_obj_metadata=message) executions[exec_name] = app.state_machine.start_execution( **exec_input)["executionArn"] else: raise NotImplementedError() return executions
def launch_from_operator_queue(event, context): executions = {} for event_record in event['Records']: message = json.loads(event_record['body']) try: source_replica = Replica[message['source_replica']] dest_replica = Replica[message['dest_replica']] key = message['key'] assert source_replica != dest_replica except (KeyError, AssertionError): logger.error("Inoperable operation sync message %s", message) continue bucket = source_replica.bucket if exists(dest_replica, key): logger.info("Key %s already exists in %s, skipping sync", key, dest_replica) continue try: size = Config.get_blobstore_handle(source_replica).get_size( bucket, key) except BlobNotFoundError: logger.error("Key %s does not exist on source replica %s", key, source_replica) continue exec_name = bucket + "/" + key + ":" + source_replica.name + ":" + dest_replica.name exec_input = dict(source_replica=source_replica.name, dest_replica=dest_replica.name, source_key=key, source_obj_metadata=dict(size=size)) executions[exec_name] = app.state_machine.start_execution( **exec_input)["executionArn"] return executions
def launch_from_notification_queue(event, context): for event_record in event['Records']: message = json.loads(event_record['body']) replica = Replica[message['replica']] owner = message['owner'] uuid = message['uuid'] key = message['key'] event_type = message['event_type'] subscription = get_subscription(replica, owner, uuid) if subscription is not None: if "DELETE" == event_type: metadata_document = get_deleted_bundle_metadata_document( replica, key) else: if not exists(replica, key): logger.warning( f"Key %s not found in replica %s, unable to notify %s", key, replica.name, uuid) return metadata_document = get_bundle_metadata_document(replica, key) if not notify(subscription, metadata_document, key): # Erroring causes the message to remain in the queue raise DSSFailedNotificationDelivery() else: logger.warning( f"Recieved queue message with no matching subscription:{message}" )
def process_new_indexable_object(self, key: str) -> None: try: if exists(self.replica, key): try: self.index_object(key) except ObjectIdentifierError: # This is expected with events about blobs as they don't have a valid object identifier logger.debug( f"Not processing {self.replica.name} key: {key}") else: logger.error("Key %s not found in replica %s", key, self.replica.name) except Exception: logger.error("Exception occurred while processing %s event: %s", self.replica, key, exc_info=True) raise
def _handle_bucket_event(replica: Replica, key: str, is_delete_event: bool): if is_delete_event: metadata_document = get_deleted_bundle_metadata_document(replica, key) else: if exists(replica, key): if key.endswith(TOMBSTONE_SUFFIX): for zombie_key in get_tombstoned_bundles(replica, key): delete_event_for_bundle(replica, zombie_key) metadata_document = build_bundle_metadata_document( replica, key) else: metadata_document = record_event_for_bundle(replica, key) else: logger.error( json.dumps(dict(message="Key not found", replica=replica.name, key=key), indent=4)) return _deliver_notifications(replica, metadata_document, key)
def launch_from_s3_event(event, context): source_replica = Replica.aws executions = {} if event.get("Event") == "s3:TestEvent": logger.info("S3 test event received and processed successfully") else: for event_record in event["Records"]: bucket = resources.s3.Bucket(event_record["s3"]["bucket"]["name"]) obj = bucket.Object(unquote(event_record["s3"]["object"]["key"])) if obj.key.startswith(BLOB_PREFIX) and not BLOB_KEY_REGEX.match( obj.key): logger.info( "Key %s does not match blob key format, skipping sync", obj.key) continue if obj.key.startswith("cache"): logger.info("Ignoring cache object") continue if bucket.name != source_replica.bucket: logger.error( "Received S3 event for bucket %s with no configured replica", bucket.name) continue for dest_replica in Config.get_replication_destinations( source_replica): if exists(dest_replica, obj.key): # Logging error here causes daemons/invoke_lambda.sh to report failure, for some reason # - Brian Hannafious, 2019-01-31 logger.info("Key %s already exists in %s, skipping sync", obj.key, dest_replica) continue exec_name = bucket.name + "/" + obj.key + ":" + source_replica.name + ":" + dest_replica.name exec_input = dict( source_replica=source_replica.name, dest_replica=dest_replica.name, source_key=obj.key, source_obj_metadata=event_record["s3"]["object"]) executions[exec_name] = app.state_machine.start_execution( **exec_input)["executionArn"] return executions