コード例 #1
0
ファイル: app.py プロジェクト: HumanCellAtlas/data-store
def dispatch_gs_indexer_event(event, context):
    """
    This handler receives GS events via the Google Cloud Function deployed from daemons/dss-gs-event-relay.
    """
    for event_record in event["Records"]:
        message = json.loads(json.loads(event_record["body"])["Message"])
        if message['resourceState'] == "not_exists":
            logger.info("Ignoring object deletion event")
        else:
            key = message['name']
            assert message['bucket'] == Config.get_gs_bucket()
            _handle_event(Replica.gcp, key, context)
コード例 #2
0
ファイル: sync.py プロジェクト: hannes-ucsc/data-store
def sync_blob(source_platform, source_key, dest_platform, context):
    gs = Config.get_native_handle(Replica.gcp)
    logger.info(
        f"Begin transfer of {source_key} from {source_platform} to {dest_platform}"
    )
    gs_bucket, s3_bucket = gs.bucket(
        Config.get_gs_bucket()), resources.s3.Bucket(Config.get_s3_bucket())
    if source_platform == "s3" and dest_platform == "gs":
        source = BlobLocation(platform=source_platform,
                              bucket=s3_bucket,
                              blob=s3_bucket.Object(source_key))
        dest = BlobLocation(platform=dest_platform,
                            bucket=gs_bucket,
                            blob=gs_bucket.blob(source_key))
    elif source_platform == "gs" and dest_platform == "s3":
        source = BlobLocation(platform=source_platform,
                              bucket=gs_bucket,
                              blob=gs_bucket.blob(source_key))
        dest = BlobLocation(platform=dest_platform,
                            bucket=s3_bucket,
                            blob=s3_bucket.Object(source_key))
    else:
        raise NotImplementedError()

    if source_platform == "s3" and dest_platform == "gs" and use_gsts:
        sync_s3_to_gcsts(gs.project, source.bucket.name, dest.bucket.name,
                         source_key)
    elif source_platform == "s3" and dest_platform == "gs":
        if dest.blob.exists():
            logger.info(f"Key {source_key} already exists in GS")
            return
        elif source.blob.content_length < part_size["s3"]:
            sync_s3_to_gs_oneshot(source, dest)
        else:
            dispatch_multipart_sync(source, dest, context)
    elif source_platform == "gs" and dest_platform == "s3":
        try:
            dest.blob.load()
            logger.info(f"Key {source_key} already exists in S3")
            return
        except clients.s3.exceptions.ClientError as e:
            if e.response["Error"].get("Message") != "Not Found":
                raise
        source.blob.reload()
        if source.blob.size < part_size["s3"]:
            sync_gs_to_s3_oneshot(source, dest)
        else:
            dispatch_multipart_sync(source, dest, context)
    logger.info(
        f"Completed transfer of {source_key} from {source.bucket} to {dest.bucket}"
    )
コード例 #3
0
ファイル: sync.py プロジェクト: HumanCellAtlas/data-store
def exists(replica: Replica, key: str):
    if replica == Replica.aws:
        try:
            resources.s3.Bucket(
                replica.bucket).Object(key).load()  # type: ignore
            return True
        except clients.s3.exceptions.ClientError:  # type: ignore
            return False
    elif replica == Replica.gcp:
        gs = Config.get_native_handle(Replica.gcp)
        gs_bucket = gs.bucket(Config.get_gs_bucket())
        return gs_bucket.blob(key).exists()
    else:
        raise NotImplementedError()
コード例 #4
0
ファイル: indexer.py プロジェクト: hannes-ucsc/data-store
 def _parse_event(self, event):
     key = event['name']
     assert event['bucket'] == Config.get_gs_bucket()
     return key