Esempio n. 1
0
def sync_blob(source_platform, source_key, dest_platform, context):
    gs = Config.get_native_handle(Replica.gcp)
    logger.info(
        f"Begin transfer of {source_key} from {source_platform} to {dest_platform}"
    )
    gs_bucket, s3_bucket = gs.bucket(
        Config.get_gs_bucket()), resources.s3.Bucket(Config.get_s3_bucket())
    if source_platform == "s3" and dest_platform == "gs":
        source = BlobLocation(platform=source_platform,
                              bucket=s3_bucket,
                              blob=s3_bucket.Object(source_key))
        dest = BlobLocation(platform=dest_platform,
                            bucket=gs_bucket,
                            blob=gs_bucket.blob(source_key))
    elif source_platform == "gs" and dest_platform == "s3":
        source = BlobLocation(platform=source_platform,
                              bucket=gs_bucket,
                              blob=gs_bucket.blob(source_key))
        dest = BlobLocation(platform=dest_platform,
                            bucket=s3_bucket,
                            blob=s3_bucket.Object(source_key))
    else:
        raise NotImplementedError()

    if source_platform == "s3" and dest_platform == "gs" and use_gsts:
        sync_s3_to_gcsts(gs.project, source.bucket.name, dest.bucket.name,
                         source_key)
    elif source_platform == "s3" and dest_platform == "gs":
        if dest.blob.exists():
            logger.info(f"Key {source_key} already exists in GS")
            return
        elif source.blob.content_length < part_size["s3"]:
            sync_s3_to_gs_oneshot(source, dest)
        else:
            dispatch_multipart_sync(source, dest, context)
    elif source_platform == "gs" and dest_platform == "s3":
        try:
            dest.blob.load()
            logger.info(f"Key {source_key} already exists in S3")
            return
        except clients.s3.exceptions.ClientError as e:
            if e.response["Error"].get("Message") != "Not Found":
                raise
        source.blob.reload()
        if source.blob.size < part_size["s3"]:
            sync_gs_to_s3_oneshot(source, dest)
        else:
            dispatch_multipart_sync(source, dest, context)
    logger.info(
        f"Completed transfer of {source_key} from {source.bucket} to {dest.bucket}"
    )
Esempio n. 2
0
from dss.index.backend import CompositeIndexBackend
from dss.index.indexer import Indexer
from dss.logging import configure_lambda_logging
from dss.util import tracing
from dss.util.time import RemainingLambdaContextTime, AdjustedRemainingTime


app = domovoi.Domovoi(configure_logs=False)


configure_lambda_logging()
logger = logging.getLogger(__name__)
dss.Config.set_config(dss.BucketConfig.NORMAL)


@app.s3_event_handler(bucket=Config.get_s3_bucket(), events=["s3:ObjectCreated:*"], use_sqs=True)
def dispatch_s3_indexer_event(event, context) -> None:
    if event.get("Event") == "s3:TestEvent":
        logger.info("DSS index daemon received S3 test event")
    else:
        for event_record in event["Records"]:
            key = unquote(event_record['s3']['object']['key'])
            _handle_event(Replica.aws, key, context)


@app.sqs_queue_subscriber("dss-index-" + os.environ["DSS_DEPLOYMENT_STAGE"])
def dispatch_gs_indexer_event(event, context):
    """
    This handler receives GS events via the Google Cloud Function deployed from daemons/dss-gs-event-relay.
    """
    for event_record in event["Records"]:
Esempio n. 3
0
                        build_bundle_metadata_document)
from dss.events.handlers.notify_v2 import should_notify, notify_or_queue, notify

from dss.events.handlers.sync import exists
from dss.subscriptions_v2 import get_subscription, get_subscriptions_for_replica

configure_lambda_logging()
logger = logging.getLogger(__name__)
dss.Config.set_config(dss.BucketConfig.NORMAL)

app = domovoi.Domovoi()


# This entry point is for S3 native events forwarded through SQS.
@app.s3_event_handler(
    bucket=Config.get_s3_bucket(),
    events=["s3:ObjectCreated:*", "s3:ObjectRemoved:Delete"],
    use_sqs=True,
    sqs_queue_attributes=dict(
        VisibilityTimeout="920"),  # Lambda timeout + 20 seconds
)
def launch_from_s3_event(event, context):
    replica = Replica.aws
    if event.get("Event") == "s3:TestEvent":
        logger.info("S3 test event received and processed successfully")
    else:
        for event_record in event['Records']:
            bucket = event_record['s3']['bucket']['name']
            if bucket != replica.bucket:
                logger.error(
                    "Received S3 event for bucket %s with no configured replica",
Esempio n. 4
0
from dss.events.handlers.sync import (
    compose_upload, initiate_multipart_upload, complete_multipart_upload,
    copy_part, exists, get_part_size, get_sync_work_state, parts_per_worker,
    dependencies_exist, do_oneshot_copy, sync_sfn_dep_wait_sleep_seconds,
    sync_sfn_num_threads)
from dss.storage.identifiers import BLOB_KEY_REGEX, BLOB_PREFIX

configure_lambda_logging()
logger = logging.getLogger(__name__)
dss.Config.set_config(dss.BucketConfig.NORMAL)

app = domovoi.Domovoi()


# This entry point is for S3 native events forwarded through SQS.
@app.s3_event_handler(bucket=Config.get_s3_bucket(),
                      events=["s3:ObjectCreated:*"],
                      use_sqs=True,
                      sqs_queue_attributes=dict(VisibilityTimeout="920"))
def launch_from_s3_event(event, context):
    source_replica = Replica.aws
    executions = {}
    if event.get("Event") == "s3:TestEvent":
        logger.info("S3 test event received and processed successfully")
    else:
        for event_record in event["Records"]:
            bucket = resources.s3.Bucket(event_record["s3"]["bucket"]["name"])
            obj = bucket.Object(unquote(event_record["s3"]["object"]["key"]))
            if obj.key.startswith(BLOB_PREFIX) and not BLOB_KEY_REGEX.match(
                    obj.key):
                logger.info(
Esempio n. 5
0
 def _parse_event(self, event):
     assert event['Records'][0]['s3']['bucket'][
         'name'] == Config.get_s3_bucket()
     key = unquote(event['Records'][0]['s3']['object']['key'])
     return key