Esempio n. 1
0
    load_schema,
)

dynamodb_resource = boto3.resource("dynamodb")
table = dynamodb_resource.Table(os.getenv("DataMapperTable"))
glue_client = boto3.client("glue")

PARQUET_HIVE_SERDE = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
JSON_HIVE_SERDE = "org.apache.hive.hcatalog.data.JsonSerDe"
JSON_OPENX_SERDE = "org.openx.data.jsonserde.JsonSerDe"
SUPPORTED_SERDE_LIBS = [PARQUET_HIVE_SERDE, JSON_HIVE_SERDE, JSON_OPENX_SERDE]


@with_logging
@add_cors_headers
@request_validator(load_schema("get_data_mapper"))
@catch_errors
def get_data_mapper_handler(event, context):
    data_mapper_id = event["pathParameters"]["data_mapper_id"]
    item = table.get_item(Key={"DataMapperId": data_mapper_id}).get("Item")
    if not item:
        return {"statusCode": 404}

    return {"statusCode": 200, "body": json.dumps(item, cls=DecimalEncoder)}


@with_logging
@add_cors_headers
@request_validator(load_schema("list_data_mappers"))
@catch_errors
def get_data_mappers_handler(event, context):
Esempio n. 2
0
@catch_errors
def enqueue_batch_handler(event, context):
    body = event["body"]
    matches = body["Matches"]
    validate_queue_items(matches)
    user_info = get_user_info(event)
    items = enqueue_items(matches, user_info)
    return {
        "statusCode": 201,
        "body": json.dumps({"Matches": items}, cls=DecimalEncoder),
    }


@with_logging
@add_cors_headers
@request_validator(load_schema("list_queue_items"))
@catch_errors
def get_handler(event, context):
    defaults = {"Type": "Simple"}
    qs = event.get("queryStringParameters")
    if not qs:
        qs = {}
    page_size = int(qs.get("page_size", 10))
    scan_params = {"Limit": page_size}
    start_at = qs.get("start_at")
    if start_at:
        scan_params["ExclusiveStartKey"] = {"DeletionQueueItemId": start_at}
    items = deletion_queue_table.scan(**scan_params).get("Items", [])
    if len(items) < page_size:
        next_start = None
    else:
Esempio n. 3
0
    "JobFinishTime",
    "JobStartTime",
    "TotalObjectRollbackFailedCount",
    "TotalObjectUpdatedCount",
    "TotalObjectUpdateFailedCount",
    "TotalQueryCount",
    "TotalQueryFailedCount",
    "TotalQueryScannedInBytes",
    "TotalQuerySucceededCount",
    "TotalQueryTimeInMillis",
]


@with_logging
@add_cors_headers
@request_validator(load_schema("get_job"))
@catch_errors
def get_job_handler(event, context):
    job_id = event["pathParameters"]["job_id"]
    resp = table.get_item(Key={"Id": job_id, "Sk": job_id,})
    item = resp.get("Item")
    if "DeletionQueueItems" not in item.keys():
        deletion_queue_bucket = item["DeletionQueueBucket"]
        deletion_queue_key = item["DeletionQueueKey"]
        obj = s3.Object(deletion_queue_bucket, deletion_queue_key)
        raw_data = obj.get()['Body'].read().decode('utf-8')
        match_id_items = json.loads(raw_data)
        item["DeletionQueueItems"] = list(map(lambda x: x["MatchId"], match_id_items["DeletionQueueItems"]))
    if not item:
        return {"statusCode": 404}