load_schema, ) dynamodb_resource = boto3.resource("dynamodb") table = dynamodb_resource.Table(os.getenv("DataMapperTable")) glue_client = boto3.client("glue") PARQUET_HIVE_SERDE = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" JSON_HIVE_SERDE = "org.apache.hive.hcatalog.data.JsonSerDe" JSON_OPENX_SERDE = "org.openx.data.jsonserde.JsonSerDe" SUPPORTED_SERDE_LIBS = [PARQUET_HIVE_SERDE, JSON_HIVE_SERDE, JSON_OPENX_SERDE] @with_logging @add_cors_headers @request_validator(load_schema("get_data_mapper")) @catch_errors def get_data_mapper_handler(event, context): data_mapper_id = event["pathParameters"]["data_mapper_id"] item = table.get_item(Key={"DataMapperId": data_mapper_id}).get("Item") if not item: return {"statusCode": 404} return {"statusCode": 200, "body": json.dumps(item, cls=DecimalEncoder)} @with_logging @add_cors_headers @request_validator(load_schema("list_data_mappers")) @catch_errors def get_data_mappers_handler(event, context):
@catch_errors def enqueue_batch_handler(event, context): body = event["body"] matches = body["Matches"] validate_queue_items(matches) user_info = get_user_info(event) items = enqueue_items(matches, user_info) return { "statusCode": 201, "body": json.dumps({"Matches": items}, cls=DecimalEncoder), } @with_logging @add_cors_headers @request_validator(load_schema("list_queue_items")) @catch_errors def get_handler(event, context): defaults = {"Type": "Simple"} qs = event.get("queryStringParameters") if not qs: qs = {} page_size = int(qs.get("page_size", 10)) scan_params = {"Limit": page_size} start_at = qs.get("start_at") if start_at: scan_params["ExclusiveStartKey"] = {"DeletionQueueItemId": start_at} items = deletion_queue_table.scan(**scan_params).get("Items", []) if len(items) < page_size: next_start = None else:
"JobFinishTime", "JobStartTime", "TotalObjectRollbackFailedCount", "TotalObjectUpdatedCount", "TotalObjectUpdateFailedCount", "TotalQueryCount", "TotalQueryFailedCount", "TotalQueryScannedInBytes", "TotalQuerySucceededCount", "TotalQueryTimeInMillis", ] @with_logging @add_cors_headers @request_validator(load_schema("get_job")) @catch_errors def get_job_handler(event, context): job_id = event["pathParameters"]["job_id"] resp = table.get_item(Key={"Id": job_id, "Sk": job_id,}) item = resp.get("Item") if "DeletionQueueItems" not in item.keys(): deletion_queue_bucket = item["DeletionQueueBucket"] deletion_queue_key = item["DeletionQueueKey"] obj = s3.Object(deletion_queue_bucket, deletion_queue_key) raw_data = obj.get()['Body'].read().decode('utf-8') match_id_items = json.loads(raw_data) item["DeletionQueueItems"] = list(map(lambda x: x["MatchId"], match_id_items["DeletionQueueItems"])) if not item: return {"statusCode": 404}