async def assert_item_not_present(self, table_name=None, key=None): async with self.new_resource() as resource: table = await resource.Table(table_name) response = await table.get_item(TableName=table_name, Key=key) if "Item" in response: item = response["Item"] raise AssertionError( "Item with key={} exists in {}: {}.".format( json.dumps(key), table_name, json.dumps(item)))
def write_pending_task(batch_id, index, request): object_key = "{}/pending/{}.json".format(batch_id, index) with trace("Write pending task {}/{} to s3", WORK_BUCKET, object_key): s3_resource.Object(WORK_BUCKET, object_key) \ .put(ACL='private', Body=json.dumps({"batchId": batch_id, "index": index, "request": request}))
async def __write_tasks_and_send_messages(batch_id, records, s3_resource, sqs_client): async with trace("Writing/sending {} tasks for batch {}", len(records), batch_id): async with S3BatchWriter(s3_resource=s3_resource, flush_amount=100) as batch_writer: for index, record in enumerate(records, start=0): pending_task = { "batchId": batch_id, "index": index, "request": record } validate_pending_task(pending_task) await work_bucket.write_pending_task(batch_id, index, pending_task, batch_writer) async with process_queue.new_batch_sender(sqs_client) as batch_sender: for index, record in enumerate(records, start=0): pending_task = { "batchId": batch_id, "index": index, "request": record } validate_pending_task(pending_task) await batch_sender.send_message( message={ "Id": str(uuid4()), "MessageBody": json.dumps(pending_task) })
async def __write_chunks_and_send_messages(batch_id, records, dynamodb_resource, sqs_client): async with trace("Writing/sending chunks for batch {}", batch_id): async with await batch_tasks_table.new_batch_writer(dynamodb_resource ) as batch_writer: for index, record in enumerate(records): pending_task = { "batchId": batch_id, "index": index, "request": record } validate_pending_task(pending_task) await batch_tasks_table.put_pending_batch_task( pending_task, batch_writer) async with process_queue.new_batch_sender(sqs_client) as batch_sender: for chunk_index, chunk in enumchunks(records, CHUNK_SIZE): async with trace("Sending chunk {} of tasks for batch_id={}", chunk_index, batch_id): chunk = { "batchId": batch_id, "index": chunk_index, "records": [{ "index": chunk_index * CHUNK_SIZE + record_index } for record_index, record in enumerate(chunk)] } await batch_sender.send_message( message={ "Id": str(uuid4()), "MessageBody": json.dumps(chunk) })
async def handle_event(event, lambda_context): logger.info("Event: {}".format(json.dumps(event, indent=2))) async with aioaws.resource("s3") as s3_resource: records = [json.loads(record["body"]) for record in event["Records"]] await asyncio.gather( *[__gather(record, s3_resource) for record in records])
async def __process(record, s3_resource, batch_writer): async with trace("Processing {}", json.dumps(record)): validate_pending_task(record) index = record["index"] batch_id = record["batchId"] request = record["request"] item_no = request["itemNo"] await items_table.put_item( { "itemNo": str(item_no), "updateTimestamp": now_epoch_millis() }, batch_writer) processed_task = { "batchId": batch_id, "index": index, "request": request, "response": { "success": True, "message": "Ok" } } validate_processed_task(processed_task) await work_bucket.write_task_result(batch_id, index, processed_task, s3_resource) await work_bucket.delete_pending_task(batch_id, index, s3_resource)
def send_batch_complete_message(batch_id): with trace("Sending complete message for {}", batch_id): queue_url = sqs_client.get_queue_url( QueueName=GATHER_QUEUE)["QueueUrl"] sqs_client.send_message(QueueUrl=queue_url, MessageGroupId=batch_id, MessageDeduplicationId=batch_id, MessageBody=json.dumps({"batchId": batch_id}))
async def assert_contains_item(self, table_name=None, key=None): async with self.new_resource() as resource: table = await resource.Table(table_name) response = await table.get_item(TableName=table_name, Key=key) if not "Item" in response: raise AssertionError( "Item with key={} not found in {}.".format( json.dumps(key), table_name))
async def write_pending_task(batch_id, index, pending_task, batch_writer): object_key = "{}/pending/{}.json".format(batch_id, index) async with trace("Write pending task {}/{} to s3", WORK_BUCKET, object_key): await batch_writer.put(Bucket=WORK_BUCKET, Key=object_key, ACL='private', Body=json.dumps(pending_task))
async def send_batch_complete_message(batch_id, sqs_client): async with trace("Sending complete message for {}", batch_id): response = await sqs_client.get_queue_url(QueueName=GATHER_QUEUE) queue_url = response["QueueUrl"] await sqs_client.send_message(QueueUrl=queue_url, MessageGroupId=batch_id, MessageDeduplicationId=batch_id, MessageBody=json.dumps({"batchId": batch_id}))
def write_batch_status(batch_id, record_count): with trace("Writing status for {}", batch_id): object_key = "{}/status.json".format(batch_id) s3_resource.Object(WORK_BUCKET, object_key).put(ACL='private', Body=json.dumps({ "variant": "s3-sqs-lambda-sync", "batchId": batch_id, "taskCount": record_count, "startTime": now() }))
async def assert_item_equal(self, table_name=None, key=None, expected_item=None): async with self.new_resource() as resource: table = await resource.Table(table_name) response = await table.get_item(TableName=table_name, Key=key) if not "Item" in response: raise AssertionError( "Item with key={} not found in {}.".format( json.dumps(key), table_name)) item = response["Item"] if json.dumps(item, sort_keys=True) != json.dumps(expected_item, sort_keys=True): raise AssertionError( "Item {} from {} is not equal to: {}.".format( json.dumps(item), table_name, json.dumps(expected_item)))
async def write_batch_status(batch_id, record_count, chunk_size, s3_resource): async with trace("Writing status for {}", batch_id): object_key = "{}/status.json".format(batch_id) s3_object = await s3_resource.Object(WORK_BUCKET, object_key) await s3_object.put(ACL='private', Body=json.dumps({ "variant": "s3-notification-sqs-lambda", "batchId": batch_id, "chunkSize": chunk_size, "taskCount": record_count, "startTime": now() }))
async def handle_event(event, lambda_context): logger.info("Event: {}".format(json.dumps(event, indent=2))) s3_objects = __get_s3_objects_from(event) batch_ids = set([__extract_batch_id(key[1]) for key in s3_objects]) async with aioaws.client("sqs") as sqs_client, \ aioaws.resource("s3") as s3_resource: await asyncio.gather(*[ __check_if_complete(batch_id, s3_resource, sqs_client) for batch_id in batch_ids ])
async def handle_event(event, lambda_context): logger.info("Event: {}".format(json.dumps(event, indent=2))) s3_objects = __get_s3_objects_from(event) async with aioaws.resource("s3") as s3_resource, \ aioaws.resource("dynamodb") as dynamodb_resource, \ await items_table.new_batch_writer(dynamodb_resource) as batch_writer: await asyncio.gather(*[ __process(s3_object, s3_resource, batch_writer) for s3_object in s3_objects ])
async def handle_event(event, lambda_context): logger.info("Event: {}".format(json.dumps(event, indent=2))) async with aioaws.client("sqs") as sqs_client, \ aioaws.resource("s3") as s3_resource, \ aioaws.resource("dynamodb") as dynamodb_resource, \ await items_table.new_batch_writer(dynamodb_resource) as batch_writer: chunks = [json.loads(record["body"]) for record in event["Records"]] await asyncio.gather(*[__process(chunk, s3_resource, batch_writer) for chunk in chunks]) batch_ids = {chunk["batchId"] for chunk in chunks} await asyncio.gather(*[__check_if_complete(batch_id, s3_resource, sqs_client) for batch_id in batch_ids])
async def assert_item_contains_values(self, table_name=None, key=None, expected_item=None): async with self.new_resource() as resource: table = await resource.Table(table_name) response = await table.get_item(TableName=table_name, Key=key) if not "Item" in response: raise AssertionError( "Item with key={} not found in {}.".format( json.dumps(key), table_name)) item = response["Item"] for item_key, item_value in expected_item.items(): if not item_key in item: raise AssertionError( "Item {} is missing the attribute \"{}\".".format( json.dumps(item), item_key)) if json.dumps(item.get(item_key, None), sort_keys=True) != json.dumps(item_value, sort_keys=True): raise AssertionError( "Attribute \"{}\" has not expected value {} in {}.". format(item_key, item_value, json.dumps(item)))
def handle_event(event, lambda_context): logger.info("Event: {}".format(json.dumps(event, indent=2))) records = event["Records"] with items_table.new_batch_writer() as batch_writer: for record in records: record = json.loads(record["body"]) with trace("Processing {}", json.dumps(record)): index = record["index"] batch_id = record["batchId"] request = record["request"] item_no = request["itemNo"] items_table.put_item( { "itemNo": str(item_no), "updateTimestamp": now_epoch_millis() }, batch_writer) work_bucket.write_task_result(batch_id, index, request, { "success": True, "message": "Ok" }) work_bucket.delete_pending_task(batch_id, index) if not work_bucket.exists_pending_task(batch_id): gather_queue.send_batch_complete_message(batch_id)
async def __process(message, s3_resource, batch_writer): async with trace("Processing {}", json.dumps(message)): batch_id = message["batchId"] index = message["index"] chunk = await work_bucket.read_pending_chunk(batch_id, index, s3_resource) for record in chunk["records"]: request = record["request"] item_no = request["itemNo"] record["response"] = {"success": True, "message": "Ok"} await items_table.put_item({"itemNo": str(item_no), "updateTimestamp": now_epoch_millis()}, batch_writer) await work_bucket.write_chunk_result(batch_id, index, chunk, s3_resource) await work_bucket.delete_pending_chunk(batch_id, index, s3_resource)
def handle_event(event, lambda_context): logger.info("Event: {}".format(json.dumps(event, indent=2))) s3_object = __get_s3_object_from(event) if s3_object is None: return batch_id = __extract_batch_id(s3_object[1]) record_batch_started(batch_id) with trace("Scattering {}", batch_id): batch_doc = input_bucket.read_batch_input(s3_object[0], s3_object[1]) validate_input(batch_doc) records = batch_doc.get("records", []) work_bucket.write_batch_status(batch_id, len(records)) __write_tasks_and_send_messages(batch_id, records) input_bucket.delete_batch_input(s3_object[0], s3_object[1]) record_scatter_finished(batch_id, len(records))
def __write_tasks_and_send_messages(batch_id, records): with trace("Writing/sending {} tasks for batch {}", len(records), batch_id): with process_queue.new_batch_sender() as batch_sender: for index, record in enumerate(records, start=0): work_bucket.write_pending_task(batch_id, index, record) batch_sender.send_message( message={ "Id": str(uuid4()), "MessageBody": json.dumps({ "batchId": batch_id, "index": index, "request": record }) })
async def __write_chunks_and_send_messages(batch_id, records, s3_resource, sqs_client): async with trace("Writing/sending chunks for batch {}", batch_id): async with S3BatchWriter(s3_resource=s3_resource, flush_amount=CHUNK_SIZE) as batch_writer: for chunk_index, chunk in enumchunks(records, CHUNK_SIZE): chunk = { "batchId": batch_id, "index": chunk_index, "records": [{ "request": record, "index": chunk_index + record_index } for record_index, record in enumerate(chunk)] } validate_pending_chunk_of_tasks(chunk) await work_bucket.write_pending_chunk(batch_id, chunk_index, chunk, batch_writer) async with process_queue.new_batch_sender(sqs_client) as batch_sender: for chunk_index, chunk in enumchunks(records, CHUNK_SIZE): chunk = { "batchId": batch_id, "index": chunk_index, "records": [{ "request": record, "index": chunk_index * CHUNK_SIZE + record_index } for record_index, record in enumerate(chunk)] } validate_pending_chunk_of_tasks(chunk) await batch_sender.send_message( message={ "Id": str(uuid4()), "MessageBody": json.dumps({ "batchId": batch_id, "index": chunk_index }) })
async def handle_event(event, lambda_context): logger.info("Event: {}".format(json.dumps(event, indent=2))) s3_object = __get_s3_object_from(event) if s3_object is None: logger.info("Is s3 test event. Skipping.") return batch_id = __extract_batch_id(s3_object[1]) async with trace("Scattering {}", batch_id): async with aioaws.resource("s3") as s3_resource, aioaws.client("sqs") as sqs_client: batch_doc = await input_bucket.read_batch_input(s3_object[0], s3_object[1], s3_resource) validate_input(batch_doc) records = batch_doc.get("records", []) record_batch_started(batch_id) await work_bucket.write_batch_status(batch_id, len(records), CHUNK_SIZE, s3_resource) await __write_chunks(batch_id, records, s3_resource, sqs_client) await input_bucket.delete_batch_input(s3_object[0], s3_object[1], s3_resource) record_scatter_finished(batch_id, len(records))
async def __process(message, dynamodb_resource, batch_writer): async with trace("Processing {}", json.dumps(message)): batch_id = message["batchId"] records = message["records"] tasks = await asyncio.gather(*[ batch_tasks_table.get_batch_task(batch_id, record["index"], dynamodb_resource) for record in records ]) for task in tasks: index = task["index"] request = task["request"] item_no = request["itemNo"] price = request["price"] response = {"success": True, "message": "Ok"} await items_table.put_item( { "itemNo": str(item_no), "price": price, "updateTimestamp": now_epoch_millis() }, batch_writer) await batch_tasks_table.put_processed_batch_task( batch_id, index, request, response, dynamodb_resource)
async def handle_event(event, lambda_context): logger.info("Event: {}".format(json.dumps(event, indent=2))) s3_object = __get_s3_object_from(event) if s3_object is None: return batch_id = __extract_batch_id(s3_object[1]) async with trace("Scattering {}", batch_id): async with aioaws.resource("s3") as s3_resource, \ aioaws.client("sqs") as sqs_client, \ aioaws.resource("dynamodb") as dynamodb_resource: batch_doc = await input_bucket.read_batch_input( s3_object[0], s3_object[1], s3_resource) validate_input(batch_doc) records = batch_doc.get("records", []) record_batch_started(batch_id) await batch_status_table.put_batch_status(batch_id, len(records), dynamodb_resource) await __write_chunks_and_send_messages(batch_id, records, dynamodb_resource, sqs_client) await input_bucket.delete_batch_input(s3_object[0], s3_object[1], s3_resource) record_scatter_finished(batch_id, len(records))
async def write_batch_output(batch_id, output, s3_resource): object_key = "{}.json".format(batch_id) async with trace("Writing batch output {}/{} to s3", OUTPUT_BUCKET, object_key): s3_object = await s3_resource.Object(OUTPUT_BUCKET, object_key) await s3_object.put(ACL='private', Body=json.dumps(output))
async def write_task_result(batch_id, index, processed_task, s3_resource): object_key = "{}/done/{}.json".format(batch_id, index) async with trace("Writing task result {}/{} to s3", WORK_BUCKET, object_key): s3_object = await s3_resource.Object(WORK_BUCKET, object_key) await s3_object.put(ACL='private', Body=json.dumps(processed_task))
def write_batch_output(batch_id, output): object_key = "{}.json".format(batch_id) with trace("Writing output to {}/{}", OUTPUT_BUCKET, object_key): s3_resource.Object(OUTPUT_BUCKET, object_key) \ .put(ACL='private', Body=json.dumps(output))
def write_task_result(batch_id, index, request, result): object_key = "{}/done/{}.json".format(batch_id, index) with trace("Writing task result {}/{} to s3", WORK_BUCKET, object_key): s3_resource.Object(WORK_BUCKET, object_key).put(ACL="private", Body=json.dumps( {"index": index, "batchId": batch_id, "request": request, "response": result}))
async def write_batch_input(batch_id, input, s3_resource): object_key = "{}.json".format(batch_id) async with trace("Writing {}/{}", INPUT_BUCKET, object_key): s3_object = await s3_resource.Object(INPUT_BUCKET, object_key) await s3_object.put(ACL='private', Body=json.dumps(input))