def __add_to_sqs(self, files): prefix = util.get_stack_name_from_arn( os.environ[c.ENV_DEPLOYMENT_STACK_ARN]) sqs = Sqs(self.context, "{0}{1}".format(prefix, c.KEY_SQS_AMOEBA_SUFFIX)) sqs.set_queue_url(lowest_load_queue=False) sqs.send_generic_message(json.dumps({"paths": files}))
def main(event, lambdacontext): global context global timestamp global aws_sqs start = time.time() ok_response = { 'StatusCode': 200, } refreshtime = datetime.datetime.utcnow() - datetime.timedelta(minutes=1) if context is None or aws_sqs is None or refreshtime > timestamp: context=dict({}) stack_id = os.environ[c.ENV_DEPLOYMENT_STACK_ARN] context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(lambdacontext, 'aws_request_id') else None db = DynamoDb(context) prefix = util.get_stack_name_from_arn(stack_id) aws_sqs = Sqs(context, queue_prefix="{}_".format(prefix)) aws_sqs.set_queue_url(True) timestamp = datetime.datetime.utcnow() else: context[c.KEY_SQS_QUEUE_URL] = aws_sqs.queue_url data = event.get(c.API_PARAM_PAYLOAD, {})[c.API_PARAM_DATA] source_IP = event.get(c.API_PARAM_SOURCE_IP, None) sensitivity_type = event.get(c.SQS_PARAM_SENSITIVITY_TYPE, sensitivity.SENSITIVITY_TYPE.NONE) compression_mode = event.get(c.SQS_PARAM_COMPRESSION_TYPE, compression.COMPRESSION_MODE.NONE) payload_type = event.get(c.SQS_PARAM_PAYLOAD_TYPE, payload.PAYLOAD_TYPE.CSV) compression_mode = CompressionClassFactory.instance(compression_mode) sensitivity_type = SensitivityClassFactory.instance(sensitivity_type) payload_type = PayloadClassFactory.instance(context, payload_type, compression_mode, sensitivity_type, source_IP) print "[{}]Using SQS queue URL '{}'".format(context[c.KEY_REQUEST_ID],aws_sqs.queue_url) if os.environ[c.ENV_VERBOSE]== "True": print "The post request contains a paylod of\n{}".format(data) if data is None: print "Terminating, there is no data." return ok_response total_metrics = "all" try: data_size = len(data) + sqs.message_overhead_size(sensitivity_type, compression_mode, payload_type) message_chunks, total_metrics = payload_type.chunk(data) for message in message_chunks: print "Sending a sqs message with {} bytes".format(len(message)) aws_sqs.send_message(message, sensitivity_type, compression_mode, payload_type) except Exception as e: traceback.print_exc() raise errors.ClientError(e.message) print "The job sent {} metric(s) to the FIFO queue '{}'".format(total_metrics, aws_sqs.queue_url) print "The job took {} seconds.".format(time.time() -start) return ok_response
def launch(event, lambdacontext): util.debug_print("Start Amoeba Launcher") context = dict({}) context[c.KEY_START_TIME] = time.time() context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr( lambdacontext, 'function_name') else None context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr( lambdacontext, 'aws_request_id') else None prefix = util.get_stack_name_from_arn( os.environ[c.ENV_DEPLOYMENT_STACK_ARN]) prefix = "{0}{1}".format(prefix, c.KEY_SQS_AMOEBA_SUFFIX) db = DynamoDb(context) sqs = Sqs(context, prefix, "sqs") sqs.set_queue_url(lowest_load_queue=False) if sqs.is_all_under_load: sqs.add_fifo_queue(prefix) elapsed = util.elapsed(context) timeout = context[c.KEY_MAX_LAMBDA_TIME] * c.RATIO_OF_MAX_LAMBDA_TIME map = {} queues_checked = 0 number_of_queues = sqs.number_of_queues sqs_delete_tokens = {} while elapsed < timeout and queues_checked < number_of_queues: messages = sqs.read_queue() length = len(messages) if sqs.queue_url not in sqs_delete_tokens: sqs_delete_tokens[sqs.queue_url] = [] if length > 0: for x in range(0, length): message = messages[x] body = json.loads(message["Body"]) paths = body["paths"] msg_token = "{}{}{}".format(message['MessageId'], context[c.KEY_SEPERATOR_CSV], message['ReceiptHandle']) sqs_delete_tokens[sqs.queue_url].append(msg_token) for i in range(0, len(paths)): path = paths[i] parts = path.split(context[c.KEY_SEPERATOR_PARTITION]) filename = parts.pop() directory = context[c.KEY_SEPERATOR_PARTITION].join(parts) if directory not in map: map[directory] = {"paths": [], "size": 0} #lambda payload limit for Event invocation type 131072 sizeof = len(path) + map[directory]["size"] is_invoked = map[directory].get("invoked", False) if sizeof >= c.MAXIMUM_ASYNC_PAYLOAD_SIZE and not is_invoked: invoke_lambda(context, directory, map[directory]["paths"]) map[directory] = { "paths": [], "size": 0, "invoked": True } else: map[directory]["paths"].append(path) map[directory]["size"] = sizeof else: queues_checked += 1 sqs.set_queue_url(lowest_load_queue=False) elapsed = util.elapsed(context) #Invoke a amoeba generator for each S3 leaf node for directory, settings in iteritems(map): is_invoked = settings.get("invoked", False) #Amoeba's are not designed to have multiple amoebas working against one directory #If the Amoeba has already been invoked due to payload size then we requeue the remaining paths if is_invoked: sqs.send_generic_message(json.dumps({"paths": settings["paths"]})) else: invoke_lambda(context, directory, settings["paths"]) context[c.KEY_THREAD_POOL] = ThreadPool(context, 8) #Delete SQS messages that have been processed for key, value in iteritems(sqs_delete_tokens): sqs.delete_message_batch(value, key) return custom_resource_response.success_response({"StatusCode": 200}, "*")