Ejemplo n.º 1
0
 def __add_to_sqs(self, files):
     prefix = util.get_stack_name_from_arn(
         os.environ[c.ENV_DEPLOYMENT_STACK_ARN])
     sqs = Sqs(self.context, "{0}{1}".format(prefix,
                                             c.KEY_SQS_AMOEBA_SUFFIX))
     sqs.set_queue_url(lowest_load_queue=False)
     sqs.send_generic_message(json.dumps({"paths": files}))
def main(event, lambdacontext):
    global context
    global timestamp
    global aws_sqs
    start = time.time()
    ok_response =  {        
        'StatusCode': 200,            
    }
    refreshtime = datetime.datetime.utcnow() - datetime.timedelta(minutes=1)
    if context is None or aws_sqs is None or refreshtime > timestamp:        
        context=dict({})    
        stack_id = os.environ[c.ENV_DEPLOYMENT_STACK_ARN]
        context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(lambdacontext, 'aws_request_id') else None
        db = DynamoDb(context)
        prefix = util.get_stack_name_from_arn(stack_id)
        aws_sqs = Sqs(context, queue_prefix="{}_".format(prefix))
        aws_sqs.set_queue_url(True)         
        timestamp = datetime.datetime.utcnow() 
    else:
        context[c.KEY_SQS_QUEUE_URL] = aws_sqs.queue_url

    data =  event.get(c.API_PARAM_PAYLOAD, {})[c.API_PARAM_DATA]
    source_IP = event.get(c.API_PARAM_SOURCE_IP, None)     
    sensitivity_type = event.get(c.SQS_PARAM_SENSITIVITY_TYPE, sensitivity.SENSITIVITY_TYPE.NONE)           
    compression_mode = event.get(c.SQS_PARAM_COMPRESSION_TYPE, compression.COMPRESSION_MODE.NONE)           
    payload_type = event.get(c.SQS_PARAM_PAYLOAD_TYPE, payload.PAYLOAD_TYPE.CSV)    
    compression_mode = CompressionClassFactory.instance(compression_mode)    
    sensitivity_type = SensitivityClassFactory.instance(sensitivity_type)
    payload_type = PayloadClassFactory.instance(context, payload_type, compression_mode, sensitivity_type, source_IP)
      
    print "[{}]Using SQS queue URL '{}'".format(context[c.KEY_REQUEST_ID],aws_sqs.queue_url) 
    if os.environ[c.ENV_VERBOSE]== "True":
        print "The post request contains a paylod of\n{}".format(data)
    if data is None:   
        print "Terminating, there is no data."
        return ok_response
        
    total_metrics = "all"    
    try:
        data_size = len(data) + sqs.message_overhead_size(sensitivity_type, compression_mode, payload_type)      
        message_chunks, total_metrics = payload_type.chunk(data)   
    
        for message in message_chunks:                    
            print "Sending a sqs message with {} bytes".format(len(message))            
            aws_sqs.send_message(message, sensitivity_type, compression_mode, payload_type)    
    except Exception as e:        
        traceback.print_exc()                
        raise errors.ClientError(e.message)     

    print "The job sent {} metric(s) to the FIFO queue '{}'".format(total_metrics, aws_sqs.queue_url)    
    print "The job took {} seconds.".format(time.time() -start)
    return ok_response
Ejemplo n.º 3
0
def launch(event, lambdacontext):
    util.debug_print("Start Amoeba Launcher")
    context = dict({})
    context[c.KEY_START_TIME] = time.time()
    context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr(
        lambdacontext, 'function_name') else None
    context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(
        lambdacontext, 'aws_request_id') else None
    prefix = util.get_stack_name_from_arn(
        os.environ[c.ENV_DEPLOYMENT_STACK_ARN])
    prefix = "{0}{1}".format(prefix, c.KEY_SQS_AMOEBA_SUFFIX)
    db = DynamoDb(context)
    sqs = Sqs(context, prefix, "sqs")
    sqs.set_queue_url(lowest_load_queue=False)

    if sqs.is_all_under_load:
        sqs.add_fifo_queue(prefix)

    elapsed = util.elapsed(context)
    timeout = context[c.KEY_MAX_LAMBDA_TIME] * c.RATIO_OF_MAX_LAMBDA_TIME
    map = {}
    queues_checked = 0
    number_of_queues = sqs.number_of_queues
    sqs_delete_tokens = {}
    while elapsed < timeout and queues_checked < number_of_queues:
        messages = sqs.read_queue()
        length = len(messages)
        if sqs.queue_url not in sqs_delete_tokens:
            sqs_delete_tokens[sqs.queue_url] = []

        if length > 0:
            for x in range(0, length):
                message = messages[x]
                body = json.loads(message["Body"])
                paths = body["paths"]
                msg_token = "{}{}{}".format(message['MessageId'],
                                            context[c.KEY_SEPERATOR_CSV],
                                            message['ReceiptHandle'])
                sqs_delete_tokens[sqs.queue_url].append(msg_token)
                for i in range(0, len(paths)):
                    path = paths[i]
                    parts = path.split(context[c.KEY_SEPERATOR_PARTITION])
                    filename = parts.pop()
                    directory = context[c.KEY_SEPERATOR_PARTITION].join(parts)
                    if directory not in map:
                        map[directory] = {"paths": [], "size": 0}
                    #lambda payload limit for Event invocation type  131072
                    sizeof = len(path) + map[directory]["size"]
                    is_invoked = map[directory].get("invoked", False)
                    if sizeof >= c.MAXIMUM_ASYNC_PAYLOAD_SIZE and not is_invoked:
                        invoke_lambda(context, directory,
                                      map[directory]["paths"])
                        map[directory] = {
                            "paths": [],
                            "size": 0,
                            "invoked": True
                        }
                    else:
                        map[directory]["paths"].append(path)
                        map[directory]["size"] = sizeof

        else:
            queues_checked += 1
            sqs.set_queue_url(lowest_load_queue=False)

        elapsed = util.elapsed(context)

    #Invoke a amoeba generator for each S3 leaf node
    for directory, settings in iteritems(map):
        is_invoked = settings.get("invoked", False)
        #Amoeba's are not designed to have multiple amoebas working against one directory
        #If the Amoeba has already been invoked due to payload size then we requeue the remaining paths
        if is_invoked:
            sqs.send_generic_message(json.dumps({"paths": settings["paths"]}))
        else:
            invoke_lambda(context, directory, settings["paths"])

    context[c.KEY_THREAD_POOL] = ThreadPool(context, 8)
    #Delete SQS messages that have been processed
    for key, value in iteritems(sqs_delete_tokens):
        sqs.delete_message_batch(value, key)

    return custom_resource_response.success_response({"StatusCode": 200}, "*")