Ejemplo n.º 1
0
def list(request):
    db = DynamoDb()
    print "Calling list heatmaps.  This should list out the existing heatmaps"
    result = db.get_key("heatmaps")
    heatmaps = []
    if result and len(result['Items']) > 0:
        heatmaps = result['Items']
    return heatmaps
Ejemplo n.º 2
0
def get(request, id):
    db = DynamoDb()
    result = db.get_key("heatmaps")
    if result and len(result['Items']) > 0:
        for item in result['Items']:
            for heatmap in item['value']:
                if heatmap['id'] == id:
                    return heatmap
    return None
Ejemplo n.º 3
0
def post(request, meta):
    db = DynamoDb()

    heatmapArr = []
    existingHeatmaps = db.get_key("heatmaps")
    heatmapArr = existingHeatmaps['Items'][0]['value']

    # TODO: Remove [0] if testing fails
    heatmapArr.append({"id": meta['id'], "heatmap": meta})
    return db.set("heatmaps", heatmapArr)
Ejemplo n.º 4
0
def get(request, filter=[]):    
    db = DynamoDb()
    result = db.get(db.context_table)
    response = dict({})
    if result and len(result['Items']) > 0:
        for pair in result['Items']:
            if pair[c.KEY_PRIMARY] in filter or len(filter)==0: 
                response[pair[c.KEY_PRIMARY]]= pair[c.KEY_SECONDARY]            
    
    return response
Ejemplo n.º 5
0
def delete(request, id):
    db = DynamoDb()

    existingHeatmaps = db.get_key("heatmaps")
    heatmapArr = existingHeatmaps['Items'][0]['value']
    print heatmapArr

    for heatmap in heatmapArr:
        if heatmap['id'] == id:
            heatmapArr.remove(heatmap)
    print heatmapArr
    return db.set("heatmaps", heatmapArr)
Ejemplo n.º 6
0
def put(request, id, meta):
    db = DynamoDb()

    existingHeatmaps = db.get_key("heatmaps")
    heatmapArr = existingHeatmaps['Items'][0]['value']

    newHeatmap = {"id": meta['id'], "heatmap": meta}
    heatmapArr = [
        newHeatmap if heatmap['id'] == id else heatmap
        for heatmap in heatmapArr
    ]
    return db.set("heatmaps", heatmapArr)
Ejemplo n.º 7
0
def context(event=None):
    '''Returns an initialized context.'''
    if not hasattr(context, 'client'):
        debug_print("Initializing the context for the first time.")
        context.client = event.get("context", {})
        DynamoDb(context.client)
    return context.client
Ejemplo n.º 8
0
def create_hour_batch(user_id):
    """
    Run periodically to create hour batch.
    :param user_id:
    :return:
    """
    db = DynamoDb()
    utc_time = datetime.utcnow()
    hash_key = 'user_id'
    range_key = 'timestamp'

    records = db.query_range(
        TABLE_NAME, hash_key, user_id, range_key,
        utc_time - timedelta(seconds=HOURLY_BUCKET_SIZE_IN_SECONDS),
        utc_time - timedelta(seconds=1)).get('Items')
    request_count = len(records)
    for record in records:
        pk_dict = {
            'user_id': record.get('user_id'),
            'timestamp': record.get('timestamp')
        }
        db.delete(TABLE_NAME, **pk_dict)
    db.add(TABLE_NAME,
           hash_key,
           user_id,
           range_key,
           datetime.strftime(utc_time, HOURLY_TIMESTAMP_FORMAT),
           hourly_count=request_count,
           monthly_count=0)
Ejemplo n.º 9
0
def update_context(request, data=None):
    db = DynamoDb()
    for item in data:
        key = item
        value = data[item]
        print("Updating '{}' with value '{}'".format(key, value))
        params = dict({})
        params["UpdateExpression"] = "SET #val = :val"
        params["ExpressionAttributeNames"] = {'#val': c.KEY_SECONDARY}
        params["ExpressionAttributeValues"] = {':val': value}
        params["Key"] = {c.KEY_PRIMARY: key}
        try:
            db.update(db.context_table.update_item, params)
        except Exception as e:
            raise ClientError(
                "Error updating the context parameter '{}' with value '{}'.\nError: {}"
                .format(key, value, e))

    return data
def main(event, lambdacontext):
    global context
    global timestamp
    global aws_sqs
    start = time.time()
    ok_response =  {        
        'StatusCode': 200,            
    }
    refreshtime = datetime.datetime.utcnow() - datetime.timedelta(minutes=1)
    if context is None or aws_sqs is None or refreshtime > timestamp:        
        context=dict({})    
        stack_id = os.environ[c.ENV_DEPLOYMENT_STACK_ARN]
        context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(lambdacontext, 'aws_request_id') else None
        db = DynamoDb(context)
        prefix = util.get_stack_name_from_arn(stack_id)
        aws_sqs = Sqs(context, queue_prefix="{}_".format(prefix))
        aws_sqs.set_queue_url(True)         
        timestamp = datetime.datetime.utcnow() 
    else:
        context[c.KEY_SQS_QUEUE_URL] = aws_sqs.queue_url

    data =  event.get(c.API_PARAM_PAYLOAD, {})[c.API_PARAM_DATA]
    source_IP = event.get(c.API_PARAM_SOURCE_IP, None)     
    sensitivity_type = event.get(c.SQS_PARAM_SENSITIVITY_TYPE, sensitivity.SENSITIVITY_TYPE.NONE)           
    compression_mode = event.get(c.SQS_PARAM_COMPRESSION_TYPE, compression.COMPRESSION_MODE.NONE)           
    payload_type = event.get(c.SQS_PARAM_PAYLOAD_TYPE, payload.PAYLOAD_TYPE.CSV)    
    compression_mode = CompressionClassFactory.instance(compression_mode)    
    sensitivity_type = SensitivityClassFactory.instance(sensitivity_type)
    payload_type = PayloadClassFactory.instance(context, payload_type, compression_mode, sensitivity_type, source_IP)
      
    print "[{}]Using SQS queue URL '{}'".format(context[c.KEY_REQUEST_ID],aws_sqs.queue_url) 
    if os.environ[c.ENV_VERBOSE]== "True":
        print "The post request contains a paylod of\n{}".format(data)
    if data is None:   
        print "Terminating, there is no data."
        return ok_response
        
    total_metrics = "all"    
    try:
        data_size = len(data) + sqs.message_overhead_size(sensitivity_type, compression_mode, payload_type)      
        message_chunks, total_metrics = payload_type.chunk(data)   
    
        for message in message_chunks:                    
            print "Sending a sqs message with {} bytes".format(len(message))            
            aws_sqs.send_message(message, sensitivity_type, compression_mode, payload_type)    
    except Exception as e:        
        traceback.print_exc()                
        raise errors.ClientError(e.message)     

    print "The job sent {} metric(s) to the FIFO queue '{}'".format(total_metrics, aws_sqs.queue_url)    
    print "The job took {} seconds.".format(time.time() -start)
    return ok_response
Ejemplo n.º 11
0
def check_rate_limit(user_id):
    db = DynamoDb()
    utc_time = datetime.utcnow()
    hash_key = 'user_id'
    range_key = 'timestamp'
    seconds_records = db.query_range(TABLE_NAME, hash_key, user_id, range_key,
                                     utc_time - timedelta(seconds=1),
                                     utc_time).get('Items')

    # seconds_records = list(filter(lambda x: x.get('hourly_count', 0) == 0 and
    #                                         x.get('yearly_count', 0) == 0, seconds_records))
    if len(seconds_records) > SECONDS_RATE_LIMIT:
        return False

    hourly_records = db.query_range(
        TABLE_NAME,
        hash_key,
        user_id,
        range_key,
        utc_time - timedelta(seconds=3600),
        utc_time,
    ).get('Items')
    total_hourly_count = 0
    for hr in hourly_records:
        total_hourly_count += hr.get('hourly_count', 0)
    if total_hourly_count + len(seconds_records) > HOURLY_RATE_LIMIT:
        return False

    monthly_records = db.query_range(
        TABLE_NAME,
        hash_key,
        user_id,
        range_key,
        utc_time - timedelta(seconds=3600 * 24),
        utc_time,
    ).get('Items')
    total_monthly_count = 0
    for mr in monthly_records:
        total_monthly_count += mr.get('monthly_count', 0)

    if total_monthly_count + total_hourly_count + len(
            seconds_records) > MONTHLY_RATE_LIMIT:
        return False
    db.add(TABLE_NAME,
           hash_key,
           user_id,
           range_key,
           datetime.strftime(utc_time, SECONDS_TIMESTAMP_FORMAT),
           hourly_count=0,
           monthly_count=0)
    return True
Ejemplo n.º 12
0
def generate_threads(functionid, threads_count, iterations_per_thread, events_per_iteration, sleep_duration, use_lambda, event_type, sensitivity_type, compression_mode):
    start = time.time()       
    context = {}            
    threadpool = ThreadPool(context, threads_count)  
    context=dict({})        
    db = DynamoDb(context) 
    print "Sleep durations: ", sleep_duration
    print "Number of threads: ", threads_count
    print "Number of iterations per thread: ", iterations_per_thread
    print "Number of events per iteration: ", events_per_iteration
    print "Using event type: ", event_type
    print "Using sensitivity type: ", sensitivity_type
    print "Using compression mode: ", compression_mode
    for i in range(0, threads_count):          
        threadpool.add(thread_job, functionid, iterations_per_thread, events_per_iteration, use_lambda, context, sleep_duration, event_type, sensitivity_type, compression_mode)                                                    
    threadpool.wait()      
    print "A total of {} metrics have been sent to the FIFO queues.".format((iterations_per_thread*events_per_iteration)*threads_count)    
    print "The overall process took {} seconds.".format(time.time() - start)
Ejemplo n.º 13
0
def main(event, request):
    context = dict({})
    context[c.KEY_LAMBDA_FUNCTION] = request.function_name if hasattr(
        request, 'function_name') else None
    context[c.KEY_REQUEST_ID] = request.aws_request_id if hasattr(
        request, 'aws_request_id') else None
    stackid = os.environ[c.ENV_DEPLOYMENT_STACK_ARN]

    context[c.KEY_DB] = DynamoDb(context)
    context[c.KEY_ATHENA_QUERY] = Query(stackid)
    context[c.KEY_GLUE_CRAWLER] = Glue()
    thread_pool = ThreadPool(size=3)
    crawler_name = context[c.KEY_GLUE_CRAWLER].get_crawler_name(stackid)
    crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE])
    glue = Glue()
    events = glue.get_events()

    start = datetime.datetime.utcnow() - datetime.timedelta(hours=2)
    now = datetime.datetime.utcnow()

    found = False
    for type in events:
        dt = start
        while dt <= now:
            prefix = metric_schema.s3_key_format().format(
                context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day,
                dt.hour, type, dt.strftime(util.partition_date_format()))
            found = crawler.exists(prefix)
            if found:
                print "FOUND new events=>", prefix
                break
            dt += timedelta(hours=1)
        if found:
            break

    if found:
        thread_pool.add(crawl, context, crawler_name,
                        context[c.KEY_ATHENA_QUERY].execute_with_format)
        thread_pool.wait()

    return custom_resource_response.success_response({}, "*")
Ejemplo n.º 14
0
def launch(event, lambdacontext):
    print "Start"
    hours_delta = 36
    context = dict({})
    context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr(
        lambdacontext, 'function_name') else None
    context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(
        lambdacontext, 'aws_request_id') else None
    global threadpool
    global is_lambda
    threadpool = ThreadPool(context, 8)
    is_lambda = context[c.KEY_REQUEST_ID] is not None
    available_amoeba_lambdas = []
    available_amoeba_lambdas.append(c.ENV_AMOEBA_1)
    available_amoeba_lambdas.append(c.ENV_AMOEBA_2)
    available_amoeba_lambdas.append(c.ENV_AMOEBA_3)
    available_amoeba_lambdas.append(c.ENV_AMOEBA_4)
    available_amoeba_lambdas.append(c.ENV_AMOEBA_5)
    db = DynamoDb(context)
    crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE])
    glue = Glue()

    events = glue.get_events()
    #TODO: adjust the amoeba tree depth so that we have fully utilized all available amoebas; len(available_amoeba_lambdas) * 1000
    #since the number of leaf nodes for the metric partitions can quickly get very large we use a 5 lambda pool to ensure we don't hit the 1000 invocation limit.

    start = datetime.datetime.utcnow() - datetime.timedelta(hours=hours_delta)
    now = datetime.datetime.utcnow()

    for type in events:
        dt = start
        while dt <= now:
            prefix = metric_schema.s3_key_format().format(
                context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day,
                dt.hour, type, dt.strftime(util.partition_date_format()))
            threadpool.add(crawler.crawl, prefix, available_amoeba_lambdas,
                           invoke_lambda)
            dt += timedelta(hours=1)

    threadpool.wait()
    return custom_resource_response.success_response({"StatusCode": 200}, "*")
Ejemplo n.º 15
0
def main(event, lambdacontext):
    context = dict({})
    stack_id = os.environ[c.ENV_DEPLOYMENT_STACK_ARN]
    context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr(
        lambdacontext, 'function_name') else None
    context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(
        lambdacontext, 'aws_request_id') else None
    is_lambda = context[c.KEY_REQUEST_ID] is not None
    db = DynamoDb(context)
    if not is_lambda:
        import lambda_fifo_message_consumer as consumer

    prefix = util.get_stack_name_from_arn(stack_id)
    sqs = Sqs(context, "{0}_".format(prefix))
    awslambda = Lambda(context)

    if sqs.is_all_under_load:
        sqs.add_fifo_queue(prefix)

    queues = sqs.get_queues()
    for queue_url in queues:
        payload = {c.KEY_SQS_QUEUE_URL: queue_url, "context": context}
        print "Starting {} with queue url '{}'".format(
            "lambda" if is_lambda else "thread", queue_url)
        if is_lambda:
            invoke(context, awslambda, payload)
        else:
            payload[c.ENV_STACK_ID] = event['StackId']
            consumer.main(
                payload,
                type('obj', (object, ),
                     {'function_name': context[c.KEY_LAMBDA_FUNCTION]}))

    print "{} {} lambdas have started".format(len(queues),
                                              context[c.KEY_LAMBDA_FUNCTION])
    return custom_resource_response.success_response({}, "*")
Ejemplo n.º 16
0
def post(request, facetid, meta):
    db = DynamoDb()
    return db.set(facetid, meta)
Ejemplo n.º 17
0
def get(request, facetid):
    db = DynamoDb()
    result = db.get_key(facetid)
    result = json.dumps(result, cls=util.DynamoDbDecoder)
    return result
Ejemplo n.º 18
0
def launch(event, lambdacontext):
    util.debug_print("Start Amoeba Launcher")
    context = dict({})
    context[c.KEY_START_TIME] = time.time()
    context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr(
        lambdacontext, 'function_name') else None
    context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(
        lambdacontext, 'aws_request_id') else None
    prefix = util.get_stack_name_from_arn(
        os.environ[c.ENV_DEPLOYMENT_STACK_ARN])
    prefix = "{0}{1}".format(prefix, c.KEY_SQS_AMOEBA_SUFFIX)
    db = DynamoDb(context)
    sqs = Sqs(context, prefix, "sqs")
    sqs.set_queue_url(lowest_load_queue=False)

    if sqs.is_all_under_load:
        sqs.add_fifo_queue(prefix)

    elapsed = util.elapsed(context)
    timeout = context[c.KEY_MAX_LAMBDA_TIME] * c.RATIO_OF_MAX_LAMBDA_TIME
    map = {}
    queues_checked = 0
    number_of_queues = sqs.number_of_queues
    sqs_delete_tokens = {}
    while elapsed < timeout and queues_checked < number_of_queues:
        messages = sqs.read_queue()
        length = len(messages)
        if sqs.queue_url not in sqs_delete_tokens:
            sqs_delete_tokens[sqs.queue_url] = []

        if length > 0:
            for x in range(0, length):
                message = messages[x]
                body = json.loads(message["Body"])
                paths = body["paths"]
                msg_token = "{}{}{}".format(message['MessageId'],
                                            context[c.KEY_SEPERATOR_CSV],
                                            message['ReceiptHandle'])
                sqs_delete_tokens[sqs.queue_url].append(msg_token)
                for i in range(0, len(paths)):
                    path = paths[i]
                    parts = path.split(context[c.KEY_SEPERATOR_PARTITION])
                    filename = parts.pop()
                    directory = context[c.KEY_SEPERATOR_PARTITION].join(parts)
                    if directory not in map:
                        map[directory] = {"paths": [], "size": 0}
                    #lambda payload limit for Event invocation type  131072
                    sizeof = len(path) + map[directory]["size"]
                    is_invoked = map[directory].get("invoked", False)
                    if sizeof >= c.MAXIMUM_ASYNC_PAYLOAD_SIZE and not is_invoked:
                        invoke_lambda(context, directory,
                                      map[directory]["paths"])
                        map[directory] = {
                            "paths": [],
                            "size": 0,
                            "invoked": True
                        }
                    else:
                        map[directory]["paths"].append(path)
                        map[directory]["size"] = sizeof

        else:
            queues_checked += 1
            sqs.set_queue_url(lowest_load_queue=False)

        elapsed = util.elapsed(context)

    #Invoke a amoeba generator for each S3 leaf node
    for directory, settings in iteritems(map):
        is_invoked = settings.get("invoked", False)
        #Amoeba's are not designed to have multiple amoebas working against one directory
        #If the Amoeba has already been invoked due to payload size then we requeue the remaining paths
        if is_invoked:
            sqs.send_generic_message(json.dumps({"paths": settings["paths"]}))
        else:
            invoke_lambda(context, directory, settings["paths"])

    context[c.KEY_THREAD_POOL] = ThreadPool(context, 8)
    #Delete SQS messages that have been processed
    for key, value in iteritems(sqs_delete_tokens):
        sqs.delete_message_batch(value, key)

    return custom_resource_response.success_response({"StatusCode": 200}, "*")
Ejemplo n.º 19
0
def get(request, heatmapid):
    db = DynamoDb()