def list(request): db = DynamoDb() print "Calling list heatmaps. This should list out the existing heatmaps" result = db.get_key("heatmaps") heatmaps = [] if result and len(result['Items']) > 0: heatmaps = result['Items'] return heatmaps
def get(request, id): db = DynamoDb() result = db.get_key("heatmaps") if result and len(result['Items']) > 0: for item in result['Items']: for heatmap in item['value']: if heatmap['id'] == id: return heatmap return None
def post(request, meta): db = DynamoDb() heatmapArr = [] existingHeatmaps = db.get_key("heatmaps") heatmapArr = existingHeatmaps['Items'][0]['value'] # TODO: Remove [0] if testing fails heatmapArr.append({"id": meta['id'], "heatmap": meta}) return db.set("heatmaps", heatmapArr)
def get(request, filter=[]): db = DynamoDb() result = db.get(db.context_table) response = dict({}) if result and len(result['Items']) > 0: for pair in result['Items']: if pair[c.KEY_PRIMARY] in filter or len(filter)==0: response[pair[c.KEY_PRIMARY]]= pair[c.KEY_SECONDARY] return response
def delete(request, id): db = DynamoDb() existingHeatmaps = db.get_key("heatmaps") heatmapArr = existingHeatmaps['Items'][0]['value'] print heatmapArr for heatmap in heatmapArr: if heatmap['id'] == id: heatmapArr.remove(heatmap) print heatmapArr return db.set("heatmaps", heatmapArr)
def put(request, id, meta): db = DynamoDb() existingHeatmaps = db.get_key("heatmaps") heatmapArr = existingHeatmaps['Items'][0]['value'] newHeatmap = {"id": meta['id'], "heatmap": meta} heatmapArr = [ newHeatmap if heatmap['id'] == id else heatmap for heatmap in heatmapArr ] return db.set("heatmaps", heatmapArr)
def context(event=None): '''Returns an initialized context.''' if not hasattr(context, 'client'): debug_print("Initializing the context for the first time.") context.client = event.get("context", {}) DynamoDb(context.client) return context.client
def create_hour_batch(user_id): """ Run periodically to create hour batch. :param user_id: :return: """ db = DynamoDb() utc_time = datetime.utcnow() hash_key = 'user_id' range_key = 'timestamp' records = db.query_range( TABLE_NAME, hash_key, user_id, range_key, utc_time - timedelta(seconds=HOURLY_BUCKET_SIZE_IN_SECONDS), utc_time - timedelta(seconds=1)).get('Items') request_count = len(records) for record in records: pk_dict = { 'user_id': record.get('user_id'), 'timestamp': record.get('timestamp') } db.delete(TABLE_NAME, **pk_dict) db.add(TABLE_NAME, hash_key, user_id, range_key, datetime.strftime(utc_time, HOURLY_TIMESTAMP_FORMAT), hourly_count=request_count, monthly_count=0)
def update_context(request, data=None): db = DynamoDb() for item in data: key = item value = data[item] print("Updating '{}' with value '{}'".format(key, value)) params = dict({}) params["UpdateExpression"] = "SET #val = :val" params["ExpressionAttributeNames"] = {'#val': c.KEY_SECONDARY} params["ExpressionAttributeValues"] = {':val': value} params["Key"] = {c.KEY_PRIMARY: key} try: db.update(db.context_table.update_item, params) except Exception as e: raise ClientError( "Error updating the context parameter '{}' with value '{}'.\nError: {}" .format(key, value, e)) return data
def main(event, lambdacontext): global context global timestamp global aws_sqs start = time.time() ok_response = { 'StatusCode': 200, } refreshtime = datetime.datetime.utcnow() - datetime.timedelta(minutes=1) if context is None or aws_sqs is None or refreshtime > timestamp: context=dict({}) stack_id = os.environ[c.ENV_DEPLOYMENT_STACK_ARN] context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr(lambdacontext, 'aws_request_id') else None db = DynamoDb(context) prefix = util.get_stack_name_from_arn(stack_id) aws_sqs = Sqs(context, queue_prefix="{}_".format(prefix)) aws_sqs.set_queue_url(True) timestamp = datetime.datetime.utcnow() else: context[c.KEY_SQS_QUEUE_URL] = aws_sqs.queue_url data = event.get(c.API_PARAM_PAYLOAD, {})[c.API_PARAM_DATA] source_IP = event.get(c.API_PARAM_SOURCE_IP, None) sensitivity_type = event.get(c.SQS_PARAM_SENSITIVITY_TYPE, sensitivity.SENSITIVITY_TYPE.NONE) compression_mode = event.get(c.SQS_PARAM_COMPRESSION_TYPE, compression.COMPRESSION_MODE.NONE) payload_type = event.get(c.SQS_PARAM_PAYLOAD_TYPE, payload.PAYLOAD_TYPE.CSV) compression_mode = CompressionClassFactory.instance(compression_mode) sensitivity_type = SensitivityClassFactory.instance(sensitivity_type) payload_type = PayloadClassFactory.instance(context, payload_type, compression_mode, sensitivity_type, source_IP) print "[{}]Using SQS queue URL '{}'".format(context[c.KEY_REQUEST_ID],aws_sqs.queue_url) if os.environ[c.ENV_VERBOSE]== "True": print "The post request contains a paylod of\n{}".format(data) if data is None: print "Terminating, there is no data." return ok_response total_metrics = "all" try: data_size = len(data) + sqs.message_overhead_size(sensitivity_type, compression_mode, payload_type) message_chunks, total_metrics = payload_type.chunk(data) for message in message_chunks: print "Sending a sqs message with {} bytes".format(len(message)) aws_sqs.send_message(message, sensitivity_type, compression_mode, payload_type) except Exception as e: traceback.print_exc() raise errors.ClientError(e.message) print "The job sent {} metric(s) to the FIFO queue '{}'".format(total_metrics, aws_sqs.queue_url) print "The job took {} seconds.".format(time.time() -start) return ok_response
def check_rate_limit(user_id): db = DynamoDb() utc_time = datetime.utcnow() hash_key = 'user_id' range_key = 'timestamp' seconds_records = db.query_range(TABLE_NAME, hash_key, user_id, range_key, utc_time - timedelta(seconds=1), utc_time).get('Items') # seconds_records = list(filter(lambda x: x.get('hourly_count', 0) == 0 and # x.get('yearly_count', 0) == 0, seconds_records)) if len(seconds_records) > SECONDS_RATE_LIMIT: return False hourly_records = db.query_range( TABLE_NAME, hash_key, user_id, range_key, utc_time - timedelta(seconds=3600), utc_time, ).get('Items') total_hourly_count = 0 for hr in hourly_records: total_hourly_count += hr.get('hourly_count', 0) if total_hourly_count + len(seconds_records) > HOURLY_RATE_LIMIT: return False monthly_records = db.query_range( TABLE_NAME, hash_key, user_id, range_key, utc_time - timedelta(seconds=3600 * 24), utc_time, ).get('Items') total_monthly_count = 0 for mr in monthly_records: total_monthly_count += mr.get('monthly_count', 0) if total_monthly_count + total_hourly_count + len( seconds_records) > MONTHLY_RATE_LIMIT: return False db.add(TABLE_NAME, hash_key, user_id, range_key, datetime.strftime(utc_time, SECONDS_TIMESTAMP_FORMAT), hourly_count=0, monthly_count=0) return True
def generate_threads(functionid, threads_count, iterations_per_thread, events_per_iteration, sleep_duration, use_lambda, event_type, sensitivity_type, compression_mode): start = time.time() context = {} threadpool = ThreadPool(context, threads_count) context=dict({}) db = DynamoDb(context) print "Sleep durations: ", sleep_duration print "Number of threads: ", threads_count print "Number of iterations per thread: ", iterations_per_thread print "Number of events per iteration: ", events_per_iteration print "Using event type: ", event_type print "Using sensitivity type: ", sensitivity_type print "Using compression mode: ", compression_mode for i in range(0, threads_count): threadpool.add(thread_job, functionid, iterations_per_thread, events_per_iteration, use_lambda, context, sleep_duration, event_type, sensitivity_type, compression_mode) threadpool.wait() print "A total of {} metrics have been sent to the FIFO queues.".format((iterations_per_thread*events_per_iteration)*threads_count) print "The overall process took {} seconds.".format(time.time() - start)
def main(event, request): context = dict({}) context[c.KEY_LAMBDA_FUNCTION] = request.function_name if hasattr( request, 'function_name') else None context[c.KEY_REQUEST_ID] = request.aws_request_id if hasattr( request, 'aws_request_id') else None stackid = os.environ[c.ENV_DEPLOYMENT_STACK_ARN] context[c.KEY_DB] = DynamoDb(context) context[c.KEY_ATHENA_QUERY] = Query(stackid) context[c.KEY_GLUE_CRAWLER] = Glue() thread_pool = ThreadPool(size=3) crawler_name = context[c.KEY_GLUE_CRAWLER].get_crawler_name(stackid) crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE]) glue = Glue() events = glue.get_events() start = datetime.datetime.utcnow() - datetime.timedelta(hours=2) now = datetime.datetime.utcnow() found = False for type in events: dt = start while dt <= now: prefix = metric_schema.s3_key_format().format( context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day, dt.hour, type, dt.strftime(util.partition_date_format())) found = crawler.exists(prefix) if found: print "FOUND new events=>", prefix break dt += timedelta(hours=1) if found: break if found: thread_pool.add(crawl, context, crawler_name, context[c.KEY_ATHENA_QUERY].execute_with_format) thread_pool.wait() return custom_resource_response.success_response({}, "*")
def launch(event, lambdacontext): print "Start" hours_delta = 36 context = dict({}) context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr( lambdacontext, 'function_name') else None context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr( lambdacontext, 'aws_request_id') else None global threadpool global is_lambda threadpool = ThreadPool(context, 8) is_lambda = context[c.KEY_REQUEST_ID] is not None available_amoeba_lambdas = [] available_amoeba_lambdas.append(c.ENV_AMOEBA_1) available_amoeba_lambdas.append(c.ENV_AMOEBA_2) available_amoeba_lambdas.append(c.ENV_AMOEBA_3) available_amoeba_lambdas.append(c.ENV_AMOEBA_4) available_amoeba_lambdas.append(c.ENV_AMOEBA_5) db = DynamoDb(context) crawler = Crawler(context, os.environ[c.ENV_S3_STORAGE]) glue = Glue() events = glue.get_events() #TODO: adjust the amoeba tree depth so that we have fully utilized all available amoebas; len(available_amoeba_lambdas) * 1000 #since the number of leaf nodes for the metric partitions can quickly get very large we use a 5 lambda pool to ensure we don't hit the 1000 invocation limit. start = datetime.datetime.utcnow() - datetime.timedelta(hours=hours_delta) now = datetime.datetime.utcnow() for type in events: dt = start while dt <= now: prefix = metric_schema.s3_key_format().format( context[c.KEY_SEPERATOR_PARTITION], dt.year, dt.month, dt.day, dt.hour, type, dt.strftime(util.partition_date_format())) threadpool.add(crawler.crawl, prefix, available_amoeba_lambdas, invoke_lambda) dt += timedelta(hours=1) threadpool.wait() return custom_resource_response.success_response({"StatusCode": 200}, "*")
def main(event, lambdacontext): context = dict({}) stack_id = os.environ[c.ENV_DEPLOYMENT_STACK_ARN] context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr( lambdacontext, 'function_name') else None context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr( lambdacontext, 'aws_request_id') else None is_lambda = context[c.KEY_REQUEST_ID] is not None db = DynamoDb(context) if not is_lambda: import lambda_fifo_message_consumer as consumer prefix = util.get_stack_name_from_arn(stack_id) sqs = Sqs(context, "{0}_".format(prefix)) awslambda = Lambda(context) if sqs.is_all_under_load: sqs.add_fifo_queue(prefix) queues = sqs.get_queues() for queue_url in queues: payload = {c.KEY_SQS_QUEUE_URL: queue_url, "context": context} print "Starting {} with queue url '{}'".format( "lambda" if is_lambda else "thread", queue_url) if is_lambda: invoke(context, awslambda, payload) else: payload[c.ENV_STACK_ID] = event['StackId'] consumer.main( payload, type('obj', (object, ), {'function_name': context[c.KEY_LAMBDA_FUNCTION]})) print "{} {} lambdas have started".format(len(queues), context[c.KEY_LAMBDA_FUNCTION]) return custom_resource_response.success_response({}, "*")
def post(request, facetid, meta): db = DynamoDb() return db.set(facetid, meta)
def get(request, facetid): db = DynamoDb() result = db.get_key(facetid) result = json.dumps(result, cls=util.DynamoDbDecoder) return result
def launch(event, lambdacontext): util.debug_print("Start Amoeba Launcher") context = dict({}) context[c.KEY_START_TIME] = time.time() context[c.KEY_LAMBDA_FUNCTION] = lambdacontext.function_name if hasattr( lambdacontext, 'function_name') else None context[c.KEY_REQUEST_ID] = lambdacontext.aws_request_id if hasattr( lambdacontext, 'aws_request_id') else None prefix = util.get_stack_name_from_arn( os.environ[c.ENV_DEPLOYMENT_STACK_ARN]) prefix = "{0}{1}".format(prefix, c.KEY_SQS_AMOEBA_SUFFIX) db = DynamoDb(context) sqs = Sqs(context, prefix, "sqs") sqs.set_queue_url(lowest_load_queue=False) if sqs.is_all_under_load: sqs.add_fifo_queue(prefix) elapsed = util.elapsed(context) timeout = context[c.KEY_MAX_LAMBDA_TIME] * c.RATIO_OF_MAX_LAMBDA_TIME map = {} queues_checked = 0 number_of_queues = sqs.number_of_queues sqs_delete_tokens = {} while elapsed < timeout and queues_checked < number_of_queues: messages = sqs.read_queue() length = len(messages) if sqs.queue_url not in sqs_delete_tokens: sqs_delete_tokens[sqs.queue_url] = [] if length > 0: for x in range(0, length): message = messages[x] body = json.loads(message["Body"]) paths = body["paths"] msg_token = "{}{}{}".format(message['MessageId'], context[c.KEY_SEPERATOR_CSV], message['ReceiptHandle']) sqs_delete_tokens[sqs.queue_url].append(msg_token) for i in range(0, len(paths)): path = paths[i] parts = path.split(context[c.KEY_SEPERATOR_PARTITION]) filename = parts.pop() directory = context[c.KEY_SEPERATOR_PARTITION].join(parts) if directory not in map: map[directory] = {"paths": [], "size": 0} #lambda payload limit for Event invocation type 131072 sizeof = len(path) + map[directory]["size"] is_invoked = map[directory].get("invoked", False) if sizeof >= c.MAXIMUM_ASYNC_PAYLOAD_SIZE and not is_invoked: invoke_lambda(context, directory, map[directory]["paths"]) map[directory] = { "paths": [], "size": 0, "invoked": True } else: map[directory]["paths"].append(path) map[directory]["size"] = sizeof else: queues_checked += 1 sqs.set_queue_url(lowest_load_queue=False) elapsed = util.elapsed(context) #Invoke a amoeba generator for each S3 leaf node for directory, settings in iteritems(map): is_invoked = settings.get("invoked", False) #Amoeba's are not designed to have multiple amoebas working against one directory #If the Amoeba has already been invoked due to payload size then we requeue the remaining paths if is_invoked: sqs.send_generic_message(json.dumps({"paths": settings["paths"]})) else: invoke_lambda(context, directory, settings["paths"]) context[c.KEY_THREAD_POOL] = ThreadPool(context, 8) #Delete SQS messages that have been processed for key, value in iteritems(sqs_delete_tokens): sqs.delete_message_batch(value, key) return custom_resource_response.success_response({"StatusCode": 200}, "*")
def get(request, heatmapid): db = DynamoDb()