def test_it_applies_time_delta(dt): dt.now.return_value = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc) assert 1580428800 == utc_timestamp(days=30) assert 1577836800 == utc_timestamp()
def process_handler(event, context): if running_job_exists(): raise ValueError("There is already a job in progress") job_id = str(uuid.uuid4()) config = get_config() deletion_queue_key = 'jobs/{}/deletion_queue/data.json'.format(job_id) item = { "Id": job_id, "Sk": job_id, "Type": "Job", "JobStatus": "QUEUED", "GSIBucket": str(random.randint(0, bucket_count - 1)), "CreatedAt": utc_timestamp(), "DeletionQueueBucket": deletion_queue_bucket, "DeletionQueueKey": deletion_queue_key, "DeletionQueueItemsSkipped": False, "CreatedBy": get_user_info(event), **{ k: v for k, v in config.items() if k not in ["JobDetailsRetentionDays"] } } if int(config.get("JobDetailsRetentionDays", 0)) > 0: item["Expires"] = utc_timestamp(days=config["JobDetailsRetentionDays"]) deletion_queue_items = {"DeletionQueueItems": []} for extended_deletion_queue_item in get_deletion_queue(): deletion_item = { "DeletionQueueItemId": extended_deletion_queue_item["DeletionQueueItemId"], "MatchId": extended_deletion_queue_item["MatchId"], "DataMappers": extended_deletion_queue_item["DataMappers"] } deletion_queue_items["DeletionQueueItems"].append(deletion_item) obj = s3.Object(deletion_queue_bucket, deletion_queue_key) obj.put(Body=json.dumps(deletion_queue_items)) jobs_table.put_item(Item=item) # after sending the data to dynamo add the deletion_queue to the response item["DeletionQueueItems"] = list( map(lambda x: x["MatchId"], deletion_queue_items["DeletionQueueItems"])) return {"statusCode": 202, "body": json.dumps(item, cls=DecimalEncoder)}
def list_jobs_handler(event, context): qs = event.get("queryStringParameters") if not qs: qs = {} page_size = int(qs.get("page_size", 10)) start_at = int(qs.get("start_at", utc_timestamp())) items = [] for gsi_bucket in range(0, bucket_count): response = table.query( IndexName=index, KeyConditionExpression=Key("GSIBucket").eq(str(gsi_bucket)) & Key("CreatedAt").lt(start_at), ScanIndexForward=False, Limit=page_size, ProjectionExpression=", ".join(job_summary_attributes), ) items += response.get("Items", []) items = sorted(items, key=lambda i: i["CreatedAt"], reverse=True)[:page_size] if len(items) < page_size: next_start = None else: next_start = min([item["CreatedAt"] for item in items]) return { "statusCode": 200, "body": json.dumps( {"Jobs": items, "NextStart": next_start,}, cls=DecimalEncoder ), }
def handler(event, context): records = event["Records"] new_jobs = [ deserialize_item(r["dynamodb"]["NewImage"]) for r in records if is_record_type(r, "Job") and is_operation(r, "INSERT") ] events = [ deserialize_item(r["dynamodb"]["NewImage"]) for r in records if is_record_type(r, "JobEvent") and is_operation(r, "INSERT") ] grouped_events = groupby(sorted(events, key=itemgetter("Id")), key=itemgetter("Id")) for job in new_jobs: process_job(job) for job_id, group in grouped_events: group = [i for i in group] update_stats(job_id, group) updated_job = update_status(job_id, group) # Perform cleanup if required if ( updated_job and updated_job.get("JobStatus") == "FORGET_COMPLETED_CLEANUP_IN_PROGRESS" ): try: clear_deletion_queue(updated_job) emit_event( job_id, "CleanupSucceeded", utc_timestamp(), "StreamProcessor" ) except Exception as e: emit_event( job_id, "CleanupFailed", {"Error": "Unable to clear deletion queue: {}".format(str(e))}, "StreamProcessor", ) elif updated_job and updated_job.get("JobStatus") in skip_cleanup_states: emit_event(job_id, "CleanupSkipped", utc_timestamp(), "StreamProcessor")
def process_handler(event, context): if running_job_exists(): raise ValueError("There is already a job in progress") job_id = str(uuid.uuid4()) config = get_config() item = { "Id": job_id, "Sk": job_id, "Type": "Job", "JobStatus": "QUEUED", "GSIBucket": str(random.randint(0, bucket_count - 1)), "CreatedAt": utc_timestamp(), "DeletionQueueItems": [], "DeletionQueueItemsSkipped": False, "CreatedBy": get_user_info(event), **{ k: v for k, v in config.items() if k not in ["JobDetailsRetentionDays"] } } if int(config.get("JobDetailsRetentionDays", 0)) > 0: item["Expires"] = utc_timestamp(days=config["JobDetailsRetentionDays"]) item_size_bytes = calculate_ddb_item_bytes(item) for deletion_queue_item in get_deletion_queue(): current_size_bytes = calculate_ddb_item_bytes(deletion_queue_item) if item_size_bytes + current_size_bytes < max_size_bytes: item['DeletionQueueItems'].append(deletion_queue_item) item_size_bytes += current_size_bytes else: item['DeletionQueueItemsSkipped'] = True break jobs_table.put_item(Item=item) return {"statusCode": 202, "body": json.dumps(item, cls=DecimalEncoder)}
def handler(event, context): records = event["Records"] new_jobs = get_records(records, "Job", "INSERT") deleted_jobs = get_records(records, "Job", "REMOVE", new_image=False) events = get_records(records, "JobEvent", "INSERT") grouped_events = groupby(sorted(events, key=itemgetter("Id")), key=itemgetter("Id")) for job in new_jobs: process_job(job) for job in deleted_jobs: cleanup_manifests(job) for job_id, group in grouped_events: group = [i for i in group] update_stats(job_id, group) updated_job = update_status(job_id, group) # Perform cleanup if required if (updated_job and updated_job.get("JobStatus") == "FORGET_COMPLETED_CLEANUP_IN_PROGRESS"): try: clear_deletion_queue(updated_job) emit_event(job_id, "CleanupSucceeded", utc_timestamp(), "StreamProcessor") except Exception as e: emit_event( job_id, "CleanupFailed", { "Error": "Unable to clear deletion queue: {}".format(str(e)) }, "StreamProcessor", ) elif updated_job and updated_job.get( "JobStatus") in skip_cleanup_states: emit_event(job_id, "CleanupSkipped", utc_timestamp(), "StreamProcessor")
def process_handler(event, context): if running_job_exists(): raise ValueError("There is already a job in progress") job_id = str(uuid.uuid4()) config = get_config() item = { "Id": job_id, "Sk": job_id, "Type": "Job", "JobStatus": "QUEUED", "GSIBucket": str(random.randint(0, bucket_count - 1)), "CreatedAt": utc_timestamp(), "CreatedBy": get_user_info(event), **{ k: v for k, v in config.items() if k not in ["JobDetailsRetentionDays"] }, } if int(config.get("JobDetailsRetentionDays", 0)) > 0: item["Expires"] = utc_timestamp(days=config["JobDetailsRetentionDays"]) jobs_table.put_item(Item=item) return {"statusCode": 202, "body": json.dumps(item, cls=DecimalEncoder)}
def enqueue_handler(event, context): body = event["body"] match_id = body["MatchId"] data_mappers = body.get("DataMappers", []) item = { "DeletionQueueItemId": str(uuid.uuid4()), "MatchId": match_id, "CreatedAt": utc_timestamp(), "DataMappers": data_mappers, "CreatedBy": get_user_info(event) } deletion_queue_table.put_item(Item=item) return {"statusCode": 201, "body": json.dumps(item, cls=DecimalEncoder)}
def enqueue_items(matches, user_info): items = [] with deletion_queue_table.batch_writer() as batch: for match in matches: match_id = match["MatchId"] data_mappers = match.get("DataMappers", []) item = { "DeletionQueueItemId": str(uuid.uuid4()), "Type": match.get("Type", "Simple"), "MatchId": match_id, "CreatedAt": utc_timestamp(), "DataMappers": data_mappers, "CreatedBy": user_info, } batch.put_item(Item=item) items.append(item) return items
def list_job_events_handler(event, context): # Input parsing job_id = event["pathParameters"]["job_id"] qs = event.get("queryStringParameters") mvqs = event.get("multiValueQueryStringParameters") if not qs: qs = {} mvqs = {} page_size = int(qs.get("page_size", 20)) start_at = qs.get("start_at", "0") # Check the job exists job = table.get_item(Key={"Id": job_id, "Sk": job_id,}).get("Item") if not job: return {"statusCode": 404} watermark_boundary_mu = (job.get("JobFinishTime", utc_timestamp()) + 1) * 1000 # Check the watermark is not "future" if int(start_at.split("#")[0]) > watermark_boundary_mu: raise ValueError("Watermark {} is out of bounds for this job".format(start_at)) # Apply filters filter_expression = Attr("Type").eq("JobEvent") user_filters = mvqs.get("filter", []) for f in user_filters: k, v = f.split("=") filter_expression = filter_expression & Attr(k).begins_with(v) # Because result may contain both JobEvent and Job items, we request max page_size+1 items then apply the type # filter as FilterExpression. We then limit the list size to the requested page size in case the number of # items after filtering is still page_size+1 i.e. the Job item wasn't on the page. items = [] query_start_key = str(start_at) last_evaluated = None last_query_size = 0 while len(items) < page_size: resp = table.query( KeyConditionExpression=Key("Id").eq(job_id), ScanIndexForward=True, FilterExpression=filter_expression, Limit=100 if len(user_filters) else page_size + 1, ExclusiveStartKey={"Id": job_id, "Sk": query_start_key}, ) results = resp.get("Items", []) last_query_size = len(results) items.extend(results[: page_size - len(items)]) query_start_key = resp.get("LastEvaluatedKey", {}).get("Sk") if not query_start_key: break last_evaluated = query_start_key next_start = _get_watermark( items, start_at, page_size, job["JobStatus"], last_evaluated, last_query_size ) resp = { k: v for k, v in {"JobEvents": items, "NextStart": next_start}.items() if v is not None } return {"statusCode": 200, "body": json.dumps(resp, cls=DecimalEncoder)}