Beispiel #1
0
def test_it_applies_time_delta(dt):
    dt.now.return_value = datetime.datetime(2020,
                                            1,
                                            1,
                                            0,
                                            0,
                                            0,
                                            tzinfo=datetime.timezone.utc)
    assert 1580428800 == utc_timestamp(days=30)
    assert 1577836800 == utc_timestamp()
def process_handler(event, context):
    if running_job_exists():
        raise ValueError("There is already a job in progress")

    job_id = str(uuid.uuid4())
    config = get_config()
    deletion_queue_key = 'jobs/{}/deletion_queue/data.json'.format(job_id)
    item = {
        "Id": job_id,
        "Sk": job_id,
        "Type": "Job",
        "JobStatus": "QUEUED",
        "GSIBucket": str(random.randint(0, bucket_count - 1)),
        "CreatedAt": utc_timestamp(),
        "DeletionQueueBucket": deletion_queue_bucket,
        "DeletionQueueKey": deletion_queue_key,
        "DeletionQueueItemsSkipped": False,
        "CreatedBy": get_user_info(event),
        **{
            k: v
            for k, v in config.items() if k not in ["JobDetailsRetentionDays"]
        }
    }

    if int(config.get("JobDetailsRetentionDays", 0)) > 0:
        item["Expires"] = utc_timestamp(days=config["JobDetailsRetentionDays"])

    deletion_queue_items = {"DeletionQueueItems": []}
    for extended_deletion_queue_item in get_deletion_queue():
        deletion_item = {
            "DeletionQueueItemId":
            extended_deletion_queue_item["DeletionQueueItemId"],
            "MatchId":
            extended_deletion_queue_item["MatchId"],
            "DataMappers":
            extended_deletion_queue_item["DataMappers"]
        }
        deletion_queue_items["DeletionQueueItems"].append(deletion_item)

    obj = s3.Object(deletion_queue_bucket, deletion_queue_key)
    obj.put(Body=json.dumps(deletion_queue_items))
    jobs_table.put_item(Item=item)

    # after sending the data to dynamo add the deletion_queue to the response
    item["DeletionQueueItems"] = list(
        map(lambda x: x["MatchId"],
            deletion_queue_items["DeletionQueueItems"]))

    return {"statusCode": 202, "body": json.dumps(item, cls=DecimalEncoder)}
Beispiel #3
0
def list_jobs_handler(event, context):
    qs = event.get("queryStringParameters")
    if not qs:
        qs = {}
    page_size = int(qs.get("page_size", 10))
    start_at = int(qs.get("start_at", utc_timestamp()))

    items = []
    for gsi_bucket in range(0, bucket_count):
        response = table.query(
            IndexName=index,
            KeyConditionExpression=Key("GSIBucket").eq(str(gsi_bucket))
            & Key("CreatedAt").lt(start_at),
            ScanIndexForward=False,
            Limit=page_size,
            ProjectionExpression=", ".join(job_summary_attributes),
        )
        items += response.get("Items", [])
    items = sorted(items, key=lambda i: i["CreatedAt"], reverse=True)[:page_size]
    if len(items) < page_size:
        next_start = None
    else:
        next_start = min([item["CreatedAt"] for item in items])

    return {
        "statusCode": 200,
        "body": json.dumps(
            {"Jobs": items, "NextStart": next_start,}, cls=DecimalEncoder
        ),
    }
Beispiel #4
0
def handler(event, context):
    records = event["Records"]
    new_jobs = [
        deserialize_item(r["dynamodb"]["NewImage"])
        for r in records
        if is_record_type(r, "Job") and is_operation(r, "INSERT")
    ]
    events = [
        deserialize_item(r["dynamodb"]["NewImage"])
        for r in records
        if is_record_type(r, "JobEvent") and is_operation(r, "INSERT")
    ]
    grouped_events = groupby(sorted(events, key=itemgetter("Id")), key=itemgetter("Id"))

    for job in new_jobs:
        process_job(job)

    for job_id, group in grouped_events:
        group = [i for i in group]
        update_stats(job_id, group)
        updated_job = update_status(job_id, group)
        # Perform cleanup if required
        if (
            updated_job
            and updated_job.get("JobStatus") == "FORGET_COMPLETED_CLEANUP_IN_PROGRESS"
        ):
            try:
                clear_deletion_queue(updated_job)
                emit_event(
                    job_id, "CleanupSucceeded", utc_timestamp(), "StreamProcessor"
                )
            except Exception as e:
                emit_event(
                    job_id,
                    "CleanupFailed",
                    {"Error": "Unable to clear deletion queue: {}".format(str(e))},
                    "StreamProcessor",
                )
        elif updated_job and updated_job.get("JobStatus") in skip_cleanup_states:
            emit_event(job_id, "CleanupSkipped", utc_timestamp(), "StreamProcessor")
def process_handler(event, context):
    if running_job_exists():
        raise ValueError("There is already a job in progress")

    job_id = str(uuid.uuid4())
    config = get_config()
    item = {
        "Id": job_id,
        "Sk": job_id,
        "Type": "Job",
        "JobStatus": "QUEUED",
        "GSIBucket": str(random.randint(0, bucket_count - 1)),
        "CreatedAt": utc_timestamp(),
        "DeletionQueueItems": [],
        "DeletionQueueItemsSkipped": False,
        "CreatedBy": get_user_info(event),
        **{
            k: v
            for k, v in config.items() if k not in ["JobDetailsRetentionDays"]
        }
    }

    if int(config.get("JobDetailsRetentionDays", 0)) > 0:
        item["Expires"] = utc_timestamp(days=config["JobDetailsRetentionDays"])

    item_size_bytes = calculate_ddb_item_bytes(item)

    for deletion_queue_item in get_deletion_queue():
        current_size_bytes = calculate_ddb_item_bytes(deletion_queue_item)
        if item_size_bytes + current_size_bytes < max_size_bytes:
            item['DeletionQueueItems'].append(deletion_queue_item)
            item_size_bytes += current_size_bytes
        else:
            item['DeletionQueueItemsSkipped'] = True
            break

    jobs_table.put_item(Item=item)

    return {"statusCode": 202, "body": json.dumps(item, cls=DecimalEncoder)}
def handler(event, context):
    records = event["Records"]
    new_jobs = get_records(records, "Job", "INSERT")
    deleted_jobs = get_records(records, "Job", "REMOVE", new_image=False)
    events = get_records(records, "JobEvent", "INSERT")
    grouped_events = groupby(sorted(events, key=itemgetter("Id")),
                             key=itemgetter("Id"))
    for job in new_jobs:
        process_job(job)

    for job in deleted_jobs:
        cleanup_manifests(job)

    for job_id, group in grouped_events:
        group = [i for i in group]
        update_stats(job_id, group)
        updated_job = update_status(job_id, group)
        # Perform cleanup if required
        if (updated_job and updated_job.get("JobStatus")
                == "FORGET_COMPLETED_CLEANUP_IN_PROGRESS"):
            try:
                clear_deletion_queue(updated_job)
                emit_event(job_id, "CleanupSucceeded", utc_timestamp(),
                           "StreamProcessor")
            except Exception as e:
                emit_event(
                    job_id,
                    "CleanupFailed",
                    {
                        "Error":
                        "Unable to clear deletion queue: {}".format(str(e))
                    },
                    "StreamProcessor",
                )
        elif updated_job and updated_job.get(
                "JobStatus") in skip_cleanup_states:
            emit_event(job_id, "CleanupSkipped", utc_timestamp(),
                       "StreamProcessor")
Beispiel #7
0
def process_handler(event, context):
    if running_job_exists():
        raise ValueError("There is already a job in progress")

    job_id = str(uuid.uuid4())
    config = get_config()
    item = {
        "Id": job_id,
        "Sk": job_id,
        "Type": "Job",
        "JobStatus": "QUEUED",
        "GSIBucket": str(random.randint(0, bucket_count - 1)),
        "CreatedAt": utc_timestamp(),
        "CreatedBy": get_user_info(event),
        **{
            k: v
            for k, v in config.items() if k not in ["JobDetailsRetentionDays"]
        },
    }
    if int(config.get("JobDetailsRetentionDays", 0)) > 0:
        item["Expires"] = utc_timestamp(days=config["JobDetailsRetentionDays"])
    jobs_table.put_item(Item=item)
    return {"statusCode": 202, "body": json.dumps(item, cls=DecimalEncoder)}
def enqueue_handler(event, context):
    body = event["body"]
    match_id = body["MatchId"]
    data_mappers = body.get("DataMappers", [])
    item = {
        "DeletionQueueItemId": str(uuid.uuid4()),
        "MatchId": match_id,
        "CreatedAt": utc_timestamp(),
        "DataMappers": data_mappers,
        "CreatedBy": get_user_info(event)
    }
    deletion_queue_table.put_item(Item=item)

    return {"statusCode": 201, "body": json.dumps(item, cls=DecimalEncoder)}
Beispiel #9
0
def enqueue_items(matches, user_info):
    items = []
    with deletion_queue_table.batch_writer() as batch:
        for match in matches:
            match_id = match["MatchId"]
            data_mappers = match.get("DataMappers", [])
            item = {
                "DeletionQueueItemId": str(uuid.uuid4()),
                "Type": match.get("Type", "Simple"),
                "MatchId": match_id,
                "CreatedAt": utc_timestamp(),
                "DataMappers": data_mappers,
                "CreatedBy": user_info,
            }
            batch.put_item(Item=item)
            items.append(item)
    return items
Beispiel #10
0
def list_job_events_handler(event, context):
    # Input parsing
    job_id = event["pathParameters"]["job_id"]
    qs = event.get("queryStringParameters")
    mvqs = event.get("multiValueQueryStringParameters")
    if not qs:
        qs = {}
        mvqs = {}
    page_size = int(qs.get("page_size", 20))
    start_at = qs.get("start_at", "0")
    # Check the job exists
    job = table.get_item(Key={"Id": job_id, "Sk": job_id,}).get("Item")
    if not job:
        return {"statusCode": 404}

    watermark_boundary_mu = (job.get("JobFinishTime", utc_timestamp()) + 1) * 1000

    # Check the watermark is not "future"
    if int(start_at.split("#")[0]) > watermark_boundary_mu:
        raise ValueError("Watermark {} is out of bounds for this job".format(start_at))

    # Apply filters
    filter_expression = Attr("Type").eq("JobEvent")
    user_filters = mvqs.get("filter", [])
    for f in user_filters:
        k, v = f.split("=")
        filter_expression = filter_expression & Attr(k).begins_with(v)

    # Because result may contain both JobEvent and Job items, we request max page_size+1 items then apply the type
    # filter as FilterExpression. We then limit the list size to the requested page size in case the number of
    # items after filtering is still page_size+1 i.e. the Job item wasn't on the page.
    items = []
    query_start_key = str(start_at)
    last_evaluated = None
    last_query_size = 0
    while len(items) < page_size:
        resp = table.query(
            KeyConditionExpression=Key("Id").eq(job_id),
            ScanIndexForward=True,
            FilterExpression=filter_expression,
            Limit=100 if len(user_filters) else page_size + 1,
            ExclusiveStartKey={"Id": job_id, "Sk": query_start_key},
        )
        results = resp.get("Items", [])
        last_query_size = len(results)
        items.extend(results[: page_size - len(items)])
        query_start_key = resp.get("LastEvaluatedKey", {}).get("Sk")
        if not query_start_key:
            break
        last_evaluated = query_start_key

    next_start = _get_watermark(
        items, start_at, page_size, job["JobStatus"], last_evaluated, last_query_size
    )

    resp = {
        k: v
        for k, v in {"JobEvents": items, "NextStart": next_start}.items()
        if v is not None
    }

    return {"statusCode": 200, "body": json.dumps(resp, cls=DecimalEncoder)}