Ejemplo n.º 1
0
def lambda_handler(event, context):
    """Lambda function that copies any worker logs to s3 and publishes batch finish to SNS.

    Parameters
    ----------
    event: dict, required
    context: object, required Lambda Context runtime methods and attributes
    Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html

    Returns
    ------
    Lambda Output Format: dict
    """
    log_request_and_context(event, context)

    try:
        request_input = parse_input(event)
    except KeyError as err:
        logger.error("Input event missing required args: %s: %s", event, err)
        raise Exception("Failed to parse input lambda handler") from err

    batch_id = request_input["batch_id"]
    # Mark the batch as completed.
    try:
        db.update_batch_status(batch_id, BatchStatus.COMPLETE)
    except botocore.exceptions.ClientError as err:
        raise Exception(f"failed to mark batch {batch_id} complete") from err

    batch_metadata = db.get_batch_metadata(batch_id)
    batch_info = input_batch_to_human_readable(batch_metadata)

    message = {
        "batchId": batch_id,
        "message": "Batch processing has completed successfully.",
        "batchInfo": batch_info,
        "token": request_input["execution_id"],
        "status": "SUCCESS",
    }

    output_sns_arn = os.getenv("DEFAULT_STATUS_SNS_ARN")
    if request_input["output_sns_arn"]:
        output_sns_arn = request_input["output_sns_arn"]

    topic = sns.Topic(output_sns_arn)
    try:
        topic.publish(Message=json.dumps(message, indent=4, default=str), )
    except botocore.exceptions.ClientError as err:
        raise Exception(
            f"Service error publishing SNS response for batch id: {batch_id}"
        ) from err

    return {
        "published_sns": message,
        "output_sns_arn": output_sns_arn,
    }
Ejemplo n.º 2
0
def lambda_handler(event, context):
    """Lambda function that ...

    Reads the S3 Input manifest, and sends the batch of the data to the SMGT Job.

    Parameters
    ----------
    event: dict, required
    context: object, required Lambda Context runtime methods and attributes
    Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html

    Returns
    ------
    Lambda Output Format: dict
    """
    log.log_request_and_context(event, context)

    parent_batch_id = event["parent_batch_id"]
    job_level = event["job_level"]

    parent_batch = db.get_batch_metadata(parent_batch_id)
    if parent_batch is None:
        raise Exception(f"Invalid parent batch id: {parent_batch_id}")

    if job_level == 1:
        meta_data_type = BatchMetadataType.FIRST_LEVEL
    elif job_level == 2:
        meta_data_type = BatchMetadataType.SECOND_LEVEL
    elif job_level == 3:
        meta_data_type = BatchMetadataType.THIRD_LEVEL

    # Filter jobs by job level
    labeling_jobs = parent_batch[BatchMetadataTableAttributes.LABELING_JOBS]
    current_jobs = [job for job in labeling_jobs if job["jobLevel"] == job_level]
    log.logging.info("Kicking off %d jobs for level %d", len(current_jobs), job_level)

    batch_id = f"{parent_batch_id}-{meta_data_type.lower()}"
    for job in current_jobs:
        trigger_labeling_job(parent_batch_id, batch_id, job)

    try:
        db.insert_perform_labeling_job_metadata(
            parent_batch_id=parent_batch_id,
            batch_id=batch_id,
            batch_status=BatchStatus.IN_PROGRESS,
            batch_metadata_type=meta_data_type,
            num_children_batches=len(current_jobs),
        )
    except botocore.exceptions.ClientError as err:
        raise Exception(f"failed to put batch id {batch_id}") from err

    return {
        "batch_id": batch_id,
    }
Ejemplo n.º 3
0
def lambda_handler(event, context):
    """Lambda function that stores the current step function state token into dynamo.

    Parameters
    ----------
    event: dict, required
    context: object, required Lambda Context runtime methods and attributes
    Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html

    Returns
    ------
    Lambda Output Format: dict
    """
    step_input = extract_input(event)
    batch_id, step_token = step_input["batch_id"], step_input["step_token"]

    batch_metadata = db.get_batch_metadata(batch_id)
    if batch_metadata is None:
        raise Exception(
            f"Failed to find batch corresponding to id: {batch_id}")

    num_child = batch_metadata.get(
        BatchMetadataTableAttributes.NUM_CHILD_BATCHES)
    if num_child is not None and num_child == 0:
        log.logger.info(
            "No children in batch, skipping wait for batch completion.")
        # Mark the db entry as complete and send task success to unblock the step function.
        sfn_client = get_boto_client("stepfunctions",
                                     context.invoked_function_arn)

        db.update_batch_status(batch_id, BatchStatus.COMPLETE)

        # Send status token to step function.
        response = sfn_client.send_task_success(taskToken=step_token,
                                                output=json.dumps(
                                                    {"batch": batch_id}))
        log.logger.info("Response for Step function token %s: %s", step_token,
                        response)
    else:
        # Not skipping wait for batch completion, the listener is responsible for marking
        # the batch as complete now.
        db.update_batch_step_token(batch_id, step_token)

    return {
        "batch_id": batch_id,
        "step_token": step_token,
    }
Ejemplo n.º 4
0
def validate_common_body(body):
    """Verify main batch fields exist"""
    batch_id = body.get("batchId")

    if batch_id is None:
        error_message = "batchId must be provided"
        return error_message

    if not validate_regex(batch_id):
        return "batchId can only contain Lower case Alphanumeric, '-'"

    batch_metadata = db.get_batch_metadata(batch_id)
    if batch_metadata is not None:
        error_message = f"Provided batchId already exists : {batch_id} : {batch_metadata}"
        return error_message

    return None
Ejemplo n.º 5
0
def mark_job_batch_complete(job_level_batch):
    """Mark the job batch as complete, trigger sideeffects if parent is complete"""
    log.logger.info(
        f"Signaling batch_meta to resume execution {job_level_batch}")

    batch_id = job_level_batch[BatchMetadataTableAttributes.BATCH_ID]
    if job_level_batch[BatchMetadataTableAttributes.
                       BATCH_STATUS] != BatchStatus.WAIT_FOR_SMGT_RESPONSE:
        log.logger.error("Invalid batch status, ignoring request")
        return
    db.update_batch_status(batch_id, BatchStatus.COMPLETE)

    # Copy worker metrics from groundtruth bucket to raw_worker_metrics
    # folder in the glue bucket
    jobOutputLocation = job_level_batch['JobOutputLocation']
    bucketName = jobOutputLocation.split('/')[2]
    groundtruth_bucket = s3.Bucket(bucketName)

    for obj in groundtruth_bucket.objects.filter(
            Prefix='/'.join(jobOutputLocation.split('/')[3:6])):
        if obj.key.endswith('.json') and 'worker-response' in obj.key:
            if not obj.key.endswith('.jpg.json'):
                new_key = f"raw_worker_metrics/{'/'.join(obj.key.split('/')[1:])}"
                s3_client.copy_object(Bucket=glue_bucket_name,
                                      CopySource=f"{bucketName}/{obj.key}",
                                      Key=new_key)

    parent_batch_id = job_level_batch[
        BatchMetadataTableAttributes.PARENT_BATCH_ID]
    if not db.update_batch_child_count(parent_batch_id, 1):
        # Incomplete, return
        return

    parent_batch = db.get_batch_metadata(parent_batch_id)
    try:
        task_token = parent_batch[BatchMetadataTableAttributes.STATE_TOKEN]
    except KeyError as err:
        raise Exception(
            f"missing state token on batch: {parent_batch_id}") from err

    # Send status token to step functions
    response = sfn_client.send_task_success(taskToken=task_token,
                                            output=json.dumps(
                                                {"batch_id": parent_batch_id}))
    log.logger.info(
        f"Response for Step function token {task_token}: {response}")
Ejemplo n.º 6
0
def get_batch_description(batch_id):
    """
    Looks up a batch using the given batch id and validates that the batch
    is of appropriate type, then returns a human readable representation.

    :param batch_id: Id of batch to convert to human readable description
    :returns: json serializable description of a given batch
    """
    batch_metadata = db.get_batch_metadata(batch_id)

    # User should only be querying for parent batches of type "INPUT", not frame
    # level batches.
    if batch_metadata["BatchMetadataType"] != BatchMetadataType.INPUT:
        logger.error(
            "User requested existing batch, but it is of the wrong type (not INPUT): %s",
            batch_id)
        return None

    # Convert batch metadata to something user presentable.
    return input_batch_to_human_readable(batch_metadata)
Ejemplo n.º 7
0
def lambda_handler(event, context):
    """Lambda function that executes batch creation API

    Parameters
    ----------
    event: dict, required API gateway request with an input SQS arn, output SQS arn
    context: object, required Lambda Context runtime methods and attributes
    Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html

    Returns
    ------
    Lambda Output Format: dict
    Return doc:
    https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html
    """
    sfn_client = get_boto_client("stepfunctions", context.invoked_function_arn)

    log.log_request_and_context(event, context)

    body = json.loads(event.get("body"))

    batch_id = body.get("batchId")
    down_sampling_rate = body.get("downSamplingRate")

    input_metadata_batch_id = f"{batch_id}-{BatchMetadataType.HUMAN_INPUT_METADATA.lower()}"
    input_metadata_batch = db.get_batch_metadata(input_metadata_batch_id)

    if input_metadata_batch:
        return construct_validation_error(
            "The system indicates the you have already input the down sampling rate "
            + f'{input_metadata_batch.get("DownSamplingRate")}')

    if batch_id is None:
        return construct_validation_error("BatchId is required.")
    if down_sampling_rate is None:
        return construct_validation_error("DownSampling rate is required.")

    batch_metadata = db.get_batch_metadata(batch_id)

    if not batch_metadata:
        return construct_validation_error(
            f"BatchMetadata not found for the batchId: {batch_id}")
    else:
        if down_sampling_rate < 0 or down_sampling_rate > 100:
            return construct_validation_error(
                "Expected down sampling range in between 0 to 100.")

    first_level_batch = db.get_child_batch_metadata(
        batch_id, BatchMetadataType.FIRST_LEVEL)
    job_output_location = first_level_batch[
        BatchMetadataTableAttributes.JOB_OUTPUT_LOCATION]

    state_token = batch_metadata.get(BatchMetadataTableAttributes.STATE_TOKEN)

    if not state_token:
        return construct_validation_error(
            f"The system indicates the batch exeuction is not currently at the wait step {batch_metadata}"
        )

    sfn_client.send_task_success(
        taskToken=batch_metadata[BatchMetadataTableAttributes.STATE_TOKEN],
        output=json.dumps({
            "batch_id":
            batch_metadata[
                BatchMetadataTableAttributes.FIRST_LEVEL_BATCH_METADATA_ID],
            "s3_output_path":
            job_output_location,
            "down_sampling_rate":
            down_sampling_rate,
            "token_sent_source_arn":
            context.invoked_function_arn,
        }),
    )

    db.insert_batch_metadata_input(
        batch_id=input_metadata_batch_id,
        parent_batch_id=batch_id,
        down_sampling_rate=down_sampling_rate,
        input_manifest=job_output_location,
        batch_status=BatchStatus.COMPLETE,
    )

    response = {
        "statusCode": 200,
        "body": "Successfully input metadata to resume batch execution : " +
        f"batchId : {batch_id}, downSamplingRate: {down_sampling_rate}",
        "isBase64Encoded": False,
    }
    return response
Ejemplo n.º 8
0
def lambda_handler(event, context):
    """Lambda function that ...
    Down sampling of the input manifest to send to the next step

    Parameters
    ----------
    event: dict, required
    context: object, required Lambda Context runtime methods and attributes
    Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html

    Returns
    ------
    Lambda Output Format: dict
    """

    log.log_request_and_context(event, context)
    batch_id = event["batch_id"]
    batch_metadata = db.get_batch_metadata(batch_id)
    current_metadata_type = batch_metadata[BatchMetadataTableAttributes.BATCH_METADATA_TYPE]

    if current_metadata_type == BatchMetadataType.FIRST_LEVEL:
        processing_output_job_level = 1
    elif current_metadata_type == BatchMetadataType.SECOND_LEVEL :
        processing_output_job_level = 2
    else :
        processing_output_job_level = 3

    parent_batch_id = batch_metadata[BatchMetadataTableAttributes.PARENT_BATCH_ID]
    parent_batch_metadata = db.get_batch_metadata(parent_batch_id)

    current_level_completed_labeling_jobs = []
    future_level_labeling_jobs = []

    labeling_jobs = parent_batch_metadata["LabelingJobs"]

    for labeling_job in labeling_jobs:
        if labeling_job["jobLevel"] == processing_output_job_level:
            current_level_completed_labeling_jobs.append(labeling_job)
        elif labeling_job["jobLevel"] > processing_output_job_level:
            future_level_labeling_jobs.append(labeling_job)

    for completed_labeling_job in current_level_completed_labeling_jobs:


        completed_labeling_job_name = completed_labeling_job["jobName"]

        for future_level_labeling_job in future_level_labeling_jobs:
            if completed_labeling_job_name == future_level_labeling_job["inputConfig"]["chainFromJobName"] and \
                    future_level_labeling_job["inputConfig"].get("downSamplingRate"):



                future_level_labeling_job_name = future_level_labeling_job["jobName"]

                job_level_batch_metadata = db.get_batch_metadata_by_labeling_job_name(completed_labeling_job_name,
                                                                                      BatchMetadataType.JOB_LEVEL)[0]

                completed_job_output_location = \
                    job_level_batch_metadata[BatchMetadataTableAttributes.JOB_OUTPUT_LOCATION]

                s3_object = s3_accessor.fetch_s3(completed_job_output_location)

                content = s3_object.decode('utf-8')
                items = content.splitlines()

                down_sample_rate = future_level_labeling_job["inputConfig"]["downSamplingRate"]
                down_sampled_data = down_sample_to_proportion(items, down_sample_rate)

                future_level_labeling_input_location = f"s3://{batch_processing_bucket_name}/batch_manifests/" \
                                           f"{future_level_labeling_job_name}/processed/data.manifest"

                s3_accessor.put_s3(future_level_labeling_input_location, "\n".join(down_sampled_data))

                batch_id = f'{parent_batch_id}-{future_level_labeling_job_name}-' \
                           f'{BatchMetadataType.PROCESS_LEVEL.lower()}'

                db.insert_processed_input_batch_metadata(parent_batch_id,
                                                         batch_id,
                                                         future_level_labeling_job_name,
                                                         future_level_labeling_input_location)
    return None