Example #1
0
def input_config_to_job_input(input_batch_id, job_name, job_level, input_config):

    """Finds input data information from a static manifest or from previous job"""
    JobInput = namedtuple(
        "JobInput",
        ["input_manifest_s3_uri", "label_attribute_name", "label_category_s3_uri"],
    )

    input_manifest_s3_uri = input_config.get("inputManifestS3Uri")
    if input_manifest_s3_uri is not None:
        return JobInput(
            input_manifest_s3_uri=input_manifest_s3_uri,
            label_attribute_name=None,
            label_category_s3_uri=None,
        )

    chain_to_job_name = job_name
    chain_from_job_name = input_config["chainFromJobName"]

    # Only support jobs within the current batch for now.
    if job_level == 1:
        raise Exception("can't chain in job_level 1")

    batches = chainable_batches(input_batch_id, job_level)
    if len(batches) == 0:
        raise Exception("no chainable batches found")

    processed_job_level_batch = next(iter(
        db.get_batch_metadata_by_labeling_job_name(chain_to_job_name, BatchMetadataType.PROCESS_LEVEL)), None)

    prev_level_jobs = []
    for batch in batches:
        prev_level_jobs += db.get_child_batch_metadata(
            batch["BatchId"], BatchMetadataType.JOB_LEVEL
        )

    for job in prev_level_jobs:
        if job[BatchMetadataTableAttributes.LABELING_JOB_NAME] == chain_from_job_name:
            # If available, use the downsampled manifest file as input to the new job
            if processed_job_level_batch :
                processed_data_location = processed_job_level_batch[BatchMetadataTableAttributes.JOB_INPUT_LOCATION]
            else:
                processed_data_location = None

            batch_output_location = (processed_data_location
                                     or job[BatchMetadataTableAttributes.JOB_OUTPUT_LOCATION]
                                     )

            return JobInput(
                input_manifest_s3_uri=batch_output_location,
                label_attribute_name=job[
                    BatchMetadataTableAttributes.LABEL_ATTRIBUTE_NAME
                ],
                label_category_s3_uri=job[
                    BatchMetadataTableAttributes.LABEL_CATEGORY_CONFIG
                ],
            )

    raise Exception(f"chain job {chain_from_job_name} not found")
Example #2
0
def chainable_batches(parent_batch_id, job_level):
    """Returns all batches that have completed and we could possibly chain from"""

    if job_level == 1:
        raise Exception("can't chain in job_level 1")

    if job_level == 2:
        return db.get_child_batch_metadata(parent_batch_id, BatchMetadataType.FIRST_LEVEL)

    if job_level == 3:
        first_level_batches = db.get_child_batch_metadata(
            parent_batch_id, BatchMetadataType.FIRST_LEVEL
        )
        second_level_batches = db.get_child_batch_metadata(
            parent_batch_id, BatchMetadataType.SECOND_LEVEL
        )

        return first_level_batches + second_level_batches

    raise Exception("unsupported job level")
def first_or_second_level_to_human_readable(batch):
    """Converts a first or second level batch to human readable"""
    job_level_batches = db.get_child_batch_metadata(
        batch[Attributes.BATCH_ID], BatchMetadataType.JOB_LEVEL)
    job_responses = [
        job_level_to_human_readable(job_level_batch)
        for job_level_batch in job_level_batches
    ]

    return {
        "status": batch[Attributes.BATCH_STATUS],
        "numChildBatches": batch[Attributes.NUM_CHILD_BATCHES],
        "numChildBatchesComplete":
        batch[Attributes.NUM_CHILD_BATCHES_COMPLETE],
        "jobLevels": job_responses,
    }
def input_batch_to_human_readable(batch):
    """
    Generates a human friendly version of an INPUT batch metadata with presigned urls

    :param batch_metadata: Batch metadata dictionary
    :returns: json serializable dictionary of batch info
    """

    # User should only be querying for parent batches of type "INPUT", not frame
    # level batches.
    if batch[Attributes.BATCH_METADATA_TYPE] != BatchMetadataType.INPUT:
        logger.error(
            "User requested existing batch, but it is of the wrong input type: %s",
            batch[Attributes.BATCH_ID],
        )
        return None

    response = {
        "batchId": batch[Attributes.BATCH_ID],
        "status": batch[Attributes.BATCH_STATUS],
        # Straight copy of request labeling jobs to acknowledge the request.
        "inputLabelingJobs": batch[Attributes.LABELING_JOBS],
    }

    stage_attributes = [
        ("firstLevel", BatchMetadataType.FIRST_LEVEL),
        ("secondLevel", BatchMetadataType.SECOND_LEVEL),
        ("thirdLevel", BatchMetadataType.THIRD_LEVEL),
    ]

    for field_name, attribute in stage_attributes:
        first_or_second_level_batches = db.get_child_batch_metadata(
            batch[Attributes.BATCH_ID], attribute)
        for first_or_second_level_batch in first_or_second_level_batches:
            response[field_name] = first_or_second_level_to_human_readable(
                first_or_second_level_batch)

    return response
Example #5
0
def lambda_handler(event, context):
    """Lambda function that executes batch creation API

    Parameters
    ----------
    event: dict, required API gateway request with an input SQS arn, output SQS arn
    context: object, required Lambda Context runtime methods and attributes
    Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html

    Returns
    ------
    Lambda Output Format: dict
    Return doc:
    https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html
    """
    sfn_client = get_boto_client("stepfunctions", context.invoked_function_arn)

    log.log_request_and_context(event, context)

    body = json.loads(event.get("body"))

    batch_id = body.get("batchId")
    down_sampling_rate = body.get("downSamplingRate")

    input_metadata_batch_id = f"{batch_id}-{BatchMetadataType.HUMAN_INPUT_METADATA.lower()}"
    input_metadata_batch = db.get_batch_metadata(input_metadata_batch_id)

    if input_metadata_batch:
        return construct_validation_error(
            "The system indicates the you have already input the down sampling rate "
            + f'{input_metadata_batch.get("DownSamplingRate")}')

    if batch_id is None:
        return construct_validation_error("BatchId is required.")
    if down_sampling_rate is None:
        return construct_validation_error("DownSampling rate is required.")

    batch_metadata = db.get_batch_metadata(batch_id)

    if not batch_metadata:
        return construct_validation_error(
            f"BatchMetadata not found for the batchId: {batch_id}")
    else:
        if down_sampling_rate < 0 or down_sampling_rate > 100:
            return construct_validation_error(
                "Expected down sampling range in between 0 to 100.")

    first_level_batch = db.get_child_batch_metadata(
        batch_id, BatchMetadataType.FIRST_LEVEL)
    job_output_location = first_level_batch[
        BatchMetadataTableAttributes.JOB_OUTPUT_LOCATION]

    state_token = batch_metadata.get(BatchMetadataTableAttributes.STATE_TOKEN)

    if not state_token:
        return construct_validation_error(
            f"The system indicates the batch exeuction is not currently at the wait step {batch_metadata}"
        )

    sfn_client.send_task_success(
        taskToken=batch_metadata[BatchMetadataTableAttributes.STATE_TOKEN],
        output=json.dumps({
            "batch_id":
            batch_metadata[
                BatchMetadataTableAttributes.FIRST_LEVEL_BATCH_METADATA_ID],
            "s3_output_path":
            job_output_location,
            "down_sampling_rate":
            down_sampling_rate,
            "token_sent_source_arn":
            context.invoked_function_arn,
        }),
    )

    db.insert_batch_metadata_input(
        batch_id=input_metadata_batch_id,
        parent_batch_id=batch_id,
        down_sampling_rate=down_sampling_rate,
        input_manifest=job_output_location,
        batch_status=BatchStatus.COMPLETE,
    )

    response = {
        "statusCode": 200,
        "body": "Successfully input metadata to resume batch execution : " +
        f"batchId : {batch_id}, downSamplingRate: {down_sampling_rate}",
        "isBase64Encoded": False,
    }
    return response