def input_config_to_job_input(input_batch_id, job_name, job_level, input_config): """Finds input data information from a static manifest or from previous job""" JobInput = namedtuple( "JobInput", ["input_manifest_s3_uri", "label_attribute_name", "label_category_s3_uri"], ) input_manifest_s3_uri = input_config.get("inputManifestS3Uri") if input_manifest_s3_uri is not None: return JobInput( input_manifest_s3_uri=input_manifest_s3_uri, label_attribute_name=None, label_category_s3_uri=None, ) chain_to_job_name = job_name chain_from_job_name = input_config["chainFromJobName"] # Only support jobs within the current batch for now. if job_level == 1: raise Exception("can't chain in job_level 1") batches = chainable_batches(input_batch_id, job_level) if len(batches) == 0: raise Exception("no chainable batches found") processed_job_level_batch = next(iter( db.get_batch_metadata_by_labeling_job_name(chain_to_job_name, BatchMetadataType.PROCESS_LEVEL)), None) prev_level_jobs = [] for batch in batches: prev_level_jobs += db.get_child_batch_metadata( batch["BatchId"], BatchMetadataType.JOB_LEVEL ) for job in prev_level_jobs: if job[BatchMetadataTableAttributes.LABELING_JOB_NAME] == chain_from_job_name: # If available, use the downsampled manifest file as input to the new job if processed_job_level_batch : processed_data_location = processed_job_level_batch[BatchMetadataTableAttributes.JOB_INPUT_LOCATION] else: processed_data_location = None batch_output_location = (processed_data_location or job[BatchMetadataTableAttributes.JOB_OUTPUT_LOCATION] ) return JobInput( input_manifest_s3_uri=batch_output_location, label_attribute_name=job[ BatchMetadataTableAttributes.LABEL_ATTRIBUTE_NAME ], label_category_s3_uri=job[ BatchMetadataTableAttributes.LABEL_CATEGORY_CONFIG ], ) raise Exception(f"chain job {chain_from_job_name} not found")
def chainable_batches(parent_batch_id, job_level): """Returns all batches that have completed and we could possibly chain from""" if job_level == 1: raise Exception("can't chain in job_level 1") if job_level == 2: return db.get_child_batch_metadata(parent_batch_id, BatchMetadataType.FIRST_LEVEL) if job_level == 3: first_level_batches = db.get_child_batch_metadata( parent_batch_id, BatchMetadataType.FIRST_LEVEL ) second_level_batches = db.get_child_batch_metadata( parent_batch_id, BatchMetadataType.SECOND_LEVEL ) return first_level_batches + second_level_batches raise Exception("unsupported job level")
def first_or_second_level_to_human_readable(batch): """Converts a first or second level batch to human readable""" job_level_batches = db.get_child_batch_metadata( batch[Attributes.BATCH_ID], BatchMetadataType.JOB_LEVEL) job_responses = [ job_level_to_human_readable(job_level_batch) for job_level_batch in job_level_batches ] return { "status": batch[Attributes.BATCH_STATUS], "numChildBatches": batch[Attributes.NUM_CHILD_BATCHES], "numChildBatchesComplete": batch[Attributes.NUM_CHILD_BATCHES_COMPLETE], "jobLevels": job_responses, }
def input_batch_to_human_readable(batch): """ Generates a human friendly version of an INPUT batch metadata with presigned urls :param batch_metadata: Batch metadata dictionary :returns: json serializable dictionary of batch info """ # User should only be querying for parent batches of type "INPUT", not frame # level batches. if batch[Attributes.BATCH_METADATA_TYPE] != BatchMetadataType.INPUT: logger.error( "User requested existing batch, but it is of the wrong input type: %s", batch[Attributes.BATCH_ID], ) return None response = { "batchId": batch[Attributes.BATCH_ID], "status": batch[Attributes.BATCH_STATUS], # Straight copy of request labeling jobs to acknowledge the request. "inputLabelingJobs": batch[Attributes.LABELING_JOBS], } stage_attributes = [ ("firstLevel", BatchMetadataType.FIRST_LEVEL), ("secondLevel", BatchMetadataType.SECOND_LEVEL), ("thirdLevel", BatchMetadataType.THIRD_LEVEL), ] for field_name, attribute in stage_attributes: first_or_second_level_batches = db.get_child_batch_metadata( batch[Attributes.BATCH_ID], attribute) for first_or_second_level_batch in first_or_second_level_batches: response[field_name] = first_or_second_level_to_human_readable( first_or_second_level_batch) return response
def lambda_handler(event, context): """Lambda function that executes batch creation API Parameters ---------- event: dict, required API gateway request with an input SQS arn, output SQS arn context: object, required Lambda Context runtime methods and attributes Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html Returns ------ Lambda Output Format: dict Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html """ sfn_client = get_boto_client("stepfunctions", context.invoked_function_arn) log.log_request_and_context(event, context) body = json.loads(event.get("body")) batch_id = body.get("batchId") down_sampling_rate = body.get("downSamplingRate") input_metadata_batch_id = f"{batch_id}-{BatchMetadataType.HUMAN_INPUT_METADATA.lower()}" input_metadata_batch = db.get_batch_metadata(input_metadata_batch_id) if input_metadata_batch: return construct_validation_error( "The system indicates the you have already input the down sampling rate " + f'{input_metadata_batch.get("DownSamplingRate")}') if batch_id is None: return construct_validation_error("BatchId is required.") if down_sampling_rate is None: return construct_validation_error("DownSampling rate is required.") batch_metadata = db.get_batch_metadata(batch_id) if not batch_metadata: return construct_validation_error( f"BatchMetadata not found for the batchId: {batch_id}") else: if down_sampling_rate < 0 or down_sampling_rate > 100: return construct_validation_error( "Expected down sampling range in between 0 to 100.") first_level_batch = db.get_child_batch_metadata( batch_id, BatchMetadataType.FIRST_LEVEL) job_output_location = first_level_batch[ BatchMetadataTableAttributes.JOB_OUTPUT_LOCATION] state_token = batch_metadata.get(BatchMetadataTableAttributes.STATE_TOKEN) if not state_token: return construct_validation_error( f"The system indicates the batch exeuction is not currently at the wait step {batch_metadata}" ) sfn_client.send_task_success( taskToken=batch_metadata[BatchMetadataTableAttributes.STATE_TOKEN], output=json.dumps({ "batch_id": batch_metadata[ BatchMetadataTableAttributes.FIRST_LEVEL_BATCH_METADATA_ID], "s3_output_path": job_output_location, "down_sampling_rate": down_sampling_rate, "token_sent_source_arn": context.invoked_function_arn, }), ) db.insert_batch_metadata_input( batch_id=input_metadata_batch_id, parent_batch_id=batch_id, down_sampling_rate=down_sampling_rate, input_manifest=job_output_location, batch_status=BatchStatus.COMPLETE, ) response = { "statusCode": 200, "body": "Successfully input metadata to resume batch execution : " + f"batchId : {batch_id}, downSamplingRate: {down_sampling_rate}", "isBase64Encoded": False, } return response