def lambda_handler(event, context): """Lambda function that copies any worker logs to s3 and publishes batch finish to SNS. Parameters ---------- event: dict, required context: object, required Lambda Context runtime methods and attributes Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html Returns ------ Lambda Output Format: dict """ log_request_and_context(event, context) try: request_input = parse_input(event) except KeyError as err: logger.error("Input event missing required args: %s: %s", event, err) raise Exception("Failed to parse input lambda handler") from err batch_id = request_input["batch_id"] # Mark the batch as completed. try: db.update_batch_status(batch_id, BatchStatus.COMPLETE) except botocore.exceptions.ClientError as err: raise Exception(f"failed to mark batch {batch_id} complete") from err batch_metadata = db.get_batch_metadata(batch_id) batch_info = input_batch_to_human_readable(batch_metadata) message = { "batchId": batch_id, "message": "Batch processing has completed successfully.", "batchInfo": batch_info, "token": request_input["execution_id"], "status": "SUCCESS", } output_sns_arn = os.getenv("DEFAULT_STATUS_SNS_ARN") if request_input["output_sns_arn"]: output_sns_arn = request_input["output_sns_arn"] topic = sns.Topic(output_sns_arn) try: topic.publish(Message=json.dumps(message, indent=4, default=str), ) except botocore.exceptions.ClientError as err: raise Exception( f"Service error publishing SNS response for batch id: {batch_id}" ) from err return { "published_sns": message, "output_sns_arn": output_sns_arn, }
def lambda_handler(event, context): """Lambda function that ... Reads the S3 Input manifest, and sends the batch of the data to the SMGT Job. Parameters ---------- event: dict, required context: object, required Lambda Context runtime methods and attributes Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html Returns ------ Lambda Output Format: dict """ log.log_request_and_context(event, context) parent_batch_id = event["parent_batch_id"] job_level = event["job_level"] parent_batch = db.get_batch_metadata(parent_batch_id) if parent_batch is None: raise Exception(f"Invalid parent batch id: {parent_batch_id}") if job_level == 1: meta_data_type = BatchMetadataType.FIRST_LEVEL elif job_level == 2: meta_data_type = BatchMetadataType.SECOND_LEVEL elif job_level == 3: meta_data_type = BatchMetadataType.THIRD_LEVEL # Filter jobs by job level labeling_jobs = parent_batch[BatchMetadataTableAttributes.LABELING_JOBS] current_jobs = [job for job in labeling_jobs if job["jobLevel"] == job_level] log.logging.info("Kicking off %d jobs for level %d", len(current_jobs), job_level) batch_id = f"{parent_batch_id}-{meta_data_type.lower()}" for job in current_jobs: trigger_labeling_job(parent_batch_id, batch_id, job) try: db.insert_perform_labeling_job_metadata( parent_batch_id=parent_batch_id, batch_id=batch_id, batch_status=BatchStatus.IN_PROGRESS, batch_metadata_type=meta_data_type, num_children_batches=len(current_jobs), ) except botocore.exceptions.ClientError as err: raise Exception(f"failed to put batch id {batch_id}") from err return { "batch_id": batch_id, }
def lambda_handler(event, context): """Lambda function that stores the current step function state token into dynamo. Parameters ---------- event: dict, required context: object, required Lambda Context runtime methods and attributes Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html Returns ------ Lambda Output Format: dict """ step_input = extract_input(event) batch_id, step_token = step_input["batch_id"], step_input["step_token"] batch_metadata = db.get_batch_metadata(batch_id) if batch_metadata is None: raise Exception( f"Failed to find batch corresponding to id: {batch_id}") num_child = batch_metadata.get( BatchMetadataTableAttributes.NUM_CHILD_BATCHES) if num_child is not None and num_child == 0: log.logger.info( "No children in batch, skipping wait for batch completion.") # Mark the db entry as complete and send task success to unblock the step function. sfn_client = get_boto_client("stepfunctions", context.invoked_function_arn) db.update_batch_status(batch_id, BatchStatus.COMPLETE) # Send status token to step function. response = sfn_client.send_task_success(taskToken=step_token, output=json.dumps( {"batch": batch_id})) log.logger.info("Response for Step function token %s: %s", step_token, response) else: # Not skipping wait for batch completion, the listener is responsible for marking # the batch as complete now. db.update_batch_step_token(batch_id, step_token) return { "batch_id": batch_id, "step_token": step_token, }
def validate_common_body(body): """Verify main batch fields exist""" batch_id = body.get("batchId") if batch_id is None: error_message = "batchId must be provided" return error_message if not validate_regex(batch_id): return "batchId can only contain Lower case Alphanumeric, '-'" batch_metadata = db.get_batch_metadata(batch_id) if batch_metadata is not None: error_message = f"Provided batchId already exists : {batch_id} : {batch_metadata}" return error_message return None
def mark_job_batch_complete(job_level_batch): """Mark the job batch as complete, trigger sideeffects if parent is complete""" log.logger.info( f"Signaling batch_meta to resume execution {job_level_batch}") batch_id = job_level_batch[BatchMetadataTableAttributes.BATCH_ID] if job_level_batch[BatchMetadataTableAttributes. BATCH_STATUS] != BatchStatus.WAIT_FOR_SMGT_RESPONSE: log.logger.error("Invalid batch status, ignoring request") return db.update_batch_status(batch_id, BatchStatus.COMPLETE) # Copy worker metrics from groundtruth bucket to raw_worker_metrics # folder in the glue bucket jobOutputLocation = job_level_batch['JobOutputLocation'] bucketName = jobOutputLocation.split('/')[2] groundtruth_bucket = s3.Bucket(bucketName) for obj in groundtruth_bucket.objects.filter( Prefix='/'.join(jobOutputLocation.split('/')[3:6])): if obj.key.endswith('.json') and 'worker-response' in obj.key: if not obj.key.endswith('.jpg.json'): new_key = f"raw_worker_metrics/{'/'.join(obj.key.split('/')[1:])}" s3_client.copy_object(Bucket=glue_bucket_name, CopySource=f"{bucketName}/{obj.key}", Key=new_key) parent_batch_id = job_level_batch[ BatchMetadataTableAttributes.PARENT_BATCH_ID] if not db.update_batch_child_count(parent_batch_id, 1): # Incomplete, return return parent_batch = db.get_batch_metadata(parent_batch_id) try: task_token = parent_batch[BatchMetadataTableAttributes.STATE_TOKEN] except KeyError as err: raise Exception( f"missing state token on batch: {parent_batch_id}") from err # Send status token to step functions response = sfn_client.send_task_success(taskToken=task_token, output=json.dumps( {"batch_id": parent_batch_id})) log.logger.info( f"Response for Step function token {task_token}: {response}")
def get_batch_description(batch_id): """ Looks up a batch using the given batch id and validates that the batch is of appropriate type, then returns a human readable representation. :param batch_id: Id of batch to convert to human readable description :returns: json serializable description of a given batch """ batch_metadata = db.get_batch_metadata(batch_id) # User should only be querying for parent batches of type "INPUT", not frame # level batches. if batch_metadata["BatchMetadataType"] != BatchMetadataType.INPUT: logger.error( "User requested existing batch, but it is of the wrong type (not INPUT): %s", batch_id) return None # Convert batch metadata to something user presentable. return input_batch_to_human_readable(batch_metadata)
def lambda_handler(event, context): """Lambda function that executes batch creation API Parameters ---------- event: dict, required API gateway request with an input SQS arn, output SQS arn context: object, required Lambda Context runtime methods and attributes Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html Returns ------ Lambda Output Format: dict Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html """ sfn_client = get_boto_client("stepfunctions", context.invoked_function_arn) log.log_request_and_context(event, context) body = json.loads(event.get("body")) batch_id = body.get("batchId") down_sampling_rate = body.get("downSamplingRate") input_metadata_batch_id = f"{batch_id}-{BatchMetadataType.HUMAN_INPUT_METADATA.lower()}" input_metadata_batch = db.get_batch_metadata(input_metadata_batch_id) if input_metadata_batch: return construct_validation_error( "The system indicates the you have already input the down sampling rate " + f'{input_metadata_batch.get("DownSamplingRate")}') if batch_id is None: return construct_validation_error("BatchId is required.") if down_sampling_rate is None: return construct_validation_error("DownSampling rate is required.") batch_metadata = db.get_batch_metadata(batch_id) if not batch_metadata: return construct_validation_error( f"BatchMetadata not found for the batchId: {batch_id}") else: if down_sampling_rate < 0 or down_sampling_rate > 100: return construct_validation_error( "Expected down sampling range in between 0 to 100.") first_level_batch = db.get_child_batch_metadata( batch_id, BatchMetadataType.FIRST_LEVEL) job_output_location = first_level_batch[ BatchMetadataTableAttributes.JOB_OUTPUT_LOCATION] state_token = batch_metadata.get(BatchMetadataTableAttributes.STATE_TOKEN) if not state_token: return construct_validation_error( f"The system indicates the batch exeuction is not currently at the wait step {batch_metadata}" ) sfn_client.send_task_success( taskToken=batch_metadata[BatchMetadataTableAttributes.STATE_TOKEN], output=json.dumps({ "batch_id": batch_metadata[ BatchMetadataTableAttributes.FIRST_LEVEL_BATCH_METADATA_ID], "s3_output_path": job_output_location, "down_sampling_rate": down_sampling_rate, "token_sent_source_arn": context.invoked_function_arn, }), ) db.insert_batch_metadata_input( batch_id=input_metadata_batch_id, parent_batch_id=batch_id, down_sampling_rate=down_sampling_rate, input_manifest=job_output_location, batch_status=BatchStatus.COMPLETE, ) response = { "statusCode": 200, "body": "Successfully input metadata to resume batch execution : " + f"batchId : {batch_id}, downSamplingRate: {down_sampling_rate}", "isBase64Encoded": False, } return response
def lambda_handler(event, context): """Lambda function that ... Down sampling of the input manifest to send to the next step Parameters ---------- event: dict, required context: object, required Lambda Context runtime methods and attributes Context doc: https://docs.aws.amazon.com/lambda/latest/dg/python-context-object.html Returns ------ Lambda Output Format: dict """ log.log_request_and_context(event, context) batch_id = event["batch_id"] batch_metadata = db.get_batch_metadata(batch_id) current_metadata_type = batch_metadata[BatchMetadataTableAttributes.BATCH_METADATA_TYPE] if current_metadata_type == BatchMetadataType.FIRST_LEVEL: processing_output_job_level = 1 elif current_metadata_type == BatchMetadataType.SECOND_LEVEL : processing_output_job_level = 2 else : processing_output_job_level = 3 parent_batch_id = batch_metadata[BatchMetadataTableAttributes.PARENT_BATCH_ID] parent_batch_metadata = db.get_batch_metadata(parent_batch_id) current_level_completed_labeling_jobs = [] future_level_labeling_jobs = [] labeling_jobs = parent_batch_metadata["LabelingJobs"] for labeling_job in labeling_jobs: if labeling_job["jobLevel"] == processing_output_job_level: current_level_completed_labeling_jobs.append(labeling_job) elif labeling_job["jobLevel"] > processing_output_job_level: future_level_labeling_jobs.append(labeling_job) for completed_labeling_job in current_level_completed_labeling_jobs: completed_labeling_job_name = completed_labeling_job["jobName"] for future_level_labeling_job in future_level_labeling_jobs: if completed_labeling_job_name == future_level_labeling_job["inputConfig"]["chainFromJobName"] and \ future_level_labeling_job["inputConfig"].get("downSamplingRate"): future_level_labeling_job_name = future_level_labeling_job["jobName"] job_level_batch_metadata = db.get_batch_metadata_by_labeling_job_name(completed_labeling_job_name, BatchMetadataType.JOB_LEVEL)[0] completed_job_output_location = \ job_level_batch_metadata[BatchMetadataTableAttributes.JOB_OUTPUT_LOCATION] s3_object = s3_accessor.fetch_s3(completed_job_output_location) content = s3_object.decode('utf-8') items = content.splitlines() down_sample_rate = future_level_labeling_job["inputConfig"]["downSamplingRate"] down_sampled_data = down_sample_to_proportion(items, down_sample_rate) future_level_labeling_input_location = f"s3://{batch_processing_bucket_name}/batch_manifests/" \ f"{future_level_labeling_job_name}/processed/data.manifest" s3_accessor.put_s3(future_level_labeling_input_location, "\n".join(down_sampled_data)) batch_id = f'{parent_batch_id}-{future_level_labeling_job_name}-' \ f'{BatchMetadataType.PROCESS_LEVEL.lower()}' db.insert_processed_input_batch_metadata(parent_batch_id, batch_id, future_level_labeling_job_name, future_level_labeling_input_location) return None