def collect_inference_inputs(s3_input_uri): """ collect information related to input to inference. """ inference_input_s3_ref = S3Ref.from_uri(s3_input_uri) inference_input = download(inference_input_s3_ref) sources = get_sources(inference_input) logger.info("Collected {} inference inputs.".format(len(sources))) return inference_input_s3_ref, inference_input, sources
def collect_inference_outputs(inference_output_uri): """ collect information related to output of inference. """ sagemaker_output_file = "unlabeled.manifest.out" prediction_output_uri = inference_output_uri + sagemaker_output_file prediction_output_s3 = S3Ref.from_uri(prediction_output_uri) prediction_output = download(prediction_output_s3) predictions = get_predictions(prediction_output) logger.info("Collected {} inference outputs.".format(len(predictions))) return predictions
def lambda_handler(event, context): """ This function is used to merge partial outputs to the manifest. The result is uploaded to s3. """ s3_input_uri = event["ManifestS3Uri"] source = S3Ref.from_uri(s3_input_uri) full_input = download(source) s3_output_uri = event["OutputS3Uri"] output = S3Ref.from_uri(s3_output_uri) partial_output = download(output) logger.info("Downloaded input and output manifests {}, {}".format( s3_input_uri, s3_output_uri)) complete_manifest = merge_manifests(full_input, partial_output) # write complete manifest back to s3 bucket merged = StringIO() for line in complete_manifest.values(): merged.write(json.dumps(line) + "\n") upload(merged, source) logger.info("Uploaded merged file to {}".format(source.get_uri()))
def lambda_handler(event, context): """ This function adds a sequential id to each record in the input manifest. """ s3_input_uri = event["ManifestS3Uri"] s3_input = S3Ref.from_uri(s3_input_uri) inp_file = download(s3_input) logger.info("Downloaded file from {} to {}".format(s3_input_uri, inp_file)) out_file = StringIO() total = 0 for processed_id_count, line in enumerate(inp_file): data = json.loads(line) data["id"] = processed_id_count out_file.write(json.dumps(data) + "\n") total += 1 logger.info("Added id field to {} records".format(total)) # Uploading back to the same location where we downloaded the file from. upload(out_file, s3_input) logger.info("Uploaded updated file from {} to {}".format(out_file, s3_input_uri)) return event