예제 #1
0
def collect_inference_inputs(s3_input_uri):
    """
     collect information related to input to inference.
    """
    inference_input_s3_ref = S3Ref.from_uri(s3_input_uri)
    inference_input = download(inference_input_s3_ref)
    sources = get_sources(inference_input)
    logger.info("Collected {} inference inputs.".format(len(sources)))
    return inference_input_s3_ref, inference_input, sources
예제 #2
0
def collect_inference_outputs(inference_output_uri):
    """
     collect information related to output of inference.
    """
    sagemaker_output_file = "unlabeled.manifest.out"
    prediction_output_uri = inference_output_uri + sagemaker_output_file
    prediction_output_s3 = S3Ref.from_uri(prediction_output_uri)
    prediction_output = download(prediction_output_s3)
    predictions = get_predictions(prediction_output)
    logger.info("Collected {} inference outputs.".format(len(predictions)))
    return predictions
def lambda_handler(event, context):
    """
    This function is used to merge partial outputs to the manifest.
    The result is uploaded to s3.
    """
    s3_input_uri = event["ManifestS3Uri"]
    source = S3Ref.from_uri(s3_input_uri)
    full_input = download(source)

    s3_output_uri = event["OutputS3Uri"]
    output = S3Ref.from_uri(s3_output_uri)
    partial_output = download(output)

    logger.info("Downloaded input and output manifests {}, {}".format(
        s3_input_uri, s3_output_uri))

    complete_manifest = merge_manifests(full_input, partial_output)
    # write complete manifest back to s3 bucket
    merged = StringIO()
    for line in complete_manifest.values():
        merged.write(json.dumps(line) + "\n")
    upload(merged, source)
    logger.info("Uploaded merged file to {}".format(source.get_uri()))
def lambda_handler(event, context):
    """
    This function adds a sequential id to each record in the input manifest.
    """
    s3_input_uri = event["ManifestS3Uri"]
    s3_input = S3Ref.from_uri(s3_input_uri)

    inp_file = download(s3_input)
    logger.info("Downloaded file from {} to {}".format(s3_input_uri, inp_file))

    out_file = StringIO()
    total = 0
    for processed_id_count, line in enumerate(inp_file):
        data = json.loads(line)
        data["id"] = processed_id_count
        out_file.write(json.dumps(data) + "\n")
        total += 1
    logger.info("Added id field to {} records".format(total))

    # Uploading back to the same location where we downloaded the file from.
    upload(out_file, s3_input)
    logger.info("Uploaded updated file from {} to {}".format(out_file, s3_input_uri))
    return event