コード例 #1
0
def write_auto_annotations(simple_al, sources, predictions,
                           inference_input_s3_ref):
    """
     write auto annotations to s3
    """
    logger.info("Generating auto annotations where confidence is high.")
    auto_annotation_stream = StringIO()
    auto_annotations = simple_al.autoannotate(predictions, sources)
    for auto_annotation in auto_annotations:
        auto_annotation_stream.write(json.dumps(auto_annotation) + "\n")

    # Auto annotation.
    auto_dest = create_ref_at_parent_key(inference_input_s3_ref,
                                         "autoannotated.manifest")
    upload(auto_annotation_stream, auto_dest)
    logger.info("Uploaded autoannotations to {}.".format(auto_dest.get_uri()))
    return auto_dest.get_uri(), auto_annotations
コード例 #2
0
def write_selector_file(simple_al, sources, predictions,
                        inference_input_s3_ref, inference_input,
                        auto_annotations):
    """
     write selector file to s3. This file is used to decide which records should be labeled by humans next.
    """
    logger.info("Selecting input for next manual annotation")
    selection_data = StringIO()
    selections = simple_al.select_for_labeling(predictions, auto_annotations)
    selections_set = set(selections)
    for line in inference_input:
        data = json.loads(line)
        if data["id"] in selections_set:
            selection_data.write(json.dumps(data) + "\n")
    inference_input.seek(0)
    selection_dest = create_ref_at_parent_key(inference_input_s3_ref,
                                              "selection.manifest")
    upload(selection_data, selection_dest)
    logger.info("Uploaded selections to {}.".format(selection_dest.get_uri()))
    return selection_dest.get_uri(), selections
コード例 #3
0
def lambda_handler(event, context):
    """
    This function is used to merge partial outputs to the manifest.
    The result is uploaded to s3.
    """
    s3_input_uri = event["ManifestS3Uri"]
    source = S3Ref.from_uri(s3_input_uri)
    full_input = download(source)

    s3_output_uri = event["OutputS3Uri"]
    output = S3Ref.from_uri(s3_output_uri)
    partial_output = download(output)

    logger.info("Downloaded input and output manifests {}, {}".format(
        s3_input_uri, s3_output_uri))

    complete_manifest = merge_manifests(full_input, partial_output)
    # write complete manifest back to s3 bucket
    merged = StringIO()
    for line in complete_manifest.values():
        merged.write(json.dumps(line) + "\n")
    upload(merged, source)
    logger.info("Uploaded merged file to {}".format(source.get_uri()))
コード例 #4
0
def lambda_handler(event, context):
    """
    This function adds a sequential id to each record in the input manifest.
    """
    s3_input_uri = event["ManifestS3Uri"]
    s3_input = S3Ref.from_uri(s3_input_uri)

    inp_file = download(s3_input)
    logger.info("Downloaded file from {} to {}".format(s3_input_uri, inp_file))

    out_file = StringIO()
    total = 0
    for processed_id_count, line in enumerate(inp_file):
        data = json.loads(line)
        data["id"] = processed_id_count
        out_file.write(json.dumps(data) + "\n")
        total += 1
    logger.info("Added id field to {} records".format(total))

    # Uploading back to the same location where we downloaded the file from.
    upload(out_file, s3_input)
    logger.info("Uploaded updated file from {} to {}".format(out_file, s3_input_uri))
    return event