def write_auto_annotations(simple_al, sources, predictions, inference_input_s3_ref): """ write auto annotations to s3 """ logger.info("Generating auto annotations where confidence is high.") auto_annotation_stream = StringIO() auto_annotations = simple_al.autoannotate(predictions, sources) for auto_annotation in auto_annotations: auto_annotation_stream.write(json.dumps(auto_annotation) + "\n") # Auto annotation. auto_dest = create_ref_at_parent_key(inference_input_s3_ref, "autoannotated.manifest") upload(auto_annotation_stream, auto_dest) logger.info("Uploaded autoannotations to {}.".format(auto_dest.get_uri())) return auto_dest.get_uri(), auto_annotations
def write_selector_file(simple_al, sources, predictions, inference_input_s3_ref, inference_input, auto_annotations): """ write selector file to s3. This file is used to decide which records should be labeled by humans next. """ logger.info("Selecting input for next manual annotation") selection_data = StringIO() selections = simple_al.select_for_labeling(predictions, auto_annotations) selections_set = set(selections) for line in inference_input: data = json.loads(line) if data["id"] in selections_set: selection_data.write(json.dumps(data) + "\n") inference_input.seek(0) selection_dest = create_ref_at_parent_key(inference_input_s3_ref, "selection.manifest") upload(selection_data, selection_dest) logger.info("Uploaded selections to {}.".format(selection_dest.get_uri())) return selection_dest.get_uri(), selections
def lambda_handler(event, context): """ This function is used to merge partial outputs to the manifest. The result is uploaded to s3. """ s3_input_uri = event["ManifestS3Uri"] source = S3Ref.from_uri(s3_input_uri) full_input = download(source) s3_output_uri = event["OutputS3Uri"] output = S3Ref.from_uri(s3_output_uri) partial_output = download(output) logger.info("Downloaded input and output manifests {}, {}".format( s3_input_uri, s3_output_uri)) complete_manifest = merge_manifests(full_input, partial_output) # write complete manifest back to s3 bucket merged = StringIO() for line in complete_manifest.values(): merged.write(json.dumps(line) + "\n") upload(merged, source) logger.info("Uploaded merged file to {}".format(source.get_uri()))
def lambda_handler(event, context): """ This function adds a sequential id to each record in the input manifest. """ s3_input_uri = event["ManifestS3Uri"] s3_input = S3Ref.from_uri(s3_input_uri) inp_file = download(s3_input) logger.info("Downloaded file from {} to {}".format(s3_input_uri, inp_file)) out_file = StringIO() total = 0 for processed_id_count, line in enumerate(inp_file): data = json.loads(line) data["id"] = processed_id_count out_file.write(json.dumps(data) + "\n") total += 1 logger.info("Added id field to {} records".format(total)) # Uploading back to the same location where we downloaded the file from. upload(out_file, s3_input) logger.info("Uploaded updated file from {} to {}".format(out_file, s3_input_uri)) return event