def get_match_score_ml(fields_with_candidates, bbox_of_all, text_to_score): ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1") matched_results = [] ''' NN for field in fields_with_candidates: input_to_matching = {"field_names": [field["string"]], "field_values": fields_with_candidates[field["string"]]['candidates']} if(len(nearest) != 0): results = ml_field_matching.predict(input_to_matching) # siamese string field match else: results = [{"field": field["string"], "value": '', "score": 0}] text_to_score[''] = '' bbox_of_all[''] = {'width': -1, 'top': -1, 'height': -1, 'left': -1} for result in sorted(results, key=lambda item: -item["score"]): matched_results.append({"field": result["field"], "value": result["value"], "score": result["score"], "field_detection_score": text_to_score[result["field"]], "value_detection_score": text_to_score[result["value"]], "value_bbox": bbox_of_all[result["value"]], "field_bbox": bbox_of_all[result["field"]] }) return results ''' ''' Hundarian
def mxnet(): mynumber = request.args.getlist('image') predictor = MXNetPredictor('sagemaker-mxnet-2018-04-12-21-02-24-757') mynumberarray = ast.literal_eval(mynumber[0]) response = predictor.predict(mynumberarray) labeled_predictions = list(zip(range(10), response[0])) labeled_predictions.sort( key=lambda label_and_prob: 1.0 - label_and_prob[1]) answer = "Most likely answer: " + str(labeled_predictions[0]) return (answer)
def transform_fn_inner(loaded_model, data, input_content_type, output_content_type): print('Global EP') input_json = json.loads(data) bucket = input_json['bucket'] image_file_name_s3 = input_json['s3_image_file'] fields_names = input_json['field_names'] loc_endpoint = input_json['loc_endpoint'] hw_endpoint = input_json['hw_endpoint'] hp_endpoint = input_json['hp_endpoint'] hw_endpoint_model = input_json.get("hw_endpoint_model", "new") hp_endpoint_model = input_json.get("hp_endpoint_model", "new") # access keys aws_access_key_id = input_json.get("aws_access_key_id", None) aws_secret_access_key = input_json.get("aws_secret_access_key", None) fields = [] values = [] bbox_of_all = {} text_to_score = {} session = boto3.Session(region_name='us-west-2', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) sagemaker_session = sagemaker.Session(boto_session=session) print('Get fields.....') # Get fields # field_id = 0 # for pair in fields_names['field_match_output']: # fields.append({"id": field_id, "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])}) # bbox_of_all[pair['field_name']] = pair['bbox'] # text_to_score[pair['field_name']]= pair["confidence"] field_id = 0 # print(fields_names) for att, val in fields_names.items(): bbox = {'top': -1, 'left': -1, 'width': -1, 'height': -1} j = 0 for item in val: if type(item[0]) == list: for bbx in item: if j == 0: bbox['top'] = bbx[0] bbox['left'] = bbx[1] bbox['height'] = bbx[2] bbox['width'] = bbx[3] else: bbox['width'] = bbox['width'] + bbx[3] j += 1 fields.append({ "id": field_id, "string": att, "bbox": bbox, "center": get_center(bbox) }) # text_to_score[pair['field_name']]= pair["confidence"] bbox_of_all[att] = bbox field_id += 1 # field_id = 0 # for pair in fields_names['field_match_output']: # fields.append({"id": field_id, "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])}) # bbox_of_all[pair['field_name']] = pair['bbox'] # text_to_score[pair['field_name']]= pair["confidence"] # field_id += 1 # Get the values print('Get the values.....') # Call the localizer print('Call the localizer.....') loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session) loc_out = loc_predictor.predict( {"url": "s3://{}/{}".format(bucket, image_file_name_s3)}) print("localized") loc_out = loc_out['result'] # Call the HW print('Call the HW.....') hw_predictor = JSONPredictor(hw_endpoint, sagemaker_session) hw_data = { "bucket": loc_out["bucket_name"], "file_name": loc_out["hw_key"], "model": hw_endpoint_model } json_predictions = hw_predictor.predict(hw_data) hw_predictions = json_predictions["result"] # Call the HP print('Call the HP.....') hp_predictor = MXNetPredictor(hp_endpoint, sagemaker_session) hp_predictor = JSONPredictor(hp_endpoint, sagemaker_session) hp_data = { "bucket": loc_out["bucket_name"], "file_name": loc_out["hp_key"], "model": hp_endpoint_model } json_predictions = hp_predictor.predict(hp_data) hp_predictions = json_predictions["result"] # Fill in the values values = [] # HW for value in hw_predictions: bbox = value['bbox'] for line in value['lines']: bbox_of_all[line['text']] = bbox text_to_score[line['text']] = line["score"] values.append({ "string": line['text'], "bbox": bbox, "center": get_center(bbox) }) # HP for value in hp_predictions: bbox_of_all[value['text']] = { 'top': value['y'], 'height': value['h'], 'width': value['w'], 'left': value['x'] } text_to_score[value['text']] = value["score"] values.append({ "string": value['text'], "bbox": bbox_of_all[value['text']], "center": get_center(bbox_of_all[value['text']]) }) print('Calling ML fields_match....') ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1") dist_thresh = 300 score_thresh = 0.7 ml_matched_results = [] print('Query Siamese with NN ....') for field in fields: candidates = [] for value in values: if value['string'] != '': l2_dist = l2_distance(field, value) if (l2_dist < dist_thresh): candidates.append((value, l2_dist)) nearest = list( map(lambda item: item[0]["string"], sorted(candidates, key=lambda item: item[1])[:5])) input_to_matching = { "field_names": [field["string"]], "field_values": nearest } #visualize_candidates(doc_img=doc_img, field=field, candidates=input_to_matching, color=colors[np.randint(0,n_colors)]) if (len(nearest) != 0): results = ml_field_matching.predict( input_to_matching) # siamese string field match else: results = [{"field": field["string"], "value": '', "score": 0}] text_to_score[''] = 0 bbox_of_all[''] = { 'width': -1, 'top': -1, 'height': -1, 'left': -1 } for result in sorted(results, key=lambda item: -item["score"]): if (result["score"] > score_thresh): ml_matched_results.append({ "field": result["field"], "value": result["value"], "score": result["score"], # "field_detection_score": text_to_score[result["field"]], "value_detection_score": text_to_score[result["value"]], "value_bbox": bbox_of_all[result["value"]], "field_bbox": bbox_of_all[result["field"]] }) else: ml_matched_results.append({ "field": result["field"], "value": '', "score": 0, # "field_detection_score": text_to_score[result["field"]], "value_detection_score": text_to_score[result["value"]], "value_bbox": bbox_of_all[result["value"]], "field_bbox": bbox_of_all[result["field"]] }) print('Filter out non matched fields....') matches_only = [] for final_matched_result in ml_matched_results: if (final_matched_result['score'] != 0): matches_only.append(final_matched_result) print('Finished') return matches_only, output_content_type
def transform_fn(loaded_model, data, input_content_type, output_content_type): print('Global EP') initial_matching = json.loads(data) original_match = prettytable.PrettyTable( ["field", "values", "field score", "value score"]) fields = [] values = [] bbox_of_all = {} text_to_score = {} for pair in initial_matching['field_match_output']: fields.append({ "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox']) }) bbox_of_all[pair['field_name']] = pair['bbox'] text_to_score[pair['field_name']] = pair["confidence"] if pair["value"]['bbox'] != { 'top': -1, 'height': -1, 'width': -1, 'left': -1 }: values.append({ "string": pair["value"]['field_value'], "bbox": pair["value"]['bbox'], "center": get_center(pair["value"]['bbox']) }) text_to_score[pair["value"] ['field_value']] = pair["value"]['confidence'] bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox'] # print({"strings": {"field": , "value": pair["value"]['field_value']}, # "bboxs": {"field": pair['bbox'], "value": pair["value"]['bbox']}}) original_match.add_row([ pair['field_name'], pair["confidence"], pair["value"]['field_value'], pair["value"]['confidence'] ]) print('Calling ML fields_match') ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1") ''' fields_strings = list(map(lambda item: item["string"], fields)) values_strings = list(map(lambda item: item["string"], values)) print(len(fields_strings)) print(len(values_strings)) data = {'field_names': fields_strings, 'field_values':values_strings} results = ml_field_matching.predict(data) for result in results: print(result) ''' predictions_act = prettytable.PrettyTable( ["field", "field score", "values", "value score", "score"]) dist_thresh = 100 matched_results = [] for field in fields: #print(field["string"]) candidates = [] for value in values: #print(value["string"]) l2_dist = l2_distance(field, value) if (l2_dist < dist_thresh): candidates.append((value, l2_dist)) #print(str(l2_dist)) nearest = list( map(lambda item: item[0]["string"], sorted(candidates, key=lambda item: item[1])[:5])) input_to_matching = { "field_names": [field["string"]], "field_values": nearest } if (len(nearest) != 0): results = ml_field_matching.predict( input_to_matching) # siamese string field match else: results = [{"field": field["string"], "value": '', "score": 0}] text_to_score[''] = '' bbox_of_all[''] = { 'width': -1, 'top': -1, 'height': -1, 'left': -1 } for result in sorted(results, key=lambda item: -item["score"]): predictions_act.add_row([ result["field"], text_to_score[result["field"]], result["value"], text_to_score[result["value"]], result["score"], ]) matched_results.append({ "field": result["field"], "value": result["value"], "score": result["score"], "field_detection_score": text_to_score[result["field"]], "value_detection_score": text_to_score[result["value"]], "value_bbox": bbox_of_all[result["value"]], "field_bbox": bbox_of_all[result["field"]] }) #print(predictions_act) print('finished') #return json.dumps(results), output_content_type return json.dumps(matched_results), output_content_type
def transform_fn(loaded_model, data, input_content_type, output_content_type): parsed = json.loads(data) loc_endpoint = parsed.get("loc_endpoint", "localization-model-2019-01-29") fm_endpoint = parsed.get("fm_endpoint", 'field-match-2019-01-24-12-39-05-522') hw_endpoint = parsed.get( "hw_endpoint", "pytorch-handwriting-ocr-2019-01-29-02-06-44-538") hp_endpoint = parsed.get("hp_endpoint", "hand-printed-model-2019-01-29-1") sp_endpoint = parsed.get("sp_endpoint", "hand-printed-model-2019-01-29-1") # access keys aws_access_key_id = parsed.get("aws_access_key_id", None) aws_secret_access_key = parsed.get("aws_secret_access_key", None) bucket = parsed.get("bucket") file_name = parsed.get("file_name") loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session) field_matching = JSONPredictor(fm_endpoint, sagemaker_session) try: loc_out = loc_predictor.predict( {"url": "s3://{}/{}".format(bucket, file_name)}) except Exception as ex: print(ex) tb = traceback.format_exc() # return error here print("localized") loc_out = loc_out["result"] print(loc_out) data = { "hw_endpoint": hw_endpoint, "hp_endpoint": hp_endpoint, # '' # "sp_endpoint": sp_endpoint, "field_names": [{ "bucket": "ahmedb-test", "filename": "field_name_list.txt" }, { "bucket": "unum-files", "filename": "unum_field_names.txt" }], "field_names_ignore": [{ "bucket": "ahmedb-test", "filename": "must_ignore.txt" }, { "bucket": "unum-files", "filename": "unum_must_ignore_field_names.txt" }], "hw_pickle": { "bucket": loc_out['bucket_name'], "filename": loc_out['hw_key'] }, "hp_pickle": { "bucket": loc_out['bucket_name'], "filename": loc_out['hp_key'] }, "page_image": { "bucket": bucket, "filename": file_name }, } fields = [] values = [] text_to_score = {} bbox_of_all = {} try: initial_matching = field_matching.predict(data) except Exception as ex: print(ex) tb = traceback.format_exc() for pair in initial_matching['field_match_output']: fields.append({ "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox']) }) bbox_of_all[pair['field_name']] = pair['bbox'] text_to_score[pair['field_name']] = pair["confidence"] if pair["value"]['bbox'] != { 'top': -1, 'height': -1, 'width': -1, 'left': -1 }: values.append({ "string": pair["value"]['field_value'], "bbox": pair["value"]['bbox'], "center": get_center(pair["value"]['bbox']) }) text_to_score[pair["value"] ['field_value']] = pair["value"]['confidence'] bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox'] print('Calling ML fields_match') #ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20") predictions_act = prettytable.PrettyTable( ["field", "field score", "values", "value score", "score"]) dist_thresh = 100 matched_results = [] for field in fields: print(field["string"]) candidates = [] for value in values: print(value["string"]) l2_dist = l2_distance(field, value) if (l2_dist < dist_thresh): candidates.append((value, l2_dist)) print(str(l2_dist)) nearest = list( map(lambda item: item[0]["string"], sorted(candidates, key=lambda item: item[1])[:5])) input_to_matching = { "field_names": [field["string"]], "field_values": nearest } if (len(nearest) != 0): results = local_ml_pairing( input_to_matching, loaded_model ) #ml_field_matching.predict(input_to_matching) # siamese string field match else: results = [{"field": field["string"], "value": '', "score": 0}] text_to_score[''] = '' bbox_of_all[''] = { 'width': -1, 'top': -1, 'height': -1, 'left': -1 } for result in sorted(results, key=lambda item: -item["score"]): predictions_act.add_row([ result["field"], text_to_score[result["field"]], result["value"], text_to_score[result["value"]], result["score"], ]) matched_results.append({ "field": result["field"], "value": result["value"], "score": result["score"], "field_detection_score": text_to_score[result["field"]], "value_detection_score": text_to_score[result["value"]], 'value_bbox': bbox_of_all[result["value"]], 'field_bbox': bbox_of_all[result["field"]] }) print(predictions_act) #return json.dumps(results), output_content_type return json.dumps(matched_results), output_content_type
import mxnet as mx import numpy as np import time import json from sagemaker.session import Session from sagemaker.mxnet import MXNetPredictor #predictor = MXNetPredictor('mxnet-inference-2019-09-11-23-33-38-737', Session()) predictor = MXNetPredictor('mxnet-inference-2019-09-13-18-42-06-926', Session()) data = np.random.rand(1, 3, 224, 224) input_data = {"instances": data} start = time.time() scores = predictor.predict(data) end = time.time() print(end - start)
def transform_fn(loaded_model, data, input_content_type, output_content_type): print('Global EP') initial_matching = json.loads(data) #parsed = json.loads(data) #initial_matching = parsed['fields_detected'] #doc_img = parsed['doc'] original_match = prettytable.PrettyTable( ["field", "field score", "values", "value score"]) fields = [] values = [] bbox_of_all = {} text_to_score = {} matched_results_dict = {} field_id = 0 for pair in initial_matching['field_match_output']: fields.append({ "id": field_id, "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox']) }) bbox_of_all[pair['field_name']] = pair['bbox'] text_to_score[pair['field_name']] = pair["confidence"] if pair["value"]['bbox'] != { 'top': -1, 'height': -1, 'width': -1, 'left': -1 }: values.append({ "string": pair["value"]['field_value'], "bbox": pair["value"]['bbox'], "center": get_center(pair["value"]['bbox']) }) text_to_score[pair["value"] ['field_value']] = pair["value"]['confidence'] bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox'] # print({"strings": {"field": , "value": pair["value"]['field_value']}, # "bboxs": {"field": pair['bbox'], "value": pair["value"]['bbox']}}) original_match.add_row([ pair['field_name'], pair["confidence"], pair["value"]['field_value'], pair["value"]['confidence'] ]) matched_results_dict[field_id] = { 'field': pair['field_name'], "value": pair["value"]['field_value'], "score": pair["confidence"], "field_detection_score": pair["confidence"], "value_detection_score": pair["value"]['confidence'], "value_bbox": pair["value"]['bbox'], "field_bbox": pair['bbox'] } field_id += 1 print('Calling ML fields_match') ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1") ''' fields_strings = list(map(lambda item: item["string"], fields)) values_strings = list(map(lambda item: item["string"], values)) print(len(fields_strings)) print(len(values_strings)) data = {'field_names': fields_strings, 'field_values':values_strings} results = ml_field_matching.predict(data) for result in results: print(result) ''' predictions_act = prettytable.PrettyTable( ["field", "field score", "values", "value score", "score"]) dist_thresh = 100 ml_matched_results = [] ml_matched_results_dict = {} # n_colors = 10 #colors = {0: 'black', 1: 'red', 2: 'blue',..., n_colors:'magenta'} for field in fields: #print(field["string"]) candidates = [] for value in values: #print(value["string"]) l2_dist = l2_distance(field, value) if (l2_dist < dist_thresh): candidates.append((value, l2_dist)) #print(str(l2_dist)) nearest = list( map(lambda item: item[0]["string"], sorted(candidates, key=lambda item: item[1])[:5])) input_to_matching = { "field_names": [field["string"]], "field_values": nearest } #visualize_candidates(doc_img=doc_img, field=field, candidates=input_to_matching, color=colors[np.randint(0,n_colors)]) if (len(nearest) != 0): results = ml_field_matching.predict( input_to_matching) # siamese string field match else: results = [{"field": field["string"], "value": '', "score": 0}] text_to_score[''] = 0 bbox_of_all[''] = { 'width': -1, 'top': -1, 'height': -1, 'left': -1 } for result in sorted(results, key=lambda item: -item["score"]): predictions_act.add_row([ result["field"], text_to_score[result["field"]], result["value"], text_to_score[result["value"]], result["score"], ]) ml_matched_results.append({ "field": result["field"], "value": result["value"], "score": result["score"], "field_detection_score": text_to_score[result["field"]], "value_detection_score": text_to_score[result["value"]], "value_bbox": bbox_of_all[result["value"]], "field_bbox": bbox_of_all[result["field"]] }) ml_matched_results_dict[field["id"]] = { "field": result["field"], "value": result["value"], "score": result["score"], "field_detection_score": text_to_score[result["field"]], "value_detection_score": text_to_score[result["value"]], "value_bbox": bbox_of_all[result["value"]], "field_bbox": bbox_of_all[result["field"]] } # Fuse textual with visual clues final_matched_results = [] for field_id in ml_matched_results_dict: if (matched_results_dict[field_id]['value_detection_score'] == 0): matched_results_dict[field_id]['value_detection_score'] = 1 if (matched_results_dict[field_id]['value_detection_score'] > ml_matched_results_dict[field_id]['value_detection_score']): final_matched_results.append(matched_results_dict[field_id]) else: final_matched_results.append(ml_matched_results_dict[field_id]) #visualize_matches(doc_img, mached_pairs_bbox) #print(predictions_act) print('finished') #return json.dumps(results), output_content_type return json.dumps(final_matched_results), output_content_type
def transform_fn(loaded_model, data, input_content_type, output_content_type): parsed = json.loads(data) loc_endpoint = parsed.get("loc_endpoint", "localization-model-2019-01-29") fm_endpoint = parsed.get("fm_endpoint", 'field-match-2019-01-24-12-39-05-522') hw_endpoint = parsed.get("hw_endpoint", "pytorch-handwriting-ocr-2019-01-29-02-06-44-538") hp_endpoint = parsed.get("hp_endpoint", "hand-printed-model-2019-01-29-1") sp_endpoint = parsed.get("sp_endpoint", "hand-printed-model-2019-01-29-1") # access keys aws_access_key_id = parsed.get("aws_access_key_id", None) aws_secret_access_key = parsed.get("aws_secret_access_key", None) bucket = parsed.get("bucket") file_name = parsed.get("file_name") loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session) field_matching = JSONPredictor(fm_endpoint, sagemaker_session) try: loc_out = loc_predictor.predict({"url": "s3://{}/{}".format(bucket, file_name)}) except Exception as ex: print(ex) tb = traceback.format_exc() # return error here print("localized") loc_out = loc_out["result"] print(loc_out) data = { "hw_endpoint": hw_endpoint, "hp_endpoint": hp_endpoint, # '' # "sp_endpoint": sp_endpoint, "field_names": [{"bucket": "ahmedb-test", "filename": "field_name_list.txt"}, {"bucket": "unum-files", "filename": "unum_field_names.txt"}], "field_names_ignore": [ {"bucket": "ahmedb-test", "filename": "must_ignore.txt"}, {"bucket": "unum-files", "filename": "unum_must_ignore_field_names.txt"} ], "hw_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hw_key']}, "hp_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hp_key']}, "page_image": {"bucket": bucket, "filename": file_name}, } fields = [] values = [] text_to_score = {} bbox_of_all = {} try: initial_matching = field_matching.predict(data) except Exception as ex: print(ex) tb = traceback.format_exc() for pair in initial_matching['field_match_output']: fields.append({"string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])}) bbox_of_all[pair['field_name']] = pair['bbox'] text_to_score[pair['field_name']] = pair["confidence"] if pair["value"]['bbox'] != {'top': -1, 'height': -1, 'width': -1, 'left': -1}: values.append({"string": pair["value"]['field_value'], "bbox": pair["value"]['bbox'], "center": get_center(pair["value"]['bbox'])}) text_to_score[pair["value"]['field_value']] = pair["value"]['confidence'] bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox'] points_2d = [] for field in fields: points_2d.append(field["center"]) for value in values: points_2d.append(value["center"]) points_2d = np.array(points_2d) # clustering thresh = 250 clusters = hcluster.fclusterdata(points_2d, thresh, criterion="distance") groupings = defaultdict(lambda: {'field_names': [], 'field_values': []}) for index, class_ in enumerate(clusters): if index >= len(fields): groupings[class_]["field_values"].append({"string": values[index - len(fields)]["string"], "center": values[index - len(fields)]["center"]}) else: groupings[class_]["field_names"].append({"string": fields[index]["string"], "center": fields[index]["center"]}) for value in groupings.values(): field_names_centers = list(map(lambda item: item["center"], value['field_names'])) + list(map(lambda item: item["center"], value['field_values'])) if field_names_centers: center = np.mean(field_names_centers, axis=0) else: center = np.array([np.inf, np.inf]) value['field_names'] = list(map(lambda item: item["string"], value['field_names'])) value['field_values'] = list(map(lambda item: item["string"], value['field_values'])) value['center'] = center while True: # merging things not_ready_for_matching_list = [] for key in groupings: value = groupings[key] if len(value["field_values"]) > len(value["field_names"]): not_ready_for_matching_list.append((key, value)) if len(not_ready_for_matching_list) == 0: break # enough merging for key, not_ready_for_matching in not_ready_for_matching_list: del groupings[key] distances = sorted(list(map(lambda item: (item, np.linalg.norm(groupings[item]["center"] - not_ready_for_matching["center"])), groupings)), key=lambda item: item[1]) groupings[distances[1][0]] = {"field_values": groupings[distances[1][0]]["field_values"] + not_ready_for_matching["field_values"], "field_names": groupings[distances[1][0]]["field_names"] + not_ready_for_matching["field_names"], "center": np.mean([groupings[distances[1][0]]["center"], not_ready_for_matching["center"]], axis=0) } final_output_json = [] # list of those {'value_detection_score': '', 'value': '', 'field_detection_score': 0.9559999999999998, 'score': 0, 'field': 'ATTENDING PHYSICIAN STATEMENT '} for cluster in [grouping for grouping in groupings.values()]: cluster = {"field_names": list(set(cluster["field_names"])), "field_values": list(set(cluster["field_values"]))} if cluster["field_names"] and cluster["field_values"]: results = local_ml_pairing(cluster, loaded_model) for result in sorted(results, key=lambda item: -item["score"]): final_output_json.append({'value_detection_score': text_to_score[result["value"]], 'value': result["value"], 'field_detection_score': text_to_score[result["field"]], 'score': result["score"], 'field': result["field"], 'value_bbox': bbox_of_all[result["value"]], 'field_bbox': bbox_of_all[result["field"]] } ) else: for field_name in cluster["field_names"]: final_output_json.append( {'value_detection_score': 0, 'value': '', 'field_detection_score': text_to_score[field_name], 'score': 0, 'field': field_name, 'value_bbox': {'width': -1, 'top': -1, 'height': -1, 'left': -1}, 'field_bbox': bbox_of_all[field_name] } ) return json.dumps(final_output_json), output_content_type