Example #1
0
def get_match_score_ml(fields_with_candidates, bbox_of_all, text_to_score):
    ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")
    matched_results = []
    ''' NN
    for field in fields_with_candidates:
        
        input_to_matching = {"field_names": [field["string"]], "field_values": fields_with_candidates[field["string"]]['candidates']}
        if(len(nearest) != 0):
            results = ml_field_matching.predict(input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = ''
            bbox_of_all[''] = {'width': -1, 'top': -1, 'height': -1, 'left': -1}
            
        for result in sorted(results, key=lambda item: -item["score"]):
            matched_results.append({"field": result["field"], 
                                    "value": result["value"], 
                                    "score": result["score"], 
                                    "field_detection_score": text_to_score[result["field"]], 
                                    "value_detection_score": text_to_score[result["value"]], 
                                    "value_bbox": bbox_of_all[result["value"]], 
                                    "field_bbox": bbox_of_all[result["field"]] })
    return results
    '''
    ''' Hundarian
Example #2
0
def mxnet():

    mynumber = request.args.getlist('image')

    predictor = MXNetPredictor('sagemaker-mxnet-2018-04-12-21-02-24-757')

    mynumberarray = ast.literal_eval(mynumber[0])

    response = predictor.predict(mynumberarray)

    labeled_predictions = list(zip(range(10), response[0]))

    labeled_predictions.sort(
        key=lambda label_and_prob: 1.0 - label_and_prob[1])
    answer = "Most likely answer: " + str(labeled_predictions[0])

    return (answer)
Example #3
0
def transform_fn_inner(loaded_model, data, input_content_type,
                       output_content_type):
    print('Global EP')
    input_json = json.loads(data)
    bucket = input_json['bucket']
    image_file_name_s3 = input_json['s3_image_file']

    fields_names = input_json['field_names']
    loc_endpoint = input_json['loc_endpoint']
    hw_endpoint = input_json['hw_endpoint']
    hp_endpoint = input_json['hp_endpoint']
    hw_endpoint_model = input_json.get("hw_endpoint_model", "new")
    hp_endpoint_model = input_json.get("hp_endpoint_model", "new")

    # access keys
    aws_access_key_id = input_json.get("aws_access_key_id", None)
    aws_secret_access_key = input_json.get("aws_secret_access_key", None)

    fields = []
    values = []
    bbox_of_all = {}
    text_to_score = {}
    session = boto3.Session(region_name='us-west-2',
                            aws_access_key_id=aws_access_key_id,
                            aws_secret_access_key=aws_secret_access_key)
    sagemaker_session = sagemaker.Session(boto_session=session)

    print('Get fields.....')
    # Get fields
    # field_id = 0
    # for pair in fields_names['field_match_output']:
    #     fields.append({"id": field_id, "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])})
    #     bbox_of_all[pair['field_name']] = pair['bbox']
    #     text_to_score[pair['field_name']]= pair["confidence"]

    field_id = 0
    #     print(fields_names)
    for att, val in fields_names.items():
        bbox = {'top': -1, 'left': -1, 'width': -1, 'height': -1}
        j = 0
        for item in val:
            if type(item[0]) == list:
                for bbx in item:
                    if j == 0:
                        bbox['top'] = bbx[0]
                        bbox['left'] = bbx[1]
                        bbox['height'] = bbx[2]
                        bbox['width'] = bbx[3]
                    else:
                        bbox['width'] = bbox['width'] + bbx[3]
                    j += 1

        fields.append({
            "id": field_id,
            "string": att,
            "bbox": bbox,
            "center": get_center(bbox)
        })
        # text_to_score[pair['field_name']]= pair["confidence"]
        bbox_of_all[att] = bbox

        field_id += 1

    # field_id = 0
    # for pair in fields_names['field_match_output']:
    #     fields.append({"id": field_id, "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])})
    #     bbox_of_all[pair['field_name']] = pair['bbox']
    #     text_to_score[pair['field_name']]= pair["confidence"]

    #     field_id += 1
    # Get the values
    print('Get the values.....')

    # Call the localizer
    print('Call the localizer.....')

    loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session)
    loc_out = loc_predictor.predict(
        {"url": "s3://{}/{}".format(bucket, image_file_name_s3)})
    print("localized")

    loc_out = loc_out['result']

    # Call the HW
    print('Call the HW.....')
    hw_predictor = JSONPredictor(hw_endpoint, sagemaker_session)
    hw_data = {
        "bucket": loc_out["bucket_name"],
        "file_name": loc_out["hw_key"],
        "model": hw_endpoint_model
    }
    json_predictions = hw_predictor.predict(hw_data)
    hw_predictions = json_predictions["result"]

    # Call the HP
    print('Call the HP.....')
    hp_predictor = MXNetPredictor(hp_endpoint, sagemaker_session)
    hp_predictor = JSONPredictor(hp_endpoint, sagemaker_session)
    hp_data = {
        "bucket": loc_out["bucket_name"],
        "file_name": loc_out["hp_key"],
        "model": hp_endpoint_model
    }
    json_predictions = hp_predictor.predict(hp_data)
    hp_predictions = json_predictions["result"]

    # Fill in the values
    values = []

    # HW
    for value in hw_predictions:
        bbox = value['bbox']
        for line in value['lines']:
            bbox_of_all[line['text']] = bbox
            text_to_score[line['text']] = line["score"]

            values.append({
                "string": line['text'],
                "bbox": bbox,
                "center": get_center(bbox)
            })

    # HP
    for value in hp_predictions:

        bbox_of_all[value['text']] = {
            'top': value['y'],
            'height': value['h'],
            'width': value['w'],
            'left': value['x']
        }
        text_to_score[value['text']] = value["score"]

        values.append({
            "string": value['text'],
            "bbox": bbox_of_all[value['text']],
            "center": get_center(bbox_of_all[value['text']])
        })

    print('Calling ML fields_match....')
    ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")

    dist_thresh = 300
    score_thresh = 0.7
    ml_matched_results = []

    print('Query Siamese with NN ....')
    for field in fields:

        candidates = []

        for value in values:
            if value['string'] != '':
                l2_dist = l2_distance(field, value)
                if (l2_dist < dist_thresh):
                    candidates.append((value, l2_dist))

        nearest = list(
            map(lambda item: item[0]["string"],
                sorted(candidates, key=lambda item: item[1])[:5]))
        input_to_matching = {
            "field_names": [field["string"]],
            "field_values": nearest
        }
        #visualize_candidates(doc_img=doc_img, field=field, candidates=input_to_matching, color=colors[np.randint(0,n_colors)])
        if (len(nearest) != 0):
            results = ml_field_matching.predict(
                input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = 0
            bbox_of_all[''] = {
                'width': -1,
                'top': -1,
                'height': -1,
                'left': -1
            }
        for result in sorted(results, key=lambda item: -item["score"]):
            if (result["score"] > score_thresh):

                ml_matched_results.append({
                    "field":
                    result["field"],
                    "value":
                    result["value"],
                    "score":
                    result["score"],
                    # "field_detection_score": text_to_score[result["field"]],
                    "value_detection_score":
                    text_to_score[result["value"]],
                    "value_bbox":
                    bbox_of_all[result["value"]],
                    "field_bbox":
                    bbox_of_all[result["field"]]
                })

            else:

                ml_matched_results.append({
                    "field":
                    result["field"],
                    "value":
                    '',
                    "score":
                    0,
                    # "field_detection_score": text_to_score[result["field"]],
                    "value_detection_score":
                    text_to_score[result["value"]],
                    "value_bbox":
                    bbox_of_all[result["value"]],
                    "field_bbox":
                    bbox_of_all[result["field"]]
                })

    print('Filter out non matched fields....')
    matches_only = []
    for final_matched_result in ml_matched_results:

        if (final_matched_result['score'] != 0):
            matches_only.append(final_matched_result)

    print('Finished')

    return matches_only, output_content_type
Example #4
0
def transform_fn(loaded_model, data, input_content_type, output_content_type):
    print('Global EP')

    initial_matching = json.loads(data)
    original_match = prettytable.PrettyTable(
        ["field", "values", "field score", "value score"])
    fields = []
    values = []
    bbox_of_all = {}
    text_to_score = {}
    for pair in initial_matching['field_match_output']:
        fields.append({
            "string": pair['field_name'],
            "bbox": pair['bbox'],
            "center": get_center(pair['bbox'])
        })
        bbox_of_all[pair['field_name']] = pair['bbox']
        text_to_score[pair['field_name']] = pair["confidence"]
        if pair["value"]['bbox'] != {
                'top': -1,
                'height': -1,
                'width': -1,
                'left': -1
        }:
            values.append({
                "string": pair["value"]['field_value'],
                "bbox": pair["value"]['bbox'],
                "center": get_center(pair["value"]['bbox'])
            })
            text_to_score[pair["value"]
                          ['field_value']] = pair["value"]['confidence']
            bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

        # print({"strings": {"field": , "value": pair["value"]['field_value']},
        #        "bboxs": {"field": pair['bbox'], "value": pair["value"]['bbox']}})
        original_match.add_row([
            pair['field_name'], pair["confidence"],
            pair["value"]['field_value'], pair["value"]['confidence']
        ])

    print('Calling ML fields_match')
    ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")
    '''
    fields_strings = list(map(lambda item: item["string"], fields))
    values_strings = list(map(lambda item: item["string"], values))

    print(len(fields_strings))
    print(len(values_strings))
    data = {'field_names': fields_strings, 'field_values':values_strings}
    
    
    results = ml_field_matching.predict(data)
    for result in results:
        print(result)
    '''

    predictions_act = prettytable.PrettyTable(
        ["field", "field score", "values", "value score", "score"])
    dist_thresh = 100
    matched_results = []
    for field in fields:
        #print(field["string"])
        candidates = []
        for value in values:
            #print(value["string"])
            l2_dist = l2_distance(field, value)
            if (l2_dist < dist_thresh):
                candidates.append((value, l2_dist))
                #print(str(l2_dist))

        nearest = list(
            map(lambda item: item[0]["string"],
                sorted(candidates, key=lambda item: item[1])[:5]))
        input_to_matching = {
            "field_names": [field["string"]],
            "field_values": nearest
        }
        if (len(nearest) != 0):
            results = ml_field_matching.predict(
                input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = ''
            bbox_of_all[''] = {
                'width': -1,
                'top': -1,
                'height': -1,
                'left': -1
            }
        for result in sorted(results, key=lambda item: -item["score"]):
            predictions_act.add_row([
                result["field"],
                text_to_score[result["field"]],
                result["value"],
                text_to_score[result["value"]],
                result["score"],
            ])
            matched_results.append({
                "field":
                result["field"],
                "value":
                result["value"],
                "score":
                result["score"],
                "field_detection_score":
                text_to_score[result["field"]],
                "value_detection_score":
                text_to_score[result["value"]],
                "value_bbox":
                bbox_of_all[result["value"]],
                "field_bbox":
                bbox_of_all[result["field"]]
            })

    #print(predictions_act)
    print('finished')

    #return json.dumps(results), output_content_type
    return json.dumps(matched_results), output_content_type
Example #5
0
def transform_fn(loaded_model, data, input_content_type, output_content_type):
    parsed = json.loads(data)

    loc_endpoint = parsed.get("loc_endpoint", "localization-model-2019-01-29")
    fm_endpoint = parsed.get("fm_endpoint",
                             'field-match-2019-01-24-12-39-05-522')

    hw_endpoint = parsed.get(
        "hw_endpoint", "pytorch-handwriting-ocr-2019-01-29-02-06-44-538")
    hp_endpoint = parsed.get("hp_endpoint", "hand-printed-model-2019-01-29-1")
    sp_endpoint = parsed.get("sp_endpoint", "hand-printed-model-2019-01-29-1")

    # access keys
    aws_access_key_id = parsed.get("aws_access_key_id", None)
    aws_secret_access_key = parsed.get("aws_secret_access_key", None)

    bucket = parsed.get("bucket")
    file_name = parsed.get("file_name")

    loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session)
    field_matching = JSONPredictor(fm_endpoint, sagemaker_session)
    try:
        loc_out = loc_predictor.predict(
            {"url": "s3://{}/{}".format(bucket, file_name)})
    except Exception as ex:
        print(ex)
        tb = traceback.format_exc()
        # return error here
    print("localized")
    loc_out = loc_out["result"]
    print(loc_out)

    data = {
        "hw_endpoint":
        hw_endpoint,
        "hp_endpoint":
        hp_endpoint,  # ''  #
        "sp_endpoint":
        sp_endpoint,
        "field_names": [{
            "bucket": "ahmedb-test",
            "filename": "field_name_list.txt"
        }, {
            "bucket": "unum-files",
            "filename": "unum_field_names.txt"
        }],
        "field_names_ignore": [{
            "bucket": "ahmedb-test",
            "filename": "must_ignore.txt"
        }, {
            "bucket": "unum-files",
            "filename": "unum_must_ignore_field_names.txt"
        }],
        "hw_pickle": {
            "bucket": loc_out['bucket_name'],
            "filename": loc_out['hw_key']
        },
        "hp_pickle": {
            "bucket": loc_out['bucket_name'],
            "filename": loc_out['hp_key']
        },
        "page_image": {
            "bucket": bucket,
            "filename": file_name
        },
    }

    fields = []
    values = []
    text_to_score = {}
    bbox_of_all = {}
    try:
        initial_matching = field_matching.predict(data)
    except Exception as ex:
        print(ex)
        tb = traceback.format_exc()

    for pair in initial_matching['field_match_output']:
        fields.append({
            "string": pair['field_name'],
            "bbox": pair['bbox'],
            "center": get_center(pair['bbox'])
        })
        bbox_of_all[pair['field_name']] = pair['bbox']
        text_to_score[pair['field_name']] = pair["confidence"]
        if pair["value"]['bbox'] != {
                'top': -1,
                'height': -1,
                'width': -1,
                'left': -1
        }:
            values.append({
                "string": pair["value"]['field_value'],
                "bbox": pair["value"]['bbox'],
                "center": get_center(pair["value"]['bbox'])
            })
            text_to_score[pair["value"]
                          ['field_value']] = pair["value"]['confidence']
            bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

    print('Calling ML fields_match')
    #ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20")

    predictions_act = prettytable.PrettyTable(
        ["field", "field score", "values", "value score", "score"])
    dist_thresh = 100
    matched_results = []
    for field in fields:
        print(field["string"])
        candidates = []
        for value in values:
            print(value["string"])
            l2_dist = l2_distance(field, value)
            if (l2_dist < dist_thresh):
                candidates.append((value, l2_dist))
                print(str(l2_dist))

        nearest = list(
            map(lambda item: item[0]["string"],
                sorted(candidates, key=lambda item: item[1])[:5]))
        input_to_matching = {
            "field_names": [field["string"]],
            "field_values": nearest
        }
        if (len(nearest) != 0):
            results = local_ml_pairing(
                input_to_matching, loaded_model
            )  #ml_field_matching.predict(input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = ''
            bbox_of_all[''] = {
                'width': -1,
                'top': -1,
                'height': -1,
                'left': -1
            }

        for result in sorted(results, key=lambda item: -item["score"]):
            predictions_act.add_row([
                result["field"],
                text_to_score[result["field"]],
                result["value"],
                text_to_score[result["value"]],
                result["score"],
            ])
            matched_results.append({
                "field":
                result["field"],
                "value":
                result["value"],
                "score":
                result["score"],
                "field_detection_score":
                text_to_score[result["field"]],
                "value_detection_score":
                text_to_score[result["value"]],
                'value_bbox':
                bbox_of_all[result["value"]],
                'field_bbox':
                bbox_of_all[result["field"]]
            })

    print(predictions_act)

    #return json.dumps(results), output_content_type
    return json.dumps(matched_results), output_content_type
Example #6
0
import mxnet as mx
import numpy as np
import time
import json

from sagemaker.session import Session
from sagemaker.mxnet import MXNetPredictor

#predictor = MXNetPredictor('mxnet-inference-2019-09-11-23-33-38-737', Session())
predictor = MXNetPredictor('mxnet-inference-2019-09-13-18-42-06-926',
                           Session())

data = np.random.rand(1, 3, 224, 224)
input_data = {"instances": data}
start = time.time()
scores = predictor.predict(data)
end = time.time()

print(end - start)
def transform_fn(loaded_model, data, input_content_type, output_content_type):
    print('Global EP')
    initial_matching = json.loads(data)
    #parsed = json.loads(data)
    #initial_matching = parsed['fields_detected']
    #doc_img = parsed['doc']
    original_match = prettytable.PrettyTable(
        ["field", "field score", "values", "value score"])
    fields = []
    values = []
    bbox_of_all = {}
    text_to_score = {}
    matched_results_dict = {}
    field_id = 0
    for pair in initial_matching['field_match_output']:
        fields.append({
            "id": field_id,
            "string": pair['field_name'],
            "bbox": pair['bbox'],
            "center": get_center(pair['bbox'])
        })
        bbox_of_all[pair['field_name']] = pair['bbox']
        text_to_score[pair['field_name']] = pair["confidence"]
        if pair["value"]['bbox'] != {
                'top': -1,
                'height': -1,
                'width': -1,
                'left': -1
        }:
            values.append({
                "string": pair["value"]['field_value'],
                "bbox": pair["value"]['bbox'],
                "center": get_center(pair["value"]['bbox'])
            })
            text_to_score[pair["value"]
                          ['field_value']] = pair["value"]['confidence']
            bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

        # print({"strings": {"field": , "value": pair["value"]['field_value']},
        #        "bboxs": {"field": pair['bbox'], "value": pair["value"]['bbox']}})
        original_match.add_row([
            pair['field_name'], pair["confidence"],
            pair["value"]['field_value'], pair["value"]['confidence']
        ])
        matched_results_dict[field_id] = {
            'field': pair['field_name'],
            "value": pair["value"]['field_value'],
            "score": pair["confidence"],
            "field_detection_score": pair["confidence"],
            "value_detection_score": pair["value"]['confidence'],
            "value_bbox": pair["value"]['bbox'],
            "field_bbox": pair['bbox']
        }
        field_id += 1

    print('Calling ML fields_match')
    ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")
    '''
    fields_strings = list(map(lambda item: item["string"], fields))
    values_strings = list(map(lambda item: item["string"], values))

    print(len(fields_strings))
    print(len(values_strings))
    data = {'field_names': fields_strings, 'field_values':values_strings}
    
    
    results = ml_field_matching.predict(data)
    for result in results:
        print(result)
    '''

    predictions_act = prettytable.PrettyTable(
        ["field", "field score", "values", "value score", "score"])
    dist_thresh = 100
    ml_matched_results = []
    ml_matched_results_dict = {}
    # n_colors = 10
    #colors = {0: 'black', 1: 'red', 2: 'blue',..., n_colors:'magenta'}

    for field in fields:
        #print(field["string"])
        candidates = []
        for value in values:
            #print(value["string"])
            l2_dist = l2_distance(field, value)
            if (l2_dist < dist_thresh):
                candidates.append((value, l2_dist))
                #print(str(l2_dist))

        nearest = list(
            map(lambda item: item[0]["string"],
                sorted(candidates, key=lambda item: item[1])[:5]))
        input_to_matching = {
            "field_names": [field["string"]],
            "field_values": nearest
        }
        #visualize_candidates(doc_img=doc_img, field=field, candidates=input_to_matching, color=colors[np.randint(0,n_colors)])
        if (len(nearest) != 0):
            results = ml_field_matching.predict(
                input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = 0
            bbox_of_all[''] = {
                'width': -1,
                'top': -1,
                'height': -1,
                'left': -1
            }
        for result in sorted(results, key=lambda item: -item["score"]):
            predictions_act.add_row([
                result["field"],
                text_to_score[result["field"]],
                result["value"],
                text_to_score[result["value"]],
                result["score"],
            ])
            ml_matched_results.append({
                "field":
                result["field"],
                "value":
                result["value"],
                "score":
                result["score"],
                "field_detection_score":
                text_to_score[result["field"]],
                "value_detection_score":
                text_to_score[result["value"]],
                "value_bbox":
                bbox_of_all[result["value"]],
                "field_bbox":
                bbox_of_all[result["field"]]
            })
            ml_matched_results_dict[field["id"]] = {
                "field": result["field"],
                "value": result["value"],
                "score": result["score"],
                "field_detection_score": text_to_score[result["field"]],
                "value_detection_score": text_to_score[result["value"]],
                "value_bbox": bbox_of_all[result["value"]],
                "field_bbox": bbox_of_all[result["field"]]
            }

    # Fuse textual with visual clues
    final_matched_results = []
    for field_id in ml_matched_results_dict:
        if (matched_results_dict[field_id]['value_detection_score'] == 0):
            matched_results_dict[field_id]['value_detection_score'] = 1
        if (matched_results_dict[field_id]['value_detection_score'] >
                ml_matched_results_dict[field_id]['value_detection_score']):
            final_matched_results.append(matched_results_dict[field_id])
        else:
            final_matched_results.append(ml_matched_results_dict[field_id])

    #visualize_matches(doc_img, mached_pairs_bbox)
    #print(predictions_act)
    print('finished')

    #return json.dumps(results), output_content_type
    return json.dumps(final_matched_results), output_content_type
def transform_fn(loaded_model, data, input_content_type, output_content_type):
    parsed = json.loads(data)

    loc_endpoint = parsed.get("loc_endpoint", "localization-model-2019-01-29")
    fm_endpoint = parsed.get("fm_endpoint", 'field-match-2019-01-24-12-39-05-522')

    hw_endpoint = parsed.get("hw_endpoint", "pytorch-handwriting-ocr-2019-01-29-02-06-44-538")
    hp_endpoint = parsed.get("hp_endpoint", "hand-printed-model-2019-01-29-1")
    sp_endpoint = parsed.get("sp_endpoint", "hand-printed-model-2019-01-29-1")

    # access keys
    aws_access_key_id = parsed.get("aws_access_key_id", None)
    aws_secret_access_key = parsed.get("aws_secret_access_key", None)

    bucket = parsed.get("bucket")
    file_name = parsed.get("file_name")

    loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session)
    field_matching = JSONPredictor(fm_endpoint, sagemaker_session)
    try:
        loc_out = loc_predictor.predict({"url": "s3://{}/{}".format(bucket, file_name)})
    except Exception as ex:
        print(ex)
        tb = traceback.format_exc()
        # return error here
    print("localized")
    loc_out = loc_out["result"]
    print(loc_out)

    data = {
        "hw_endpoint": hw_endpoint,
        "hp_endpoint": hp_endpoint,  # ''  #
        "sp_endpoint": sp_endpoint,

        "field_names": [{"bucket": "ahmedb-test", "filename": "field_name_list.txt"},
                        {"bucket": "unum-files", "filename": "unum_field_names.txt"}],
        "field_names_ignore": [
            {"bucket": "ahmedb-test", "filename": "must_ignore.txt"},
            {"bucket": "unum-files", "filename": "unum_must_ignore_field_names.txt"}
        ],

        "hw_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hw_key']},
        "hp_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hp_key']},
        "page_image": {"bucket": bucket, "filename": file_name},

    }

    fields = []
    values = []
    text_to_score = {}
    bbox_of_all = {}
    try:
        initial_matching = field_matching.predict(data)
    except Exception as ex:
        print(ex)
        tb = traceback.format_exc()

    for pair in initial_matching['field_match_output']:
        fields.append({"string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])})
        bbox_of_all[pair['field_name']] = pair['bbox']
        text_to_score[pair['field_name']] = pair["confidence"]
        if pair["value"]['bbox'] != {'top': -1, 'height': -1, 'width': -1, 'left': -1}:
            values.append({"string": pair["value"]['field_value'], "bbox": pair["value"]['bbox'], "center": get_center(pair["value"]['bbox'])})
            text_to_score[pair["value"]['field_value']] = pair["value"]['confidence']
            bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

    points_2d = []
    for field in fields:
        points_2d.append(field["center"])
    for value in values:
        points_2d.append(value["center"])

    points_2d = np.array(points_2d)

    # clustering
    thresh = 250
    clusters = hcluster.fclusterdata(points_2d, thresh, criterion="distance")

    groupings = defaultdict(lambda: {'field_names': [], 'field_values': []})
    for index, class_ in enumerate(clusters):
        if index >= len(fields):
            groupings[class_]["field_values"].append({"string": values[index - len(fields)]["string"], "center": values[index - len(fields)]["center"]})
        else:
            groupings[class_]["field_names"].append({"string": fields[index]["string"], "center": fields[index]["center"]})

    for value in groupings.values():
        field_names_centers = list(map(lambda item: item["center"], value['field_names'])) + list(map(lambda item: item["center"], value['field_values']))
        if field_names_centers:
            center = np.mean(field_names_centers, axis=0)
        else:
            center = np.array([np.inf, np.inf])

        value['field_names'] = list(map(lambda item: item["string"], value['field_names']))
        value['field_values'] = list(map(lambda item: item["string"], value['field_values']))
        value['center'] = center

    while True:  # merging things
        not_ready_for_matching_list = []
        for key in groupings:
            value = groupings[key]
            if len(value["field_values"]) > len(value["field_names"]):
                not_ready_for_matching_list.append((key, value))

        if len(not_ready_for_matching_list) == 0:
            break  # enough merging

        for key, not_ready_for_matching in not_ready_for_matching_list:
            del groupings[key]
            distances = sorted(list(map(lambda item: (item, np.linalg.norm(groupings[item]["center"] - not_ready_for_matching["center"])), groupings)), key=lambda item: item[1])
            groupings[distances[1][0]] = {"field_values": groupings[distances[1][0]]["field_values"] + not_ready_for_matching["field_values"],
                                          "field_names": groupings[distances[1][0]]["field_names"] + not_ready_for_matching["field_names"],
                                          "center": np.mean([groupings[distances[1][0]]["center"], not_ready_for_matching["center"]], axis=0)
                                          }

    final_output_json = []  # list of those {'value_detection_score': '', 'value': '', 'field_detection_score': 0.9559999999999998, 'score': 0, 'field': 'ATTENDING PHYSICIAN STATEMENT '}

    for cluster in [grouping for grouping in groupings.values()]:
        cluster = {"field_names": list(set(cluster["field_names"])), "field_values": list(set(cluster["field_values"]))}
        if cluster["field_names"] and cluster["field_values"]:
            results = local_ml_pairing(cluster, loaded_model)

            for result in sorted(results, key=lambda item: -item["score"]):
                final_output_json.append({'value_detection_score': text_to_score[result["value"]],
                                          'value': result["value"],
                                          'field_detection_score': text_to_score[result["field"]],
                                          'score': result["score"],
                                          'field': result["field"],
                                          'value_bbox': bbox_of_all[result["value"]],
                                          'field_bbox': bbox_of_all[result["field"]]
                                          }

                                         )
        else:
            for field_name in cluster["field_names"]:
                final_output_json.append(
                    {'value_detection_score': 0,
                     'value': '',
                     'field_detection_score': text_to_score[field_name],
                     'score': 0,
                     'field': field_name,
                     'value_bbox': {'width': -1, 'top': -1, 'height': -1, 'left': -1},
                     'field_bbox': bbox_of_all[field_name]
                     }

                )

    return json.dumps(final_output_json), output_content_type