Python MXNetPredictor.MXNetPredictor Examples

Programming Language: Python

Namespace/Package Name: sagemaker.mxnet

Class/Type: MXNetPredictor

Method/Function: MXNetPredictor

Examples at hotexamples.com: 8

Python MXNetPredictor.MXNetPredictor - 8 examples found. These are the top rated real world Python examples of sagemaker.mxnet.MXNetPredictor.MXNetPredictor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MXNetPredictor(8)

predict(7)

Frequently Used Methods

MXNetPredictor (8)

predict (7)

Example #1

Show file

def get_match_score_ml(fields_with_candidates, bbox_of_all, text_to_score):
    ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")
    matched_results = []
    ''' NN
    for field in fields_with_candidates:
        
        input_to_matching = {"field_names": [field["string"]], "field_values": fields_with_candidates[field["string"]]['candidates']}
        if(len(nearest) != 0):
            results = ml_field_matching.predict(input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = ''
            bbox_of_all[''] = {'width': -1, 'top': -1, 'height': -1, 'left': -1}
            
        for result in sorted(results, key=lambda item: -item["score"]):
            matched_results.append({"field": result["field"], 
                                    "value": result["value"], 
                                    "score": result["score"], 
                                    "field_detection_score": text_to_score[result["field"]], 
                                    "value_detection_score": text_to_score[result["value"]], 
                                    "value_bbox": bbox_of_all[result["value"]], 
                                    "field_bbox": bbox_of_all[result["field"]] })
    return results
    '''
    ''' Hundarian

Example #2

Show file

File: app.py Project: kbajaria12/project3

def mxnet():

    mynumber = request.args.getlist('image')

    predictor = MXNetPredictor('sagemaker-mxnet-2018-04-12-21-02-24-757')

    mynumberarray = ast.literal_eval(mynumber[0])

    response = predictor.predict(mynumberarray)

    labeled_predictions = list(zip(range(10), response[0]))

    labeled_predictions.sort(
        key=lambda label_and_prob: 1.0 - label_and_prob[1])
    answer = "Most likely answer: " + str(labeled_predictions[0])

    return (answer)

Example #3

Show file

def transform_fn_inner(loaded_model, data, input_content_type,
                       output_content_type):
    print('Global EP')
    input_json = json.loads(data)
    bucket = input_json['bucket']
    image_file_name_s3 = input_json['s3_image_file']

    fields_names = input_json['field_names']
    loc_endpoint = input_json['loc_endpoint']
    hw_endpoint = input_json['hw_endpoint']
    hp_endpoint = input_json['hp_endpoint']
    hw_endpoint_model = input_json.get("hw_endpoint_model", "new")
    hp_endpoint_model = input_json.get("hp_endpoint_model", "new")

    # access keys
    aws_access_key_id = input_json.get("aws_access_key_id", None)
    aws_secret_access_key = input_json.get("aws_secret_access_key", None)

    fields = []
    values = []
    bbox_of_all = {}
    text_to_score = {}
    session = boto3.Session(region_name='us-west-2',
                            aws_access_key_id=aws_access_key_id,
                            aws_secret_access_key=aws_secret_access_key)
    sagemaker_session = sagemaker.Session(boto_session=session)

    print('Get fields.....')
    # Get fields
    # field_id = 0
    # for pair in fields_names['field_match_output']:
    #     fields.append({"id": field_id, "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])})
    #     bbox_of_all[pair['field_name']] = pair['bbox']
    #     text_to_score[pair['field_name']]= pair["confidence"]

    field_id = 0
    #     print(fields_names)
    for att, val in fields_names.items():
        bbox = {'top': -1, 'left': -1, 'width': -1, 'height': -1}
        j = 0
        for item in val:
            if type(item[0]) == list:
                for bbx in item:
                    if j == 0:
                        bbox['top'] = bbx[0]
                        bbox['left'] = bbx[1]
                        bbox['height'] = bbx[2]
                        bbox['width'] = bbx[3]
                    else:
                        bbox['width'] = bbox['width'] + bbx[3]
                    j += 1

        fields.append({
            "id": field_id,
            "string": att,
            "bbox": bbox,
            "center": get_center(bbox)
        })
        # text_to_score[pair['field_name']]= pair["confidence"]
        bbox_of_all[att] = bbox

        field_id += 1

    # field_id = 0
    # for pair in fields_names['field_match_output']:
    #     fields.append({"id": field_id, "string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])})
    #     bbox_of_all[pair['field_name']] = pair['bbox']
    #     text_to_score[pair['field_name']]= pair["confidence"]

    #     field_id += 1
    # Get the values
    print('Get the values.....')

    # Call the localizer
    print('Call the localizer.....')

    loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session)
    loc_out = loc_predictor.predict(
        {"url": "s3://{}/{}".format(bucket, image_file_name_s3)})
    print("localized")

    loc_out = loc_out['result']

    # Call the HW
    print('Call the HW.....')
    hw_predictor = JSONPredictor(hw_endpoint, sagemaker_session)
    hw_data = {
        "bucket": loc_out["bucket_name"],
        "file_name": loc_out["hw_key"],
        "model": hw_endpoint_model
    }
    json_predictions = hw_predictor.predict(hw_data)
    hw_predictions = json_predictions["result"]

    # Call the HP
    print('Call the HP.....')
    hp_predictor = MXNetPredictor(hp_endpoint, sagemaker_session)
    hp_predictor = JSONPredictor(hp_endpoint, sagemaker_session)
    hp_data = {
        "bucket": loc_out["bucket_name"],
        "file_name": loc_out["hp_key"],
        "model": hp_endpoint_model
    }
    json_predictions = hp_predictor.predict(hp_data)
    hp_predictions = json_predictions["result"]

    # Fill in the values
    values = []

    # HW
    for value in hw_predictions:
        bbox = value['bbox']
        for line in value['lines']:
            bbox_of_all[line['text']] = bbox
            text_to_score[line['text']] = line["score"]

            values.append({
                "string": line['text'],
                "bbox": bbox,
                "center": get_center(bbox)
            })

    # HP
    for value in hp_predictions:

        bbox_of_all[value['text']] = {
            'top': value['y'],
            'height': value['h'],
            'width': value['w'],
            'left': value['x']
        }
        text_to_score[value['text']] = value["score"]

        values.append({
            "string": value['text'],
            "bbox": bbox_of_all[value['text']],
            "center": get_center(bbox_of_all[value['text']])
        })

    print('Calling ML fields_match....')
    ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")

    dist_thresh = 300
    score_thresh = 0.7
    ml_matched_results = []

    print('Query Siamese with NN ....')
    for field in fields:

        candidates = []

        for value in values:
            if value['string'] != '':
                l2_dist = l2_distance(field, value)
                if (l2_dist < dist_thresh):
                    candidates.append((value, l2_dist))

        nearest = list(
            map(lambda item: item[0]["string"],
                sorted(candidates, key=lambda item: item[1])[:5]))
        input_to_matching = {
            "field_names": [field["string"]],
            "field_values": nearest
        }
        #visualize_candidates(doc_img=doc_img, field=field, candidates=input_to_matching, color=colors[np.randint(0,n_colors)])
        if (len(nearest) != 0):
            results = ml_field_matching.predict(
                input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = 0
            bbox_of_all[''] = {
                'width': -1,
                'top': -1,
                'height': -1,
                'left': -1
            }
        for result in sorted(results, key=lambda item: -item["score"]):
            if (result["score"] > score_thresh):

                ml_matched_results.append({
                    "field":
                    result["field"],
                    "value":
                    result["value"],
                    "score":
                    result["score"],
                    # "field_detection_score": text_to_score[result["field"]],
                    "value_detection_score":
                    text_to_score[result["value"]],
                    "value_bbox":
                    bbox_of_all[result["value"]],
                    "field_bbox":
                    bbox_of_all[result["field"]]
                })

            else:

                ml_matched_results.append({
                    "field":
                    result["field"],
                    "value":
                    '',
                    "score":
                    0,
                    # "field_detection_score": text_to_score[result["field"]],
                    "value_detection_score":
                    text_to_score[result["value"]],
                    "value_bbox":
                    bbox_of_all[result["value"]],
                    "field_bbox":
                    bbox_of_all[result["field"]]
                })

    print('Filter out non matched fields....')
    matches_only = []
    for final_matched_result in ml_matched_results:

        if (final_matched_result['score'] != 0):
            matches_only.append(final_matched_result)

    print('Finished')

    return matches_only, output_content_type

Example #4

Show file

def transform_fn(loaded_model, data, input_content_type, output_content_type):
    print('Global EP')

    initial_matching = json.loads(data)
    original_match = prettytable.PrettyTable(
        ["field", "values", "field score", "value score"])
    fields = []
    values = []
    bbox_of_all = {}
    text_to_score = {}
    for pair in initial_matching['field_match_output']:
        fields.append({
            "string": pair['field_name'],
            "bbox": pair['bbox'],
            "center": get_center(pair['bbox'])
        })
        bbox_of_all[pair['field_name']] = pair['bbox']
        text_to_score[pair['field_name']] = pair["confidence"]
        if pair["value"]['bbox'] != {
                'top': -1,
                'height': -1,
                'width': -1,
                'left': -1
        }:
            values.append({
                "string": pair["value"]['field_value'],
                "bbox": pair["value"]['bbox'],
                "center": get_center(pair["value"]['bbox'])
            })
            text_to_score[pair["value"]
                          ['field_value']] = pair["value"]['confidence']
            bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

        # print({"strings": {"field": , "value": pair["value"]['field_value']},
        #        "bboxs": {"field": pair['bbox'], "value": pair["value"]['bbox']}})
        original_match.add_row([
            pair['field_name'], pair["confidence"],
            pair["value"]['field_value'], pair["value"]['confidence']
        ])

    print('Calling ML fields_match')
    ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")
    '''
    fields_strings = list(map(lambda item: item["string"], fields))
    values_strings = list(map(lambda item: item["string"], values))

    print(len(fields_strings))
    print(len(values_strings))
    data = {'field_names': fields_strings, 'field_values':values_strings}
    
    
    results = ml_field_matching.predict(data)
    for result in results:
        print(result)
    '''

    predictions_act = prettytable.PrettyTable(
        ["field", "field score", "values", "value score", "score"])
    dist_thresh = 100
    matched_results = []
    for field in fields:
        #print(field["string"])
        candidates = []
        for value in values:
            #print(value["string"])
            l2_dist = l2_distance(field, value)
            if (l2_dist < dist_thresh):
                candidates.append((value, l2_dist))
                #print(str(l2_dist))

        nearest = list(
            map(lambda item: item[0]["string"],
                sorted(candidates, key=lambda item: item[1])[:5]))
        input_to_matching = {
            "field_names": [field["string"]],
            "field_values": nearest
        }
        if (len(nearest) != 0):
            results = ml_field_matching.predict(
                input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = ''
            bbox_of_all[''] = {
                'width': -1,
                'top': -1,
                'height': -1,
                'left': -1
            }
        for result in sorted(results, key=lambda item: -item["score"]):
            predictions_act.add_row([
                result["field"],
                text_to_score[result["field"]],
                result["value"],
                text_to_score[result["value"]],
                result["score"],
            ])
            matched_results.append({
                "field":
                result["field"],
                "value":
                result["value"],
                "score":
                result["score"],
                "field_detection_score":
                text_to_score[result["field"]],
                "value_detection_score":
                text_to_score[result["value"]],
                "value_bbox":
                bbox_of_all[result["value"]],
                "field_bbox":
                bbox_of_all[result["field"]]
            })

    #print(predictions_act)
    print('finished')

    #return json.dumps(results), output_content_type
    return json.dumps(matched_results), output_content_type

Example #5

Show file

def transform_fn(loaded_model, data, input_content_type, output_content_type):
    parsed = json.loads(data)

    loc_endpoint = parsed.get("loc_endpoint", "localization-model-2019-01-29")
    fm_endpoint = parsed.get("fm_endpoint",
                             'field-match-2019-01-24-12-39-05-522')

    hw_endpoint = parsed.get(
        "hw_endpoint", "pytorch-handwriting-ocr-2019-01-29-02-06-44-538")
    hp_endpoint = parsed.get("hp_endpoint", "hand-printed-model-2019-01-29-1")
    sp_endpoint = parsed.get("sp_endpoint", "hand-printed-model-2019-01-29-1")

    # access keys
    aws_access_key_id = parsed.get("aws_access_key_id", None)
    aws_secret_access_key = parsed.get("aws_secret_access_key", None)

    bucket = parsed.get("bucket")
    file_name = parsed.get("file_name")

    loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session)
    field_matching = JSONPredictor(fm_endpoint, sagemaker_session)
    try:
        loc_out = loc_predictor.predict(
            {"url": "s3://{}/{}".format(bucket, file_name)})
    except Exception as ex:
        print(ex)
        tb = traceback.format_exc()
        # return error here
    print("localized")
    loc_out = loc_out["result"]
    print(loc_out)

    data = {
        "hw_endpoint":
        hw_endpoint,
        "hp_endpoint":
        hp_endpoint,  # ''  #
        "sp_endpoint":
        sp_endpoint,
        "field_names": [{
            "bucket": "ahmedb-test",
            "filename": "field_name_list.txt"
        }, {
            "bucket": "unum-files",
            "filename": "unum_field_names.txt"
        }],
        "field_names_ignore": [{
            "bucket": "ahmedb-test",
            "filename": "must_ignore.txt"
        }, {
            "bucket": "unum-files",
            "filename": "unum_must_ignore_field_names.txt"
        }],
        "hw_pickle": {
            "bucket": loc_out['bucket_name'],
            "filename": loc_out['hw_key']
        },
        "hp_pickle": {
            "bucket": loc_out['bucket_name'],
            "filename": loc_out['hp_key']
        },
        "page_image": {
            "bucket": bucket,
            "filename": file_name
        },
    }

    fields = []
    values = []
    text_to_score = {}
    bbox_of_all = {}
    try:
        initial_matching = field_matching.predict(data)
    except Exception as ex:
        print(ex)
        tb = traceback.format_exc()

    for pair in initial_matching['field_match_output']:
        fields.append({
            "string": pair['field_name'],
            "bbox": pair['bbox'],
            "center": get_center(pair['bbox'])
        })
        bbox_of_all[pair['field_name']] = pair['bbox']
        text_to_score[pair['field_name']] = pair["confidence"]
        if pair["value"]['bbox'] != {
                'top': -1,
                'height': -1,
                'width': -1,
                'left': -1
        }:
            values.append({
                "string": pair["value"]['field_value'],
                "bbox": pair["value"]['bbox'],
                "center": get_center(pair["value"]['bbox'])
            })
            text_to_score[pair["value"]
                          ['field_value']] = pair["value"]['confidence']
            bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

    print('Calling ML fields_match')
    #ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20")

    predictions_act = prettytable.PrettyTable(
        ["field", "field score", "values", "value score", "score"])
    dist_thresh = 100
    matched_results = []
    for field in fields:
        print(field["string"])
        candidates = []
        for value in values:
            print(value["string"])
            l2_dist = l2_distance(field, value)
            if (l2_dist < dist_thresh):
                candidates.append((value, l2_dist))
                print(str(l2_dist))

        nearest = list(
            map(lambda item: item[0]["string"],
                sorted(candidates, key=lambda item: item[1])[:5]))
        input_to_matching = {
            "field_names": [field["string"]],
            "field_values": nearest
        }
        if (len(nearest) != 0):
            results = local_ml_pairing(
                input_to_matching, loaded_model
            )  #ml_field_matching.predict(input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = ''
            bbox_of_all[''] = {
                'width': -1,
                'top': -1,
                'height': -1,
                'left': -1
            }

        for result in sorted(results, key=lambda item: -item["score"]):
            predictions_act.add_row([
                result["field"],
                text_to_score[result["field"]],
                result["value"],
                text_to_score[result["value"]],
                result["score"],
            ])
            matched_results.append({
                "field":
                result["field"],
                "value":
                result["value"],
                "score":
                result["score"],
                "field_detection_score":
                text_to_score[result["field"]],
                "value_detection_score":
                text_to_score[result["value"]],
                'value_bbox':
                bbox_of_all[result["value"]],
                'field_bbox':
                bbox_of_all[result["field"]]
            })

    print(predictions_act)

    #return json.dumps(results), output_content_type
    return json.dumps(matched_results), output_content_type

Example #6

Show file

File: predict.py Project: Elizaaaaa/mxnet-python-sdk

import mxnet as mx
import numpy as np
import time
import json

from sagemaker.session import Session
from sagemaker.mxnet import MXNetPredictor

#predictor = MXNetPredictor('mxnet-inference-2019-09-11-23-33-38-737', Session())
predictor = MXNetPredictor('mxnet-inference-2019-09-13-18-42-06-926',
                           Session())

data = np.random.rand(1, 3, 224, 224)
input_data = {"instances": data}
start = time.time()
scores = predictor.predict(data)
end = time.time()

print(end - start)

Example #7

Show file

File: global_endpoint_improvements.py Project: aitecheg/field_match_ml

def transform_fn(loaded_model, data, input_content_type, output_content_type):
    print('Global EP')
    initial_matching = json.loads(data)
    #parsed = json.loads(data)
    #initial_matching = parsed['fields_detected']
    #doc_img = parsed['doc']
    original_match = prettytable.PrettyTable(
        ["field", "field score", "values", "value score"])
    fields = []
    values = []
    bbox_of_all = {}
    text_to_score = {}
    matched_results_dict = {}
    field_id = 0
    for pair in initial_matching['field_match_output']:
        fields.append({
            "id": field_id,
            "string": pair['field_name'],
            "bbox": pair['bbox'],
            "center": get_center(pair['bbox'])
        })
        bbox_of_all[pair['field_name']] = pair['bbox']
        text_to_score[pair['field_name']] = pair["confidence"]
        if pair["value"]['bbox'] != {
                'top': -1,
                'height': -1,
                'width': -1,
                'left': -1
        }:
            values.append({
                "string": pair["value"]['field_value'],
                "bbox": pair["value"]['bbox'],
                "center": get_center(pair["value"]['bbox'])
            })
            text_to_score[pair["value"]
                          ['field_value']] = pair["value"]['confidence']
            bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

        # print({"strings": {"field": , "value": pair["value"]['field_value']},
        #        "bboxs": {"field": pair['bbox'], "value": pair["value"]['bbox']}})
        original_match.add_row([
            pair['field_name'], pair["confidence"],
            pair["value"]['field_value'], pair["value"]['confidence']
        ])
        matched_results_dict[field_id] = {
            'field': pair['field_name'],
            "value": pair["value"]['field_value'],
            "score": pair["confidence"],
            "field_detection_score": pair["confidence"],
            "value_detection_score": pair["value"]['confidence'],
            "value_bbox": pair["value"]['bbox'],
            "field_bbox": pair['bbox']
        }
        field_id += 1

    print('Calling ML fields_match')
    ml_field_matching = MXNetPredictor("field-match-ml-2019-01-20-1")
    '''
    fields_strings = list(map(lambda item: item["string"], fields))
    values_strings = list(map(lambda item: item["string"], values))

    print(len(fields_strings))
    print(len(values_strings))
    data = {'field_names': fields_strings, 'field_values':values_strings}
    
    
    results = ml_field_matching.predict(data)
    for result in results:
        print(result)
    '''

    predictions_act = prettytable.PrettyTable(
        ["field", "field score", "values", "value score", "score"])
    dist_thresh = 100
    ml_matched_results = []
    ml_matched_results_dict = {}
    # n_colors = 10
    #colors = {0: 'black', 1: 'red', 2: 'blue',..., n_colors:'magenta'}

    for field in fields:
        #print(field["string"])
        candidates = []
        for value in values:
            #print(value["string"])
            l2_dist = l2_distance(field, value)
            if (l2_dist < dist_thresh):
                candidates.append((value, l2_dist))
                #print(str(l2_dist))

        nearest = list(
            map(lambda item: item[0]["string"],
                sorted(candidates, key=lambda item: item[1])[:5]))
        input_to_matching = {
            "field_names": [field["string"]],
            "field_values": nearest
        }
        #visualize_candidates(doc_img=doc_img, field=field, candidates=input_to_matching, color=colors[np.randint(0,n_colors)])
        if (len(nearest) != 0):
            results = ml_field_matching.predict(
                input_to_matching)  # siamese string field match
        else:
            results = [{"field": field["string"], "value": '', "score": 0}]
            text_to_score[''] = 0
            bbox_of_all[''] = {
                'width': -1,
                'top': -1,
                'height': -1,
                'left': -1
            }
        for result in sorted(results, key=lambda item: -item["score"]):
            predictions_act.add_row([
                result["field"],
                text_to_score[result["field"]],
                result["value"],
                text_to_score[result["value"]],
                result["score"],
            ])
            ml_matched_results.append({
                "field":
                result["field"],
                "value":
                result["value"],
                "score":
                result["score"],
                "field_detection_score":
                text_to_score[result["field"]],
                "value_detection_score":
                text_to_score[result["value"]],
                "value_bbox":
                bbox_of_all[result["value"]],
                "field_bbox":
                bbox_of_all[result["field"]]
            })
            ml_matched_results_dict[field["id"]] = {
                "field": result["field"],
                "value": result["value"],
                "score": result["score"],
                "field_detection_score": text_to_score[result["field"]],
                "value_detection_score": text_to_score[result["value"]],
                "value_bbox": bbox_of_all[result["value"]],
                "field_bbox": bbox_of_all[result["field"]]
            }

    # Fuse textual with visual clues
    final_matched_results = []
    for field_id in ml_matched_results_dict:
        if (matched_results_dict[field_id]['value_detection_score'] == 0):
            matched_results_dict[field_id]['value_detection_score'] = 1
        if (matched_results_dict[field_id]['value_detection_score'] >
                ml_matched_results_dict[field_id]['value_detection_score']):
            final_matched_results.append(matched_results_dict[field_id])
        else:
            final_matched_results.append(ml_matched_results_dict[field_id])

    #visualize_matches(doc_img, mached_pairs_bbox)
    #print(predictions_act)
    print('finished')

    #return json.dumps(results), output_content_type
    return json.dumps(final_matched_results), output_content_type

Example #8

Show file

File: global_endpoint-cluster.py Project: aitecheg/field_match_ml

def transform_fn(loaded_model, data, input_content_type, output_content_type):
    parsed = json.loads(data)

    loc_endpoint = parsed.get("loc_endpoint", "localization-model-2019-01-29")
    fm_endpoint = parsed.get("fm_endpoint", 'field-match-2019-01-24-12-39-05-522')

    hw_endpoint = parsed.get("hw_endpoint", "pytorch-handwriting-ocr-2019-01-29-02-06-44-538")
    hp_endpoint = parsed.get("hp_endpoint", "hand-printed-model-2019-01-29-1")
    sp_endpoint = parsed.get("sp_endpoint", "hand-printed-model-2019-01-29-1")

    # access keys
    aws_access_key_id = parsed.get("aws_access_key_id", None)
    aws_secret_access_key = parsed.get("aws_secret_access_key", None)

    bucket = parsed.get("bucket")
    file_name = parsed.get("file_name")

    loc_predictor = MXNetPredictor(loc_endpoint, sagemaker_session)
    field_matching = JSONPredictor(fm_endpoint, sagemaker_session)
    try:
        loc_out = loc_predictor.predict({"url": "s3://{}/{}".format(bucket, file_name)})
    except Exception as ex:
        print(ex)
        tb = traceback.format_exc()
        # return error here
    print("localized")
    loc_out = loc_out["result"]
    print(loc_out)

    data = {
        "hw_endpoint": hw_endpoint,
        "hp_endpoint": hp_endpoint,  # ''  #
        "sp_endpoint": sp_endpoint,

        "field_names": [{"bucket": "ahmedb-test", "filename": "field_name_list.txt"},
                        {"bucket": "unum-files", "filename": "unum_field_names.txt"}],
        "field_names_ignore": [
            {"bucket": "ahmedb-test", "filename": "must_ignore.txt"},
            {"bucket": "unum-files", "filename": "unum_must_ignore_field_names.txt"}
        ],

        "hw_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hw_key']},
        "hp_pickle": {"bucket": loc_out['bucket_name'], "filename": loc_out['hp_key']},
        "page_image": {"bucket": bucket, "filename": file_name},

    }

    fields = []
    values = []
    text_to_score = {}
    bbox_of_all = {}
    try:
        initial_matching = field_matching.predict(data)
    except Exception as ex:
        print(ex)
        tb = traceback.format_exc()

    for pair in initial_matching['field_match_output']:
        fields.append({"string": pair['field_name'], "bbox": pair['bbox'], "center": get_center(pair['bbox'])})
        bbox_of_all[pair['field_name']] = pair['bbox']
        text_to_score[pair['field_name']] = pair["confidence"]
        if pair["value"]['bbox'] != {'top': -1, 'height': -1, 'width': -1, 'left': -1}:
            values.append({"string": pair["value"]['field_value'], "bbox": pair["value"]['bbox'], "center": get_center(pair["value"]['bbox'])})
            text_to_score[pair["value"]['field_value']] = pair["value"]['confidence']
            bbox_of_all[pair["value"]['field_value']] = pair["value"]['bbox']

    points_2d = []
    for field in fields:
        points_2d.append(field["center"])
    for value in values:
        points_2d.append(value["center"])

    points_2d = np.array(points_2d)

    # clustering
    thresh = 250
    clusters = hcluster.fclusterdata(points_2d, thresh, criterion="distance")

    groupings = defaultdict(lambda: {'field_names': [], 'field_values': []})
    for index, class_ in enumerate(clusters):
        if index >= len(fields):
            groupings[class_]["field_values"].append({"string": values[index - len(fields)]["string"], "center": values[index - len(fields)]["center"]})
        else:
            groupings[class_]["field_names"].append({"string": fields[index]["string"], "center": fields[index]["center"]})

    for value in groupings.values():
        field_names_centers = list(map(lambda item: item["center"], value['field_names'])) + list(map(lambda item: item["center"], value['field_values']))
        if field_names_centers:
            center = np.mean(field_names_centers, axis=0)
        else:
            center = np.array([np.inf, np.inf])

        value['field_names'] = list(map(lambda item: item["string"], value['field_names']))
        value['field_values'] = list(map(lambda item: item["string"], value['field_values']))
        value['center'] = center

    while True:  # merging things
        not_ready_for_matching_list = []
        for key in groupings:
            value = groupings[key]
            if len(value["field_values"]) > len(value["field_names"]):
                not_ready_for_matching_list.append((key, value))

        if len(not_ready_for_matching_list) == 0:
            break  # enough merging

        for key, not_ready_for_matching in not_ready_for_matching_list:
            del groupings[key]
            distances = sorted(list(map(lambda item: (item, np.linalg.norm(groupings[item]["center"] - not_ready_for_matching["center"])), groupings)), key=lambda item: item[1])
            groupings[distances[1][0]] = {"field_values": groupings[distances[1][0]]["field_values"] + not_ready_for_matching["field_values"],
                                          "field_names": groupings[distances[1][0]]["field_names"] + not_ready_for_matching["field_names"],
                                          "center": np.mean([groupings[distances[1][0]]["center"], not_ready_for_matching["center"]], axis=0)
                                          }

    final_output_json = []  # list of those {'value_detection_score': '', 'value': '', 'field_detection_score': 0.9559999999999998, 'score': 0, 'field': 'ATTENDING PHYSICIAN STATEMENT '}

    for cluster in [grouping for grouping in groupings.values()]:
        cluster = {"field_names": list(set(cluster["field_names"])), "field_values": list(set(cluster["field_values"]))}
        if cluster["field_names"] and cluster["field_values"]:
            results = local_ml_pairing(cluster, loaded_model)

            for result in sorted(results, key=lambda item: -item["score"]):
                final_output_json.append({'value_detection_score': text_to_score[result["value"]],
                                          'value': result["value"],
                                          'field_detection_score': text_to_score[result["field"]],
                                          'score': result["score"],
                                          'field': result["field"],
                                          'value_bbox': bbox_of_all[result["value"]],
                                          'field_bbox': bbox_of_all[result["field"]]
                                          }

                                         )
        else:
            for field_name in cluster["field_names"]:
                final_output_json.append(
                    {'value_detection_score': 0,
                     'value': '',
                     'field_detection_score': text_to_score[field_name],
                     'score': 0,
                     'field': field_name,
                     'value_bbox': {'width': -1, 'top': -1, 'height': -1, 'left': -1},
                     'field_bbox': bbox_of_all[field_name]
                     }

                )

    return json.dumps(final_output_json), output_content_type