Example #1
0
 def test_custom_form_empty_model_id(self, formrecognizer_test_endpoint,
                                     formrecognizer_test_api_key):
     client = FormRecognizerClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(ValueError):
         client.begin_recognize_custom_forms(model_id="", form=b"xx")
 def test_custom_form_empty_model_id(self, resource_group, location,
                                     form_recognizer_account,
                                     form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with self.assertRaises(ValueError):
         client.begin_recognize_custom_forms(model_id="", form=b"xx")
Example #3
0
    def recognize_custom_forms(self):
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_trained_with_labels_id = os.environ[
            "ID_OF_MODEL_TRAINED_WITH_LABELS"]
        model_trained_without_labels_id = os.environ[
            "ID_OF_MODEL_TRAINED_WITHOUT_LABELS"]

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))

        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open("sample_forms/forms/Form_1.jpg", "rb") as f:
            stream = f.read()
        forms_with_labeled_model_poller = form_recognizer_client.begin_recognize_custom_forms(
            model_id=model_trained_with_labels_id, form=stream)
        forms_with_unlabeled_model_poller = form_recognizer_client.begin_recognize_custom_forms(
            model_id=model_trained_without_labels_id, form=stream)

        # Calling result after kicking off each call allows for server-side paralellization
        forms_with_labeled_model = forms_with_labeled_model_poller.result()
        forms_with_unlabeled_model = forms_with_unlabeled_model_poller.result()

        # With a form recognized by a model trained with labels, this 'name' key will be its
        # training-time label, otherwise it will be denoted by numeric indices.
        # Label data is not returned for model trained with labels.
        print(
            "---------Recognizing forms with models trained with labeled data---------"
        )
        for labeled_form in forms_with_labeled_model:
            for name, field in labeled_form.fields.items():
                print(
                    "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}"
                    .format(name, field.value, field.value_data.text,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))

        print(
            "-----------------------------------------------------------------------"
        )
        print(
            "-------Recognizing forms with models trained with unlabeled data-------"
        )
        for unlabeled_form in forms_with_unlabeled_model:
            for name, field in unlabeled_form.fields.items():
                print(
                    "...Field '{}' has label '{}' within bounding box '{}', with a confidence score of {}"
                    .format(name, field.label_data.text,
                            format_bounding_box(field.label_data.bounding_box),
                            field.confidence))
                print(
                    "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}"
                    .format(name, field.value, field.value_data.text,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))
Example #4
0
 def test_authentication_bad_key(self, formrecognizer_test_endpoint,
                                 formrecognizer_test_api_key):
     client = FormRecognizerClient(formrecognizer_test_endpoint,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         poller = client.begin_recognize_custom_forms(
             model_id="xx", form=b"xx", content_type="image/jpeg")
Example #5
0
    def recognize_custom_forms(self):
        # [START recognize_custom_forms]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient
        form_recognizer_client = FormRecognizerClient(
            endpoint=self.endpoint, credential=AzureKeyCredential(self.key))

        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open("sample_forms/forms/Form_1.jpg", "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=self.model_id, stream=f)
        forms = poller.result()

        for idx, form in enumerate(forms):
            print("--------Recognizing Form #{}--------".format(idx))
            print("Form {} has type {}".format(idx, form.form_type))
            for name, field in form.fields.items():
                # each field is of type FormField
                # The value of the field can also be a FormField, or a list of FormFields
                # In our sample, it is just a FormField.
                print(
                    "...Field '{}' has value '{}' with a confidence score of {}"
                    .format(name, field.value, field.confidence))
                # label data is populated if you are using a model trained with unlabeled data, since the service needs to make predictions for
                # labels if not explicitly given to it.
                if field.label_data:
                    print(
                        "...Field '{}' has label '{}' with a confidence score of {}"
                        .format(name, field.label_data.text, field.confidence))
            print("-----------------------------------")
 def test_authentication_bad_key(self, resource_group, location,
                                 form_recognizer_account,
                                 form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         poller = client.begin_recognize_custom_forms(
             model_id="xx", stream=b"xx", content_type="image/jpeg")
    def test_auto_detect_unsupported_stream_content(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))
        with open(self.unsupported_content_py, "rb") as fd:
            myfile = fd.read()

        with self.assertRaises(ValueError):
            poller = client.begin_recognize_custom_forms(
                model_id="xxx",
                form=myfile,
            )
Example #8
0
 def test_custom_form_bad_endpoint(self, formrecognizer_test_endpoint,
                                   formrecognizer_test_api_key):
     with open(self.form_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         poller = client.begin_recognize_custom_forms(model_id="xx",
                                                      form=myfile)
Example #9
0
 def test_passing_unsupported_url_content_type(self,
                                               formrecognizer_test_endpoint,
                                               formrecognizer_test_api_key):
     client = FormRecognizerClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(TypeError):
         poller = client.begin_recognize_custom_forms(
             model_id="xx",
             form="https://badurl.jpg",
             content_type="application/json")
    def test_passing_unsupported_url_content_type(self, resource_group,
                                                  location,
                                                  form_recognizer_account,
                                                  form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with self.assertRaises(TypeError):
            poller = client.begin_recognize_custom_forms(
                model_id="xx",
                stream="https://badurl.jpg",
                content_type="application/json")
    def recognize_custom_forms(self):
        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..",
                         "./sample_forms/forms/Form_1.jpg"))
        # [START recognize_custom_forms]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_id = os.environ["CUSTOM_TRAINED_MODEL_ID"]

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))

        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f)
        forms = poller.result()

        for idx, form in enumerate(forms):
            print("--------Recognizing Form #{}--------".format(idx + 1))
            print("Form has type {}".format(form.form_type))
            print("Form has form type confidence {}".format(
                form.form_type_confidence))
            print("Form was analyzed with model with ID {}".format(
                form.model_id))
            for name, field in form.fields.items():
                # each field is of type FormField
                # label_data is populated if you are using a model trained without labels,
                # since the service needs to make predictions for labels if not explicitly given to it.
                if field.label_data:
                    print(
                        "...Field '{}' has label '{}' with a confidence score of {}"
                        .format(name, field.label_data.text, field.confidence))
                # The value of the field can also be a Dict[str, FormField], or a List[FormField] - in our sample, it is not.
                print(
                    "...Label '{}' has value '{}' with a confidence score of {}"
                    .format(
                        field.label_data.text if field.label_data else name,
                        field.value, field.confidence))

            print("-----------------------------------")
    def test_recognize_tables_dynamic_rows(self, custom_model_id):
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_id_dynamic_rows_table = os.getenv("MODEL_ID_DYNAMIC_ROW_TABLES",
                                                custom_model_id)

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))

        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..", "..",
                         "./sample_forms/forms/label_table_dynamic_rows1.pdf"))

        with open(path_to_sample_forms, "rb") as f:
            form = f.read()
        poller = form_recognizer_client.begin_recognize_custom_forms(
            model_id=model_id_dynamic_rows_table, form=form)

        result = poller.result()

        print(
            "\n\n--------Recognizing labeled table with dynamic rows--------\n"
        )
        for form in result:
            for name, field in form.fields.items():
                # substitute "table" for the label given to the table tag during training
                # (if different than sample training docs)
                if name == "table":
                    for idx, row in enumerate(field.value):
                        print("Row {}".format(idx + 1))
                        for column_name, row_value in row.value.items():
                            print(
                                "...Column '{}' with value '{}' and a confidence score of {}"
                                .format(column_name, row_value.value,
                                        row_value.confidence))
                else:  # non-table tagged FormField
                    print(
                        "...Field '{}' has value '{}' with a confidence score of {}"
                        .format(name, field.value, field.confidence))
Example #13
0
def main():

    try:

        # Get configuration settings
        load_dotenv()
        form_endpoint = os.getenv('FORM_ENDPOINT')
        print(form_endpoint)
        form_key = os.getenv('FORM_KEY')
        print(form_key)

        # Create client using endpoint and key
        form_recognizer_client = FormRecognizerClient(
            form_endpoint, AzureKeyCredential(form_key))
        form_training_client = FormTrainingClient(form_endpoint,
                                                  AzureKeyCredential(form_key))

        # Model ID from when you trained your model.
        model_id = os.getenv('MODEL_ID')

        # Test trained model with a new form
        with open('test1.jpg', "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f)

        result = poller.result()

        for recognized_form in result:
            print("Form type: {}".format(recognized_form.form_type))
            for name, field in recognized_form.fields.items():
                print(
                    "Field '{}' has label '{}' with value '{}' and a confidence score of {}"
                    .format(
                        name,
                        field.label_data.text if field.label_data else name,
                        field.value, field.confidence))

    except Exception as ex:
        print(ex)
Example #14
0
def main():

    try:

        # Get configuration settings
        form_endpoint = "https://doors1.cognitiveservices.azure.com/"
        form_key = "70b2796924584d8da912296e8dea613a"

        # Create client using endpoint and key
        form_recognizer_client = FormRecognizerClient(
            form_endpoint, AzureKeyCredential(form_key))
        form_training_client = FormTrainingClient(form_endpoint,
                                                  AzureKeyCredential(form_key))

        # Model ID from when your trained your model.
        model_id = "8bf5a901-3ef6-4d1b-8a5b-f82ca3c1b05c"

        # Test trained model with a new form
        with open('test1.jpg', "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f)

        result = poller.result()

        for recognized_form in result:
            print("Form type: {}".format(recognized_form.form_type))
            for name, field in recognized_form.fields.items():
                print(
                    "Field '{}' has label '{}' with value '{}' and a confidence score of {}"
                    .format(
                        name,
                        field.label_data.text if field.label_data else name,
                        field.value, field.confidence))

    except Exception as ex:
        print(ex)
Example #15
0
    def get_bounding_boxes(self):
        from azure.ai.formrecognizer import FormWord, FormLine
        # [START create_form_recognizer_client]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient
        form_recognizer_client = FormRecognizerClient(
            endpoint=self.endpoint, credential=AzureKeyCredential(self.key))
        # [END create_form_recognizer_client]

        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open("sample_forms/forms/Form_1.jpg", "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=self.model_id,
                stream=f.read(),
                include_text_content=True)
        forms = poller.result()

        for idx, form in enumerate(forms):
            print("--------RECOGNIZING FORM #{}--------".format(idx))
            print("Form has type {}".format(form.form_type))
            for name, field in form.fields.items():
                # each field is of type FormField
                # The value of the field can also be a FormField, or a list of FormFields
                # In our sample, it is not.
                print(
                    "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}"
                    .format(name, field.value, field.value_data.text,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))
            for page in form.pages:
                print("-------Recognizing Page #{} of Form #{}-------".format(
                    page.page_number, idx))
                print(
                    "Has width '{}' and height '{}' measure with unit: {}, and has text angle '{}'"
                    .format(page.width, page.height, page.unit,
                            page.text_angle))
                for table in page.tables:
                    for cell in table.cells:
                        print(
                            "...Cell[{}][{}] has text '{}' with confidence {} based on the following words: "
                            .format(cell.row_index, cell.column_index,
                                    cell.text, cell.confidence))
                        # text_content is only populated if you set include_text_content to True in your function call to recognize_custom_forms
                        # It is a heterogeneous list of FormWord and FormLine.
                        for content in cell.text_content:
                            if isinstance(content, FormWord):
                                print(
                                    "......Word '{}' within bounding box '{}' has a confidence of {}"
                                    .format(
                                        content.text,
                                        format_bounding_box(
                                            content.bounding_box),
                                        content.confidence))
                            elif isinstance(content, FormLine):
                                print(
                                    "......Line '{}' within bounding box '{}' has the following words: "
                                    .format(
                                        content.text,
                                        format_bounding_box(
                                            content.bounding_box)))
                                for word in content.words:
                                    print(
                                        ".........Word '{}' within bounding box '{}' has a confidence of {}"
                                        .format(
                                            word.text,
                                            format_bounding_box(
                                                word.bounding_box),
                                            word.confidence))

                print("---------------------------------------------------")
            print("-----------------------------------")
Example #16
0
def main():
    """ Text and Image Recognizer """

    st.markdown("bla bla")

    uploaded_image = st.file_uploader("Selectione une image")
    if uploaded_image is not None:
        image = Image.open(uploaded_image)
        st.image(image, use_column_width=True)
        image.save('test_images/test11111.png', 'PNG')

        # Dictionary to save the informations
        dic = {}

        ## Form Recognizer Configuration ##

        # Endpoint and Key
        endpoint_f = "https://recettesfromrecognizer.cognitiveservices.azure.com/"
        key_f = "dee1ba127bbf442489c58a86932ae162"

        # Authenticate the client object
        form_recognizer_client = FormRecognizerClient(
            endpoint_f, AzureKeyCredential(key_f))

        # Model ID (The Model that we have trined on Azure Labeling Tool)
        model_id = "491ce87f-878c-4eaf-8bc4-a336a4a209a5"

        # image path
        image_path = os.path.join("test_images", "test1.png")

        # Open and test the image

        with open(image_path, "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f)

        # Result of the test for new image
        forms = poller.result()

        # To get cles and valeurs
        for recognized_form in forms:
            for name, field in recognized_form.fields.items():
                print(" '{}' : ({}) Accuracy  \n '{}' \n".format(
                    name,
                    field.confidence,
                    field.value,
                ))
                dic[name] = field.value

        ## Costum Vision Configuration ##

        # Endpoint and Key
        endpoint_c = "https://testdeletes123.cognitiveservices.azure.com/"
        key_c = "de6ba6ab6d3246e48cdba750fbd0f17e"

        # Authenticate the client object
        computervision_client = ComputerVisionClient(
            endpoint_c, CognitiveServicesCredentials(key_c))
        # Open the test image

        im = Image.open(image_path)
        img = open(image_path, "rb")

        # Detect the photo on the image :)
        detected_object = computervision_client.detect_objects_in_stream(img)

        # This example detects different kinds of objects with bounding boxes in a remote image.
        X = ""
        Xw = ""
        Y = ""
        Yh = ""

        if len(detected_object.objects) == 0:
            print("No objects detected.")

        else:
            for object in detected_object.objects:
                X = object.rectangle.x
                Xw = object.rectangle.x + object.rectangle.w
                Y = object.rectangle.y
                Yh = object.rectangle.y + object.rectangle.h

        # Create Box
        box = (X, Y, Xw, Yh)

        # Crop Image
        area = im.crop(box)
        #image = Image.open(uploaded_image)
        st.image(area, use_column_width=True)
        #area.show()
        # Convert the image to an array
        image_array = asarray(area)
        dic["image"] = image_array
    def recognize_custom_forms(self, custom_model_id):
        path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__),
                                                            "..", "..", "./sample_forms/forms/Form_1.jpg"))
        # [START recognize_custom_forms]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_id = os.getenv("CUSTOM_TRAINED_MODEL_ID", custom_model_id)

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )

        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f, include_field_elements=True
            )
        forms = poller.result()

        for idx, form in enumerate(forms):
            print("--------Recognizing Form #{}--------".format(idx+1))
            print("Form has type {}".format(form.form_type))
            print("Form has form type confidence {}".format(form.form_type_confidence))
            print("Form was analyzed with model with ID {}".format(form.model_id))
            for name, field in form.fields.items():
                # each field is of type FormField
                # label_data is populated if you are using a model trained without labels,
                # since the service needs to make predictions for labels if not explicitly given to it.
                if field.label_data:
                    print("...Field '{}' has label '{}' with a confidence score of {}".format(
                        name,
                        field.label_data.text,
                        field.confidence
                    ))

                print("...Label '{}' has value '{}' with a confidence score of {}".format(
                    field.label_data.text if field.label_data else name, field.value, field.confidence
                ))

            # iterate over tables, lines, and selection marks on each page
            for page in form.pages:
                for i, table in enumerate(page.tables):
                    print("\nTable {} on page {}".format(i+1, table.page_number))
                    for cell in table.cells:
                        print("...Cell[{}][{}] has text '{}' with confidence {}".format(
                            cell.row_index, cell.column_index, cell.text, cell.confidence
                        ))
                print("\nLines found on page {}".format(page.page_number))
                for line in page.lines:
                    print("...Line '{}' is made up of the following words: ".format(line.text))
                    for word in line.words:
                        print("......Word '{}' has a confidence of {}".format(
                            word.text,
                            word.confidence
                        ))
                if page.selection_marks:
                    print("\nSelection marks found on page {}".format(page.page_number))
                    for selection_mark in page.selection_marks:
                        print("......Selection mark is '{}' and has a confidence of {}".format(
                            selection_mark.state,
                            selection_mark.confidence
                        ))

            print("-----------------------------------")
Example #18
0
endpoint_f = "https://recettesfromrecognizer.cognitiveservices.azure.com/"
key_f = "dee1ba127bbf442489c58a86932ae162"

# Authenticate the client object
form_recognizer_client = FormRecognizerClient(endpoint_f,
                                              AzureKeyCredential(key_f))

# Model ID (The Model that we have trined on Azure Labeling Tool)
model_id = "491ce87f-878c-4eaf-8bc4-a336a4a209a5"

# image path
image_path = os.path.join("test_images", "test3.jpg")

# Open and test the image
with open(image_path, "rb") as f:
    poller = form_recognizer_client.begin_recognize_custom_forms(
        model_id=model_id, form=f)
# Result of the test for new image
forms = poller.result()

# To get cles and valeurs
for recognized_form in forms:
    for name, field in recognized_form.fields.items():
        print(" '{}' : ({}) Accuracy  \n '{}' \n".format(
            name,
            field.confidence,
            field.value,
        ))
        dic[name] = field.value

## Costum Vision Configuration ##
    def recognize_custom_forms(self, labeled_model_id, unlabeled_model_id):
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_trained_with_labels_id = os.getenv(
            "ID_OF_MODEL_TRAINED_WITH_LABELS", labeled_model_id)
        model_trained_without_labels_id = os.getenv(
            "ID_OF_MODEL_TRAINED_WITHOUT_LABELS", unlabeled_model_id)

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))

        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..", "..",
                         "./sample_forms/forms/Form_1.jpg"))

        with open(path_to_sample_forms, "rb") as f:
            form = f.read()
        forms_with_labeled_model_poller = form_recognizer_client.begin_recognize_custom_forms(
            model_id=model_trained_with_labels_id, form=form)
        forms_with_unlabeled_model_poller = form_recognizer_client.begin_recognize_custom_forms(
            model_id=model_trained_without_labels_id, form=form)

        # Calling result() after kicking off each call allows for server-side parallelization
        forms_with_labeled_model = forms_with_labeled_model_poller.result()
        forms_with_unlabeled_model = forms_with_unlabeled_model_poller.result()

        # With a form recognized by a model trained with labels, the `name` key will be its label given during training.
        # `value` will contain the typed field value and `value_data` will contain information about the field value
        # `label_data` is not populated for a model trained with labels as this was the given label used to extract the key
        print(
            "---------Recognizing forms using models trained with labeled data---------"
        )
        for labeled_form in forms_with_labeled_model:
            for name, field in labeled_form.fields.items():
                print(
                    "...Field '{}' has value '{}' within bounding box '{}', with a confidence score of {}"
                    .format(name, field.value,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))

        # Find a specific labeled field. Substitute "Merchant" with your specific training-time label
        try:
            print(
                "\nValue for a specific labeled field using the training-time label:"
            )
            training_time_label = "Merchant"
            for labeled_form in forms_with_labeled_model:
                print("The Merchant is {}\n".format(
                    labeled_form.fields[training_time_label].value))
        except KeyError:
            print(
                "'Merchant' training-time label does not exist. Substitute with your own training-time label.\n"
            )

        # With a form recognized by a model trained without labels, the `name` key will be denoted by numeric indices.
        # Non-unique form field label names will be found in the `label_data.text`
        # Information about the form field label and the field value are found in `label_data` and `value_data`
        print(
            "-----------------------------------------------------------------------"
        )
        print(
            "-------Recognizing forms using models trained with unlabeled data-------"
        )
        for unlabeled_form in forms_with_unlabeled_model:
            for name, field in unlabeled_form.fields.items():
                print(
                    "...Field '{}' has label '{}' within bounding box '{}', with a confidence score of {}"
                    .format(name, field.label_data.text,
                            format_bounding_box(field.label_data.bounding_box),
                            field.confidence))
                print(
                    "...Field '{}' has value '{}' within bounding box '{}', with a confidence score of {}"
                    .format(name, field.value,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))

        # Find the value of a specific unlabeled field. Will only be found if sample training forms used
        print("\nValue for a specific unlabeled field:")
        field_label = "Vendor Name:"
        for unlabeled_form in forms_with_unlabeled_model:
            for name, field in unlabeled_form.fields.items():
                if field.label_data.text == field_label:
                    print("The Vendor Name is {}\n".format(field.value))
Example #20
0
class RecognizeCustomForms(PerfStressTest):
    def __init__(self, arguments):
        super().__init__(arguments)

        with open(
                os.path.abspath(
                    os.path.join(os.path.abspath(__file__), "..",
                                 "./../sample_forms/forms/Form_1.jpg")),
                "rb") as fd:
            self.custom_form_jpg = fd.read()

        # read test related env vars
        self.formrecognizer_storage_container_sas_url = os.environ[
            "FORMRECOGNIZER_TRAINING_DATA_CONTAINER_SAS_URL"]
        formrecognizer_test_endpoint = os.environ[
            "FORMRECOGNIZER_TEST_ENDPOINT"]
        form_recognizer_account_key = os.environ["FORMRECOGNIZER_TEST_API_KEY"]

        # assign the clients that will be used in the perf tests
        self.service_client = FormRecognizerClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(form_recognizer_account_key))
        self.async_service_client = AsyncFormRecognizerClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(form_recognizer_account_key))

        # training client will be used for model training in set up
        self.async_training_client = AsyncFormTrainingClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(form_recognizer_account_key))

    async def global_setup(self):
        """The global setup is run only once."""
        poller = await self.async_training_client.begin_training(
            self.formrecognizer_storage_container_sas_url,
            use_training_labels=True,
            model_name="labeled")
        model = await poller.result()
        self.model_id = model.model_id

    async def global_cleanup(self):
        """The global cleanup is run only once."""
        await self.async_training_client.delete_model(self.model_id)

    async def close(self):
        """This is run after cleanup."""
        await self.async_service_client.close()
        self.service_client.close()
        await self.async_training_client.close()
        await super().close()

    def run_sync(self):
        """The synchronous perf test."""
        poller = self.service_client.begin_recognize_custom_forms(
            self.model_id,
            self.custom_form_jpg,
            content_type=FormContentType.IMAGE_JPEG)
        result = poller.result()
        assert result

    async def run_async(self):
        """The asynchronous perf test."""
        poller = await self.async_service_client.begin_recognize_custom_forms(
            self.model_id,
            self.custom_form_jpg,
            content_type=FormContentType.IMAGE_JPEG)
        result = await poller.result()
        assert result
    def get_bounding_boxes(self):
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient
        from azure.ai.formrecognizer import FormWord, FormLine

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
        model_id = os.environ["CUSTOM_TRAINED_MODEL_ID"]

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key))

        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..",
                         "./sample_forms/forms/Form_1.jpg"))
        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f, include_field_elements=True)
        forms = poller.result()

        for idx, form in enumerate(forms):
            print("--------RECOGNIZING FORM #{}--------".format(idx))
            print("Form has type: {}".format(form.form_type))
            for name, field in form.fields.items():
                # each field is of type FormField
                # The value of the field can also be a Dict[str, FormField], or a List[FormField] - in our sample, it is not.
                print(
                    "...Field '{}' has label '{}' with value '{}' within bounding box '{}', with a confidence score of {}"
                    .format(
                        name,
                        field.label_data.text if field.label_data else name,
                        field.value,
                        format_bounding_box(field.value_data.bounding_box),
                        field.confidence))
            for page in form.pages:
                print("-------Recognizing Page #{} of Form #{}-------".format(
                    page.page_number, idx))
                print(
                    "Has width '{}' and height '{}' measure with unit: {}, and has text angle '{}'"
                    .format(page.width, page.height, page.unit,
                            page.text_angle))
                for table in page.tables:
                    for cell in table.cells:
                        print(
                            "...Cell[{}][{}] has text '{}' with confidence {} based on the following words: "
                            .format(cell.row_index, cell.column_index,
                                    cell.text, cell.confidence))
                        # field_elements is only populated if you set include_field_elements to True in your call
                        # to begin_recognize_custom_forms
                        # It is a heterogeneous list of FormWord and FormLine.
                        for content in cell.field_elements:
                            if isinstance(content, FormWord):
                                print(
                                    "......Word '{}' within bounding box '{}' has a confidence of {}"
                                    .format(
                                        content.text,
                                        format_bounding_box(
                                            content.bounding_box),
                                        content.confidence))
                            elif isinstance(content, FormLine):
                                print(
                                    "......Line '{}' within bounding box '{}' has the following words: "
                                    .format(
                                        content.text,
                                        format_bounding_box(
                                            content.bounding_box)))
                                for word in content.words:
                                    print(
                                        ".........Word '{}' within bounding box '{}' has a confidence of {}"
                                        .format(
                                            word.text,
                                            format_bounding_box(
                                                word.bounding_box),
                                            word.confidence))

                print("---------------------------------------------------")
            print("-----------------------------------")
Example #22
0
    def recognize_custom_forms(self):
        path_to_sample_forms = os.path.join('receipt.pdf')
        # [START recognize_custom_forms]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer import FormRecognizerClient

        endpoint = 'https://gsdp-formrecog.cognitiveservices.azure.com/'
        key = '0176c0aa83a0451ea4aa8c0ae6845aa3'
        model_id = 'b131c408-ad86-424b-aa01-388b98a48d9d'

        form_recognizer_client = FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )

        # Make sure your form's type is included in the list of form types the custom model can recognize
        with open(path_to_sample_forms, "rb") as f:
            poller = form_recognizer_client.begin_recognize_custom_forms(
                model_id=model_id, form=f
            )
        forms = poller.result()
        # we will eventually use confidence scores to decide whether to reject/accept customer data
        #90%/0.9 threshold for 'good' - subject to change
        #potentially anything below 70% and customer is asked to try again

        for idx, form in enumerate(forms):
            print("--------Recognizing Form #{}--------".format(idx+1))
            # clinicName = receipt.fields.get("clinic name")
            #     if clinicName:
            #     print("The clinic name is: {}" .format(clinicName.value))
            print("Form has type {}".format(form.form_type))
            # print("Form has form type confidence {}".format(form.form_type_confidence))
            # print("Form was analyzed with model with ID {}".format(form.model_id))
            clinic_address = form.fields.get("clinic address")
            if clinic_address:
                print("Clinic address: {}".format(clinic_address.value))
            
            clinicName = form.fields.get("clinic name")
            if clinicName:
                print("Clinic name: {}" .format(clinicName.value))

            items = form.fields.get("item-names")
            # items_string = items.value
            # split = items_string.split()
            # items: fixing this later (afternoon) to split into individual;s, waiting on Elanco

            # can we access the split parts individually? possible solution
            if items:
                print("\t - Item Name: {}".format(items.value))
            prices = form.fields.get("prices")
            # prices_string = prices.value
            # prices_split = prices_string.split()
            #accessing individual values here? split by space, assign to decimal array
            
            if prices:
                print("\t - Price: {}".format(prices.value))
            invoiceDate = form.fields.get("invoice date")
            if invoiceDate:
                print("\t - Invoice Date: {}".format(invoiceDate.value))
                patientName = form.fields.get("patientName")
            if patientName:
                print("\t - Patient Name: {}".format(patientName.value))
                
            microchip = form.fields.get("microchip")
            customerName = form.fields.get("customer name")
            if customerName:
                print("\t - Customer Name: {}".format(customerName.value))
                
                
            transactionNo = form.fields.get("transaction no")
            if transactionNo:
                print("Transaction number: {}".format(transactionNo.value))
            quantity = form.fields.get("quantity")
            if quantity:
                print("quantity: {}".format(quantity.value))