Beispiel #1
0
    async def test_passing_unsupported_url_content_type(
            self, resource_group, location, form_recognizer_account,
            form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with self.assertRaises(TypeError):
            result = await client.recognize_receipts(
                "https://badurl.jpg", content_type="application/json")
Beispiel #2
0
 async def test_receipt_bad_endpoint(self, resource_group, location,
                                     form_recognizer_account,
                                     form_recognizer_account_key):
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(form_recognizer_account_key))
         result = await client.recognize_receipts(myfile)
 async def test_id_document_bad_endpoint(self, formrecognizer_test_endpoint,
                                         formrecognizer_test_api_key):
     with open(self.id_document_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         async with client:
             poller = await client.begin_recognize_id_documents(myfile)
Beispiel #4
0
 async def test_content_authentication_successful_key(
         self, resource_group, location, form_recognizer_account,
         form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.invoice_pdf, "rb") as fd:
         myfile = fd.read()
     poller = await client.begin_recognize_content(myfile)
     result = await poller.result()
Beispiel #5
0
    async def test_pass_stream_into_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.unsupported_content_py, "rb") as fd:
            with self.assertRaises(HttpResponseError):
                poller = await client.begin_recognize_custom_forms_from_url(
                    model_id="xxx",
                    form_url=fd,
                )
                result = await poller.result()
 async def test_content_authentication_bad_key(self,
                                               formrecognizer_test_endpoint,
                                               formrecognizer_test_api_key):
     client = FormRecognizerClient(formrecognizer_test_endpoint,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_recognize_content(
                 b"xxx", content_type="application/pdf")
             result = await poller.result()
 async def test_custom_form_none_model_id(self,
                                          formrecognizer_test_endpoint,
                                          formrecognizer_test_api_key):
     client = FormRecognizerClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_recognize_custom_forms_from_url(
                 model_id=None, form_url="https://badurl.jpg")
 async def test_custom_form_none_model_id(self, resource_group, location,
                                          form_recognizer_account,
                                          form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_recognize_custom_forms(model_id=None,
                                                       form=b"xx")
Beispiel #9
0
    async def test_get_form_training_client(self, resource_group, location,
                                            form_recognizer_account,
                                            form_recognizer_account_key):
        transport = AioHttpTransport()
        frc = FormRecognizerClient(
            endpoint=form_recognizer_account,
            credential=AzureKeyCredential(form_recognizer_account_key),
            transport=transport)

        async with frc:
            result = await frc.recognize_receipts_from_url(self.receipt_url_jpg
                                                           )
            assert transport.session is not None
            async with frc.get_form_training_client() as ftc:
                assert transport.session is not None
                properties = await ftc.get_account_properties()
            result = await frc.recognize_receipts_from_url(self.receipt_url_jpg
                                                           )
            assert transport.session is not None
Beispiel #10
0
 async def test_authentication_successful_key(self, resource_group,
                                              location,
                                              form_recognizer_account,
                                              form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     result = await client.recognize_receipts(myfile)
Beispiel #11
0
    async def test_passing_bad_url(self, resource_group, location,
                                   form_recognizer_account,
                                   form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))

        with self.assertRaises(HttpResponseError):
            result = await client.recognize_custom_forms_from_url(
                model_id="xx", form_url="https://badurl.jpg")
    async def test_auto_detect_unsupported_stream_content(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        with open(self.unsupported_content_py, "rb") as fd:
            myfile = fd.read()

        with self.assertRaises(ValueError):
            result = await client.recognize_content(
                myfile
            )
 async def test_url_authentication_bad_key(self, resource_group, location,
                                           form_recognizer_account,
                                           form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_recognize_custom_forms_from_url(
                 model_id="xx", form_url=self.form_url_jpg)
             result = await poller.result()
Beispiel #14
0
 async def test_receipt_url_bad_endpoint(self, formrecognizer_test_endpoint,
                                         formrecognizer_test_api_key):
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         async with client:
             poller = await client.begin_recognize_receipts_from_url(
                 self.receipt_url_jpg)
             result = await poller.result()
Beispiel #15
0
 async def test_damaged_file_bytes_fails_autodetect_content_type(
         self, resource_group, location, form_recognizer_account,
         form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     damaged_pdf = b"\x50\x44\x46\x55\x55\x55"  # doesn't match any magic file numbers
     with self.assertRaises(ValueError):
         poller = await client.begin_recognize_receipts(damaged_pdf, )
         result = await poller.result()
 async def test_custom_form_empty_model_id(self, resource_group, location,
                                           form_recognizer_account,
                                           form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_recognize_custom_forms_from_url(
                 model_id="", form_url="https://badurl.jpg")
Beispiel #17
0
 async def test_damaged_file_passed_as_bytes(self, resource_group, location,
                                             form_recognizer_account,
                                             form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     damaged_pdf = b"\x25\x50\x44\x46\x55\x55\x55"  # still has correct bytes to be recognized as PDF
     with self.assertRaises(HttpResponseError):
         poller = await client.begin_recognize_receipts(damaged_pdf, )
         result = await poller.result()
 async def test_content_url_bad_endpoint(self, resource_group, location,
                                         form_recognizer_account,
                                         form_recognizer_account_key):
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(form_recognizer_account_key))
         poller = await client.begin_recognize_content_from_url(
             self.invoice_url_pdf)
         result = await poller.result()
Beispiel #19
0
 async def test_authentication_bad_key(self, resource_group, location,
                                       form_recognizer_account,
                                       form_recognizer_account_key):
     client = FormRecognizerClient(form_recognizer_account,
                                   AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         async with client:
             poller = await client.begin_recognize_receipts(
                 b"xx", content_type="image/jpeg")
             result = await poller.result()
 async def test_custom_form_empty_model_id(self,
                                           formrecognizer_test_endpoint,
                                           formrecognizer_test_api_key):
     client = FormRecognizerClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(ValueError):
         async with client:
             await client.begin_recognize_custom_forms(model_id="",
                                                       form=b"xx")
Beispiel #21
0
    async def recognize_custom_forms(self):
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer.aio import FormRecognizerClient

        path_to_sample_forms = os.path.abspath(
            os.path.join(os.path.abspath(__file__), "..", "..",
                         "./sample_forms/forms/Form_1.jpg"))
        async with FormRecognizerClient(
                endpoint=self.endpoint, credential=AzureKeyCredential(
                    self.key)) as form_recognizer_client:

            # Make sure your form's type is included in the list of form types the custom model can recognize
            with open(path_to_sample_forms, "rb") as f:
                stream = f.read()
            forms_with_labeled_model = await form_recognizer_client.recognize_custom_forms(
                model_id=self.model_trained_with_labels_id, form=stream)
            forms_with_unlabeled_model = await form_recognizer_client.recognize_custom_forms(
                model_id=self.model_trained_without_labels_id, form=stream)

            # With a form recognized by a model trained with labels, this 'name' key will be its
            # training-time label, otherwise it will be denoted by numeric indices.
            # Label data is not returned for model trained with labels.
            print(
                "---------Recognizing forms with models trained with labels---------"
            )
            for labeled_form in forms_with_labeled_model:
                for name, field in labeled_form.fields.items():
                    print(
                        "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.value, field.value_data.text,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))

            print(
                "------------------------------------------------------------------"
            )
            print(
                "-------Recognizing forms with models trained without labels-------"
            )
            for unlabeled_form in forms_with_unlabeled_model:
                for name, field in unlabeled_form.fields.items():
                    # The form recognized with a model trained with unlabeled data will also include data about your labels
                    print(
                        "...Field '{}' has label '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.label_data.text,
                            format_bounding_box(field.label_data.bounding_box),
                            field.confidence))
                    print(
                        "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}"
                        .format(
                            name, field.value, field.value_data.text,
                            format_bounding_box(field.value_data.bounding_box),
                            field.confidence))
    async def test_receipt_multipage_transform_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
        client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key))

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = client._client._deserialize(AnalyzeOperationResult, raw_response)
            extracted_receipt = prepare_receipt(analyze_result)
            responses.append(analyze_result)
            responses.append(extracted_receipt)

        poller = await client.begin_recognize_receipts_from_url(
            self.multipage_url_pdf,
            include_text_content=True,
            cls=callback
        )

        result = await poller.result()
        raw_response = responses[0]
        returned_model = responses[1]
        actual = raw_response.analyze_result.document_results
        read_results = raw_response.analyze_result.read_results
        document_results = raw_response.analyze_result.document_results
        page_results = raw_response.analyze_result.page_results

        # check hardcoded values
        for receipt, actual in zip(returned_model, actual):
            if actual.fields is None:  # second page is blank
                continue

            # check dict values
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantAddress"), actual.fields.get("MerchantAddress"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantName"), actual.fields.get("MerchantName"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantPhoneNumber"), actual.fields.get("MerchantPhoneNumber"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Subtotal"), actual.fields.get("Subtotal"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Tax"), actual.fields.get("Tax"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Tip"), actual.fields.get("Tip"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("Total"), actual.fields.get("Total"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("TransactionDate"), actual.fields.get("TransactionDate"), read_results)
            self.assertFormFieldTransformCorrect(receipt.fields.get("TransactionTime"), actual.fields.get("TransactionTime"), read_results)

            # check page range
            self.assertEqual(receipt.page_range.first_page_number, actual.page_range[0])
            self.assertEqual(receipt.page_range.last_page_number, actual.page_range[1])

            # check receipt type
            receipt_type = receipt.fields.get("ReceiptType")
            self.assertEqual(receipt_type.confidence, actual.fields["ReceiptType"].confidence)
            self.assertEqual(receipt_type.value, actual.fields["ReceiptType"].value_string)

            # check receipt items
            self.assertReceiptItemsTransformCorrect(receipt.fields["Items"].value, actual.fields["Items"], read_results)

        # Check form pages
        self.assertFormPagesTransformCorrect(returned_model, read_results)
Beispiel #23
0
    async def recognize_receipts_from_url(self):
        # [START recognize_receipts_from_url_async]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.formrecognizer.aio import FormRecognizerClient

        endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
        key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

        async with FormRecognizerClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        ) as form_recognizer_client:
            url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"
            poller = await form_recognizer_client.begin_recognize_receipts_from_url(receipt_url=url)
            receipts = await poller.result()

            for idx, receipt in enumerate(receipts):
                print("--------Recognizing receipt #{}--------".format(idx+1))
                receipt_type = receipt.fields.get("ReceiptType")
                if receipt_type:
                    print("Receipt Type: {} has confidence: {}".format(receipt_type.value, receipt_type.confidence))
                merchant_name = receipt.fields.get("MerchantName")
                if merchant_name:
                    print("Merchant Name: {} has confidence: {}".format(merchant_name.value, merchant_name.confidence))
                transaction_date = receipt.fields.get("TransactionDate")
                if transaction_date:
                    print("Transaction Date: {} has confidence: {}".format(transaction_date.value, transaction_date.confidence))
                if receipt.fields.get("Items"):
                    print("Receipt items:")
                    for idx, item in enumerate(receipt.fields.get("Items").value):
                        print("...Item #{}".format(idx+1))
                        item_name = item.value.get("Name")
                        if item_name:
                            print("......Item Name: {} has confidence: {}".format(item_name.value, item_name.confidence))
                        item_quantity = item.value.get("Quantity")
                        if item_quantity:
                            print("......Item Quantity: {} has confidence: {}".format(item_quantity.value, item_quantity.confidence))
                        item_price = item.value.get("Price")
                        if item_price:
                            print("......Individual Item Price: {} has confidence: {}".format(item_price.value, item_price.confidence))
                        item_total_price = item.value.get("TotalPrice")
                        if item_total_price:
                            print("......Total Item Price: {} has confidence: {}".format(item_total_price.value, item_total_price.confidence))
                subtotal = receipt.fields.get("Subtotal")
                if subtotal:
                    print("Subtotal: {} has confidence: {}".format(subtotal.value, subtotal.confidence))
                tax = receipt.fields.get("Tax")
                if tax:
                    print("Tax: {} has confidence: {}".format(tax.value, tax.confidence))
                tip = receipt.fields.get("Tip")
                if tip:
                    print("Tip: {} has confidence: {}".format(tip.value, tip.confidence))
                total = receipt.fields.get("Total")
                if total:
                    print("Total: {} has confidence: {}".format(total.value, total.confidence))
                print("--------------------------------------")
 async def test_damaged_file_bytes_io_fails_autodetect(
         self, formrecognizer_test_endpoint, formrecognizer_test_api_key):
     client = FormRecognizerClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     damaged_pdf = BytesIO(b"\x50\x44\x46\x55\x55\x55"
                           )  # doesn't match any magic file numbers
     with pytest.raises(ValueError):
         async with client:
             poller = await client.begin_recognize_receipts(damaged_pdf, )
             result = await poller.result()
Beispiel #25
0
 async def test_content_bad_endpoint(self, formrecognizer_test_endpoint,
                                     formrecognizer_test_api_key, **kwargs):
     with open(self.invoice_pdf, "rb") as fd:
         myfile = fd.read()
     with pytest.raises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         async with client:
             poller = await client.begin_recognize_content(myfile)
             result = await poller.result()
Beispiel #26
0
    async def test_content_url_pass_stream(self, resource_group, location,
                                           form_recognizer_account,
                                           form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        with open(self.receipt_jpg, "rb") as fd:
            receipt = fd.read(4)  # makes the recording smaller

        with self.assertRaises(HttpResponseError):
            result = await client.recognize_content_from_url(receipt)
Beispiel #27
0
    async def test_content_multipage_url(self, resource_group, location,
                                         form_recognizer_account,
                                         form_recognizer_account_key):
        client = FormRecognizerClient(
            form_recognizer_account,
            AzureKeyCredential(form_recognizer_account_key))
        result = await client.recognize_content_from_url(self.multipage_url_pdf
                                                         )

        self.assertEqual(len(result), 3)
        self.assertFormPagesHasValues(result)
Beispiel #28
0
 async def test_custom_form_url_bad_endpoint(self, resource_group, location,
                                             form_recognizer_account,
                                             form_recognizer_account_key):
     with self.assertRaises(ServiceRequestError):
         client = FormRecognizerClient(
             "http://notreal.azure.com",
             AzureKeyCredential(form_recognizer_account_key))
         async with client:
             poller = await client.begin_recognize_custom_forms_from_url(
                 model_id="xx", form_url=self.form_url_jpg)
             result = await poller.result()
Beispiel #29
0
 async def test_passing_enum_content_type(self, resource_group, location,
                                          form_recognizer_account,
                                          form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_png, "rb") as fd:
         myfile = fd.read()
     result = await client.recognize_receipts(
         myfile, content_type=FormContentType.image_png)
     self.assertIsNotNone(result)
Beispiel #30
0
 async def test_passing_bad_content_type_param_passed(
         self, resource_group, location, form_recognizer_account,
         form_recognizer_account_key):
     client = FormRecognizerClient(
         form_recognizer_account,
         AzureKeyCredential(form_recognizer_account_key))
     with open(self.receipt_jpg, "rb") as fd:
         myfile = fd.read()
     with self.assertRaises(ValueError):
         result = await client.recognize_receipts(
             myfile, content_type="application/jpeg")