async def test_passing_unsupported_url_content_type( self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(TypeError): result = await client.recognize_receipts( "https://badurl.jpg", content_type="application/json")
async def test_receipt_bad_endpoint(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): with open(self.receipt_jpg, "rb") as fd: myfile = fd.read() with self.assertRaises(ServiceRequestError): client = FormRecognizerClient( "http://notreal.azure.com", AzureKeyCredential(form_recognizer_account_key)) result = await client.recognize_receipts(myfile)
async def test_id_document_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): with open(self.id_document_jpg, "rb") as fd: myfile = fd.read() with self.assertRaises(ServiceRequestError): client = FormRecognizerClient( "http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key)) async with client: poller = await client.begin_recognize_id_documents(myfile)
async def test_content_authentication_successful_key( self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.invoice_pdf, "rb") as fd: myfile = fd.read() poller = await client.begin_recognize_content(myfile) result = await poller.result()
async def test_pass_stream_into_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.unsupported_content_py, "rb") as fd: with self.assertRaises(HttpResponseError): poller = await client.begin_recognize_custom_forms_from_url( model_id="xxx", form_url=fd, ) result = await poller.result()
async def test_content_authentication_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = FormRecognizerClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): async with client: poller = await client.begin_recognize_content( b"xxx", content_type="application/pdf") result = await poller.result()
async def test_custom_form_none_model_id(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = FormRecognizerClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) with self.assertRaises(ValueError): async with client: await client.begin_recognize_custom_forms_from_url( model_id=None, form_url="https://badurl.jpg")
async def test_custom_form_none_model_id(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(ValueError): async with client: await client.begin_recognize_custom_forms(model_id=None, form=b"xx")
async def test_get_form_training_client(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): transport = AioHttpTransport() frc = FormRecognizerClient( endpoint=form_recognizer_account, credential=AzureKeyCredential(form_recognizer_account_key), transport=transport) async with frc: result = await frc.recognize_receipts_from_url(self.receipt_url_jpg ) assert transport.session is not None async with frc.get_form_training_client() as ftc: assert transport.session is not None properties = await ftc.get_account_properties() result = await frc.recognize_receipts_from_url(self.receipt_url_jpg ) assert transport.session is not None
async def test_authentication_successful_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.receipt_jpg, "rb") as fd: myfile = fd.read() result = await client.recognize_receipts(myfile)
async def test_passing_bad_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(HttpResponseError): result = await client.recognize_custom_forms_from_url( model_id="xx", form_url="https://badurl.jpg")
async def test_auto_detect_unsupported_stream_content(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.unsupported_content_py, "rb") as fd: myfile = fd.read() with self.assertRaises(ValueError): result = await client.recognize_content( myfile )
async def test_url_authentication_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): async with client: poller = await client.begin_recognize_custom_forms_from_url( model_id="xx", form_url=self.form_url_jpg) result = await poller.result()
async def test_receipt_url_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): with self.assertRaises(ServiceRequestError): client = FormRecognizerClient( "http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key)) async with client: poller = await client.begin_recognize_receipts_from_url( self.receipt_url_jpg) result = await poller.result()
async def test_damaged_file_bytes_fails_autodetect_content_type( self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) damaged_pdf = b"\x50\x44\x46\x55\x55\x55" # doesn't match any magic file numbers with self.assertRaises(ValueError): poller = await client.begin_recognize_receipts(damaged_pdf, ) result = await poller.result()
async def test_custom_form_empty_model_id(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(ValueError): async with client: await client.begin_recognize_custom_forms_from_url( model_id="", form_url="https://badurl.jpg")
async def test_damaged_file_passed_as_bytes(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) damaged_pdf = b"\x25\x50\x44\x46\x55\x55\x55" # still has correct bytes to be recognized as PDF with self.assertRaises(HttpResponseError): poller = await client.begin_recognize_receipts(damaged_pdf, ) result = await poller.result()
async def test_content_url_bad_endpoint(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): with self.assertRaises(ServiceRequestError): client = FormRecognizerClient( "http://notreal.azure.com", AzureKeyCredential(form_recognizer_account_key)) poller = await client.begin_recognize_content_from_url( self.invoice_url_pdf) result = await poller.result()
async def test_authentication_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): async with client: poller = await client.begin_recognize_receipts( b"xx", content_type="image/jpeg") result = await poller.result()
async def test_custom_form_empty_model_id(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = FormRecognizerClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) with self.assertRaises(ValueError): async with client: await client.begin_recognize_custom_forms(model_id="", form=b"xx")
async def recognize_custom_forms(self): from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import FormRecognizerClient path_to_sample_forms = os.path.abspath( os.path.join(os.path.abspath(__file__), "..", "..", "./sample_forms/forms/Form_1.jpg")) async with FormRecognizerClient( endpoint=self.endpoint, credential=AzureKeyCredential( self.key)) as form_recognizer_client: # Make sure your form's type is included in the list of form types the custom model can recognize with open(path_to_sample_forms, "rb") as f: stream = f.read() forms_with_labeled_model = await form_recognizer_client.recognize_custom_forms( model_id=self.model_trained_with_labels_id, form=stream) forms_with_unlabeled_model = await form_recognizer_client.recognize_custom_forms( model_id=self.model_trained_without_labels_id, form=stream) # With a form recognized by a model trained with labels, this 'name' key will be its # training-time label, otherwise it will be denoted by numeric indices. # Label data is not returned for model trained with labels. print( "---------Recognizing forms with models trained with labels---------" ) for labeled_form in forms_with_labeled_model: for name, field in labeled_form.fields.items(): print( "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}" .format( name, field.value, field.value_data.text, format_bounding_box(field.value_data.bounding_box), field.confidence)) print( "------------------------------------------------------------------" ) print( "-------Recognizing forms with models trained without labels-------" ) for unlabeled_form in forms_with_unlabeled_model: for name, field in unlabeled_form.fields.items(): # The form recognized with a model trained with unlabeled data will also include data about your labels print( "...Field '{}' has label '{}' within bounding box '{}', with a confidence score of {}" .format( name, field.label_data.text, format_bounding_box(field.label_data.bounding_box), field.confidence)) print( "...Field '{}' has value '{}' based on '{}' within bounding box '{}', with a confidence score of {}" .format( name, field.value, field.value_data.text, format_bounding_box(field.value_data.bounding_box), field.confidence))
async def test_receipt_multipage_transform_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) responses = [] def callback(raw_response, _, headers): analyze_result = client._client._deserialize(AnalyzeOperationResult, raw_response) extracted_receipt = prepare_receipt(analyze_result) responses.append(analyze_result) responses.append(extracted_receipt) poller = await client.begin_recognize_receipts_from_url( self.multipage_url_pdf, include_text_content=True, cls=callback ) result = await poller.result() raw_response = responses[0] returned_model = responses[1] actual = raw_response.analyze_result.document_results read_results = raw_response.analyze_result.read_results document_results = raw_response.analyze_result.document_results page_results = raw_response.analyze_result.page_results # check hardcoded values for receipt, actual in zip(returned_model, actual): if actual.fields is None: # second page is blank continue # check dict values self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantAddress"), actual.fields.get("MerchantAddress"), read_results) self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantName"), actual.fields.get("MerchantName"), read_results) self.assertFormFieldTransformCorrect(receipt.fields.get("MerchantPhoneNumber"), actual.fields.get("MerchantPhoneNumber"), read_results) self.assertFormFieldTransformCorrect(receipt.fields.get("Subtotal"), actual.fields.get("Subtotal"), read_results) self.assertFormFieldTransformCorrect(receipt.fields.get("Tax"), actual.fields.get("Tax"), read_results) self.assertFormFieldTransformCorrect(receipt.fields.get("Tip"), actual.fields.get("Tip"), read_results) self.assertFormFieldTransformCorrect(receipt.fields.get("Total"), actual.fields.get("Total"), read_results) self.assertFormFieldTransformCorrect(receipt.fields.get("TransactionDate"), actual.fields.get("TransactionDate"), read_results) self.assertFormFieldTransformCorrect(receipt.fields.get("TransactionTime"), actual.fields.get("TransactionTime"), read_results) # check page range self.assertEqual(receipt.page_range.first_page_number, actual.page_range[0]) self.assertEqual(receipt.page_range.last_page_number, actual.page_range[1]) # check receipt type receipt_type = receipt.fields.get("ReceiptType") self.assertEqual(receipt_type.confidence, actual.fields["ReceiptType"].confidence) self.assertEqual(receipt_type.value, actual.fields["ReceiptType"].value_string) # check receipt items self.assertReceiptItemsTransformCorrect(receipt.fields["Items"].value, actual.fields["Items"], read_results) # Check form pages self.assertFormPagesTransformCorrect(returned_model, read_results)
async def recognize_receipts_from_url(self): # [START recognize_receipts_from_url_async] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import FormRecognizerClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] async with FormRecognizerClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) as form_recognizer_client: url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png" poller = await form_recognizer_client.begin_recognize_receipts_from_url(receipt_url=url) receipts = await poller.result() for idx, receipt in enumerate(receipts): print("--------Recognizing receipt #{}--------".format(idx+1)) receipt_type = receipt.fields.get("ReceiptType") if receipt_type: print("Receipt Type: {} has confidence: {}".format(receipt_type.value, receipt_type.confidence)) merchant_name = receipt.fields.get("MerchantName") if merchant_name: print("Merchant Name: {} has confidence: {}".format(merchant_name.value, merchant_name.confidence)) transaction_date = receipt.fields.get("TransactionDate") if transaction_date: print("Transaction Date: {} has confidence: {}".format(transaction_date.value, transaction_date.confidence)) if receipt.fields.get("Items"): print("Receipt items:") for idx, item in enumerate(receipt.fields.get("Items").value): print("...Item #{}".format(idx+1)) item_name = item.value.get("Name") if item_name: print("......Item Name: {} has confidence: {}".format(item_name.value, item_name.confidence)) item_quantity = item.value.get("Quantity") if item_quantity: print("......Item Quantity: {} has confidence: {}".format(item_quantity.value, item_quantity.confidence)) item_price = item.value.get("Price") if item_price: print("......Individual Item Price: {} has confidence: {}".format(item_price.value, item_price.confidence)) item_total_price = item.value.get("TotalPrice") if item_total_price: print("......Total Item Price: {} has confidence: {}".format(item_total_price.value, item_total_price.confidence)) subtotal = receipt.fields.get("Subtotal") if subtotal: print("Subtotal: {} has confidence: {}".format(subtotal.value, subtotal.confidence)) tax = receipt.fields.get("Tax") if tax: print("Tax: {} has confidence: {}".format(tax.value, tax.confidence)) tip = receipt.fields.get("Tip") if tip: print("Tip: {} has confidence: {}".format(tip.value, tip.confidence)) total = receipt.fields.get("Total") if total: print("Total: {} has confidence: {}".format(total.value, total.confidence)) print("--------------------------------------")
async def test_damaged_file_bytes_io_fails_autodetect( self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = FormRecognizerClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) damaged_pdf = BytesIO(b"\x50\x44\x46\x55\x55\x55" ) # doesn't match any magic file numbers with pytest.raises(ValueError): async with client: poller = await client.begin_recognize_receipts(damaged_pdf, ) result = await poller.result()
async def test_content_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): with open(self.invoice_pdf, "rb") as fd: myfile = fd.read() with pytest.raises(ServiceRequestError): client = FormRecognizerClient( "http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key)) async with client: poller = await client.begin_recognize_content(myfile) result = await poller.result()
async def test_content_url_pass_stream(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.receipt_jpg, "rb") as fd: receipt = fd.read(4) # makes the recording smaller with self.assertRaises(HttpResponseError): result = await client.recognize_content_from_url(receipt)
async def test_content_multipage_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) result = await client.recognize_content_from_url(self.multipage_url_pdf ) self.assertEqual(len(result), 3) self.assertFormPagesHasValues(result)
async def test_custom_form_url_bad_endpoint(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): with self.assertRaises(ServiceRequestError): client = FormRecognizerClient( "http://notreal.azure.com", AzureKeyCredential(form_recognizer_account_key)) async with client: poller = await client.begin_recognize_custom_forms_from_url( model_id="xx", form_url=self.form_url_jpg) result = await poller.result()
async def test_passing_enum_content_type(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.receipt_png, "rb") as fd: myfile = fd.read() result = await client.recognize_receipts( myfile, content_type=FormContentType.image_png) self.assertIsNotNone(result)
async def test_passing_bad_content_type_param_passed( self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.receipt_jpg, "rb") as fd: myfile = fd.read() with self.assertRaises(ValueError): result = await client.recognize_receipts( myfile, content_type="application/jpeg")