async def test_receipt_multipage(self, client): with open(self.multipage_receipt_pdf, "rb") as fd: receipt = fd.read() async with client: poller = await client.begin_analyze_document("prebuilt-receipt", receipt) result = await poller.result() d = result.to_dict() result = AnalyzeResult.from_dict(d) assert len(result.documents) == 2 receipt = result.documents[0] assert receipt.fields.get("MerchantAddress").value, '123 Main Street Redmond == WA 98052' assert receipt.fields.get("MerchantName").value == 'Contoso' assert receipt.fields.get("MerchantPhoneNumber").value == '+19876543210' assert receipt.fields.get("Subtotal").value == 11.7 assert receipt.fields.get("TotalTax").value == 1.17 assert receipt.fields.get("Tip").value == 1.63 assert receipt.fields.get("Total").value == 14.5 assert receipt.fields.get("TransactionDate").value == date(year=2019, month=6, day=10) assert receipt.fields.get("TransactionTime").value == time(hour=13, minute=59, second=0) assert receipt.doc_type == "receipt.retailMeal" receipt = result.documents[1] assert receipt.fields.get("MerchantAddress").value, '123 Main Street Redmond == WA 98052' assert receipt.fields.get("MerchantName").value == 'Contoso' assert receipt.fields.get("Subtotal").value == 1098.99 assert receipt.fields.get("TotalTax").value == 104.4 assert receipt.fields.get("Total").value == 1203.39 assert receipt.fields.get("TransactionDate").value == date(year=2019, month=6, day=10) assert receipt.fields.get("TransactionTime").value == time(hour=13, minute=59, second=0) assert receipt.doc_type == "receipt.retailMeal" assert len(result.pages) == 2 return {}
async def test_receipt_multipage(self, client): with open(self.multipage_receipt_pdf, "rb") as fd: receipt = fd.read() async with client: poller = await client.begin_analyze_document( "prebuilt-receipt", receipt) result = await poller.result() d = result.to_dict() result = AnalyzeResult.from_dict(d) self.assertEqual(len(result.documents), 2) receipt = result.documents[0] self.assertEqual( receipt.fields.get("MerchantAddress").value, '123 Main Street Redmond, WA 98052') self.assertEqual(receipt.fields.get("MerchantName").value, 'Contoso') self.assertEqual( receipt.fields.get("MerchantPhoneNumber").value, '+19876543210') self.assertEqual(receipt.fields.get("Subtotal").value, 11.7) self.assertEqual(receipt.fields.get("Tax").value, 1.17) self.assertEqual(receipt.fields.get("Tip").value, 1.63) self.assertEqual(receipt.fields.get("Total").value, 14.5) self.assertEqual( receipt.fields.get("TransactionDate").value, date(year=2019, month=6, day=10)) self.assertEqual( receipt.fields.get("TransactionTime").value, time(hour=13, minute=59, second=0)) receipt_type = receipt.fields.get("ReceiptType") self.assertIsNotNone(receipt_type.confidence) self.assertEqual(receipt_type.value, 'Itemized') receipt = result.documents[1] self.assertEqual( receipt.fields.get("MerchantAddress").value, '123 Main Street Redmond, WA 98052') self.assertEqual(receipt.fields.get("MerchantName").value, 'Contoso') self.assertEqual(receipt.fields.get("Subtotal").value, 1098.99) self.assertEqual(receipt.fields.get("Tax").value, 104.4) self.assertEqual(receipt.fields.get("Total").value, 1203.39) self.assertEqual( receipt.fields.get("TransactionDate").value, date(year=2019, month=6, day=10)) self.assertEqual( receipt.fields.get("TransactionTime").value, time(hour=13, minute=59, second=0)) receipt_type = receipt.fields.get("ReceiptType") self.assertIsNotNone(receipt_type.confidence) self.assertEqual(receipt_type.value, 'Itemized') self.assertEqual(len(result.pages), 2)
async def convert_to_and_from_dict_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/forms/Form_1.jpg", ) ) from azure.core.serialization import AzureJSONEncoder from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient from azure.ai.formrecognizer import AnalyzeResult endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-document", document=f ) result = await poller.result() # convert the received model to a dictionary analyze_result_dict = result.to_dict() # save the dictionary as JSON content in a JSON file, use the AzureJSONEncoder # to help make types, such as dates, JSON serializable # NOTE: AzureJSONEncoder is only available with azure.core>=1.18.0. with open('data.json', 'w') as f: json.dump(analyze_result_dict, f, cls=AzureJSONEncoder) # convert the dictionary back to the original model model = AnalyzeResult.from_dict(analyze_result_dict) # use the model as normal print("----Converted from dictionary AnalyzeResult----") print("Model ID: '{}'".format(model.model_id)) print("Number of pages analyzed {}".format(len(model.pages))) print("API version used: {}".format(model.api_version)) print("----------------------------------------")
def test_receipt_multipage_transform(self, client): responses = [] def callback(raw_response, _, headers): analyze_result = client._deserialize(AnalyzeResultOperation, raw_response) extracted_receipt = AnalyzeResult._from_generated( analyze_result.analyze_result) responses.append(analyze_result) responses.append(extracted_receipt) with open(self.multipage_receipt_pdf, "rb") as fd: myfile = fd.read() poller = client.begin_analyze_document("prebuilt-receipt", document=myfile, cls=callback) result = poller.result() raw_analyze_result = responses[0].analyze_result d = responses[1].to_dict() returned_model = AnalyzeResult.from_dict(d) # Check AnalyzeResult assert returned_model.model_id == raw_analyze_result.model_id assert returned_model.api_version == raw_analyze_result.api_version assert returned_model.content == raw_analyze_result.content self.assertDocumentPagesTransformCorrect(returned_model.pages, raw_analyze_result.pages) self.assertDocumentTransformCorrect(returned_model.documents, raw_analyze_result.documents) self.assertDocumentTablesTransformCorrect(returned_model.tables, raw_analyze_result.tables) self.assertDocumentKeyValuePairsTransformCorrect( returned_model.key_value_pairs, raw_analyze_result.key_value_pairs) self.assertDocumentEntitiesTransformCorrect( returned_model.entities, raw_analyze_result.entities) self.assertDocumentStylesTransformCorrect(returned_model.styles, raw_analyze_result.styles) # check page range assert len(raw_analyze_result.pages) == len(returned_model.pages) return {}
def test_document_line_get_words_error(self, client, **kwargs): with open(self.invoice_pdf, "rb") as fd: document = fd.read() poller = client.begin_analyze_document("prebuilt-document", document) result = poller.result() # check the error occurs when converting a larger element that encompasses a document line d = result.to_dict() analyze_result = AnalyzeResult.from_dict(d) with pytest.raises(ValueError): elements = analyze_result.pages[0].lines[0].get_words() # check that the error occurs when directly converting a DocumentLine from a dict d = result.pages[0].lines[0].to_dict() line = DocumentLine.from_dict(d) with pytest.raises(ValueError): elements = line.get_words()
def test_invoice_jpg(self, client, **kwargs): with open(self.invoice_jpg, "rb") as fd: invoice = fd.read() poller = client.begin_analyze_document("prebuilt-invoice", invoice) result = poller.result() d = result.to_dict() json.dumps(d, cls=AzureJSONEncoder) result = AnalyzeResult.from_dict(d) assert len(result.documents) == 1 invoice = result.documents[0] assert result.pages # check dict values assert invoice.fields.get("AmountDue").value.amount == 610.0 assert invoice.fields.get("AmountDue").value.symbol == "$" assert invoice.fields.get( "BillingAddress").value, "123 Bill St, Redmond WA == 98052" assert invoice.fields.get( "BillingAddressRecipient").value == "Microsoft Finance" assert invoice.fields.get( "CustomerAddress").value, "123 Other St, Redmond WA == 98052" assert invoice.fields.get( "CustomerAddressRecipient").value == "Microsoft Corp" assert invoice.fields.get("CustomerId").value == "CID-12345" assert invoice.fields.get( "CustomerName").value == "MICROSOFT CORPORATION" assert invoice.fields.get("DueDate").value, date(2019, 12 == 15) assert invoice.fields.get("InvoiceDate").value, date(2019, 11 == 15) assert invoice.fields.get("InvoiceId").value == "INV-100" assert invoice.fields.get("InvoiceTotal").value.amount == 110.0 assert invoice.fields.get( "PreviousUnpaidBalance").value.amount == 500.0 assert invoice.fields.get("PurchaseOrder").value == "PO-3333" assert invoice.fields.get( "RemittanceAddress").value, "123 Remit St New York, NY == 10001" assert invoice.fields.get( "RemittanceAddressRecipient").value == "Contoso Billing" assert invoice.fields.get( "ServiceAddress").value, "123 Service St, Redmond WA == 98052" assert invoice.fields.get( "ServiceAddressRecipient").value == "Microsoft Services" assert invoice.fields.get("ServiceEndDate").value, date(2019, 11 == 14) assert invoice.fields.get("ServiceStartDate").value, date( 2019, 10 == 14) assert invoice.fields.get( "ShippingAddress").value, "123 Ship St, Redmond WA == 98052" assert invoice.fields.get( "ShippingAddressRecipient").value == "Microsoft Delivery" assert invoice.fields.get("SubTotal").value.amount == 100.0 assert invoice.fields.get("SubTotal").value.symbol == "$" assert invoice.fields.get("TotalTax").value.amount == 10.0 assert invoice.fields.get("TotalTax").value.symbol == "$" assert invoice.fields.get("VendorName").value == "CONTOSO LTD." assert invoice.fields.get( "VendorAddress").value, "123 456th St New York, NY == 10001" assert invoice.fields.get( "VendorAddressRecipient").value == "Contoso Headquarters" assert invoice.fields.get( "Items").value[0].value["Amount"].value.amount == 100.0 assert invoice.fields.get( "Items").value[0].value["Amount"].value.symbol == "$" assert invoice.fields.get("Items").value[0].value[ "Description"].value == "Consulting service" assert invoice.fields.get( "Items").value[0].value["Quantity"].value == 1.0 assert invoice.fields.get( "Items").value[0].value["UnitPrice"].value.amount == 1.0 assert invoice.fields.get( "Items").value[0].value["UnitPrice"].value.symbol == None