async def test_document_analysis_empty_model_id(self, **kwargs): formrecognizer_test_endpoint = kwargs.pop("formrecognizer_test_endpoint") formrecognizer_test_api_key = kwargs.pop("formrecognizer_test_api_key") client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) with pytest.raises(ValueError): async with client: await client.begin_analyze_document_from_url(model="", document_url="https://badurl.jpg")
async def test_receipt_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): with open(self.receipt_jpg, "rb") as fd: myfile = fd.read() with self.assertRaises(ServiceRequestError): client = DocumentAnalysisClient("http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key)) async with client: poller = await client.begin_analyze_document("prebuilt-receipt", myfile) result = await poller.result()
async def test_analyze_document_empty_model_id( self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = DocumentAnalysisClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) with self.assertRaises(ValueError): async with client: await client.begin_analyze_document(model="", document=b"xx")
async def test_active_directory_auth_async(self): token = self.generate_oauth_token() endpoint = self.get_oauth_endpoint() client = DocumentAnalysisClient(endpoint, token) async with client: poller = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg) result = await poller.result() assert result is not None
async def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): async with client: poller = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg) result = await poller.result()
async def test_receipt_url_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): set_bodiless_matcher() with pytest.raises(ServiceRequestError): client = DocumentAnalysisClient("http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key)) async with client: poller = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg ) result = await poller.result()
async def test_document_analysis_none_model(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = DocumentAnalysisClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) with self.assertRaises(ValueError): async with client: await client.begin_analyze_document_from_url( model=None, document_url="https://badurl.jpg")
async def test_analyze_document_none_model_id(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): client = DocumentAnalysisClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) with pytest.raises(ValueError): async with client: await client.begin_analyze_document(model=None, document=b"xx")
async def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): set_bodiless_matcher() client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx")) with pytest.raises(ClientAuthenticationError): async with client: poller = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg) result = await poller.result()
async def test_polling_interval(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): set_bodiless_matcher() client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key), polling_interval=7) assert client._client._config.polling_interval == 7 async with client: poller = await client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg, polling_interval=6) await poller.wait() assert poller._polling_method._timeout == 6 poller2 = await client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg) await poller2.wait() assert poller2._polling_method._timeout == 7 # goes back to client default return {}
async def authentication_with_api_key_credential_document_analysis_client_async(): # [START create_da_client_with_key_async] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient(endpoint, AzureKeyCredential(key)) # [END create_da_client_with_key_async] async with document_analysis_client: poller = await document_analysis_client.begin_analyze_document_from_url( "prebuilt-layout", url ) result = await poller.result()
async def convert_to_and_from_dict_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/forms/Form_1.jpg", ) ) from azure.core.serialization import AzureJSONEncoder from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient from azure.ai.formrecognizer import AnalyzeResult endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-document", document=f ) result = await poller.result() # convert the received model to a dictionary analyze_result_dict = result.to_dict() # save the dictionary as JSON content in a JSON file, use the AzureJSONEncoder # to help make types, such as dates, JSON serializable # NOTE: AzureJSONEncoder is only available with azure.core>=1.18.0. with open('data.json', 'w') as f: json.dump(analyze_result_dict, f, cls=AzureJSONEncoder) # convert the dictionary back to the original model model = AnalyzeResult.from_dict(analyze_result_dict) # use the model as normal print("----Converted from dictionary AnalyzeResult----") print("Model ID: '{}'".format(model.model_id)) print("Number of pages analyzed {}".format(len(model.pages))) print("API version used: {}".format(model.api_version)) print("----------------------------------------")
async def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers set_custom_default_matcher( compare_bodies=False, excluded_headers= "Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id" ) client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx")) with pytest.raises(ClientAuthenticationError): async with client: poller = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg) result = await poller.result()
async def get_words_on_document_line_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/forms/Form_1.jpg", )) from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key)) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-document", document=f) result = await poller.result() for idx, page in enumerate(result.pages): print("----Analyzing lines and words from page #{}----".format(idx + 1)) print( "Page has width: {} and height: {}, measured with unit: {}".format( page.width, page.height, page.unit)) for line_idx, line in enumerate(page.lines): words = line.get_words() print( "...Line # {} has word count {} and text '{}' within bounding box '{}'" .format( line_idx, len(words), line.content, format_bounding_box(line.bounding_box), )) for word in words: print("......Word '{}' has a confidence of {}".format( word.content, word.confidence)) print("----------------------------------------")
async def test_mock_quota_exceeded_429(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): response = mock.Mock( status_code=429, headers={"Retry-After": 186688, "Content-Type": "application/json"}, reason="Bad Request" ) response.text = lambda encoding=None: json.dumps( {"error": {"code": "429", "message": "Out of call volume quota for FormRecognizer F0 pricing tier. " "Please retry after 1 day. To increase your call volume switch to a paid tier."}} ) response.content_type = "application/json" transport = AsyncMockTransport(send=wrap_in_future(lambda request, **kwargs: response)) client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key), transport=transport) with pytest.raises(HttpResponseError) as e: poller = await client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg) assert e.value.status_code == 429 assert e.value.error.message == 'Out of call volume quota for FormRecognizer F0 pricing tier. Please retry after 1 day. To increase your call volume switch to a paid tier.'
async def test_polling_interval(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = DocumentAnalysisClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key), polling_interval=7) self.assertEqual(client._client._config.polling_interval, 7) async with client: poller = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg, polling_interval=6) await poller.wait() self.assertEqual(poller._polling_method._timeout, 6) poller2 = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg) await poller2.wait() self.assertEqual(poller2._polling_method._timeout, 7) # goes back to client default
async def authentication_with_azure_active_directory_document_analysis_client_async( ): # [START create_da_client_with_aad_async] """DefaultAzureCredential will use the values from these environment variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET """ from azure.ai.formrecognizer.aio import DocumentAnalysisClient from azure.identity.aio import DefaultAzureCredential endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] credential = DefaultAzureCredential() document_analysis_client = DocumentAnalysisClient(endpoint, credential) # [END create_da_client_with_aad_async] async with document_analysis_client: poller = await document_analysis_client.begin_analyze_document_from_url( "prebuilt-layout", url) result = await poller.result()
async def test_logging_info_dac_client(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key)) mock_handler = MockHandler() logger = logging.getLogger("azure") logger.addHandler(mock_handler) logger.setLevel(logging.INFO) async with client: poller = await client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg) result = await poller.result() for message in mock_handler.messages: if message.levelname == "INFO": # not able to use json.loads here. At INFO level only API key should be REDACTED if message.message.find("Ocp-Apim-Subscription-Key") != -1: assert message.message.find("REDACTED") != -1 else: assert message.message.find("REDACTED") == -1
async def test_polling_interval(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers set_custom_default_matcher( compare_bodies=False, excluded_headers= "Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id" ) client = DocumentAnalysisClient( formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key), polling_interval=7) assert client._client._config.polling_interval == 7 async with client: poller = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg, polling_interval=6) await poller.wait() assert poller._polling_method._timeout == 6 poller2 = await client.begin_analyze_document_from_url( "prebuilt-receipt", self.receipt_url_jpg) await poller2.wait() assert poller2._polling_method._timeout == 7 # goes back to client default
def test_bad_api_version_document_analysis_client(self): with pytest.raises(ValueError) as excinfo: client = DocumentAnalysisClient("url", "key", api_version="9") assert "Unsupported API version '9'. Please select from: {}".format( ", ".join(v.value for v in DocumentAnalysisApiVersion)) == str( excinfo.value)
async def analyze_invoice_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/forms/sample_invoice.jpg", ) ) # [START analyze_invoices_async] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-invoice", document=f, locale="en-US" ) invoices = await poller.result() for idx, invoice in enumerate(invoices.documents): print("--------Recognizing invoice #{}--------".format(idx + 1)) vendor_name = invoice.fields.get("VendorName") if vendor_name: print( "Vendor Name: {} has confidence: {}".format( vendor_name.value, vendor_name.confidence ) ) vendor_address = invoice.fields.get("VendorAddress") if vendor_address: print( "Vendor Address: {} has confidence: {}".format( vendor_address.value, vendor_address.confidence ) ) vendor_address_recipient = invoice.fields.get("VendorAddressRecipient") if vendor_address_recipient: print( "Vendor Address Recipient: {} has confidence: {}".format( vendor_address_recipient.value, vendor_address_recipient.confidence, ) ) customer_name = invoice.fields.get("CustomerName") if customer_name: print( "Customer Name: {} has confidence: {}".format( customer_name.value, customer_name.confidence ) ) customer_id = invoice.fields.get("CustomerId") if customer_id: print( "Customer Id: {} has confidence: {}".format( customer_id.value, customer_id.confidence ) ) customer_address = invoice.fields.get("CustomerAddress") if customer_address: print( "Customer Address: {} has confidence: {}".format( customer_address.value, customer_address.confidence ) ) customer_address_recipient = invoice.fields.get("CustomerAddressRecipient") if customer_address_recipient: print( "Customer Address Recipient: {} has confidence: {}".format( customer_address_recipient.value, customer_address_recipient.confidence, ) ) invoice_id = invoice.fields.get("InvoiceId") if invoice_id: print( "Invoice Id: {} has confidence: {}".format( invoice_id.value, invoice_id.confidence ) ) invoice_date = invoice.fields.get("InvoiceDate") if invoice_date: print( "Invoice Date: {} has confidence: {}".format( invoice_date.value, invoice_date.confidence ) ) invoice_total = invoice.fields.get("InvoiceTotal") if invoice_total: print( "Invoice Total: {} has confidence: {}".format( invoice_total.value, invoice_total.confidence ) ) due_date = invoice.fields.get("DueDate") if due_date: print( "Due Date: {} has confidence: {}".format( due_date.value, due_date.confidence ) ) purchase_order = invoice.fields.get("PurchaseOrder") if purchase_order: print( "Purchase Order: {} has confidence: {}".format( purchase_order.value, purchase_order.confidence ) ) billing_address = invoice.fields.get("BillingAddress") if billing_address: print( "Billing Address: {} has confidence: {}".format( billing_address.value, billing_address.confidence ) ) billing_address_recipient = invoice.fields.get("BillingAddressRecipient") if billing_address_recipient: print( "Billing Address Recipient: {} has confidence: {}".format( billing_address_recipient.value, billing_address_recipient.confidence, ) ) shipping_address = invoice.fields.get("ShippingAddress") if shipping_address: print( "Shipping Address: {} has confidence: {}".format( shipping_address.value, shipping_address.confidence ) ) shipping_address_recipient = invoice.fields.get("ShippingAddressRecipient") if shipping_address_recipient: print( "Shipping Address Recipient: {} has confidence: {}".format( shipping_address_recipient.value, shipping_address_recipient.confidence, ) ) print("Invoice items:") for idx, item in enumerate(invoice.fields.get("Items").value): print("...Item #{}".format(idx + 1)) item_description = item.value.get("Description") if item_description: print( "......Description: {} has confidence: {}".format( item_description.value, item_description.confidence ) ) item_quantity = item.value.get("Quantity") if item_quantity: print( "......Quantity: {} has confidence: {}".format( item_quantity.value, item_quantity.confidence ) ) unit = item.value.get("Unit") if unit: print( "......Unit: {} has confidence: {}".format( unit.value, unit.confidence ) ) unit_price = item.value.get("UnitPrice") if unit_price: print( "......Unit Price: {} has confidence: {}".format( unit_price.value, unit_price.confidence ) ) product_code = item.value.get("ProductCode") if product_code: print( "......Product Code: {} has confidence: {}".format( product_code.value, product_code.confidence ) ) item_date = item.value.get("Date") if item_date: print( "......Date: {} has confidence: {}".format( item_date.value, item_date.confidence ) ) tax = item.value.get("Tax") if tax: print( "......Tax: {} has confidence: {}".format(tax.value, tax.confidence) ) amount = item.value.get("Amount") if amount: print( "......Amount: {} has confidence: {}".format( amount.value, amount.confidence ) ) subtotal = invoice.fields.get("SubTotal") if subtotal: print( "Subtotal: {} has confidence: {}".format( subtotal.value, subtotal.confidence ) ) total_tax = invoice.fields.get("TotalTax") if total_tax: print( "Total Tax: {} has confidence: {}".format( total_tax.value, total_tax.confidence ) ) previous_unpaid_balance = invoice.fields.get("PreviousUnpaidBalance") if previous_unpaid_balance: print( "Previous Unpaid Balance: {} has confidence: {}".format( previous_unpaid_balance.value, previous_unpaid_balance.confidence, ) ) amount_due = invoice.fields.get("AmountDue") if amount_due: print( "Amount Due: {} has confidence: {}".format( amount_due.value, amount_due.confidence ) ) service_start_date = invoice.fields.get("ServiceStartDate") if service_start_date: print( "Service Start Date: {} has confidence: {}".format( service_start_date.value, service_start_date.confidence ) ) service_end_date = invoice.fields.get("ServiceEndDate") if service_end_date: print( "Service End Date: {} has confidence: {}".format( service_end_date.value, service_end_date.confidence ) ) service_address = invoice.fields.get("ServiceAddress") if service_address: print( "Service Address: {} has confidence: {}".format( service_address.value, service_address.confidence ) ) service_address_recipient = invoice.fields.get("ServiceAddressRecipient") if service_address_recipient: print( "Service Address Recipient: {} has confidence: {}".format( service_address_recipient.value, service_address_recipient.confidence, ) ) remittance_address = invoice.fields.get("RemittanceAddress") if remittance_address: print( "Remittance Address: {} has confidence: {}".format( remittance_address.value, remittance_address.confidence ) ) remittance_address_recipient = invoice.fields.get("RemittanceAddressRecipient") if remittance_address_recipient: print( "Remittance Address Recipient: {} has confidence: {}".format( remittance_address_recipient.value, remittance_address_recipient.confidence, ) )
async def analyze_read(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/forms/Form_1.jpg", )) from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key)) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-read", document=f) result = await poller.result() print("----Languages detected in the document----") for language in result.languages: print("Language code: '{}' with confidence {}".format( language.language_code, language.confidence)) for page in result.pages: print("----Analyzing document from page #{}----".format( page.page_number)) print( "Page has width: {} and height: {}, measured with unit: {}".format( page.width, page.height, page.unit)) for line_idx, line in enumerate(page.lines): words = line.get_words() print( "...Line # {} has {} words and text '{}' within bounding box '{}'" .format( line_idx, len(words), line.content, format_bounding_box(line.bounding_box), )) for word in words: print("......Word '{}' has a confidence of {}".format( word.content, word.confidence)) for selection_mark in page.selection_marks: print( "...Selection mark is '{}' within bounding box '{}' and has a confidence of {}" .format( selection_mark.state, format_bounding_box(selection_mark.bounding_box), selection_mark.confidence, )) print("----------------------------------------")
async def analyze_layout_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/forms/form_selection_mark.png", ) ) from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-layout", document=f ) result = await poller.result() for idx, style in enumerate(result.styles): print( "Document contains {} content".format( "handwritten" if style.is_handwritten else "no handwritten" ) ) for idx, page in enumerate(result.pages): print("----Analyzing layout from page #{}----".format(idx + 1)) print( "Page has width: {} and height: {}, measured with unit: {}".format( page.width, page.height, page.unit ) ) for line_idx, line in enumerate(page.lines): words = line.get_words() print( "...Line # {} has word count {} and text '{}' within bounding box '{}'".format( line_idx, len(words), line.content, format_bounding_box(line.bounding_box), ) ) for word in words: print( "......Word '{}' has a confidence of {}".format( word.content, word.confidence ) ) for selection_mark in page.selection_marks: print( "Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format( selection_mark.state, format_bounding_box(selection_mark.bounding_box), selection_mark.confidence, ) ) for table_idx, table in enumerate(result.tables): print( "Table # {} has {} rows and {} columns".format( table_idx, table.row_count, table.column_count ) ) for region in table.bounding_regions: print( "Table # {} location on page: {} is {}".format( table_idx, region.page_number, format_bounding_box(region.bounding_box), ) ) for cell in table.cells: print( "...Cell[{}][{}] has text '{}'".format( cell.row_index, cell.column_index, cell.content, ) ) for region in cell.bounding_regions: print( "...content on page {} is within bounding box '{}'".format( region.page_number, format_bounding_box(region.bounding_box), ) ) print("----------------------------------------")
async def analyze_business_card_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/business_cards/business-card-english.jpg", )) from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key)) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-businessCard", document=f, locale="en-US") business_cards = await poller.result() for idx, business_card in enumerate(business_cards.documents): print("--------Analyzing business card #{}--------".format(idx + 1)) contact_names = business_card.fields.get("ContactNames") if contact_names: for contact_name in contact_names.value: print("Contact First Name: {} has confidence: {}".format( contact_name.value["FirstName"].value, contact_name.value["FirstName"].confidence, )) print("Contact Last Name: {} has confidence: {}".format( contact_name.value["LastName"].value, contact_name.value["LastName"].confidence, )) company_names = business_card.fields.get("CompanyNames") if company_names: for company_name in company_names.value: print("Company Name: {} has confidence: {}".format( company_name.value, company_name.confidence)) departments = business_card.fields.get("Departments") if departments: for department in departments.value: print("Department: {} has confidence: {}".format( department.value, department.confidence)) job_titles = business_card.fields.get("JobTitles") if job_titles: for job_title in job_titles.value: print("Job Title: {} has confidence: {}".format( job_title.value, job_title.confidence)) emails = business_card.fields.get("Emails") if emails: for email in emails.value: print("Email: {} has confidence: {}".format( email.value, email.confidence)) websites = business_card.fields.get("Websites") if websites: for website in websites.value: print("Website: {} has confidence: {}".format( website.value, website.confidence)) addresses = business_card.fields.get("Addresses") if addresses: for address in addresses.value: print("Address: {} has confidence: {}".format( address.value, address.confidence)) mobile_phones = business_card.fields.get("MobilePhones") if mobile_phones: for phone in mobile_phones.value: print("Mobile phone number: {} has confidence: {}".format( phone.content, phone.confidence)) faxes = business_card.fields.get("Faxes") if faxes: for fax in faxes.value: print("Fax number: {} has confidence: {}".format( fax.content, fax.confidence)) work_phones = business_card.fields.get("WorkPhones") if work_phones: for work_phone in work_phones.value: print("Work phone number: {} has confidence: {}".format( work_phone.content, work_phone.confidence)) other_phones = business_card.fields.get("OtherPhones") if other_phones: for other_phone in other_phones.value: print("Other phone number: {} has confidence: {}".format( other_phone.value, other_phone.confidence))
async def analyze_receipts_from_url_async(): # [START analyze_receipts_from_url_async] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key)) async with document_analysis_client: url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png" poller = await document_analysis_client.begin_analyze_document_from_url( "prebuilt-receipt", document_url=url) receipts = await poller.result() for idx, receipt in enumerate(receipts.documents): print("--------Recognizing receipt #{}--------".format(idx + 1)) receipt_type = receipt.fields.get("ReceiptType") if receipt_type: print("Receipt Type: {} has confidence: {}".format( receipt_type.value, receipt_type.confidence)) merchant_name = receipt.fields.get("MerchantName") if merchant_name: print("Merchant Name: {} has confidence: {}".format( merchant_name.value, merchant_name.confidence)) transaction_date = receipt.fields.get("TransactionDate") if transaction_date: print("Transaction Date: {} has confidence: {}".format( transaction_date.value, transaction_date.confidence)) if receipt.fields.get("Items"): print("Receipt items:") for idx, item in enumerate(receipt.fields.get("Items").value): print("...Item #{}".format(idx + 1)) item_name = item.value.get("Name") if item_name: print("......Item Name: {} has confidence: {}".format( item_name.value, item_name.confidence)) item_quantity = item.value.get("Quantity") if item_quantity: print("......Item Quantity: {} has confidence: {}".format( item_quantity.value, item_quantity.confidence)) item_price = item.value.get("Price") if item_price: print("......Individual Item Price: {} has confidence: {}". format(item_price.value, item_price.confidence)) item_total_price = item.value.get("TotalPrice") if item_total_price: print( "......Total Item Price: {} has confidence: {}".format( item_total_price.value, item_total_price.confidence)) subtotal = receipt.fields.get("Subtotal") if subtotal: print("Subtotal: {} has confidence: {}".format( subtotal.value, subtotal.confidence)) tax = receipt.fields.get("Tax") if tax: print("Tax: {} has confidence: {}".format(tax.value, tax.confidence)) tip = receipt.fields.get("Tip") if tip: print("Tip: {} has confidence: {}".format(tip.value, tip.confidence)) total = receipt.fields.get("Total") if total: print("Total: {} has confidence: {}".format( total.value, total.confidence)) print("--------------------------------------")
async def analyze_tax_us_w2_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/tax/sample_w2.png", )) from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key)) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-tax.us.w2", document=f, locale="en-US") w2s = await poller.result() for idx, w2 in enumerate(w2s.documents): print("--------Recognizing US Tax W-2 Form #{}--------".format(idx + 1)) form_variant = w2.fields.get("W2FormVariant") if form_variant: print("Form variant: {} has confidence: {}".format( form_variant.value, form_variant.confidence)) tax_year = w2.fields.get("TaxYear") if tax_year: print("Tax year: {} has confidence: {}".format( tax_year.value, tax_year.confidence)) w2_copy = w2.fields.get("W2Copy") if w2_copy: print("W-2 Copy: {} has confidence: {}".format( w2_copy.value, w2_copy.confidence, )) employee = w2.fields.get("Employee") if employee: print("Employee data:") employee_name = employee.value.get("Name") if employee_name: print("...Name: {} has confidence: {}".format( employee_name.value, employee_name.confidence)) employee_ssn = employee.value.get("SocialSecurityNumber") if employee_ssn: print("...SSN: {} has confidence: {}".format( employee_ssn.value, employee_ssn.confidence)) employee_address = employee.value.get("Address") if employee_address: print("...Address: {} has confidence: {}".format( employee_address.value, employee_address.confidence)) employee_zipcode = employee.value.get("ZipCode") if employee_zipcode: print("...Zipcode: {} has confidence: {}".format( employee_zipcode.value, employee_zipcode.confidence)) control_number = w2.fields.get("ControlNumber") if control_number: print("Control Number: {} has confidence: {}".format( control_number.value, control_number.confidence)) employer = w2.fields.get("Employer") if employer: print("Employer data:") employer_name = employer.value.get("Name") if employer_name: print("...Name: {} has confidence: {}".format( employer_name.value, employer_name.confidence)) employer_id = employer.value.get("IdNumber") if employer_id: print("...ID Number: {} has confidence: {}".format( employer_id.value, employer_id.confidence)) employer_address = employer.value.get("Address") if employer_address: print("...Address: {} has confidence: {}".format( employer_address.value, employer_address.confidence)) employer_zipcode = employer.value.get("ZipCode") if employer_zipcode: print("...Zipcode: {} has confidence: {}".format( employer_zipcode.value, employer_zipcode.confidence)) wages_tips = w2.fields.get("WagesTipsAndOtherCompensation") if wages_tips: print("Wages, tips, and other compensation: {} has confidence: {}". format( wages_tips.value, wages_tips.confidence, )) fed_income_tax_withheld = w2.fields.get("FederalIncomeTaxWithheld") if fed_income_tax_withheld: print("Federal income tax withheld: {} has confidence: {}".format( fed_income_tax_withheld.value, fed_income_tax_withheld.confidence)) social_security_wages = w2.fields.get("SocialSecurityWages") if social_security_wages: print("Social Security wages: {} has confidence: {}".format( social_security_wages.value, social_security_wages.confidence)) social_security_tax_withheld = w2.fields.get( "SocialSecurityTaxWithheld") if social_security_tax_withheld: print("Social Security tax withheld: {} has confidence: {}".format( social_security_tax_withheld.value, social_security_tax_withheld.confidence)) medicare_wages_tips = w2.fields.get("MedicareWagesAndTips") if medicare_wages_tips: print("Medicare wages and tips: {} has confidence: {}".format( medicare_wages_tips.value, medicare_wages_tips.confidence)) medicare_tax_withheld = w2.fields.get("MedicareTaxWithheld") if medicare_tax_withheld: print("Medicare tax withheld: {} has confidence: {}".format( medicare_tax_withheld.value, medicare_tax_withheld.confidence)) social_security_tips = w2.fields.get("SocialSecurityTips") if social_security_tips: print("Social Security tips: {} has confidence: {}".format( social_security_tips.value, social_security_tips.confidence)) allocated_tips = w2.fields.get("AllocatedTips") if allocated_tips: print("Allocated tips: {} has confidence: {}".format( allocated_tips.value, allocated_tips.confidence, )) verification_code = w2.fields.get("VerificationCode") if verification_code: print("Verification code: {} has confidence: {}".format( verification_code.value, verification_code.confidence)) dependent_care_benefits = w2.fields.get("DependentCareBenefits") if dependent_care_benefits: print("Dependent care benefits: {} has confidence: {}".format( dependent_care_benefits.value, dependent_care_benefits.confidence, )) non_qualified_plans = w2.fields.get("NonQualifiedPlans") if non_qualified_plans: print("Non-qualified plans: {} has confidence: {}".format( non_qualified_plans.value, non_qualified_plans.confidence, )) additional_info = w2.fields.get("AdditionalInfo") if additional_info: print("Additional information:") for item in additional_info.value: letter_code = item.value.get("LetterCode") if letter_code: print("...Letter code: {} has confidence: {}".format( letter_code.value, letter_code.confidence)) amount = item.value.get("Amount") if amount: print("...Amount: {} has confidence: {}".format( amount.value, amount.confidence)) is_statutory_employee = w2.fields.get("IsStatutoryEmployee") if is_statutory_employee: print("Is statutory employee: {} has confidence: {}".format( is_statutory_employee.value, is_statutory_employee.confidence)) is_retirement_plan = w2.fields.get("IsRetirementPlan") if is_retirement_plan: print("Is retirement plan: {} has confidence: {}".format( is_retirement_plan.value, is_retirement_plan.confidence)) third_party_sick_pay = w2.fields.get("IsThirdPartySickPay") if third_party_sick_pay: print("Is third party sick pay: {} has confidence: {}".format( third_party_sick_pay.value, third_party_sick_pay.confidence)) other_info = w2.fields.get("Other") if other_info: print("Other information: {} has confidence: {}".format( other_info.value, other_info.confidence, )) state_tax_info = w2.fields.get("StateTaxInfos") if state_tax_info: print("State Tax info:") for tax in state_tax_info.value: state = tax.value.get("State") if state: print("...State: {} has confidence: {}".format( state.value, state.confidence)) employer_state_id_number = tax.value.get( "EmployerStateIdNumber") if employer_state_id_number: print("...Employer state ID number: {} has confidence: {}". format(employer_state_id_number.value, employer_state_id_number.confidence)) state_wages_tips = tax.value.get("StateWagesTipsEtc") if state_wages_tips: print("...State wages, tips, etc: {} has confidence: {}". format(state_wages_tips.value, state_wages_tips.confidence)) state_income_tax = tax.value.get("StateIncomeTax") if state_income_tax: print("...State income tax: {} has confidence: {}".format( state_income_tax.value, state_income_tax.confidence)) local_tax_info = w2.fields.get("LocalTaxInfos") if local_tax_info: print("Local Tax info:") for tax in local_tax_info.value: local_wages_tips = tax.value.get("LocalWagesTipsEtc") if local_wages_tips: print("...Local wages, tips, etc: {} has confidence: {}". format(local_wages_tips.value, local_wages_tips.confidence)) local_income_tax = tax.value.get("LocalIncomeTax") if local_income_tax: print("...Local income tax: {} has confidence: {}".format( local_income_tax.value, local_income_tax.confidence)) locality_name = tax.value.get("LocalityName") if locality_name: print("...Locality name: {} has confidence: {}".format( locality_name.value, locality_name.confidence))
async def get_elements_with_spans_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/forms/Form_1.jpg", ) ) from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-document", document=f ) result = await poller.result() # Below is a method to search for the lines of a particular element by using spans. # This example uses DocumentTable, but other elements that also have a `spans` or `span` field # can also be used to search for related elements, such as lines in this case. # To see an example for searching for words which have a `span` field, see # `sample_get_words_on_document_line.py` under the samples v3.2-beta directory. for table_idx, table in enumerate(result.tables): print( "Table # {} has {} rows and {} columns".format( table_idx, table.row_count, table.column_count ) ) lines = [] for region in table.bounding_regions: print( "Table # {} location on page: {}".format( table_idx, region.page_number, ) ) lines.extend(get_lines(table.spans, get_page(region.page_number, result.pages))) print("Found # {} lines in the table".format(len(lines))) for line in lines: print( "...Line '{}' is within bounding box: '{}'".format( line.content, line.bounding_box, ) ) # Below is a method to search for the style of a particular element by using spans. # This example uses DocumentEntity, but other elements that also have a `spans` or `span` # field can also be used to search for document text style. for entity in result.entities: styles = get_styles(entity.spans, result.styles) print( "Found entity '{}' of type '{}' with style:".format( entity.content, entity.category, ) ) if not styles: print( "...no handwritten text found" ) for style in styles: if style.is_handwritten: print( "...handwritten with confidence {}".format(style.confidence) ) print("----------------------------------------")
async def analyze_receipts_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/receipt/contoso-allinone.jpg", )) from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key)) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-receipt", document=f, locale="en-US") receipts = await poller.result() for idx, receipt in enumerate(receipts.documents): print("--------Recognizing receipt #{}--------".format(idx + 1)) receipt_type = receipt.fields.get("ReceiptType") if receipt_type: print("Receipt Type: {} has confidence: {}".format( receipt_type.value, receipt_type.confidence)) merchant_name = receipt.fields.get("MerchantName") if merchant_name: print("Merchant Name: {} has confidence: {}".format( merchant_name.value, merchant_name.confidence)) transaction_date = receipt.fields.get("TransactionDate") if transaction_date: print("Transaction Date: {} has confidence: {}".format( transaction_date.value, transaction_date.confidence)) if receipt.fields.get("Items"): print("Receipt items:") for idx, item in enumerate(receipt.fields.get("Items").value): print("...Item #{}".format(idx + 1)) item_name = item.value.get("Name") if item_name: print("......Item Name: {} has confidence: {}".format( item_name.value, item_name.confidence)) item_quantity = item.value.get("Quantity") if item_quantity: print("......Item Quantity: {} has confidence: {}".format( item_quantity.value, item_quantity.confidence)) item_price = item.value.get("Price") if item_price: print("......Individual Item Price: {} has confidence: {}". format(item_price.value, item_price.confidence)) item_total_price = item.value.get("TotalPrice") if item_total_price: print( "......Total Item Price: {} has confidence: {}".format( item_total_price.value, item_total_price.confidence)) subtotal = receipt.fields.get("Subtotal") if subtotal: print("Subtotal: {} has confidence: {}".format( subtotal.value, subtotal.confidence)) tax = receipt.fields.get("Tax") if tax: print("Tax: {} has confidence: {}".format(tax.value, tax.confidence)) tip = receipt.fields.get("Tip") if tip: print("Tip: {} has confidence: {}".format(tip.value, tip.confidence)) total = receipt.fields.get("Total") if total: print("Total: {} has confidence: {}".format( total.value, total.confidence)) print("--------------------------------------")
async def analyze_identity_documents_async(): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/id_documents/license.jpg", )) from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key)) async with document_analysis_client: with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( "prebuilt-idDocument", document=f) id_documents = await poller.result() for idx, id_document in enumerate(id_documents.documents): print("--------Recognizing ID document #{}--------".format(idx + 1)) first_name = id_document.fields.get("FirstName") if first_name: print("First Name: {} has confidence: {}".format( first_name.value, first_name.confidence)) last_name = id_document.fields.get("LastName") if last_name: print("Last Name: {} has confidence: {}".format( last_name.value, last_name.confidence)) document_number = id_document.fields.get("DocumentNumber") if document_number: print("Document Number: {} has confidence: {}".format( document_number.value, document_number.confidence)) dob = id_document.fields.get("DateOfBirth") if dob: print("Date of Birth: {} has confidence: {}".format( dob.value, dob.confidence)) doe = id_document.fields.get("DateOfExpiration") if doe: print("Date of Expiration: {} has confidence: {}".format( doe.value, doe.confidence)) sex = id_document.fields.get("Sex") if sex: print("Sex: {} has confidence: {}".format(sex.value, sex.confidence)) address = id_document.fields.get("Address") if address: print("Address: {} has confidence: {}".format( address.value, address.confidence)) country_region = id_document.fields.get("CountryRegion") if country_region: print("Country/Region: {} has confidence: {}".format( country_region.value, country_region.confidence)) region = id_document.fields.get("Region") if region: print("Region: {} has confidence: {}".format( region.value, region.confidence))
async def analyze_custom_documents_async(custom_model_id): path_to_sample_documents = os.path.abspath( os.path.join( os.path.abspath(__file__), "..", "..", "..", "./sample_forms/forms/Form_1.jpg", ) ) # [START analyze_custom_documents_async] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer.aio import DocumentAnalysisClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] model_id = os.getenv("CUSTOM_BUILT_MODEL_ID", custom_model_id) document_analysis_client = DocumentAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key) ) async with document_analysis_client: # Make sure your document's type is included in the list of document types the custom model can analyze with open(path_to_sample_documents, "rb") as f: poller = await document_analysis_client.begin_analyze_document( model=model_id, document=f ) result = await poller.result() for idx, document in enumerate(result.documents): print("--------Analyzing document #{}--------".format(idx + 1)) print("Document has type {}".format(document.doc_type)) print("Document has document type confidence {}".format(document.confidence)) print("Document was analyzed with model with ID {}".format(result.model_id)) for name, field in document.fields.items(): field_value = field.value if field.value else field.content print("......found field of type '{}' with value '{}' and with confidence {}".format(field.value_type, field_value, field.confidence)) # iterate over tables, lines, and selection marks on each page for page in result.pages: print("\nLines found on page {}".format(page.page_number)) for line in page.lines: print("...Line '{}'".format(line.content)) for word in page.words: print( "...Word '{}' has a confidence of {}".format( word.content, word.confidence ) ) if page.selection_marks: print("\nSelection marks found on page {}".format(page.page_number)) for selection_mark in page.selection_marks: print( "...Selection mark is '{}' and has a confidence of {}".format( selection_mark.state, selection_mark.confidence ) ) for i, table in enumerate(result.tables): print("\nTable {} can be found on page:".format(i + 1)) for region in table.bounding_regions: print("...{}".format(i + 1, region.page_number)) for cell in table.cells: print( "...Cell[{}][{}] has text '{}'".format( cell.row_index, cell.column_index, cell.content ) ) print("-----------------------------------")