def test_content_url_transform_pdf(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) responses = [] def callback(raw_response, _, headers): analyze_result = client._client._deserialize( AnalyzeOperationResult, raw_response) extracted_layout = prepare_content_result(analyze_result) responses.append(analyze_result) responses.append(extracted_layout) poller = client.begin_recognize_content_from_url(self.invoice_url_pdf, cls=callback) result = poller.result() raw_response = responses[0] layout = responses[1] page_results = raw_response.analyze_result.page_results read_results = raw_response.analyze_result.read_results # Check form pages self.assertFormPagesTransformCorrect(layout, read_results, page_results)
def test_content_multipage_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) poller = client.begin_recognize_content_from_url(self.multipage_url_pdf) result = poller.result() self.assertEqual(len(result), 3) self.assertFormPagesHasValues(result)
def test_content_continuation_token(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) initial_poller = client.begin_recognize_content_from_url( self.form_url_jpg) cont_token = initial_poller.continuation_token() poller = client.begin_recognize_content_from_url( self.form_url_jpg, continuation_token=cont_token) result = poller.result() self.assertIsNotNone(result) initial_poller.wait( ) # necessary so azure-devtools doesn't throw assertion error
def test_content_url_auth_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): client = FormRecognizerClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): poller = client.begin_recognize_content_from_url( self.invoice_url_pdf)
def test_content_url_auth_bad_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential("xxxx")) with self.assertRaises(ClientAuthenticationError): poller = client.begin_recognize_content_from_url( self.invoice_url_pdf)
def test_content_url_auth_successful_key(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) poller = client.begin_recognize_content_from_url(self.invoice_url_pdf) result = poller.result()
def test_content_url_auth_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): set_bodiless_matcher() client = FormRecognizerClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx")) with pytest.raises(ClientAuthenticationError): poller = client.begin_recognize_content_from_url( self.invoice_url_pdf)
def test_content_url_bad_endpoint(self, formrecognizer_test_endpoint, formrecognizer_test_api_key): with self.assertRaises(ServiceRequestError): client = FormRecognizerClient( "http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key)) poller = client.begin_recognize_content_from_url( self.invoice_url_pdf)
def test_content_url_auth_bad_key(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs): # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers set_custom_default_matcher( compare_bodies=False, excluded_headers="Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id" ) client = FormRecognizerClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx")) with pytest.raises(ClientAuthenticationError): poller = client.begin_recognize_content_from_url(self.invoice_url_pdf)
def test_content_url_pass_stream(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with open(self.receipt_jpg, "rb") as receipt: with self.assertRaises(HttpResponseError): poller = client.begin_recognize_content_from_url(receipt)
def test_content_bad_url(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient( form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) with self.assertRaises(HttpResponseError): poller = client.begin_recognize_content_from_url( "https://badurl.jpg")
def authentication_with_api_key_credential_form_recognizer_client(self): # [START create_fr_client_with_key] from azure.core.credentials import AzureKeyCredential from azure.ai.formrecognizer import FormRecognizerClient endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] key = os.environ["AZURE_FORM_RECOGNIZER_KEY"] form_recognizer_client = FormRecognizerClient(endpoint, AzureKeyCredential(key)) # [END create_fr_client_with_key] poller = form_recognizer_client.begin_recognize_content_from_url(self.url) result = poller.result()
def test_content_url_pdf(self, resource_group, location, form_recognizer_account, form_recognizer_account_key): client = FormRecognizerClient(form_recognizer_account, AzureKeyCredential(form_recognizer_account_key)) poller = client.begin_recognize_content_from_url(self.invoice_url_pdf) result = poller.result() self.assertEqual(len(result), 1) layout = result[0] self.assertEqual(layout.page_number, 1) self.assertFormPagesHasValues(result) self.assertEqual(layout.tables[0].row_count, 2) self.assertEqual(layout.tables[0].column_count, 6)
def authentication_with_azure_active_directory_form_recognizer_client(self): # [START create_fr_client_with_aad] """DefaultAzureCredential will use the values from these environment variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET """ from azure.ai.formrecognizer import FormRecognizerClient from azure.identity import DefaultAzureCredential endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"] credential = DefaultAzureCredential() form_recognizer_client = FormRecognizerClient(endpoint, credential) # [END create_fr_client_with_aad] poller = form_recognizer_client.begin_recognize_content_from_url(self.url) result = poller.result()
# <snippet_creds> endpoint = "<paste-your-form-recognizer-endpoint-here>" key = "<paste-your-form-recognizer-key-here>" # </snippet_creds> # <snippet_auth> form_recognizer_client = FormRecognizerClient(endpoint, AzureKeyCredential(key)) form_training_client = FormTrainingClient(endpoint, AzureKeyCredential(key)) # </snippet_auth> # <snippet_getcontent> formUrl = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/master/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/forms/Form_1.jpg" poller = form_recognizer_client.begin_recognize_content_from_url(formUrl) page = poller.result() table = page[0].tables[0] # page 1, table 1 print("Table found on page {}:".format(table.page_number)) for cell in table.cells: print("Cell text: {}".format(cell.text)) print("Location: {}".format(cell.bounding_box)) print("Confidence score: {}\n".format(cell.confidence)) # </snippet_getcontent> # <snippet_receipts> receiptUrl = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/master/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png" poller = form_recognizer_client.begin_recognize_receipts_from_url(receiptUrl) result = poller.result()
cosmos_database_client = cosmos_client.get_database_client( getenv("AZURE_COSMOS_DB", default="azimageai")) cosmos_container_client = cosmos_database_client.get_container_client( getenv("AZURE_COSMOS_CONTAINER", default="images")) while True: print("Receiving messages...") batches = queue_client.receive_messages(messages_per_page=getenv( "AZURE_STORAGE_QUEUE_MSG_COUNT", default="10")) for batch in batches.by_page(): for message in batch: message_json = DotMap(json.loads(message.content)) fr_poller = fr_client.begin_recognize_content_from_url( message_json.url) fr_result = fr_poller.result() lines_of_text = [] for page in fr_result: for line in page.lines: lines_of_text.append(line.text) text = " ".join(lines_of_text) lines_of_text.clear() print(text) message_json.text = text if text: ta_response = ta_client.analyze_sentiment([text]) for doc in ta_response: