def test_document_analysis_none_model(self, formrecognizer_test_endpoint,
                                       formrecognizer_test_api_key):
     client = DocumentAnalysisClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with self.assertRaises(ValueError):
         client.begin_analyze_document_from_url(
             model=None, document_url="https://badurl.jpg")
Beispiel #2
0
 def test_document_analysis_none_model(self, **kwargs):
     formrecognizer_test_endpoint = kwargs.pop(
         "formrecognizer_test_endpoint")
     formrecognizer_test_api_key = kwargs.pop("formrecognizer_test_api_key")
     client = DocumentAnalysisClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with pytest.raises(ValueError):
         client.begin_analyze_document_from_url(
             model=None, document_url="https://badurl.jpg")
Beispiel #3
0
    def test_polling_interval(self, formrecognizer_test_endpoint, formrecognizer_test_api_key):
        client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key), polling_interval=7)
        self.assertEqual(client._client._config.polling_interval, 7)

        poller = client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg, polling_interval=6)
        poller.wait()
        self.assertEqual(poller._polling_method._timeout, 6)
        poller2 = client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
        poller2.wait()
        self.assertEqual(poller2._polling_method._timeout, 7)  # goes back to client default
 def test_document_analysis_empty_model_id(self,
                                           formrecognizer_test_endpoint,
                                           formrecognizer_test_api_key,
                                           **kwargs):
     client = DocumentAnalysisClient(
         formrecognizer_test_endpoint,
         AzureKeyCredential(formrecognizer_test_api_key))
     with pytest.raises(ValueError):
         client.begin_analyze_document_from_url(
             model="", document_url="https://badurl.jpg")
Beispiel #5
0
    def test_polling_interval(self, formrecognizer_test_endpoint, formrecognizer_test_api_key, **kwargs):
        # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers
        set_custom_default_matcher(
            compare_bodies=False, excluded_headers="Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id"
        )
        client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential(formrecognizer_test_api_key), polling_interval=7)
        assert client._client._config.polling_interval ==  7

        poller = client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg, polling_interval=6)
        poller.wait()
        assert poller._polling_method._timeout ==  6
        poller2 = client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
        poller2.wait()
        assert poller2._polling_method._timeout ==  7  # goes back to client default
Beispiel #6
0
 def test_active_directory_auth(self):
     token = self.generate_oauth_token()
     endpoint = self.get_oauth_endpoint()
     client = DocumentAnalysisClient(endpoint, token)
     poller = client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
     result = poller.result()
     assert result is not None
Beispiel #7
0
 def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint,
                                   formrecognizer_test_api_key):
     client = DocumentAnalysisClient(formrecognizer_test_endpoint,
                                     AzureKeyCredential("xxxx"))
     with self.assertRaises(ClientAuthenticationError):
         poller = client.begin_analyze_document_from_url(
             "prebuilt-receipt", self.receipt_url_jpg)
Beispiel #8
0
    def test_mock_quota_exceeded_403(self, formrecognizer_test_endpoint,
                                     formrecognizer_test_api_key):

        response = mock.Mock(status_code=403,
                             headers={
                                 "Retry-After": 186688,
                                 "Content-Type": "application/json"
                             },
                             reason="Bad Request")
        response.text = lambda encoding=None: json.dumps({
            "error": {
                "code":
                "403",
                "message":
                "Out of call volume quota for FormRecognizer F0 pricing tier. "
                "Please retry after 1 day. To increase your call volume switch to a paid tier."
            }
        })
        response.content_type = "application/json"
        transport = mock.Mock(send=lambda request, **kwargs: response)

        client = DocumentAnalysisClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(formrecognizer_test_api_key),
            transport=transport)

        with pytest.raises(HttpResponseError) as e:
            poller = client.begin_analyze_document_from_url(
                "prebuilt-receipt", self.receipt_url_jpg)
        assert e.value.status_code == 403
        assert e.value.error.message == 'Out of call volume quota for FormRecognizer F0 pricing tier. Please retry after 1 day. To increase your call volume switch to a paid tier.'
 def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint,
                                   **kwargs):
     set_bodiless_matcher()
     client = DocumentAnalysisClient(formrecognizer_test_endpoint,
                                     AzureKeyCredential("xxxx"))
     with pytest.raises(ClientAuthenticationError):
         poller = client.begin_analyze_document_from_url(
             "prebuilt-receipt", self.receipt_url_jpg)
Beispiel #10
0
 def test_receipt_url_bad_endpoint(self, formrecognizer_test_endpoint,
                                   formrecognizer_test_api_key):
     with self.assertRaises(ServiceRequestError):
         client = DocumentAnalysisClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         poller = client.begin_analyze_document_from_url(
             "prebuilt-receipt", self.receipt_url_jpg)
Beispiel #11
0
 def test_receipt_url_auth_bad_key(self, formrecognizer_test_endpoint, **kwargs):
     # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers
     set_custom_default_matcher(
         compare_bodies=False, excluded_headers="Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id"
     )
     client = DocumentAnalysisClient(formrecognizer_test_endpoint, AzureKeyCredential("xxxx"))
     with pytest.raises(ClientAuthenticationError):
         poller = client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
Beispiel #12
0
 def test_receipt_url_bad_endpoint(self, formrecognizer_test_api_key, **kwargs):
     # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers
     set_custom_default_matcher(
         compare_bodies=False, excluded_headers="Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id"
     )
     with pytest.raises(ServiceRequestError):
         client = DocumentAnalysisClient("http://notreal.azure.com", AzureKeyCredential(formrecognizer_test_api_key))
         poller = client.begin_analyze_document_from_url("prebuilt-receipt", self.receipt_url_jpg)
Beispiel #13
0
 def test_receipt_url_bad_endpoint(self, formrecognizer_test_api_key,
                                   **kwargs):
     set_bodiless_matcher()
     with pytest.raises(ServiceRequestError):
         client = DocumentAnalysisClient(
             "http://notreal.azure.com",
             AzureKeyCredential(formrecognizer_test_api_key))
         poller = client.begin_analyze_document_from_url(
             "prebuilt-receipt", self.receipt_url_jpg)
    def test_polling_interval(self, formrecognizer_test_endpoint,
                              formrecognizer_test_api_key, **kwargs):
        set_bodiless_matcher()
        client = DocumentAnalysisClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(formrecognizer_test_api_key),
            polling_interval=7)
        assert client._client._config.polling_interval == 7

        poller = client.begin_analyze_document_from_url("prebuilt-receipt",
                                                        self.receipt_url_jpg,
                                                        polling_interval=6)
        poller.wait()
        assert poller._polling_method._timeout == 6
        poller2 = client.begin_analyze_document_from_url(
            "prebuilt-receipt", self.receipt_url_jpg)
        poller2.wait()
        assert poller2._polling_method._timeout == 7  # goes back to client default
Beispiel #15
0
def authentication_with_api_key_credential_document_analysis_client():
    # [START create_da_client_with_key]
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(endpoint, AzureKeyCredential(key))
    # [END create_da_client_with_key]
    poller = document_analysis_client.begin_analyze_document_from_url(
        "prebuilt-layout", url
    )
    result = poller.result()
Beispiel #16
0
def analyze_document(endpoint, key, data):
    recordId = data['recordId']
    formUrl = data["formUrl"] + data["formSasToken"]
    model = data["model"]
    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))
    poller = document_analysis_client.begin_analyze_document_from_url(
        model, formUrl)
    result = poller.result()
    output_record = {}
    if model == "prebuilt-layout":
        output_record = {
            "tables": get_tables(result),
            "pages": get_pages(result)
        }
    elif model == "prebuilt-document":
        output_record = {
            "kvp": get_key_value_pairs(result),
            "entities": get_entities(result),
            "tables": get_tables(result),
            "pages": get_pages(result)
        }
    elif model == "prebuilt-receipt":
        output_record = {
            "fields": get_fields(result),
            "tables": get_tables(result),
            "pages": get_pages(result)
        }
    elif model == "prebuilt-idDocument":
        output_record = {
            "fields": get_fields(result),
            "tables": get_tables(result),
            "pages": get_pages(result)
        }
    elif model == "prebuilt-invoice":
        output_record = {
            "fields": get_fields(result),
            "tables": get_tables(result),
            "pages": get_pages(result)
        }
    else:
        output_record = {
            "kvp": get_fields(result),
            "tables": get_tables(result),
            "pages": get_pages(result)
        }
    return output_record
Beispiel #17
0
def authentication_with_azure_active_directory_document_analysis_client():
    # [START create_da_client_with_aad]
    """DefaultAzureCredential will use the values from these environment
    variables: AZURE_CLIENT_ID, AZURE_TENANT_ID, AZURE_CLIENT_SECRET
    """
    from azure.ai.formrecognizer import DocumentAnalysisClient
    from azure.identity import DefaultAzureCredential

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    credential = DefaultAzureCredential()

    document_analysis_client = DocumentAnalysisClient(endpoint, credential)
    # [END create_da_client_with_aad]
    poller = document_analysis_client.begin_analyze_document_from_url(
        "prebuilt-layout", url
    )
    result = poller.result()
Beispiel #18
0
    def test_logging_info_dac_client(self, formrecognizer_test_endpoint,
                                     formrecognizer_test_api_key):
        client = DocumentAnalysisClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(formrecognizer_test_api_key))
        mock_handler = MockHandler()

        logger = logging.getLogger("azure")
        logger.addHandler(mock_handler)
        logger.setLevel(logging.INFO)

        poller = client.begin_analyze_document_from_url(
            "prebuilt-receipt", self.receipt_url_jpg)
        result = poller.result()

        for message in mock_handler.messages:
            if message.levelname == "INFO":
                # not able to use json.loads here. At INFO level only API key should be REDACTED
                if message.message.find("Ocp-Apim-Subscription-Key") != -1:
                    assert message.message.find("REDACTED") != -1
                else:
                    assert message.message.find("REDACTED") == -1
class AnalyzeDocumentFromUrlRequestPreparation(PerfStressTest):
    def __init__(self, arguments):
        super().__init__(arguments)

        self.document_jpg_url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/forms/Form_1.jpg"

        # read test related env vars
        formrecognizer_test_endpoint = os.environ[
            "FORMRECOGNIZER_TEST_ENDPOINT"]
        form_recognizer_account_key = os.environ["FORMRECOGNIZER_TEST_API_KEY"]

        # assign the clients that will be used in the perf tests
        self.service_client = DocumentAnalysisClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(form_recognizer_account_key))
        self.async_service_client = AsyncDocumentAnalysisClient(
            formrecognizer_test_endpoint,
            AzureKeyCredential(form_recognizer_account_key))

    async def close(self):
        """This is run after cleanup."""
        await self.async_service_client.close()
        self.service_client.close()
        await super().close()

    def run_sync(self):
        """The synchronous perf test."""
        poller = self.service_client.begin_analyze_document_from_url(
            "prebuilt-document", self.document_jpg_url)
        assert poller

    async def run_async(self):
        """The asynchronous perf test."""
        poller = await self.async_service_client.begin_analyze_document_from_url(
            "prebuilt-document", self.document_jpg_url)
        assert poller
def analyze_document(endpoint, key, recordId, data):
    try:
        formUrl = data["formUrl"] + data["formSasToken"]
        model = data["model"]
        logging.info("Model: " + model)
        document_analysis_client = DocumentAnalysisClient(
            endpoint=endpoint, credential=AzureKeyCredential(key)
        )
        poller = document_analysis_client.begin_analyze_document_from_url(
                model, formUrl)
        result = poller.result()
        logging.info("Result from Form Recognizer before formatting: " + str(result))
        output_record = {}
        output_record_data = {}
        if  model == "prebuilt-layout":
            output_record_data = { 
                "tables": get_tables(result),
                "pages": get_pages(result)
        }
        elif model == "prebuilt-document":
            output_record_data = { 
                "kvp": get_key_value_pairs(result),
                "entities" : get_entities(result),
                "tables": get_tables(result),
                "pages": get_pages(result)
            }
        elif model == "prebuilt-receipt":
            output_record_data = { 
                "fields": get_fields(result),
                "tables": get_tables(result),
                "pages": get_pages(result)
            }
        elif model == "prebuilt-idDocument":
            output_record_data = { 
                "fields": get_fields(result),
                "tables": get_tables(result),
                "pages": get_pages(result)
            }
        elif model == "prebuilt-invoice":
            output_record_data = { 
                "fields": get_fields(result),
                "tables": get_tables(result),
                "pages": get_pages(result)
        }
        else:
            output_record_data = { 
                "kvp": get_fields(result),
                "tables": get_tables(result),
                "pages": get_pages(result)
            }

        output_record = {
            "recordId": recordId,
            "data": output_record_data
        }

    except Exception as error:
        output_record = {
            "recordId": recordId,
            "errors": [ { "message": "Error: " + str(error) }   ] 
        }

    logging.info("Output record: " + json.dumps(output_record, ensure_ascii=False, cls=DateTimeEncoder))
    return output_record

        
def analyze_receipts_from_url():
    # [START analyze_receipts_from_url]
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.formrecognizer import DocumentAnalysisClient

    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]

    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    url = "https://raw.githubusercontent.com/Azure/azure-sdk-for-python/main/sdk/formrecognizer/azure-ai-formrecognizer/tests/sample_forms/receipt/contoso-receipt.png"
    poller = document_analysis_client.begin_analyze_document_from_url(
        "prebuilt-receipt", document_url=url
    )
    receipts = poller.result()

    for idx, receipt in enumerate(receipts.documents):
        print("--------Recognizing receipt #{}--------".format(idx + 1))
        print("Receipt type: {}".format(receipt.doc_type or "N/A"))
        merchant_name = receipt.fields.get("MerchantName")
        if merchant_name:
            print(
                "Merchant Name: {} has confidence: {}".format(
                    merchant_name.value, merchant_name.confidence
                )
            )
        transaction_date = receipt.fields.get("TransactionDate")
        if transaction_date:
            print(
                "Transaction Date: {} has confidence: {}".format(
                    transaction_date.value, transaction_date.confidence
                )
            )
        if receipt.fields.get("Items"):
            print("Receipt items:")
            for idx, item in enumerate(receipt.fields.get("Items").value):
                print("...Item #{}".format(idx + 1))
                item_name = item.value.get("Name")
                if item_name:
                    print(
                        "......Item Name: {} has confidence: {}".format(
                            item_name.value, item_name.confidence
                        )
                    )
                item_quantity = item.value.get("Quantity")
                if item_quantity:
                    print(
                        "......Item Quantity: {} has confidence: {}".format(
                            item_quantity.value, item_quantity.confidence
                        )
                    )
                item_price = item.value.get("Price")
                if item_price:
                    print(
                        "......Individual Item Price: {} has confidence: {}".format(
                            item_price.value, item_price.confidence
                        )
                    )
                item_total_price = item.value.get("TotalPrice")
                if item_total_price:
                    print(
                        "......Total Item Price: {} has confidence: {}".format(
                            item_total_price.value, item_total_price.confidence
                        )
                    )
        subtotal = receipt.fields.get("Subtotal")
        if subtotal:
            print(
                "Subtotal: {} has confidence: {}".format(
                    subtotal.value, subtotal.confidence
                )
            )
        tax = receipt.fields.get("Tax")
        if tax:
            print("Tax: {} has confidence: {}".format(tax.value, tax.confidence))
        tip = receipt.fields.get("Tip")
        if tip:
            print("Tip: {} has confidence: {}".format(tip.value, tip.confidence))
        total = receipt.fields.get("Total")
        if total:
            print("Total: {} has confidence: {}".format(total.value, total.confidence))
        print("--------------------------------------")