async def test_form_multipage_labeled(
            self, client,
            formrecognizer_multipage_storage_container_sas_url_v2, **kwargs):
        # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers
        set_custom_default_matcher(
            compare_bodies=False,
            excluded_headers=
            "Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id"
        )
        fr_client = client.get_form_recognizer_client()
        blob_sas_url = _get_blob_url(
            formrecognizer_multipage_storage_container_sas_url_v2,
            "multipage-training-data", "multipage_invoice1.pdf")
        async with client:
            training_poller = await client.begin_training(
                formrecognizer_multipage_storage_container_sas_url_v2,
                use_training_labels=True)
            model = await training_poller.result()

            async with fr_client:
                poller = await fr_client.begin_recognize_custom_forms_from_url(
                    model.model_id, blob_sas_url)
                forms = await poller.result()

        for form in forms:
            assert form.form_type == "custom:" + model.model_id
            self.assertLabeledRecognizedFormHasValues(form, model)
    async def test_custom_form_multipage_unlabeled(
            self, client,
            formrecognizer_multipage_storage_container_sas_url_v2, **kwargs):
        set_bodiless_matcher()
        fr_client = client.get_form_recognizer_client()
        blob_sas_url = _get_blob_url(
            formrecognizer_multipage_storage_container_sas_url_v2,
            "multipage-training-data", "multipage_invoice1.pdf")
        async with client:
            training_poller = await client.begin_training(
                formrecognizer_multipage_storage_container_sas_url_v2,
                use_training_labels=False)
            model = await training_poller.result()

            async with fr_client:
                poller = await fr_client.begin_recognize_custom_forms_from_url(
                    model.model_id,
                    blob_sas_url,
                )
                forms = await poller.result()

        for form in forms:
            if form.form_type is None:
                continue  # blank page
            assert form.form_type == "form-0"
            self.assertUnlabeledRecognizedFormHasValues(form, model)
    async def test_pages_kwarg_specified(
            self, client, formrecognizer_testing_data_container_sas_url,
            **kwargs):
        # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers
        set_custom_default_matcher(
            compare_bodies=False,
            excluded_headers=
            "Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id"
        )
        fr_client = client.get_form_recognizer_client()
        blob_sas_url = _get_blob_url(
            formrecognizer_testing_data_container_sas_url, "testingdata",
            "multi1.pdf")

        async with fr_client:
            training_poller = await client.begin_training(
                formrecognizer_testing_data_container_sas_url,
                use_training_labels=False)
            model = await training_poller.result()

            poller = await fr_client.begin_recognize_custom_forms_from_url(
                model.model_id, blob_sas_url, pages=["1"])

            assert '1' == poller._polling_method._initial_response.http_response.request.query[
                'pages']
            result = await poller.result()
            assert result
    async def test_multipage_labeled_transform(
            self, client,
            formrecognizer_multipage_storage_container_sas_url_v2, **kwargs):
        # this can be reverted to set_bodiless_matcher() after tests are re-recorded and don't contain these headers
        set_custom_default_matcher(
            compare_bodies=False,
            excluded_headers=
            "Authorization,Content-Length,x-ms-client-request-id,x-ms-request-id"
        )
        fr_client = client.get_form_recognizer_client()
        blob_sas_url = _get_blob_url(
            formrecognizer_multipage_storage_container_sas_url_v2,
            "multipage-training-data", "multipage_invoice1.pdf")
        responses = []

        def callback(raw_response, _, headers):
            analyze_result = fr_client._deserialize(AnalyzeOperationResult,
                                                    raw_response)
            form = prepare_form_result(analyze_result, model.model_id)
            responses.append(analyze_result)
            responses.append(form)

        async with client:
            training_poller = await client.begin_training(
                formrecognizer_multipage_storage_container_sas_url_v2,
                use_training_labels=True)
            model = await training_poller.result()

            async with fr_client:
                poller = await fr_client.begin_recognize_custom_forms_from_url(
                    model.model_id,
                    blob_sas_url,
                    include_field_elements=True,
                    cls=callback)
                form = await poller.result()

        actual = responses[0]
        recognized_form = responses[1]
        read_results = actual.analyze_result.read_results
        page_results = actual.analyze_result.page_results
        document_results = actual.analyze_result.document_results

        self.assertFormPagesTransformCorrect(recognized_form, read_results,
                                             page_results)
        for form, actual in zip(recognized_form, document_results):
            assert form.page_range.first_page_number == actual.page_range[0]
            assert form.page_range.last_page_number == actual.page_range[1]
            assert form.form_type == "custom:" + model.model_id
            assert form.form_type_confidence is not None
            assert form.model_id == model.model_id
            self.assertFormFieldsTransformCorrect(form.fields, actual.fields,
                                                  read_results)
    async def test_multipage_unlabeled_transform(
            self, client,
            formrecognizer_multipage_storage_container_sas_url_v2, **kwargs):
        set_bodiless_matcher()
        fr_client = client.get_form_recognizer_client()
        blob_sas_url = _get_blob_url(
            formrecognizer_multipage_storage_container_sas_url_v2,
            "multipage-training-data", "multipage_invoice1.pdf")
        responses = []

        def callback(raw_response, _, headers):
            analyze_result = fr_client._deserialize(AnalyzeOperationResult,
                                                    raw_response)
            form = prepare_form_result(analyze_result, model.model_id)
            responses.append(analyze_result)
            responses.append(form)

        async with client:
            training_poller = await client.begin_training(
                formrecognizer_multipage_storage_container_sas_url_v2,
                use_training_labels=False)
            model = await training_poller.result()

            async with fr_client:
                poller = await fr_client.begin_recognize_custom_forms_from_url(
                    model.model_id,
                    blob_sas_url,
                    include_field_elements=True,
                    cls=callback)

                form = await poller.result()
        actual = responses[0]
        recognized_form = responses[1]
        read_results = actual.analyze_result.read_results
        page_results = actual.analyze_result.page_results

        self.assertFormPagesTransformCorrect(recognized_form, read_results,
                                             page_results)

        for form, actual in zip(recognized_form, page_results):
            assert form.page_range.first_page_number == actual.page
            assert form.page_range.last_page_number == actual.page
            assert form.form_type_confidence is None
            assert form.model_id == model.model_id
            self.assertUnlabeledFormFieldDictTransformCorrect(
                form.fields, actual.key_value_pairs, read_results)
Esempio n. 6
0
    def test_custom_form_multipage_vendor_set_labeled_transform(
            self, client,
            formrecognizer_multipage_storage_container_sas_url_2_v2, **kwargs):
        set_bodiless_matcher()
        fr_client = client.get_form_recognizer_client()
        blob_sas_url = _get_blob_url(
            formrecognizer_multipage_storage_container_sas_url_2_v2,
            "multipage-vendor-forms", "multi1.pdf")
        poller = client.begin_training(
            formrecognizer_multipage_storage_container_sas_url_2_v2,
            use_training_labels=True)
        model = poller.result()

        responses = []

        def callback(raw_response, _, headers):
            analyze_result = fr_client._deserialize(AnalyzeOperationResult,
                                                    raw_response)
            form = prepare_form_result(analyze_result, model.model_id)
            responses.append(analyze_result)
            responses.append(form)

        poller = fr_client.begin_recognize_custom_forms_from_url(
            model.model_id,
            blob_sas_url,
            include_field_elements=True,
            cls=callback)
        form = poller.result()
        actual = responses[0]
        recognized_form = responses[1]
        read_results = actual.analyze_result.read_results
        page_results = actual.analyze_result.page_results
        document_results = actual.analyze_result.document_results

        self.assertFormPagesTransformCorrect(recognized_form, read_results,
                                             page_results)
        for form, actual in zip(recognized_form, document_results):
            assert form.page_range.first_page_number == actual.page_range[0]
            assert form.page_range.last_page_number == actual.page_range[1]
            assert form.form_type == "custom:" + model.model_id
            assert form.form_type_confidence is not None
            assert form.model_id == model.model_id
            self.assertFormFieldsTransformCorrect(form.fields, actual.fields,
                                                  read_results)
Esempio n. 7
0
    def test_pages_kwarg_specified(
            self, client, formrecognizer_testing_data_container_sas_url,
            **kwargs):
        set_bodiless_matcher()
        fr_client = client.get_form_recognizer_client()
        blob_sas_url = _get_blob_url(
            formrecognizer_testing_data_container_sas_url, "testingdata",
            "multi1.pdf")

        training_poller = client.begin_training(
            formrecognizer_testing_data_container_sas_url,
            use_training_labels=False)
        model = training_poller.result()

        poller = fr_client.begin_recognize_custom_forms_from_url(
            model.model_id, blob_sas_url, pages=["1"])
        assert '1' == poller._polling_method._initial_response.http_response.request.query[
            'pages']
        result = poller.result()
        assert result
Esempio n. 8
0
    def test_form_multipage_labeled(
            self, client,
            formrecognizer_multipage_storage_container_sas_url_v2, **kwargs):
        set_bodiless_matcher()
        blob_sas_url = _get_blob_url(
            formrecognizer_multipage_storage_container_sas_url_v2,
            "multipage-training-data", "multipage_invoice1.pdf")
        fr_client = client.get_form_recognizer_client()

        poller = client.begin_training(
            formrecognizer_multipage_storage_container_sas_url_v2,
            use_training_labels=True)
        model = poller.result()

        poller = fr_client.begin_recognize_custom_forms_from_url(
            model.model_id, blob_sas_url)
        forms = poller.result()

        for form in forms:
            assert form.form_type == "custom:" + model.model_id
            self.assertLabeledRecognizedFormHasValues(form, model)