Exemple #1
0
    async def test_extract_summary_partial_results(self, client):
        docs = [{
            "id": "1",
            "language": "en",
            "text": ""
        }, {
            "id": "2",
            "language": "en",
            "text": "hello world"
        }]

        async with client:
            response = await (await client.begin_analyze_actions(
                docs,
                actions=[ExtractSummaryAction()],
                show_stats=True,
                polling_interval=self._interval(),
            )).result()

            document_results = []
            async for doc in response:
                document_results.append(doc)
            assert document_results[0][0].is_error
            assert document_results[0][0].error.code == "InvalidDocument"

            assert not document_results[1][0].is_error
            assert isinstance(document_results[1][0], ExtractSummaryResult)
Exemple #2
0
    def test_disable_service_logs(self, client):
        actions = [
            RecognizeEntitiesAction(disable_service_logs=True),
            ExtractKeyPhrasesAction(disable_service_logs=True),
            RecognizePiiEntitiesAction(disable_service_logs=True),
            RecognizeLinkedEntitiesAction(disable_service_logs=True),
            AnalyzeSentimentAction(disable_service_logs=True),
            ExtractSummaryAction(disable_service_logs=True),
        ]

        for action in actions:
            assert action.disable_service_logs

        def callback(resp):
            tasks = json.loads(resp.http_request.body)["tasks"]
            assert len(tasks) == len(actions)
            for task in tasks.values():
                assert task[0]["parameters"]["loggingOptOut"]

        client.begin_analyze_actions(
            documents=["Test for logging disable"],
            actions=actions,
            polling_interval=self._interval(),
            raw_response_hook=callback,
        ).result()
Exemple #3
0
    async def test_all_successful_passing_dict_extract_summary_action(
            self, client):
        docs = [{
            "id":
            "1",
            "language":
            "en",
            "text":
            "The government of British Prime Minster Theresa May has been plunged into turmoil with the resignation"
            " of two senior Cabinet ministers in a deep split over her Brexit strategy. The Foreign Secretary Boris "
            "Johnson, quit on Monday, hours after the resignation late on Sunday night of the minister in charge of "
            "Brexit negotiations, David Davis. Their decision to leave the government came three days after May "
            "appeared to have agreed a deal with herfractured Cabinet on the UK's post Brexit relationship with "
            "the EU. That plan is now in tatters and her political future appears uncertain. May appeared in Parliament"
            " on Monday afternoon to defend her plan, minutes after Downing Street confirmed the departure of Johnson. "
            "May acknowledged the splits in her statement to MPs, saying of the ministers who quit: We do not agree "
            "about the best way of delivering our shared commitment to honoring the result of the referendum. The "
            "Prime Minister's latest plitical drama began late on Sunday night when Davis quit, declaring he could "
            "not support May's Brexit plan. He said it involved too close a relationship with the EU and gave only "
            "an illusion of control being returned to the UK after it left the EU. It seems to me we're giving too "
            "much away, too easily, and that's a dangerous strategy at this time, Davis said in a BBC radio "
            "interview Monday morning. Johnson's resignation came Monday afternoon local time, just before the "
            "Prime Minister was due to make a scheduled statement in Parliament. This afternoon, the Prime Minister "
            "accepted the resignation of Boris Johnson as Foreign Secretary, a statement from Downing Street said."
        }, {
            "id": "2",
            "language": "es",
            "text": "Microsoft fue fundado por Bill Gates y Paul Allen"
        }]

        async with client:
            response = await (await client.begin_analyze_actions(
                docs,
                actions=[ExtractSummaryAction()],
                show_stats=True,
                polling_interval=self._interval(),
            )).result()

            document_results = []
            async for doc in response:
                document_results.append(doc)

            assert len(document_results) == 2
            for document_result in document_results:
                assert len(document_result) == 1
                for result in document_result:
                    assert isinstance(result, ExtractSummaryResult)
                    assert result.statistics
                    assert len(result.sentences) == 3 if result.id == 0 else 1
                    for sentence in result.sentences:
                        assert sentence.text
                        assert sentence.rank_score is not None
                        assert sentence.offset is not None
                        assert sentence.length is not None
                    assert result.id is not None
Exemple #4
0
def sample_extractive_summarization():
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.textanalytics import (
        TextAnalyticsClient,
        ExtractSummaryAction
    )

    endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
    key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

    text_analytics_client = TextAnalyticsClient(
        endpoint=endpoint,
        credential=AzureKeyCredential(key),
    )

    document = [
        "At Microsoft, we have been on a quest to advance AI beyond existing techniques, by taking a more holistic, "
        "human-centric approach to learning and understanding. As Chief Technology Officer of Azure AI Cognitive "
        "Services, I have been working with a team of amazing scientists and engineers to turn this quest into a "
        "reality. In my role, I enjoy a unique perspective in viewing the relationship among three attributes of "
        "human cognition: monolingual text (X), audio or visual sensory signals, (Y) and multilingual (Z). At the "
        "intersection of all three, there's magic-what we call XYZ-code as illustrated in Figure 1-a joint "
        "representation to create more powerful AI that can speak, hear, see, and understand humans better. "
        "We believe XYZ-code will enable us to fulfill our long-term vision: cross-domain transfer learning, "
        "spanning modalities and languages. The goal is to have pretrained models that can jointly learn "
        "representations to support a broad range of downstream AI tasks, much in the way humans do today. "
        "Over the past five years, we have achieved human performance on benchmarks in conversational speech "
        "recognition, machine translation, conversational question answering, machine reading comprehension, "
        "and image captioning. These five breakthroughs provided us with strong signals toward our more ambitious "
        "aspiration to produce a leap in AI capabilities, achieving multisensory and multilingual learning that "
        "is closer in line with how humans learn and understand. I believe the joint XYZ-code is a foundational "
        "component of this aspiration, if grounded with external knowledge sources in the downstream AI tasks."
    ]

    poller = text_analytics_client.begin_analyze_actions(
        document,
        actions=[
            ExtractSummaryAction(),
        ],
    )

    document_results = poller.result()
    for result in document_results:
        extract_summary_result = result[0]  # first document, first result
        if extract_summary_result.is_error:
            print("...Is an error with code '{}' and message '{}'".format(
                extract_summary_result.code, extract_summary_result.message
            ))
        else:
            print("Summary extracted: \n{}".format(
                " ".join([sentence.text for sentence in extract_summary_result.sentences]))
            )
Exemple #5
0
    async def test_multiple_pages_of_results_returned_successfully(
            self, client):
        single_doc = "hello world"
        docs = [{
            "id": str(idx),
            "text": val
        } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25)))
                ]  # max number of documents is 25

        async with client:
            result = await (await client.begin_analyze_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizePiiEntitiesAction(),
                    RecognizeLinkedEntitiesAction(),
                    AnalyzeSentimentAction(),
                    ExtractSummaryAction()
                ],
                show_stats=True,
                polling_interval=self._interval())).result()

            pages = []
            async for p in result:
                pages.append(p)

            assert len(pages) == len(docs)
        action_order = [
            _AnalyzeActionsType.RECOGNIZE_ENTITIES,
            _AnalyzeActionsType.EXTRACT_KEY_PHRASES,
            _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES,
            _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES,
            _AnalyzeActionsType.ANALYZE_SENTIMENT,
            _AnalyzeActionsType.EXTRACT_SUMMARY
        ]
        action_type_to_document_results = defaultdict(list)

        for doc_idx, page in enumerate(pages):
            for action_idx, document_result in enumerate(page):
                self.assertEqual(document_result.id, str(doc_idx))
                action_type = self.document_result_to_action_type(
                    document_result)
                self.assertEqual(action_type, action_order[action_idx])
                action_type_to_document_results[action_type].append(
                    document_result)

        assert len(action_type_to_document_results) == len(action_order)
        for document_results in action_type_to_document_results.values():
            assert len(document_results) == len(docs)
Exemple #6
0
    async def test_out_of_order_ids_multiple_tasks(self, client):
        docs = [{
            "id": "56",
            "text": ":)"
        }, {
            "id": "0",
            "text": ":("
        }, {
            "id": "19",
            "text": ":P"
        }, {
            "id": "1",
            "text": ":D"
        }]

        async with client:
            response = await (await client.begin_analyze_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizePiiEntitiesAction(),
                    RecognizeLinkedEntitiesAction(),
                    AnalyzeSentimentAction(),
                    ExtractSummaryAction()
                ],
                polling_interval=self._interval())).result()

            results = []
            async for p in response:
                results.append(p)
            assert len(results) == len(docs)

            document_order = ["56", "0", "19", "1"]
            action_order = [
                _AnalyzeActionsType.RECOGNIZE_ENTITIES,
                _AnalyzeActionsType.EXTRACT_KEY_PHRASES,
                _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES,
                _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES,
                _AnalyzeActionsType.ANALYZE_SENTIMENT,
                _AnalyzeActionsType.EXTRACT_SUMMARY
            ]
            for doc_idx, document_results in enumerate(results):
                assert len(document_results) == 6
                for action_idx, document_result in enumerate(document_results):
                    self.assertEqual(document_result.id,
                                     document_order[doc_idx])
                    self.assertEqual(
                        self.document_result_to_action_type(document_result),
                        action_order[action_idx])
Exemple #7
0
 async def test_bad_credentials(self, client):
     with self.assertRaises(ClientAuthenticationError):
         async with client:
             await (await client.begin_analyze_actions(
                 ["This is written in English."],
                 actions=[
                     RecognizeEntitiesAction(),
                     ExtractKeyPhrasesAction(),
                     RecognizePiiEntitiesAction(),
                     RecognizeLinkedEntitiesAction(),
                     AnalyzeSentimentAction(),
                     ExtractSummaryAction()
                 ],
                 polling_interval=self._interval())).result()
Exemple #8
0
 def test_empty_credential_class(self, client):
     with self.assertRaises(ClientAuthenticationError):
         response = client.begin_analyze_actions(
             ["This is written in English."],
             actions=[
                 RecognizeEntitiesAction(),
                 ExtractKeyPhrasesAction(),
                 RecognizePiiEntitiesAction(),
                 RecognizeLinkedEntitiesAction(),
                 AnalyzeSentimentAction(),
                 ExtractSummaryAction()
             ],
             polling_interval=self._interval(),
         )
Exemple #9
0
 async def test_missing_input_records_error(self, client):
     docs = []
     with pytest.raises(ValueError) as excinfo:
         async with client:
             await (await client.begin_analyze_actions(
                 docs,
                 actions=[
                     RecognizeEntitiesAction(),
                     ExtractKeyPhrasesAction(),
                     RecognizePiiEntitiesAction(),
                     RecognizeLinkedEntitiesAction(),
                     AnalyzeSentimentAction(),
                     ExtractSummaryAction()
                 ],
                 polling_interval=self._interval())).result()
     assert "Input documents can not be empty or None" in str(excinfo.value)
Exemple #10
0
    async def test_disable_service_logs(self, client):
        actions = [
            RecognizeEntitiesAction(disable_service_logs=True),
            ExtractKeyPhrasesAction(disable_service_logs=True),
            RecognizePiiEntitiesAction(disable_service_logs=True),
            RecognizeLinkedEntitiesAction(disable_service_logs=True),
            AnalyzeSentimentAction(disable_service_logs=True),
            ExtractSummaryAction(disable_service_logs=True),
        ]

        for action in actions:
            assert action.disable_service_logs

        await (await client.begin_analyze_actions(
            documents=["Test for logging disable"],
            actions=actions,
            polling_interval=self._interval(),
        )).result()
Exemple #11
0
    async def test_too_many_documents(self, client):
        docs = list(itertools.repeat(
            "input document",
            26))  # Maximum number of documents per request is 25

        with pytest.raises(HttpResponseError) as excinfo:
            async with client:
                await (await client.begin_analyze_actions(
                    docs,
                    actions=[
                        RecognizeEntitiesAction(),
                        ExtractKeyPhrasesAction(),
                        RecognizePiiEntitiesAction(),
                        RecognizeLinkedEntitiesAction(),
                        AnalyzeSentimentAction(),
                        ExtractSummaryAction()
                    ],
                    polling_interval=self._interval())).result()
        assert excinfo.value.status_code == 400
Exemple #12
0
    async def test_bad_model_version_error_multiple_tasks(self, client):
        docs = [{
            "id": "1",
            "language": "english",
            "text": "I did not like the hotel we stayed at."
        }]

        async with client:
            with pytest.raises(HttpResponseError):
                response = await (await client.begin_analyze_actions(
                    docs,
                    actions=[
                        RecognizeEntitiesAction(model_version="latest"),
                        ExtractKeyPhrasesAction(model_version="bad"),
                        RecognizePiiEntitiesAction(model_version="bad"),
                        RecognizeLinkedEntitiesAction(model_version="bad"),
                        AnalyzeSentimentAction(model_version="bad"),
                        ExtractSummaryAction(model_version="bad")
                    ],
                    polling_interval=self._interval())).result()
Exemple #13
0
    async def test_bad_model_version_error_all_tasks(
            self, client):  # TODO: verify behavior of service
        docs = [{
            "id": "1",
            "language": "english",
            "text": "I did not like the hotel we stayed at."
        }]

        with self.assertRaises(HttpResponseError):
            async with client:
                result = await (await client.begin_analyze_actions(
                    docs,
                    actions=[
                        RecognizeEntitiesAction(model_version="bad"),
                        ExtractKeyPhrasesAction(model_version="bad"),
                        RecognizePiiEntitiesAction(model_version="bad"),
                        RecognizeLinkedEntitiesAction(model_version="bad"),
                        AnalyzeSentimentAction(model_version="bad"),
                        ExtractSummaryAction(model_version="bad")
                    ],
                    polling_interval=self._interval())).result()
Exemple #14
0
    def test_invalid_language_hint_method(self, client):
        response = list(
            client.begin_analyze_actions(
                [
                    "This should fail because we're passing in an invalid language hint"
                ],
                language="notalanguage",
                actions=[
                    RecognizeEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizePiiEntitiesAction(),
                    RecognizeLinkedEntitiesAction(),
                    AnalyzeSentimentAction(),
                    ExtractSummaryAction()
                ],
                polling_interval=self._interval(),
            ).result())

        for document_results in response:
            for doc in document_results:
                assert doc.is_error
Exemple #15
0
    async def test_show_stats_and_model_version_multiple_tasks(self, client):

        docs = [{
            "id": "56",
            "text": ":)"
        }, {
            "id": "0",
            "text": ":("
        }, {
            "id": "19",
            "text": ":P"
        }, {
            "id": "1",
            "text": ":D"
        }]

        def callback(resp):
            assert resp.raw_response
            tasks = resp.raw_response['tasks']
            assert tasks['completed'] == 6
            assert tasks['inProgress'] == 0
            assert tasks['failed'] == 0
            assert tasks['total'] == 6
            num_tasks = 0
            for key, task in tasks.items():
                if "Tasks" in key:
                    num_tasks += 1
                    assert len(task) == 1
                    task_stats = task[0]['results']['statistics']
                    assert task_stats['documentsCount'] == 4
                    assert task_stats['validDocumentsCount'] == 4
                    assert task_stats['erroneousDocumentsCount'] == 0
                    assert task_stats['transactionsCount'] == 4
            assert num_tasks == 6

        async with client:
            response = await (await client.begin_analyze_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(model_version="latest"),
                    ExtractKeyPhrasesAction(model_version="latest"),
                    RecognizePiiEntitiesAction(model_version="latest"),
                    RecognizeLinkedEntitiesAction(model_version="latest"),
                    AnalyzeSentimentAction(model_version="latest"),
                    ExtractSummaryAction(model_version="latest")
                ],
                show_stats=True,
                polling_interval=self._interval(),
                raw_response_hook=callback,
            )).result()

            pages = []
            async for p in response:
                pages.append(p)
            assert len(pages) == len(docs)

            action_order = [
                _AnalyzeActionsType.RECOGNIZE_ENTITIES,
                _AnalyzeActionsType.EXTRACT_KEY_PHRASES,
                _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES,
                _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES,
                _AnalyzeActionsType.ANALYZE_SENTIMENT,
                _AnalyzeActionsType.EXTRACT_SUMMARY
            ]
            for document_results in pages:
                assert len(document_results) == len(action_order)
                for document_result in document_results:
                    assert document_result.statistics
                    assert document_result.statistics.character_count
                    assert document_result.statistics.transaction_count