async def test_missing_input_records_error(self, client):
     docs = []
     with pytest.raises(ValueError) as excinfo:
         async with client:
             await (await client.begin_analyze_actions(
                 docs,
                 actions=[
                     RecognizeEntitiesAction(),
                     ExtractKeyPhrasesAction(),
                     RecognizePiiEntitiesAction(),
                     RecognizeLinkedEntitiesAction(),
                     AnalyzeSentimentAction()
                 ],
                 polling_interval=self._interval()
             )).result()
     assert "Input documents can not be empty or None" in str(excinfo.value)
예제 #2
0
    def test_too_many_documents(self, client):
        docs = list(itertools.repeat(
            "input document",
            26))  # Maximum number of documents per request is 25

        with pytest.raises(HttpResponseError) as excinfo:
            client.begin_analyze_batch_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizePiiEntitiesAction()
                ],
                polling_interval=self._interval(),
            )
        assert excinfo.value.status_code == 400
    async def test_duplicate_ids_error(self, client):  # TODO: verify behavior of service
        # Duplicate Ids
        docs = [{"id": "1", "text": "hello world"},
                {"id": "1", "text": "I did not like the hotel we stayed at."}]

        with self.assertRaises(HttpResponseError):
            async with client:
                result = await (await client.begin_analyze_batch_actions(
                    docs,
                    actions=[
                        RecognizeEntitiesAction(),
                        ExtractKeyPhrasesAction(),
                        RecognizePiiEntitiesAction(),
                    ],
                    polling_interval=self._interval()
                )).result()
 def test_mixing_inputs(self, client):
     docs = [
         {"id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen."},
         TextDocumentInput(id="2", text="I did not like the hotel we stayed at. It was too expensive."),
         u"You cannot mix string input with the above inputs"
     ]
     with self.assertRaises(TypeError):
         response = client.begin_analyze_batch_actions(
             docs,
             actions=[
                 RecognizeEntitiesAction(),
                 ExtractKeyPhrasesAction(),
                 RecognizePiiEntitiesAction(),
             ],
             polling_interval=self._interval(),
         ).result()
    async def test_bad_model_version_error_all_tasks(self, client):  # TODO: verify behavior of service
        docs = [{"id": "1", "language": "english", "text": "I did not like the hotel we stayed at."}]

        with self.assertRaises(HttpResponseError):
            async with client:
                result = await (await client.begin_analyze_actions(
                    docs,
                    actions=[
                        RecognizeEntitiesAction(model_version="bad"),
                        ExtractKeyPhrasesAction(model_version="bad"),
                        RecognizePiiEntitiesAction(model_version="bad"),
                        RecognizeLinkedEntitiesAction(model_version="bad"),
                        AnalyzeSentimentAction(model_version="bad")
                    ],
                    polling_interval=self._interval()
                )).result()
예제 #6
0
    def test_out_of_order_ids_multiple_tasks(self, client):
        docs = [{
            "id": "56",
            "text": ":)"
        }, {
            "id": "0",
            "text": ":("
        }, {
            "id": "19",
            "text": ":P"
        }, {
            "id": "1",
            "text": ":D"
        }]

        response = client.begin_analyze_batch_actions(
            docs,
            actions=[
                RecognizeEntitiesAction(model_version="bad"),
                ExtractKeyPhrasesAction(),
                RecognizePiiEntitiesAction(),
                RecognizeLinkedEntitiesAction(),
                AnalyzeSentimentAction()
            ],
            polling_interval=self._interval(),
        ).result()

        action_results = list(response)
        assert len(action_results) == 3

        assert action_results[0].is_error
        assert action_results[
            1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
        assert action_results[
            2].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES

        action_results = [r for r in action_results if not r.is_error]
        assert all([
            action_result for action_result in action_results
            if len(action_result.document_results) == len(docs)
        ])

        in_order = ["56", "0", "19", "1"]

        for action_result in action_results:
            for idx, resp in enumerate(action_result.document_results):
                self.assertEqual(resp.id, in_order[idx])
예제 #7
0
    def test_out_of_order_ids_multiple_tasks(self, client):
        docs = [{
            "id": "56",
            "text": ":)"
        }, {
            "id": "0",
            "text": ":("
        }, {
            "id": "19",
            "text": ":P"
        }, {
            "id": "1",
            "text": ":D"
        }]

        response = client.begin_analyze_actions(
            docs,
            actions=[
                RecognizeEntitiesAction(),
                ExtractKeyPhrasesAction(),
                RecognizePiiEntitiesAction(),
                RecognizeLinkedEntitiesAction(),
                AnalyzeSentimentAction(),
                ExtractSummaryAction()
            ],
            polling_interval=self._interval(),
        ).result()

        results = list(response)
        assert len(results) == len(docs)

        document_order = ["56", "0", "19", "1"]
        action_order = [
            _AnalyzeActionsType.RECOGNIZE_ENTITIES,
            _AnalyzeActionsType.EXTRACT_KEY_PHRASES,
            _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES,
            _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES,
            _AnalyzeActionsType.ANALYZE_SENTIMENT,
            _AnalyzeActionsType.EXTRACT_SUMMARY
        ]
        for doc_idx, document_results in enumerate(results):
            assert len(document_results) == 6
            for action_idx, document_result in enumerate(document_results):
                self.assertEqual(document_result.id, document_order[doc_idx])
                self.assertEqual(
                    self.document_result_to_action_type(document_result),
                    action_order[action_idx])
    async def test_all_successful_passing_text_document_input_entities_task(
            self, client):
        docs = [
            TextDocumentInput(
                id="1",
                text=
                "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975",
                language="en"),
            TextDocumentInput(
                id="2",
                text=
                "Microsoft fue fundado por Bill Gates y Paul Allen el 4 de abril de 1975.",
                language="es"),
            TextDocumentInput(
                id="3",
                text=
                "Microsoft wurde am 4. April 1975 von Bill Gates und Paul Allen gegründet.",
                language="de"),
        ]

        async with client:
            poller = await client.begin_analyze_batch_actions(
                docs,
                actions=[RecognizeEntitiesAction()],
                show_stats=True,
                polling_interval=self._interval(),
            )
            response = await poller.result()

            action_results = []
            async for p in response:
                action_results.append(p)
            assert len(action_results) == 1
            action_result = action_results[0]

            assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
            assert len(action_result.document_results) == len(docs)

            for doc in action_result.document_results:
                self.assertEqual(len(doc.entities), 4)
                self.assertIsNotNone(doc.id)
                # self.assertIsNotNone(doc.statistics)
                for entity in doc.entities:
                    self.assertIsNotNone(entity.text)
                    self.assertIsNotNone(entity.category)
                    self.assertIsNotNone(entity.offset)
                    self.assertIsNotNone(entity.confidence_score)
예제 #9
0
    def test_multiple_pages_of_results_returned_successfully(self, client):
        single_doc = "hello world"
        docs = [{
            "id": str(idx),
            "text": val
        } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25)))
                ]  # max number of documents is 25

        result = client.begin_analyze_actions(
            docs,
            actions=[
                RecognizeEntitiesAction(),
                ExtractKeyPhrasesAction(),
                RecognizePiiEntitiesAction(),
                RecognizeLinkedEntitiesAction(),
                AnalyzeSentimentAction(),
                ExtractSummaryAction()
            ],
            show_stats=True,
            polling_interval=self._interval(),
        ).result()

        pages = list(result)
        assert len(pages) == len(docs)
        action_order = [
            _AnalyzeActionsType.RECOGNIZE_ENTITIES,
            _AnalyzeActionsType.EXTRACT_KEY_PHRASES,
            _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES,
            _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES,
            _AnalyzeActionsType.ANALYZE_SENTIMENT,
            _AnalyzeActionsType.EXTRACT_SUMMARY
        ]
        action_type_to_document_results = defaultdict(list)

        for doc_idx, page in enumerate(pages):
            for action_idx, document_result in enumerate(page):
                self.assertEqual(document_result.id, str(doc_idx))
                action_type = self.document_result_to_action_type(
                    document_result)
                self.assertEqual(action_type, action_order[action_idx])
                action_type_to_document_results[action_type].append(
                    document_result)

        assert len(action_type_to_document_results) == len(action_order)
        for document_results in action_type_to_document_results.values():
            assert len(document_results) == len(docs)
    def test_invalid_language_hint_method(self, client):
        response = list(client.begin_analyze_actions(
            ["This should fail because we're passing in an invalid language hint"],
            language="notalanguage",
            actions=[
                RecognizeEntitiesAction(),
                ExtractKeyPhrasesAction(),
                RecognizePiiEntitiesAction(),
                RecognizeLinkedEntitiesAction(),
                AnalyzeSentimentAction()
            ],
            polling_interval=self._interval(),
        ).result())

        for action_result in response:
            for doc in action_result.document_results:
                assert doc.is_error
예제 #11
0
def sample_model_version():
    print("--------------Choosing model_version sample--------------")
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.textanalytics import TextAnalyticsClient, RecognizeEntitiesAction

    endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
    key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

    text_analytics_client = TextAnalyticsClient(
        endpoint=endpoint, credential=AzureKeyCredential(key))
    documents = [
        "I work for Foo Company, and we hired Contoso for our annual founding ceremony. The food \
        was amazing and we all can't say enough good words about the quality and the level of service."
    ]

    print("\nSetting model_version='latest' with recognize_entities")
    result = text_analytics_client.recognize_entities(documents,
                                                      model_version="latest")
    result = [review for review in result if not review.is_error]

    print("...Results of Recognize Entities:")
    for review in result:
        for entity in review.entities:
            print(
                f"......Entity '{entity.text}' has category '{entity.category}'"
            )

    print(
        "\nSetting model_version='latest' with recognize entities action in begin_analyze_actions"
    )
    poller = text_analytics_client.begin_analyze_actions(
        documents, actions=[RecognizeEntitiesAction(model_version="latest")])

    print("...Results of Recognize Entities Action:")
    document_results = poller.result()
    for action_results in document_results:
        recognize_entities_result = action_results[0]
        if recognize_entities_result.is_error:
            print("......Is an error with code '{}' and message '{}'".format(
                recognize_entities_result.code,
                recognize_entities_result.message))
        else:
            for entity in recognize_entities_result.entities:
                print(
                    f"......Entity '{entity.text}' has category '{entity.category}'"
                )
    async def test_disable_service_logs(self, client):
        actions = [
            RecognizeEntitiesAction(disable_service_logs=True),
            ExtractKeyPhrasesAction(disable_service_logs=True),
            RecognizePiiEntitiesAction(disable_service_logs=True),
            RecognizeLinkedEntitiesAction(disable_service_logs=True),
            AnalyzeSentimentAction(disable_service_logs=True),
        ]

        for action in actions:
            assert action.disable_service_logs

        await (await client.begin_analyze_actions(
            documents=["Test for logging disable"],
            actions=actions,
            polling_interval=self._interval(),
        )).result()
    async def test_bad_model_version_error_multiple_tasks(self, client):
        docs = [{"id": "1", "language": "english", "text": "I did not like the hotel we stayed at."}]

        async with client:
            with pytest.raises(HttpResponseError):
                response = await (await
                client.begin_analyze_actions(
                    docs,
                    actions=[
                        RecognizeEntitiesAction(model_version="latest"),
                        ExtractKeyPhrasesAction(model_version="bad"),
                        RecognizePiiEntitiesAction(model_version="bad"),
                        RecognizeLinkedEntitiesAction(model_version="bad"),
                        AnalyzeSentimentAction(model_version="bad")
                    ],
                    polling_interval=self._interval()
                )).result()
예제 #14
0
    def test_all_successful_passing_text_document_input_entities_task(
            self, client):
        docs = [
            TextDocumentInput(
                id="1",
                text=
                "Microsoft was founded by Bill Gates and Paul Allen on April 4, 1975",
                language="en"),
            TextDocumentInput(
                id="2",
                text=
                "Microsoft fue fundado por Bill Gates y Paul Allen el 4 de abril de 1975.",
                language="es"),
            TextDocumentInput(
                id="3",
                text=
                "Microsoft wurde am 4. April 1975 von Bill Gates und Paul Allen gegründet.",
                language="de"),
        ]

        response = client.begin_analyze_actions(
            docs,
            actions=[RecognizeEntitiesAction()],
            show_stats=True,
            polling_interval=self._interval(),
        ).result()

        pages = list(response)
        assert len(pages) == len(docs)

        for document_results in pages:
            assert len(document_results) == 1
            document_result = document_results[0]
            assert isinstance(document_result, RecognizeEntitiesResult)
            assert len(document_result.entities) == 4
            assert document_result.id is not None
            for entity in document_result.entities:
                assert entity.text is not None
                assert entity.category is not None
                assert entity.offset is not None
                assert entity.confidence_score is not None
                self.assertIsNotNone(entity.category)
                self.assertIsNotNone(entity.offset)
                self.assertIsNotNone(entity.confidence_score)
    async def test_multiple_pages_of_results_returned_successfully(self, client):
        single_doc = "hello world"
        docs = [{"id": str(idx), "text": val} for (idx, val) in
                enumerate(list(itertools.repeat(single_doc, 25)))]  # max number of documents is 25

        async with client:
            result = await (await client.begin_analyze_batch_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizePiiEntitiesAction(),
                ],
                show_stats=True,
                polling_interval=self._interval()
            )).result()

            pages = []
            async for p in result:
                pages.append(p)

            recognize_entities_results = []
            extract_key_phrases_results = []
            recognize_pii_entities_results = []

            for idx, action_result in enumerate(pages):
                if idx % 3 == 0:
                    assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
                    recognize_entities_results.append(action_result)
                elif idx % 3 == 1:
                    assert action_result.action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
                    extract_key_phrases_results.append(action_result)
                else:
                    assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
                    recognize_pii_entities_results.append(action_result)
                if idx < 3:  # first page of task results
                    assert len(action_result.document_results) == 20
                else:
                    assert len(action_result.document_results) == 5

            assert all([action_result for action_result in recognize_entities_results if len(action_result.document_results) == len(docs)])
            assert all([action_result for action_result in extract_key_phrases_results if len(action_result.document_results) == len(docs)])
            assert all([action_result for action_result in recognize_pii_entities_results if len(action_result.document_results) == len(docs)])
예제 #16
0
    async def test_too_many_documents(self, client):
        docs = list(itertools.repeat(
            "input document",
            26))  # Maximum number of documents per request is 25

        with pytest.raises(HttpResponseError) as excinfo:
            async with client:
                await (await client.begin_analyze_actions(
                    docs,
                    actions=[
                        RecognizeEntitiesAction(),
                        ExtractKeyPhrasesAction(),
                        RecognizePiiEntitiesAction(),
                        RecognizeLinkedEntitiesAction(),
                        AnalyzeSentimentAction(),
                        ExtractSummaryAction()
                    ],
                    polling_interval=self._interval())).result()
        assert excinfo.value.status_code == 400
    def test_bad_model_version_error_multiple_tasks(self, client):  # TODO: verify behavior of service
        docs = [{"id": "1", "language": "english", "text": "I did not like the hotel we stayed at."}]

        response = client.begin_analyze_batch_actions(
            docs,
            actions=[
                RecognizeEntitiesAction(model_version="latest"),
                ExtractKeyPhrasesAction(model_version="bad"),
                RecognizePiiEntitiesAction(model_version="bad")
            ],
            polling_interval=self._interval(),
        ).result()

        action_results = list(response)
        assert action_results[0].is_error == False
        assert action_results[0].action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
        assert action_results[1].is_error == True
        assert action_results[1].error.code == "InvalidRequest"
        assert action_results[2].is_error == True
        assert action_results[2].error.code == "InvalidRequest"
    async def test_show_stats_and_model_version_multiple_tasks(self, client):
        docs = [{
            "id": "56",
            "text": ":)"
        }, {
            "id": "0",
            "text": ":("
        }, {
            "id": "19",
            "text": ":P"
        }, {
            "id": "1",
            "text": ":D"
        }]

        async with client:
            response = await (await client.begin_analyze_batch_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(model_version="latest"),
                    ExtractKeyPhrasesAction(model_version="latest"),
                    RecognizePiiEntitiesAction(model_version="latest")
                ],
                show_stats=True,
                polling_interval=self._interval())).result()

            action_results = []
            async for p in response:
                action_results.append(p)
            assert len(action_results) == 3
            assert action_results[
                0].action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
            assert action_results[
                1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
            assert action_results[
                2].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES

            assert all([
                action_result for action_result in action_results
                if len(action_result.document_results) == len(docs)
            ])
    def test_show_stats_and_model_version_multiple_tasks(self, client):

        def callback(resp):
            if resp.raw_response:
                a = "b"

        docs = [{"id": "56", "text": ":)"},
                {"id": "0", "text": ":("},
                {"id": "19", "text": ":P"},
                {"id": "1", "text": ":D"}]

        poller = client.begin_analyze_actions(
            docs,
            actions=[
                RecognizeEntitiesAction(model_version="latest"),
                ExtractKeyPhrasesAction(model_version="latest"),
                RecognizePiiEntitiesAction(model_version="latest"),
                RecognizeLinkedEntitiesAction(model_version="latest"),
                AnalyzeSentimentAction(model_version="latest")
            ],
            show_stats=True,
            polling_interval=self._interval(),
            raw_response_hook=callback,
        )

        response = poller.result()

        action_results = list(response)
        assert len(action_results) == 5
        assert action_results[0].action_type == AnalyzeActionsType.RECOGNIZE_ENTITIES
        assert action_results[1].action_type == AnalyzeActionsType.EXTRACT_KEY_PHRASES
        assert action_results[2].action_type == AnalyzeActionsType.RECOGNIZE_PII_ENTITIES
        assert action_results[3].action_type == AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES
        assert action_results[4].action_type == AnalyzeActionsType.ANALYZE_SENTIMENT

        assert all([action_result for action_result in action_results if len(action_result.document_results) == len(docs)])

        for action_result in action_results:
            assert action_result.statistics
            for doc in action_result.document_results:
                assert doc.statistics
    async def test_user_agent(self, client):
        def callback(resp):
            self.assertIn("azsdk-python-ai-textanalytics/{} Python/{} ({})".format(
                VERSION, platform.python_version(), platform.platform()),
                resp.http_request.headers["User-Agent"]
            )

        docs = [{"id": "1", "text": "I will go to the park."},
                {"id": "2", "text": "I did not like the hotel we stayed at."},
                {"id": "3", "text": "The restaurant had really good food."}]

        async with client:
            poller = await client.begin_analyze_batch_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(),
                ],
                polling_interval=self._interval(),
                raw_response_hook=callback,
            )
            await poller.wait()
예제 #21
0
    def test_poller_metadata(self, client):
        docs = [{"id": "56", "text": ":)"}]

        poller = client.begin_analyze_batch_actions(
            docs,
            actions=[RecognizeEntitiesAction(model_version="latest")],
            show_stats=True,
            polling_interval=self._interval(),
        )

        response = poller.result()

        assert isinstance(poller.created_on, datetime.datetime)
        poller._polling_method.display_name
        assert isinstance(poller.expires_on, datetime.datetime)
        assert poller.actions_failed_count == 0
        assert poller.actions_in_progress_count == 0
        assert poller.actions_succeeded_count == 1
        assert isinstance(poller.last_modified_on, datetime.datetime)
        assert poller.total_actions_count == 1
        assert poller.id
    async def test_out_of_order_ids_multiple_tasks(self, client):
        docs = [{"id": "56", "text": ":)"},
                {"id": "0", "text": ":("},
                {"id": "19", "text": ":P"},
                {"id": "1", "text": ":D"}]

        async with client:
            response = await (await client.begin_analyze_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizePiiEntitiesAction(),
                    RecognizeLinkedEntitiesAction(),
                    AnalyzeSentimentAction()
                ],
                polling_interval=self._interval()
            )).result()

            action_results = []
            async for p in response:
                action_results.append(p)
            assert len(action_results) == 5

            assert action_results[0].action_type == AnalyzeActionsType.RECOGNIZE_ENTITIES
            assert action_results[1].action_type == AnalyzeActionsType.EXTRACT_KEY_PHRASES
            assert action_results[2].action_type == AnalyzeActionsType.RECOGNIZE_PII_ENTITIES
            assert action_results[3].action_type == AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES
            assert action_results[4].action_type == AnalyzeActionsType.ANALYZE_SENTIMENT

            action_results = [r for r in action_results if not r.is_error]

            assert all([action_result for action_result in action_results if len(action_result.document_results) == len(docs)])

            in_order = ["56", "0", "19", "1"]

            for action_result in action_results:
                for idx, resp in enumerate(action_result.document_results):
                    self.assertEqual(resp.id, in_order[idx])
예제 #23
0
    def test_disable_service_logs(self, client):
        actions = [
            RecognizeEntitiesAction(disable_service_logs=True),
            ExtractKeyPhrasesAction(disable_service_logs=True),
            RecognizePiiEntitiesAction(disable_service_logs=True),
            RecognizeLinkedEntitiesAction(disable_service_logs=True),
            AnalyzeSentimentAction(disable_service_logs=True),
        ]

        for action in actions:
            assert action.disable_service_logs

        def callback(resp):
            tasks = json.loads(resp.http_request.body)["tasks"]
            assert len(tasks) == len(actions)
            for task in tasks.values():
                assert task[0]["parameters"]["loggingOptOut"]

        client.begin_analyze_actions(
            documents=["Test for logging disable"],
            actions=actions,
            polling_interval=self._interval(),
            raw_response_hook=callback,
        ).result()
예제 #24
0
    def analyze(self):
        # [START analyze]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.textanalytics import (
            TextAnalyticsClient,
            RecognizeEntitiesAction,
            RecognizeLinkedEntitiesAction,
            RecognizePiiEntitiesAction,
            ExtractKeyPhrasesAction,
            AnalyzeSentimentAction,
            PiiEntityDomainType,
        )

        endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
        key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

        text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint,
            credential=AzureKeyCredential(key),
        )

        documents = [
            "We went to Contoso Steakhouse located at midtown NYC last week for a dinner party, and we adore the spot! \
            They provide marvelous food and they have a great menu. The chief cook happens to be the owner (I think his name is John Doe) \
            and he is super nice, coming out of the kitchen and greeted us all. We enjoyed very much dining in the place! \
            The Sirloin steak I ordered was tender and juicy, and the place was impeccably clean. You can even pre-order from their \
            online menu at www.contososteakhouse.com, call 312-555-0176 or send email to [email protected]! \
            The only complaint I have is the food didn't come fast enough. Overall I highly recommend it!"
        ]

        poller = text_analytics_client.begin_analyze_actions(
            documents,
            display_name="Sample Text Analysis",
            actions=[
                RecognizeEntitiesAction(),
                RecognizePiiEntitiesAction(domain_filter=PiiEntityDomainType.
                                           PROTECTED_HEALTH_INFORMATION),
                ExtractKeyPhrasesAction(),
                RecognizeLinkedEntitiesAction(),
                AnalyzeSentimentAction()
            ],
        )

        result = poller.result()
        action_results = [
            action_result for action_result in list(result)
            if not action_result.is_error
        ]

        first_action_result = action_results[0]
        print("Results of Entities Recognition action:")
        docs = [
            doc for doc in first_action_result.document_results
            if not doc.is_error
        ]

        for idx, doc in enumerate(docs):
            print("\nDocument text: {}".format(documents[idx]))
            for entity in doc.entities:
                print("Entity: {}".format(entity.text))
                print("...Category: {}".format(entity.category))
                print("...Confidence Score: {}".format(
                    entity.confidence_score))
                print("...Offset: {}".format(entity.offset))
                print("...Length: {}".format(entity.length))
            print("------------------------------------------")

        second_action_result = action_results[1]
        print("Results of PII Entities Recognition action:")
        docs = [
            doc for doc in second_action_result.document_results
            if not doc.is_error
        ]

        for idx, doc in enumerate(docs):
            print("Document text: {}".format(documents[idx]))
            print("Document text with redactions: {}".format(
                doc.redacted_text))
            for entity in doc.entities:
                print("Entity: {}".format(entity.text))
                print("...Category: {}".format(entity.category))
                print("...Confidence Score: {}\n".format(
                    entity.confidence_score))
                print("...Offset: {}".format(entity.offset))
                print("...Length: {}".format(entity.length))
            print("------------------------------------------")

        third_action_result = action_results[2]
        print("Results of Key Phrase Extraction action:")
        docs = [
            doc for doc in third_action_result.document_results
            if not doc.is_error
        ]

        for idx, doc in enumerate(docs):
            print("Document text: {}\n".format(documents[idx]))
            print("Key Phrases: {}\n".format(doc.key_phrases))
            print("------------------------------------------")

        fourth_action_result = action_results[3]
        print("Results of Linked Entities Recognition action:")
        docs = [
            doc for doc in fourth_action_result.document_results
            if not doc.is_error
        ]

        for idx, doc in enumerate(docs):
            print("Document text: {}\n".format(documents[idx]))
            for linked_entity in doc.entities:
                print("Entity name: {}".format(linked_entity.name))
                print("...Data source: {}".format(linked_entity.data_source))
                print("...Data source language: {}".format(
                    linked_entity.language))
                print("...Data source entity ID: {}".format(
                    linked_entity.data_source_entity_id))
                print("...Data source URL: {}".format(linked_entity.url))
                print("...Document matches:")
                for match in linked_entity.matches:
                    print("......Match text: {}".format(match.text))
                    print(".........Confidence Score: {}".format(
                        match.confidence_score))
                    print(".........Offset: {}".format(match.offset))
                    print(".........Length: {}".format(match.length))
            print("------------------------------------------")

        fifth_action_result = action_results[4]
        print("Results of Sentiment Analysis action:")
        docs = [
            doc for doc in fifth_action_result.document_results
            if not doc.is_error
        ]

        for doc in docs:
            print("Overall sentiment: {}".format(doc.sentiment))
            print("Scores: positive={}; neutral={}; negative={} \n".format(
                doc.confidence_scores.positive,
                doc.confidence_scores.neutral,
                doc.confidence_scores.negative,
            ))
            print("------------------------------------------")
예제 #25
0
def sample_analyze_actions():
    # [START analyze]
    from azure.core.credentials import AzureKeyCredential
    from azure.ai.textanalytics import (
        TextAnalyticsClient,
        RecognizeEntitiesAction,
        RecognizeLinkedEntitiesAction,
        RecognizePiiEntitiesAction,
        ExtractKeyPhrasesAction,
        AnalyzeSentimentAction,
    )

    endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
    key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

    text_analytics_client = TextAnalyticsClient(
        endpoint=endpoint,
        credential=AzureKeyCredential(key),
    )

    documents = [
        'We went to Contoso Steakhouse located at midtown NYC last week for a dinner party, and we adore the spot! '
        'They provide marvelous food and they have a great menu. The chief cook happens to be the owner (I think his name is John Doe) '
        'and he is super nice, coming out of the kitchen and greeted us all.',
        'We enjoyed very much dining in the place! '
        'The Sirloin steak I ordered was tender and juicy, and the place was impeccably clean. You can even pre-order from their '
        'online menu at www.contososteakhouse.com, call 312-555-0176 or send email to [email protected]! '
        'The only complaint I have is the food didn\'t come fast enough. Overall I highly recommend it!'
    ]

    poller = text_analytics_client.begin_analyze_actions(
        documents,
        display_name="Sample Text Analysis",
        actions=[
            RecognizeEntitiesAction(),
            RecognizePiiEntitiesAction(),
            ExtractKeyPhrasesAction(),
            RecognizeLinkedEntitiesAction(),
            AnalyzeSentimentAction(),
        ],
    )

    document_results = poller.result()
    for doc, action_results in zip(documents, document_results):
        print(f"\nDocument text: {doc}")
        recognize_entities_result = action_results[0]
        print("...Results of Recognize Entities Action:")
        if recognize_entities_result.is_error:
            print("...Is an error with code '{}' and message '{}'".format(
                recognize_entities_result.code,
                recognize_entities_result.message))
        else:
            for entity in recognize_entities_result.entities:
                print(f"......Entity: {entity.text}")
                print(f".........Category: {entity.category}")
                print(f".........Confidence Score: {entity.confidence_score}")
                print(f".........Offset: {entity.offset}")

        recognize_pii_entities_result = action_results[1]
        print("...Results of Recognize PII Entities action:")
        if recognize_pii_entities_result.is_error:
            print("...Is an error with code '{}' and message '{}'".format(
                recognize_pii_entities_result.code,
                recognize_pii_entities_result.message))
        else:
            for entity in recognize_pii_entities_result.entities:
                print(f"......Entity: {entity.text}")
                print(f".........Category: {entity.category}")
                print(f".........Confidence Score: {entity.confidence_score}")

        extract_key_phrases_result = action_results[2]
        print("...Results of Extract Key Phrases action:")
        if extract_key_phrases_result.is_error:
            print("...Is an error with code '{}' and message '{}'".format(
                extract_key_phrases_result.code,
                extract_key_phrases_result.message))
        else:
            print(
                f"......Key Phrases: {extract_key_phrases_result.key_phrases}")

        recognize_linked_entities_result = action_results[3]
        print("...Results of Recognize Linked Entities action:")
        if recognize_linked_entities_result.is_error:
            print("...Is an error with code '{}' and message '{}'".format(
                recognize_linked_entities_result.code,
                recognize_linked_entities_result.message))
        else:
            for linked_entity in recognize_linked_entities_result.entities:
                print(f"......Entity name: {linked_entity.name}")
                print(f".........Data source: {linked_entity.data_source}")
                print(
                    f".........Data source language: {linked_entity.language}")
                print(
                    f".........Data source entity ID: {linked_entity.data_source_entity_id}"
                )
                print(f".........Data source URL: {linked_entity.url}")
                print(".........Document matches:")
                for match in linked_entity.matches:
                    print(f"............Match text: {match.text}")
                    print(
                        f"............Confidence Score: {match.confidence_score}"
                    )
                    print(f"............Offset: {match.offset}")
                    print(f"............Length: {match.length}")

        analyze_sentiment_result = action_results[4]
        print("...Results of Analyze Sentiment action:")
        if analyze_sentiment_result.is_error:
            print("...Is an error with code '{}' and message '{}'".format(
                analyze_sentiment_result.code,
                analyze_sentiment_result.message))
        else:
            print(
                f"......Overall sentiment: {analyze_sentiment_result.sentiment}"
            )
            print(
                "......Scores: positive={}; neutral={}; negative={} \n".format(
                    analyze_sentiment_result.confidence_scores.positive,
                    analyze_sentiment_result.confidence_scores.neutral,
                    analyze_sentiment_result.confidence_scores.negative,
                ))
        print("------------------------------------------")
    async def analyze_async(self):
        # [START analyze_async]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.textanalytics.aio import TextAnalyticsClient
        from azure.ai.textanalytics import (
            RecognizeEntitiesAction,
            RecognizeLinkedEntitiesAction,
            RecognizePiiEntitiesAction,
            ExtractKeyPhrasesAction,
            AnalyzeSentimentAction,
        )

        endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
        key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

        text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint,
            credential=AzureKeyCredential(key),
        )

        documents = [
            'We went to Contoso Steakhouse located at midtown NYC last week for a dinner party, and we adore the spot!'\
            'They provide marvelous food and they have a great menu. The chief cook happens to be the owner (I think his name is John Doe)'\
            'and he is super nice, coming out of the kitchen and greeted us all.'\
            ,

            'We enjoyed very much dining in the place!'\
            'The Sirloin steak I ordered was tender and juicy, and the place was impeccably clean. You can even pre-order from their'\
            'online menu at www.contososteakhouse.com, call 312-555-0176 or send email to [email protected]!'\
            'The only complaint I have is the food didn\'t come fast enough. Overall I highly recommend it!'\
        ]

        async with text_analytics_client:
            poller = await text_analytics_client.begin_analyze_actions(
                documents,
                display_name="Sample Text Analysis",
                actions=[
                    RecognizeEntitiesAction(),
                    RecognizePiiEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizeLinkedEntitiesAction(),
                    AnalyzeSentimentAction()
                ])

            pages = await poller.result()

            # To enumerate / zip for async, unless you install a third party library,
            # you have to read in all of the elements into memory first.
            # If you're not looking to enumerate / zip, we recommend you just asynchronously
            # loop over it immediately, without going through this step of reading them into memory
            document_results = []
            async for page in pages:
                document_results.append(page)

            for doc, action_results in zip(documents, document_results):
                print("\nDocument text: {}".format(doc))
                recognize_entities_result = action_results[0]
                print("...Results of Recognize Entities Action:")
                if recognize_entities_result.is_error:
                    print("...Is an error with code '{}' and message '{}'".
                          format(recognize_entities_result.code,
                                 recognize_entities_result.message))
                else:
                    for entity in recognize_entities_result.entities:
                        print("......Entity: {}".format(entity.text))
                        print(".........Category: {}".format(entity.category))
                        print(".........Confidence Score: {}".format(
                            entity.confidence_score))
                        print(".........Offset: {}".format(entity.offset))

                recognize_pii_entities_result = action_results[1]
                print("...Results of Recognize PII Entities action:")
                if recognize_pii_entities_result.is_error:
                    print("...Is an error with code '{}' and message '{}'".
                          format(recognize_pii_entities_result.code,
                                 recognize_pii_entities_result.message))
                else:
                    for entity in recognize_pii_entities_result.entities:
                        print("......Entity: {}".format(entity.text))
                        print(".........Category: {}".format(entity.category))
                        print(".........Confidence Score: {}".format(
                            entity.confidence_score))

                extract_key_phrases_result = action_results[2]
                print("...Results of Extract Key Phrases action:")
                if extract_key_phrases_result.is_error:
                    print("...Is an error with code '{}' and message '{}'".
                          format(extract_key_phrases_result.code,
                                 extract_key_phrases_result.message))
                else:
                    print("......Key Phrases: {}".format(
                        extract_key_phrases_result.key_phrases))

                recognize_linked_entities_result = action_results[3]
                print("...Results of Recognize Linked Entities action:")
                if recognize_linked_entities_result.is_error:
                    print("...Is an error with code '{}' and message '{}'".
                          format(recognize_linked_entities_result.code,
                                 recognize_linked_entities_result.message))
                else:
                    for linked_entity in recognize_linked_entities_result.entities:
                        print("......Entity name: {}".format(
                            linked_entity.name))
                        print(".........Data source: {}".format(
                            linked_entity.data_source))
                        print(".........Data source language: {}".format(
                            linked_entity.language))
                        print(".........Data source entity ID: {}".format(
                            linked_entity.data_source_entity_id))
                        print(".........Data source URL: {}".format(
                            linked_entity.url))
                        print(".........Document matches:")
                        for match in linked_entity.matches:
                            print("............Match text: {}".format(
                                match.text))
                            print("............Confidence Score: {}".format(
                                match.confidence_score))
                            print("............Offset: {}".format(
                                match.offset))
                            print("............Length: {}".format(
                                match.length))

                analyze_sentiment_result = action_results[4]
                print("...Results of Analyze Sentiment action:")
                if analyze_sentiment_result.is_error:
                    print("...Is an error with code '{}' and message '{}'".
                          format(analyze_sentiment_result.code,
                                 analyze_sentiment_result.message))
                else:
                    print("......Overall sentiment: {}".format(
                        analyze_sentiment_result.sentiment))
                    print(
                        "......Scores: positive={}; neutral={}; negative={} \n"
                        .format(
                            analyze_sentiment_result.confidence_scores.
                            positive,
                            analyze_sentiment_result.confidence_scores.neutral,
                            analyze_sentiment_result.confidence_scores.
                            negative,
                        ))
                print("------------------------------------------")
예제 #27
0
    def test_multiple_pages_of_results_returned_successfully(self, client):
        single_doc = "hello world"
        docs = [{
            "id": str(idx),
            "text": val
        } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25)))
                ]  # max number of documents is 25

        result = client.begin_analyze_batch_actions(
            docs,
            actions=[
                RecognizeEntitiesAction(),
                ExtractKeyPhrasesAction(),
                RecognizePiiEntitiesAction(),
                RecognizeLinkedEntitiesAction(),
                AnalyzeSentimentAction()
            ],
            show_stats=True,
            polling_interval=self._interval(),
        ).result()

        recognize_entities_results = []
        extract_key_phrases_results = []
        recognize_pii_entities_results = []
        recognize_linked_entities_results = []
        analyze_sentiment_results = []

        action_results = list(result)

        # do 2 pages of 5 task results
        for idx, action_result in enumerate(action_results):
            if idx % 5 == 0:
                assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
                recognize_entities_results.append(action_result)
            elif idx % 5 == 1:
                assert action_result.action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
                extract_key_phrases_results.append(action_result)
            elif idx % 5 == 2:
                assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
                recognize_pii_entities_results.append(action_result)
            elif idx % 5 == 3:
                assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
                recognize_linked_entities_results.append(action_result)
            else:
                assert action_result.action_type == AnalyzeBatchActionsType.ANALYZE_SENTIMENT
                analyze_sentiment_results.append(action_result)
            if idx < 5:  # first page of task results
                assert len(action_result.document_results) == 20
            else:
                assert len(action_result.document_results) == 5

        assert all([
            action_result for action_result in recognize_entities_results
            if len(action_result.document_results) == len(docs)
        ])
        assert all([
            action_result for action_result in extract_key_phrases_results
            if len(action_result.document_results) == len(docs)
        ])
        assert all([
            action_result for action_result in recognize_pii_entities_results
            if len(action_result.document_results) == len(docs)
        ])
        assert all([
            action_result
            for action_result in recognize_linked_entities_results
            if len(action_result.document_results) == len(docs)
        ])
        assert all([
            action_result for action_result in analyze_sentiment_results
            if len(action_result.document_results) == len(docs)
        ])
    async def analyze_async(self):
        # [START analyze_async]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.textanalytics.aio import TextAnalyticsClient
        from azure.ai.textanalytics import (
            RecognizeEntitiesAction,
            RecognizePiiEntitiesAction,
            ExtractKeyPhrasesAction,
            AnalyzeBatchActionsType
        )

        endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
        key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

        text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint,
            credential=AzureKeyCredential(key),
        )

        documents = [
            "We went to Contoso Steakhouse located at midtown NYC last week for a dinner party, and we adore the spot! \
            They provide marvelous food and they have a great menu. The chief cook happens to be the owner (I think his name is John Doe) \
            and he is super nice, coming out of the kitchen and greeted us all. We enjoyed very much dining in the place! \
            The Sirloin steak I ordered was tender and juicy, and the place was impeccably clean. You can even pre-order from their \
            online menu at www.contososteakhouse.com, call 312-555-0176 or send email to [email protected]! \
            The only complaint I have is the food didn't come fast enough. Overall I highly recommend it!"
        ]

        async with text_analytics_client:
            poller = await text_analytics_client.begin_analyze_batch_actions(
                documents,
                display_name="Sample Text Analysis",
                actions=[
                    RecognizeEntitiesAction(),
                    RecognizePiiEntitiesAction(),
                    ExtractKeyPhrasesAction()
                ]
            )

            result = await poller.result()

            async for action_result in result:
                if action_result.is_error:
                    raise ValueError(
                        "Action has failed with message: {}".format(
                            action_result.error.message
                        )
                    )
                if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES:
                    print("Results of Entities Recognition action:")
                    for idx, doc in enumerate(action_result.document_results):
                        print("\nDocument text: {}".format(documents[idx]))
                        for entity in doc.entities:
                            print("Entity: {}".format(entity.text))
                            print("...Category: {}".format(entity.category))
                            print("...Confidence Score: {}".format(entity.confidence_score))
                            print("...Offset: {}".format(entity.offset))
                        print("------------------------------------------")

                if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
                    print("Results of PII Entities Recognition action:")
                    for idx, doc in enumerate(action_result.document_results):
                        print("Document text: {}".format(documents[idx]))
                        for entity in doc.entities:
                            print("Entity: {}".format(entity.text))
                            print("Category: {}".format(entity.category))
                            print("Confidence Score: {}\n".format(entity.confidence_score))
                        print("------------------------------------------")

                if action_result.action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES:
                    print("Results of Key Phrase Extraction action:")
                    for idx, doc in enumerate(action_result.document_results):
                        print("Document text: {}\n".format(documents[idx]))
                        print("Key Phrases: {}\n".format(doc.key_phrases))
                        print("------------------------------------------")
예제 #29
0
    async def test_show_stats_and_model_version_multiple_tasks(self, client):

        docs = [{
            "id": "56",
            "text": ":)"
        }, {
            "id": "0",
            "text": ":("
        }, {
            "id": "19",
            "text": ":P"
        }, {
            "id": "1",
            "text": ":D"
        }]

        def callback(resp):
            assert resp.raw_response
            tasks = resp.raw_response['tasks']
            assert tasks['completed'] == 6
            assert tasks['inProgress'] == 0
            assert tasks['failed'] == 0
            assert tasks['total'] == 6
            num_tasks = 0
            for key, task in tasks.items():
                if "Tasks" in key:
                    num_tasks += 1
                    assert len(task) == 1
                    task_stats = task[0]['results']['statistics']
                    assert task_stats['documentsCount'] == 4
                    assert task_stats['validDocumentsCount'] == 4
                    assert task_stats['erroneousDocumentsCount'] == 0
                    assert task_stats['transactionsCount'] == 4
            assert num_tasks == 6

        async with client:
            response = await (await client.begin_analyze_actions(
                docs,
                actions=[
                    RecognizeEntitiesAction(model_version="latest"),
                    ExtractKeyPhrasesAction(model_version="latest"),
                    RecognizePiiEntitiesAction(model_version="latest"),
                    RecognizeLinkedEntitiesAction(model_version="latest"),
                    AnalyzeSentimentAction(model_version="latest"),
                    ExtractSummaryAction(model_version="latest")
                ],
                show_stats=True,
                polling_interval=self._interval(),
                raw_response_hook=callback,
            )).result()

            pages = []
            async for p in response:
                pages.append(p)
            assert len(pages) == len(docs)

            action_order = [
                _AnalyzeActionsType.RECOGNIZE_ENTITIES,
                _AnalyzeActionsType.EXTRACT_KEY_PHRASES,
                _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES,
                _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES,
                _AnalyzeActionsType.ANALYZE_SENTIMENT,
                _AnalyzeActionsType.EXTRACT_SUMMARY
            ]
            for document_results in pages:
                assert len(document_results) == len(action_order)
                for document_result in document_results:
                    assert document_result.statistics
                    assert document_result.statistics.character_count
                    assert document_result.statistics.transaction_count