async def test_all_successful_passing_text_document_input(self, client):

        docs = [
            TextDocumentInput(
                id="1",
                text="Microsoft was founded by Bill Gates and Paul Allen"),
            TextDocumentInput(
                id="2",
                text="Microsoft fue fundado por Bill Gates y Paul Allen")
        ]

        response = await client.recognize_linked_entities(docs)
        for doc in response:
            assert len(doc.entities) == 3
            for entity in doc.entities:
                assert entity.name is not None
                assert entity.language is not None
                assert entity.data_source_entity_id is not None
                assert entity.url is not None
                assert entity.data_source is not None
                assert entity.matches is not None
                for match in entity.matches:
                    assert match.offset is not None
 async def test_mixing_inputs(self, client):
     docs = [
         {
             "id": "1",
             "text": "Microsoft was founded by Bill Gates and Paul Allen."
         },
         TextDocumentInput(
             id="2",
             text=
             "I did not like the hotel we stayed at. It was too expensive."
         ), u"You cannot mix string input with the above inputs"
     ]
     with self.assertRaises(TypeError):
         response = await client.extract_key_phrases(docs)
    async def test_all_successful_passing_text_document_input(self, client):
        docs = [
            TextDocumentInput(id="1", text="My SSN is 859-98-0987."),
            TextDocumentInput(id="2", text="Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."),
            TextDocumentInput(id="3", text="Is 998.214.865-68 your Brazilian CPF number?")
        ]

        response = await client.recognize_pii_entities(docs, show_stats=True)
        self.assertEqual(response[0].entities[0].text, "859-98-0987")
        self.assertEqual(response[0].entities[0].category, "USSocialSecurityNumber")
        self.assertEqual(response[1].entities[0].text, "111000025")
        # self.assertEqual(response[1].entities[0].category, "ABA Routing Number")  # Service is currently returning PhoneNumber here
        # commenting out brazil cpf, currently service is not returning it
        # self.assertEqual(response[2].entities[0].text, "998.214.865-68")
        # self.assertEqual(response[2].entities[0].category, "Brazil CPF Number")
        for doc in response:
            self.assertIsNotNone(doc.id)
            self.assertIsNotNone(doc.statistics)
            for entity in doc.entities:
                self.assertIsNotNone(entity.text)
                self.assertIsNotNone(entity.category)
                self.assertIsNotNone(entity.offset)
                self.assertIsNotNone(entity.confidence_score)
 async def test_mixing_inputs(self, client):
     docs = [
         {
             "id": "1",
             "text": "Microsoft was founded by Bill Gates and Paul Allen."
         },
         TextDocumentInput(
             id="2",
             text=
             "I did not like the hotel we stayed at. It was too expensive."
         ), "You cannot mix string input with the above inputs"
     ]
     with pytest.raises(TypeError):
         response = await client.recognize_linked_entities(docs)
Example #5
0
    async def test_all_successful_passing_text_document_input(self, resource_group, location, text_analytics_account, text_analytics_account_key):
        text_analytics = TextAnalyticsClient(text_analytics_account, TextAnalyticsApiKeyCredential(text_analytics_account_key))

        docs = [
            TextDocumentInput(id="1", text="I should take my cat to the veterinarian"),
            TextDocumentInput(id="2", text="Este es un document escrito en Español."),
            TextDocumentInput(id="3", text="猫は幸せ"),
            TextDocumentInput(id="4", text="Fahrt nach Stuttgart und dann zum Hotel zu Fu.")
        ]

        response = await text_analytics.detect_language(docs)

        self.assertEqual(response[0].primary_language.name, "English")
        self.assertEqual(response[1].primary_language.name, "Spanish")
        self.assertEqual(response[2].primary_language.name, "Japanese")
        self.assertEqual(response[3].primary_language.name, "German")
        self.assertEqual(response[0].primary_language.iso6391_name, "en")
        self.assertEqual(response[1].primary_language.iso6391_name, "es")
        self.assertEqual(response[2].primary_language.iso6391_name, "ja")
        self.assertEqual(response[3].primary_language.iso6391_name, "de")

        for doc in response:
            self.assertIsNotNone(doc.primary_language.score)
Example #6
0
    def test_whole_batch_language_hint_and_obj_input(
            self, resource_group, location, text_analytics_account,
            text_analytics_account_key):
        text_analytics = TextAnalyticsClient(
            text_analytics_account,
            TextAnalyticsApiKeyCredential(text_analytics_account_key))

        def callback(resp):
            language_str = "\"language\": \"de\""
            language = resp.http_request.body.count(language_str)
            self.assertEqual(language, 3)

        docs = [
            TextDocumentInput(
                id="1", text="I should take my cat to the veterinarian."),
            TextDocumentInput(id="4",
                              text="Este es un document escrito en Español."),
            TextDocumentInput(id="3", text="猫は幸せ"),
        ]

        response = text_analytics.analyze_sentiment(docs,
                                                    language="de",
                                                    raw_response_hook=callback)
    async def test_whole_batch_language_hint_and_obj_per_item_hints(
            self, client):
        def callback(resp):
            language_str = "\"language\": \"es\""
            language = resp.http_request.body.count(language_str)
            self.assertEqual(language, 2)
            language_str = "\"language\": \"en\""
            language = resp.http_request.body.count(language_str)
            self.assertEqual(language, 1)

        docs = [
            TextDocumentInput(id="1",
                              text="I should take my cat to the veterinarian.",
                              language="es"),
            TextDocumentInput(id="2",
                              text="Este es un document escrito en Español.",
                              language="es"),
            TextDocumentInput(id="3", text="猫は幸せ"),
        ]

        response = await client.extract_key_phrases(docs,
                                                    language="en",
                                                    raw_response_hook=callback)
    def test_all_successful_passing_text_document_input(self, client):
        docs = [
            TextDocumentInput(id="1", text="My SSN is 859-98-0987."),
            TextDocumentInput(id="2", text="Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."),
            TextDocumentInput(id="3", text="Is 998.214.865-68 your Brazilian CPF number?")
        ]

        response = client.recognize_pii_entities(docs, show_stats=True)
        assert response[0].entities[0].text == "859-98-0987"
        assert response[0].entities[0].category == "USSocialSecurityNumber"
        assert response[1].entities[0].text == "111000025"
        # assert response[1].entities[0].category == "ABA Routing Number"  # Service is currently returning PhoneNumber here
        # commenting out brazil cpf, currently service is not returning it
        # assert response[2].entities[0].text == "998.214.865-68"
        # assert response[2].entities[0].category == "Brazil CPF Number"
        for doc in response:
            assert doc.id is not None
            assert doc.statistics is not None
            for entity in doc.entities:
                assert entity.text is not None
                assert entity.category is not None
                assert entity.offset is not None
                assert entity.confidence_score is not None
Example #9
0
    async def test_all_successful_passing_text_document_input(self, client):
        docs = [
            TextDocumentInput(
                id="1",
                text="Patient does not suffer from high blood pressure."),
            TextDocumentInput(
                id="2", text="Prescribed 100mg ibuprofen, taken twice daily."),
        ]

        async with client:
            result = await (await client.begin_analyze_healthcare_entities(
                docs, polling_interval=self._interval())).result()

        self.assertIsNone(result.statistics)  # show_stats=False by default

        response = []
        async for r in result:
            response.append(r)

        for doc in response:
            self.assertIsNotNone(doc.id)
            self.assertIsNone(doc.statistics)
            self.assertIsNotNone(doc.entities)

        self.assertEqual(len(response[0].entities), 2)
        entity1 = list(filter(lambda x: x.text == "high",
                              response[0].entities))[0]
        entity2 = list(
            filter(lambda x: x.text == "blood pressure",
                   response[0].entities))[0]

        self.assertEqual(len(entity1.related_entities), 1)
        related_entity, relation_type = entity1.related_entities.popitem()
        self.assertEqual(related_entity, entity2)
        self.assertEqual(relation_type, "ValueOfExamination")

        self.assertEqual(len(entity2.related_entities), 0)
    def test_all_successful_passing_text_document_input(self, client):
        docs = [
            TextDocumentInput(
                id="1",
                text="Microsoft was founded by Bill Gates and Paul Allen."),
            TextDocumentInput(
                id="2",
                text=
                "I did not like the hotel we stayed at. It was too expensive."
            ),
            TextDocumentInput(
                id="3",
                text=
                "The restaurant had really good food. I recommend you try it."
            ),
        ]

        response = client.analyze_sentiment(docs)
        assert response[0].sentiment == "neutral"
        assert response[1].sentiment == "negative"
        assert response[2].sentiment == "positive"

        for doc in response:
            self.validateConfidenceScores(doc.confidence_scores)
            assert doc.sentences is not None

        assert len(response[0].sentences) == 1
        assert response[0].sentences[
            0].text == "Microsoft was founded by Bill Gates and Paul Allen."
        assert len(response[1].sentences) == 2
        assert response[1].sentences[
            0].text == "I did not like the hotel we stayed at."
        assert response[1].sentences[1].text == "It was too expensive."
        assert len(response[2].sentences) == 2
        assert response[2].sentences[
            0].text == "The restaurant had really good food."
        assert response[2].sentences[1].text == "I recommend you try it."
Example #11
0
    def test_whole_batch_language_hint_and_obj_input(self, client):
        def callback(resp):
            language_str = "\"language\": \"de\""
            language = resp.http_request.body.count(language_str)
            self.assertEqual(language, 3)

        docs = [
            TextDocumentInput(
                id="1", text="I should take my cat to the veterinarian."),
            TextDocumentInput(id="4",
                              text="Este es un document escrito en Español."),
            TextDocumentInput(id="3", text="猫は幸せ"),
        ]

        response = list(
            client.begin_analyze(
                docs,
                entities_recognition_tasks=[EntitiesRecognitionTask()],
                key_phrase_extraction_tasks=[KeyPhraseExtractionTask()],
                pii_entities_recognition_tasks=[PiiEntitiesRecognitionTask()],
                language="en",
                polling_interval=self._interval(),
            ).result())

        task_types = [
            "entities_recognition_results", "key_phrase_extraction_results",
            "pii_entities_recognition_results"
        ]

        for task_type in task_types:
            task_results = getattr(response[0], task_type)
            self.assertEqual(len(task_results), 1)

            results = task_results[0].results
            for r in results:
                self.assertFalse(r.is_error)
 def test_mixing_inputs(self, client):
     docs = [
         {
             "id": "1",
             "text": "Microsoft was founded by Bill Gates and Paul Allen."
         },
         TextDocumentInput(
             id="2",
             text=
             "I did not like the hotel we stayed at. It was too expensive."
         ), u"You cannot mix string input with the above inputs"
     ]
     with self.assertRaises(TypeError):
         response = client.begin_analyze_healthcare_entities(
             docs, polling_interval=self._interval())
    async def test_all_successful_passing_text_document_input(
            self, resource_group, location, text_analytics_account,
            text_analytics_account_key):
        text_analytics = TextAnalyticsClient(
            text_analytics_account,
            TextAnalyticsApiKeyCredential(text_analytics_account_key))

        docs = [
            TextDocumentInput(id="1", text="My SSN is 555-55-5555."),
            TextDocumentInput(
                id="2",
                text=
                "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."
            ),
            TextDocumentInput(
                id="3", text="Is 998.214.865-68 your Brazilian CPF number?")
        ]

        response = await text_analytics.recognize_pii_entities(docs,
                                                               show_stats=True)
        self.assertEqual(response[0].entities[0].text, "555-55-5555")
        self.assertEqual(response[0].entities[0].category,
                         "U.S. Social Security Number (SSN)")
        self.assertEqual(response[1].entities[0].text, "111000025")
        # self.assertEqual(response[1].entities[0].category, "ABA Routing Number")  # Service is currently returning PhoneNumber here
        self.assertEqual(response[2].entities[0].text, "998.214.865-68")
        self.assertEqual(response[2].entities[0].category, "Brazil CPF Number")
        for doc in response:
            self.assertIsNotNone(doc.id)
            self.assertIsNotNone(doc.statistics)
            for entity in doc.entities:
                self.assertIsNotNone(entity.text)
                self.assertIsNotNone(entity.category)
                self.assertIsNotNone(entity.grapheme_offset)
                self.assertIsNotNone(entity.grapheme_length)
                self.assertIsNotNone(entity.confidence_score)
    async def test_output_same_order_as_input_multiple_tasks(self, client):
        docs = [
            TextDocumentInput(id="1", text="one"),
            TextDocumentInput(id="2", text="two"),
            TextDocumentInput(id="3", text="three"),
            TextDocumentInput(id="4", text="four"),
            TextDocumentInput(id="5", text="five")
        ]

        async with client:
            response = await (await client.begin_analyze_batch_actions(
                docs,
                actions=[
                    RecognizePiiEntitiesAction(),
                    ExtractKeyPhrasesAction(),
                    RecognizePiiEntitiesAction(model_version="bad"),
                ],
                polling_interval=self._interval()
            )).result()

            action_results = []
            async for p in response:
                action_results.append(p)

            assert len(action_results) == 3
            action_result = action_results[0]

            assert action_results[0].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
            assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
            assert action_results[2].is_error
            assert all([action_result for action_result in action_results if not action_result.is_error and len(action_result.document_results) == len(docs)])

            for action_result in action_results:
                if not action_result.is_error:
                    for idx, doc in enumerate(action_result.document_results):
                        self.assertEqual(str(idx + 1), doc.id)
    def test_all_successful_passing_text_document_input(self, client):
        docs = [
            TextDocumentInput(id="1",
                              text="I should take my cat to the veterinarian"),
            TextDocumentInput(id="2",
                              text="Este es un document escrito en Español."),
            TextDocumentInput(id="3", text="猫は幸せ"),
            TextDocumentInput(
                id="4", text="Fahrt nach Stuttgart und dann zum Hotel zu Fu.")
        ]

        response = client.detect_language(docs)

        self.assertEqual(response[0].primary_language.name, "English")
        self.assertEqual(response[1].primary_language.name, "Spanish")
        self.assertEqual(response[2].primary_language.name, "Japanese")
        self.assertEqual(response[3].primary_language.name, "German")
        self.assertEqual(response[0].primary_language.iso6391_name, "en")
        self.assertEqual(response[1].primary_language.iso6391_name, "es")
        self.assertEqual(response[2].primary_language.iso6391_name, "ja")
        self.assertEqual(response[3].primary_language.iso6391_name, "de")

        for doc in response:
            self.assertIsNotNone(doc.primary_language.score)
    async def test_all_successful_passing_text_document_input(
            self, resource_group, location, text_analytics_account,
            text_analytics_account_key):
        text_analytics = TextAnalyticsClient(
            text_analytics_account,
            AzureKeyCredential(text_analytics_account_key))

        docs = [
            TextDocumentInput(
                id="1",
                text="Microsoft was founded by Bill Gates and Paul Allen",
                language="en"),
            TextDocumentInput(
                id="2",
                text="Microsoft fue fundado por Bill Gates y Paul Allen",
                language="es")
        ]

        response = await text_analytics.extract_key_phrases(docs)
        for phrases in response:
            self.assertIn("Paul Allen", phrases.key_phrases)
            self.assertIn("Bill Gates", phrases.key_phrases)
            self.assertIn("Microsoft", phrases.key_phrases)
            self.assertIsNotNone(phrases.id)
    def test_all_successful_passing_text_document_input(self, client):
        docs = [
            TextDocumentInput(id="1", text="Microsoft was founded by Bill Gates and Paul Allen."),
            TextDocumentInput(id="2", text="I did not like the hotel we stayed at. It was too expensive."),
            TextDocumentInput(id="3", text="The restaurant had really good food. I recommend you try it."),
        ]

        response = client.analyze_sentiment(docs)
        self.assertEqual(response[0].sentiment, "neutral")
        self.assertEqual(response[1].sentiment, "negative")
        self.assertEqual(response[2].sentiment, "positive")

        for doc in response:
            self.assertIsNotNone(doc.confidence_scores)
            self.assertIsNotNone(doc.sentences)

        self.assertEqual(len(response[0].sentences), 1)
        self.assertEqual(response[0].sentences[0].text, "Microsoft was founded by Bill Gates and Paul Allen.")
        self.assertEqual(len(response[1].sentences), 2)
        self.assertEqual(response[1].sentences[0].text, "I did not like the hotel we stayed at.")
        self.assertEqual(response[1].sentences[1].text, "It was too expensive.")
        self.assertEqual(len(response[2].sentences), 2)
        self.assertEqual(response[2].sentences[0].text, "The restaurant had really good food.")
        self.assertEqual(response[2].sentences[1].text, "I recommend you try it.")
    def test_all_successful_passing_text_document_input(self, client):
        docs = [
            TextDocumentInput(
                id="1",
                text="Microsoft was founded by Bill Gates and Paul Allen"),
            TextDocumentInput(
                id="2",
                text="Microsoft fue fundado por Bill Gates y Paul Allen")
        ]

        response = client.recognize_linked_entities(docs)
        for doc in response:
            self.assertEqual(len(doc.entities), 3)
            for entity in doc.entities:
                self.assertIsNotNone(entity.name)
                self.assertIsNotNone(entity.language)
                self.assertIsNotNone(entity.data_source_entity_id)
                self.assertIsNotNone(entity.url)
                self.assertIsNotNone(entity.data_source)
                self.assertIsNotNone(entity.matches)
                for match in entity.matches:
                    self.assertIsNotNone(match.offset)
                    self.assertIsNotNone(match.length)
                    self.assertNotEqual(match.length, 0)
 def test_mixing_inputs(self, client):
     docs = [
         {"id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen."},
         TextDocumentInput(id="2", text="I did not like the hotel we stayed at. It was too expensive."),
         u"You cannot mix string input with the above inputs"
     ]
     with self.assertRaises(TypeError):
         response = client.begin_analyze_batch_actions(
             docs,
             actions=[
                 RecognizeEntitiesAction(),
                 ExtractKeyPhrasesAction(),
                 RecognizePiiEntitiesAction(),
             ],
             polling_interval=self._interval(),
         ).result()
Example #20
0
 def test_mixing_inputs(self, resource_group, location,
                        text_analytics_account, text_analytics_account_key):
     text_analytics = TextAnalyticsClient(text_analytics_account,
                                          text_analytics_account_key)
     docs = [
         {
             "id": "1",
             "text": "Microsoft was founded by Bill Gates and Paul Allen."
         },
         TextDocumentInput(
             id="2",
             text=
             "I did not like the hotel we stayed it. It was too expensive."
         ), u"You cannot mix string input with the above inputs"
     ]
     with self.assertRaises(TypeError):
         response = text_analytics.analyze_sentiment(docs)
Example #21
0
 def test_mixing_inputs(self, client):
     docs = [
         {
             "id": "1",
             "text": "Microsoft was founded by Bill Gates and Paul Allen."
         },
         TextDocumentInput(
             id="2",
             text=
             "I did not like the hotel we stayed at. It was too expensive."
         ), u"You cannot mix string input with the above inputs"
     ]
     with self.assertRaises(TypeError):
         response = client.begin_analyze(
             docs,
             entities_recognition_tasks=[EntitiesRecognitionTask()],
             key_phrase_extraction_tasks=[KeyPhraseExtractionTask()],
             pii_entities_recognition_tasks=[PiiEntitiesRecognitionTask()],
             polling_interval=self._interval(),
         ).result()