def test_output_same_order_as_input_multiple_tasks(self, client): docs = [ TextDocumentInput(id="1", text="one"), TextDocumentInput(id="2", text="two"), TextDocumentInput(id="3", text="three"), TextDocumentInput(id="4", text="four"), TextDocumentInput(id="5", text="five") ] response = client.begin_analyze_batch_actions( docs, actions=[ RecognizePiiEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(model_version="bad"), ], polling_interval=self._interval(), ).result() action_results = list(response) assert len(action_results) == 3 action_result = action_results[0] assert action_results[0].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES assert action_results[2].is_error assert all([action_result for action_result in action_results if not action_result.is_error and len(action_result.document_results) == len(docs)]) for action_result in action_results: if not action_result.is_error: for idx, doc in enumerate(action_result.document_results): self.assertEqual(str(idx + 1), doc.id)
def test_show_stats_and_model_version_multiple_tasks(self, client): def callback(resp): if resp.raw_response: a = "b" docs = [{"id": "56", "text": ":)"}, {"id": "0", "text": ":("}, {"id": "19", "text": ":P"}, {"id": "1", "text": ":D"}] poller = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="latest"), RecognizePiiEntitiesAction(model_version="latest") ], show_stats=True, polling_interval=self._interval(), raw_response_hook=callback, ) response = poller.result() # assert response.statistics action_results = list(response) assert len(action_results) == 3 assert action_results[0].action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES assert action_results[2].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES assert all([action_result for action_result in action_results if len(action_result.document_results) == len(docs)])
def test_all_successful_passing_string_pii_entities_task(self, client): docs = [ "My SSN is 859-98-0987.", "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", "Is 998.214.865-68 your Brazilian CPF number?" ] response = client.begin_analyze_actions( docs, actions=[RecognizePiiEntitiesAction()], show_stats=True, polling_interval=self._interval(), ).result() pages = list(response) assert len(pages) == len(docs) for idx, document_results in enumerate(pages): assert len(document_results) == 1 document_result = document_results[0] assert isinstance(document_result, RecognizePiiEntitiesResult) if idx == 0: assert document_result.entities[0].text == "859-98-0987" assert document_result.entities[ 0].category == "USSocialSecurityNumber" elif idx == 1: assert document_result.entities[0].text == "111000025" for entity in document_result.entities: assert entity.text is not None assert entity.category is not None assert entity.offset is not None assert entity.confidence_score is not None
def test_bad_model_version_error_multiple_tasks(self, client): # TODO: verify behavior of service docs = [{"id": "1", "language": "english", "text": "I did not like the hotel we stayed at."}] response = client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="bad"), RecognizePiiEntitiesAction(model_version="bad"), RecognizeLinkedEntitiesAction(model_version="bad"), AnalyzeSentimentAction(model_version="bad") ], polling_interval=self._interval(), ).result() action_results = list(response) assert action_results[0].is_error == False assert action_results[0].action_type == AnalyzeActionsType.RECOGNIZE_ENTITIES assert action_results[1].is_error == True assert action_results[1].error.code == "InvalidRequest" assert action_results[2].is_error == True assert action_results[2].error.code == "InvalidRequest" assert action_results[3].is_error == True assert action_results[3].error.code == "InvalidRequest" assert action_results[4].is_error == True assert action_results[4].error.code == "InvalidRequest"
async def test_bad_model_version_error_multiple_tasks( self, client): # TODO: verify behavior of service docs = [{ "id": "1", "language": "english", "text": "I did not like the hotel we stayed at." }] async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="bad"), RecognizePiiEntitiesAction(model_version="bad") ], polling_interval=self._interval())).result() action_results = [] async for p in response: action_results.append(p) assert action_results[0].is_error == False assert action_results[ 0].action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES assert action_results[1].is_error == True assert action_results[1].error.code == "InvalidRequest" assert action_results[2].is_error == True assert action_results[2].error.code == "InvalidRequest"
def test_disable_service_logs(self, client): actions = [ RecognizeEntitiesAction(disable_service_logs=True), ExtractKeyPhrasesAction(disable_service_logs=True), RecognizePiiEntitiesAction(disable_service_logs=True), RecognizeLinkedEntitiesAction(disable_service_logs=True), AnalyzeSentimentAction(disable_service_logs=True), ExtractSummaryAction(disable_service_logs=True), ] for action in actions: assert action.disable_service_logs def callback(resp): tasks = json.loads(resp.http_request.body)["tasks"] assert len(tasks) == len(actions) for task in tasks.values(): assert task[0]["parameters"]["loggingOptOut"] client.begin_analyze_actions( documents=["Test for logging disable"], actions=actions, polling_interval=self._interval(), raw_response_hook=callback, ).result()
async def test_multiple_pages_of_results_with_errors_returned_successfully( self, client): single_doc = "hello world" docs = [{ "id": str(idx), "text": val } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25))) ] # max number of documents is 25 async with client: result = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="bad"), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval())).result() pages = [] async for p in result: pages.append(p) for idx, action_result in enumerate(pages): if idx % 3 == 0: assert action_result.is_error else: assert all([ doc for doc in action_result.document_results if not doc.is_error ])
def test_out_of_order_ids_multiple_tasks(self, client): docs = [{"id": "56", "text": ":)"}, {"id": "0", "text": ":("}, {"id": "19", "text": ":P"}, {"id": "1", "text": ":D"}] response = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="bad"), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), ).result() action_results = list(response) assert len(action_results) == 3 assert action_results[0].is_error assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES assert action_results[2].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES action_results = [r for r in action_results if not r.is_error] assert all([action_result for action_result in action_results if len(action_result.document_results) == len(docs)]) in_order = ["56", "0", "19", "1"] for action_result in action_results: for idx, resp in enumerate(action_result.document_results): self.assertEqual(resp.id, in_order[idx])
async def test_show_stats_and_model_version_multiple_tasks(self, client): docs = [{"id": "56", "text": ":)"}, {"id": "0", "text": ":("}, {"id": "19", "text": ":P"}, {"id": "1", "text": ":D"}] async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="latest"), RecognizePiiEntitiesAction(model_version="latest") ], show_stats=True, polling_interval=self._interval() )).result() action_results = [] async for p in response: action_results.append(p) assert len(action_results) == 3 assert action_results[0].action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES assert action_results[1].action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES assert action_results[2].action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES assert all([action_result for action_result in action_results if len(action_result.document_results) == len(docs)])
async def test_multiple_pages_of_results_returned_successfully( self, client): single_doc = "hello world" docs = [{ "id": str(idx), "text": val } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25))) ] # max number of documents is 25 async with client: result = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], show_stats=True, polling_interval=self._interval())).result() pages = [] async for p in result: pages.append(p) recognize_entities_results = [] extract_key_phrases_results = [] recognize_pii_entities_results = [] for idx, action_result in enumerate(pages): if idx % 3 == 0: assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES recognize_entities_results.append(action_result) elif idx % 3 == 1: assert action_result.action_type == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES extract_key_phrases_results.append(action_result) else: assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES recognize_pii_entities_results.append(action_result) if idx < 3: # first page of task results assert len(action_result.document_results) == 20 else: assert len(action_result.document_results) == 5 assert all([ action_result for action_result in recognize_entities_results if len(action_result.document_results) == len(docs) ]) assert all([ action_result for action_result in extract_key_phrases_results if len(action_result.document_results) == len(docs) ]) assert all([ action_result for action_result in recognize_pii_entities_results if len(action_result.document_results) == len(docs) ])
async def test_out_of_order_ids_multiple_tasks(self, client): docs = [{ "id": "56", "text": ":)" }, { "id": "0", "text": ":(" }, { "id": "19", "text": ":P" }, { "id": "1", "text": ":D" }] async with client: response = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction() ], polling_interval=self._interval())).result() action_results = [] async for p in response: action_results.append(p) assert len(action_results) == 5 assert action_results[ 0].action_type == AnalyzeActionsType.RECOGNIZE_ENTITIES assert action_results[ 1].action_type == AnalyzeActionsType.EXTRACT_KEY_PHRASES assert action_results[ 2].action_type == AnalyzeActionsType.RECOGNIZE_PII_ENTITIES assert action_results[ 3].action_type == AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES assert action_results[ 4].action_type == AnalyzeActionsType.ANALYZE_SENTIMENT action_results = [r for r in action_results if not r.is_error] assert all([ action_result for action_result in action_results if len(action_result.document_results) == len(docs) ]) in_order = ["56", "0", "19", "1"] for action_result in action_results: for idx, resp in enumerate(action_result.document_results): self.assertEqual(resp.id, in_order[idx])
def test_empty_credential_class(self, client): with self.assertRaises(ClientAuthenticationError): response = client.begin_analyze_batch_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), )
async def test_bad_credentials(self, client): with self.assertRaises(ClientAuthenticationError): async with client: response = await (await client.begin_analyze_batch_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval())).result()
def test_multiple_pages_of_results_returned_successfully(self, client): single_doc = "hello world" docs = [{"id": str(idx), "text": val} for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25)))] # max number of documents is 25 result = client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction() ], show_stats=True, polling_interval=self._interval(), ).result() recognize_entities_results = [] extract_key_phrases_results = [] recognize_pii_entities_results = [] recognize_linked_entities_results = [] analyze_sentiment_results = [] action_results = list(result) # do 2 pages of 5 task results for idx, action_result in enumerate(action_results): if idx % 5 == 0: assert action_result.action_type == AnalyzeActionsType.RECOGNIZE_ENTITIES recognize_entities_results.append(action_result) elif idx % 5 == 1: assert action_result.action_type == AnalyzeActionsType.EXTRACT_KEY_PHRASES extract_key_phrases_results.append(action_result) elif idx % 5 == 2: assert action_result.action_type == AnalyzeActionsType.RECOGNIZE_PII_ENTITIES recognize_pii_entities_results.append(action_result) elif idx % 5 == 3: assert action_result.action_type == AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES recognize_linked_entities_results.append(action_result) else: assert action_result.action_type == AnalyzeActionsType.ANALYZE_SENTIMENT analyze_sentiment_results.append(action_result) if idx < 5: # first page of task results assert len(action_result.document_results) == 20 else: assert len(action_result.document_results) == 5 assert all([action_result for action_result in recognize_entities_results if len(action_result.document_results) == len(docs)]) assert all([action_result for action_result in extract_key_phrases_results if len(action_result.document_results) == len(docs)]) assert all([action_result for action_result in recognize_pii_entities_results if len(action_result.document_results) == len(docs)]) assert all([action_result for action_result in recognize_linked_entities_results if len(action_result.document_results) == len(docs)]) assert all([action_result for action_result in analyze_sentiment_results if len(action_result.document_results) == len(docs)])
async def test_bad_document_input(self, client): docs = "This is the wrong type" with self.assertRaises(TypeError): async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval())).result()
async def test_not_passing_list_for_docs(self, client): docs = {"id": "1", "text": "hello world"} with pytest.raises(TypeError) as excinfo: async with client: await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction() ], polling_interval=self._interval())).result() assert "Input documents cannot be a dict" in str(excinfo.value)
async def test_empty_credential_class(self, client): with self.assertRaises(ClientAuthenticationError): async with client: await (await client.begin_analyze_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction() ], polling_interval=self._interval())).result()
async def test_missing_input_records_error(self, client): docs = [] with pytest.raises(ValueError) as excinfo: async with client: await (await client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction() ], polling_interval=self._interval())).result() assert "Input documents can not be empty or None" in str(excinfo.value)
def test_bad_model_version_error_all_tasks(self, client): # TODO: verify behavior of service docs = [{"id": "1", "language": "english", "text": "I did not like the hotel we stayed at."}] with self.assertRaises(HttpResponseError): response = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(model_version="bad"), ExtractKeyPhrasesAction(model_version="bad"), RecognizePiiEntitiesAction(model_version="bad") ], polling_interval=self._interval(), ).result()
async def test_multiple_of_same_action_fail(self, client): docs = [{ "id": "1", "language": "en", "text": "I did not like the hotel we stayed at." }, { "id": "2", "language": "en", "text": "I did not like the hotel we stayed at." }] with pytest.raises(ValueError) as e: await client.begin_analyze_actions( docs, actions=[ RecognizePiiEntitiesAction(domain_filter="phi"), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), ) assert "Multiple of the same action is not currently supported." in str( e.value)
async def test_multiple_pages_of_results_returned_successfully( self, client): single_doc = "hello world" docs = [{ "id": str(idx), "text": val } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25))) ] # max number of documents is 25 async with client: result = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], show_stats=True, polling_interval=self._interval())).result() pages = [] async for p in result: pages.append(p) assert len(pages) == len(docs) action_order = [ _AnalyzeActionsType.RECOGNIZE_ENTITIES, _AnalyzeActionsType.EXTRACT_KEY_PHRASES, _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES, _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES, _AnalyzeActionsType.ANALYZE_SENTIMENT, _AnalyzeActionsType.EXTRACT_SUMMARY ] action_type_to_document_results = defaultdict(list) for doc_idx, page in enumerate(pages): for action_idx, document_result in enumerate(page): self.assertEqual(document_result.id, str(doc_idx)) action_type = self.document_result_to_action_type( document_result) self.assertEqual(action_type, action_order[action_idx]) action_type_to_document_results[action_type].append( document_result) assert len(action_type_to_document_results) == len(action_order) for document_results in action_type_to_document_results.values(): assert len(document_results) == len(docs)
async def test_out_of_order_ids_multiple_tasks(self, client): docs = [{ "id": "56", "text": ":)" }, { "id": "0", "text": ":(" }, { "id": "19", "text": ":P" }, { "id": "1", "text": ":D" }] async with client: response = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval())).result() results = [] async for p in response: results.append(p) assert len(results) == len(docs) document_order = ["56", "0", "19", "1"] action_order = [ _AnalyzeActionsType.RECOGNIZE_ENTITIES, _AnalyzeActionsType.EXTRACT_KEY_PHRASES, _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES, _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES, _AnalyzeActionsType.ANALYZE_SENTIMENT, _AnalyzeActionsType.EXTRACT_SUMMARY ] for doc_idx, document_results in enumerate(results): assert len(document_results) == 6 for action_idx, document_result in enumerate(document_results): self.assertEqual(document_result.id, document_order[doc_idx]) self.assertEqual( self.document_result_to_action_type(document_result), action_order[action_idx])
def test_too_many_documents(self, client): docs = list(itertools.repeat("input document", 26)) # Maximum number of documents per request is 25 with pytest.raises(HttpResponseError) as excinfo: client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction() ], polling_interval=self._interval(), ) assert excinfo.value.status_code == 400
def test_bad_credentials(self, client): with self.assertRaises(ClientAuthenticationError): response = client.begin_analyze_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval(), )
def test_missing_input_records_error(self, client): docs = [] with pytest.raises(ValueError) as excinfo: client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction() ], polling_interval=self._interval(), ) assert "Input documents can not be empty or None" in str(excinfo.value)
def test_duplicate_ids_error(self, client): # TODO: verify behavior of service # Duplicate Ids docs = [{"id": "1", "text": "hello world"}, {"id": "1", "text": "I did not like the hotel we stayed at."}] with self.assertRaises(HttpResponseError): result = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), ).result()
def test_invalid_language_hint_method(self, client): response = list(client.begin_analyze_batch_actions( ["This should fail because we're passing in an invalid language hint"], language="notalanguage", actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction() ], polling_interval=self._interval(), ).result()) for action_result in response: for doc in action_result.document_results: assert doc.is_error
def test_mixing_inputs(self, client): docs = [ {"id": "1", "text": "Microsoft was founded by Bill Gates and Paul Allen."}, TextDocumentInput(id="2", text="I did not like the hotel we stayed at. It was too expensive."), u"You cannot mix string input with the above inputs" ] with self.assertRaises(TypeError): response = client.begin_analyze_batch_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), ], polling_interval=self._interval(), ).result()
async def test_all_successful_passing_string_pii_entities_task( self, client): docs = [ "My SSN is 859-98-0987.", "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check.", "Is 998.214.865-68 your Brazilian CPF number?" ] async with client: response = await (await client.begin_analyze_batch_actions( docs, actions=[RecognizePiiEntitiesAction()], show_stats=True, polling_interval=self._interval())).result() action_results = [] async for p in response: action_results.append(p) assert len(action_results) == 1 action_result = action_results[0] assert action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES assert len(action_result.document_results) == len(docs) self.assertEqual( action_result.document_results[0].entities[0].text, "859-98-0987") self.assertEqual( action_result.document_results[0].entities[0].category, "USSocialSecurityNumber") self.assertEqual( action_result.document_results[1].entities[0].text, "111000025") # self.assertEqual(results[1].entities[0].category, "ABA Routing Number") # Service is currently returning PhoneNumber here # commenting out brazil cpf, currently service is not returning it # self.assertEqual(action_result.document_results[2].entities[0].text, "998.214.865-68") # self.assertEqual(action_result.document_results[2].entities[0].category, "Brazil CPF Number") for doc in action_result.document_results: self.assertIsNotNone(doc.id) # self.assertIsNotNone(doc.statistics) for entity in doc.entities: self.assertIsNotNone(entity.text) self.assertIsNotNone(entity.category) self.assertIsNotNone(entity.offset) self.assertIsNotNone(entity.confidence_score)
async def test_disable_service_logs(self, client): actions = [ RecognizeEntitiesAction(disable_service_logs=True), ExtractKeyPhrasesAction(disable_service_logs=True), RecognizePiiEntitiesAction(disable_service_logs=True), RecognizeLinkedEntitiesAction(disable_service_logs=True), AnalyzeSentimentAction(disable_service_logs=True), ] for action in actions: assert action.disable_service_logs await (await client.begin_analyze_actions( documents=["Test for logging disable"], actions=actions, polling_interval=self._interval(), )).result()