async def test_extract_summary_partial_results(self, client): docs = [{ "id": "1", "language": "en", "text": "" }, { "id": "2", "language": "en", "text": "hello world" }] async with client: response = await (await client.begin_analyze_actions( docs, actions=[ExtractSummaryAction()], show_stats=True, polling_interval=self._interval(), )).result() document_results = [] async for doc in response: document_results.append(doc) assert document_results[0][0].is_error assert document_results[0][0].error.code == "InvalidDocument" assert not document_results[1][0].is_error assert isinstance(document_results[1][0], ExtractSummaryResult)
def test_disable_service_logs(self, client): actions = [ RecognizeEntitiesAction(disable_service_logs=True), ExtractKeyPhrasesAction(disable_service_logs=True), RecognizePiiEntitiesAction(disable_service_logs=True), RecognizeLinkedEntitiesAction(disable_service_logs=True), AnalyzeSentimentAction(disable_service_logs=True), ExtractSummaryAction(disable_service_logs=True), ] for action in actions: assert action.disable_service_logs def callback(resp): tasks = json.loads(resp.http_request.body)["tasks"] assert len(tasks) == len(actions) for task in tasks.values(): assert task[0]["parameters"]["loggingOptOut"] client.begin_analyze_actions( documents=["Test for logging disable"], actions=actions, polling_interval=self._interval(), raw_response_hook=callback, ).result()
async def test_all_successful_passing_dict_extract_summary_action( self, client): docs = [{ "id": "1", "language": "en", "text": "The government of British Prime Minster Theresa May has been plunged into turmoil with the resignation" " of two senior Cabinet ministers in a deep split over her Brexit strategy. The Foreign Secretary Boris " "Johnson, quit on Monday, hours after the resignation late on Sunday night of the minister in charge of " "Brexit negotiations, David Davis. Their decision to leave the government came three days after May " "appeared to have agreed a deal with herfractured Cabinet on the UK's post Brexit relationship with " "the EU. That plan is now in tatters and her political future appears uncertain. May appeared in Parliament" " on Monday afternoon to defend her plan, minutes after Downing Street confirmed the departure of Johnson. " "May acknowledged the splits in her statement to MPs, saying of the ministers who quit: We do not agree " "about the best way of delivering our shared commitment to honoring the result of the referendum. The " "Prime Minister's latest plitical drama began late on Sunday night when Davis quit, declaring he could " "not support May's Brexit plan. He said it involved too close a relationship with the EU and gave only " "an illusion of control being returned to the UK after it left the EU. It seems to me we're giving too " "much away, too easily, and that's a dangerous strategy at this time, Davis said in a BBC radio " "interview Monday morning. Johnson's resignation came Monday afternoon local time, just before the " "Prime Minister was due to make a scheduled statement in Parliament. This afternoon, the Prime Minister " "accepted the resignation of Boris Johnson as Foreign Secretary, a statement from Downing Street said." }, { "id": "2", "language": "es", "text": "Microsoft fue fundado por Bill Gates y Paul Allen" }] async with client: response = await (await client.begin_analyze_actions( docs, actions=[ExtractSummaryAction()], show_stats=True, polling_interval=self._interval(), )).result() document_results = [] async for doc in response: document_results.append(doc) assert len(document_results) == 2 for document_result in document_results: assert len(document_result) == 1 for result in document_result: assert isinstance(result, ExtractSummaryResult) assert result.statistics assert len(result.sentences) == 3 if result.id == 0 else 1 for sentence in result.sentences: assert sentence.text assert sentence.rank_score is not None assert sentence.offset is not None assert sentence.length is not None assert result.id is not None
def sample_extractive_summarization(): from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import ( TextAnalyticsClient, ExtractSummaryAction ) endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"] key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), ) document = [ "At Microsoft, we have been on a quest to advance AI beyond existing techniques, by taking a more holistic, " "human-centric approach to learning and understanding. As Chief Technology Officer of Azure AI Cognitive " "Services, I have been working with a team of amazing scientists and engineers to turn this quest into a " "reality. In my role, I enjoy a unique perspective in viewing the relationship among three attributes of " "human cognition: monolingual text (X), audio or visual sensory signals, (Y) and multilingual (Z). At the " "intersection of all three, there's magic-what we call XYZ-code as illustrated in Figure 1-a joint " "representation to create more powerful AI that can speak, hear, see, and understand humans better. " "We believe XYZ-code will enable us to fulfill our long-term vision: cross-domain transfer learning, " "spanning modalities and languages. The goal is to have pretrained models that can jointly learn " "representations to support a broad range of downstream AI tasks, much in the way humans do today. " "Over the past five years, we have achieved human performance on benchmarks in conversational speech " "recognition, machine translation, conversational question answering, machine reading comprehension, " "and image captioning. These five breakthroughs provided us with strong signals toward our more ambitious " "aspiration to produce a leap in AI capabilities, achieving multisensory and multilingual learning that " "is closer in line with how humans learn and understand. I believe the joint XYZ-code is a foundational " "component of this aspiration, if grounded with external knowledge sources in the downstream AI tasks." ] poller = text_analytics_client.begin_analyze_actions( document, actions=[ ExtractSummaryAction(), ], ) document_results = poller.result() for result in document_results: extract_summary_result = result[0] # first document, first result if extract_summary_result.is_error: print("...Is an error with code '{}' and message '{}'".format( extract_summary_result.code, extract_summary_result.message )) else: print("Summary extracted: \n{}".format( " ".join([sentence.text for sentence in extract_summary_result.sentences])) )
async def test_multiple_pages_of_results_returned_successfully( self, client): single_doc = "hello world" docs = [{ "id": str(idx), "text": val } for (idx, val) in enumerate(list(itertools.repeat(single_doc, 25))) ] # max number of documents is 25 async with client: result = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], show_stats=True, polling_interval=self._interval())).result() pages = [] async for p in result: pages.append(p) assert len(pages) == len(docs) action_order = [ _AnalyzeActionsType.RECOGNIZE_ENTITIES, _AnalyzeActionsType.EXTRACT_KEY_PHRASES, _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES, _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES, _AnalyzeActionsType.ANALYZE_SENTIMENT, _AnalyzeActionsType.EXTRACT_SUMMARY ] action_type_to_document_results = defaultdict(list) for doc_idx, page in enumerate(pages): for action_idx, document_result in enumerate(page): self.assertEqual(document_result.id, str(doc_idx)) action_type = self.document_result_to_action_type( document_result) self.assertEqual(action_type, action_order[action_idx]) action_type_to_document_results[action_type].append( document_result) assert len(action_type_to_document_results) == len(action_order) for document_results in action_type_to_document_results.values(): assert len(document_results) == len(docs)
async def test_out_of_order_ids_multiple_tasks(self, client): docs = [{ "id": "56", "text": ":)" }, { "id": "0", "text": ":(" }, { "id": "19", "text": ":P" }, { "id": "1", "text": ":D" }] async with client: response = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval())).result() results = [] async for p in response: results.append(p) assert len(results) == len(docs) document_order = ["56", "0", "19", "1"] action_order = [ _AnalyzeActionsType.RECOGNIZE_ENTITIES, _AnalyzeActionsType.EXTRACT_KEY_PHRASES, _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES, _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES, _AnalyzeActionsType.ANALYZE_SENTIMENT, _AnalyzeActionsType.EXTRACT_SUMMARY ] for doc_idx, document_results in enumerate(results): assert len(document_results) == 6 for action_idx, document_result in enumerate(document_results): self.assertEqual(document_result.id, document_order[doc_idx]) self.assertEqual( self.document_result_to_action_type(document_result), action_order[action_idx])
async def test_bad_credentials(self, client): with self.assertRaises(ClientAuthenticationError): async with client: await (await client.begin_analyze_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval())).result()
def test_empty_credential_class(self, client): with self.assertRaises(ClientAuthenticationError): response = client.begin_analyze_actions( ["This is written in English."], actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval(), )
async def test_missing_input_records_error(self, client): docs = [] with pytest.raises(ValueError) as excinfo: async with client: await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval())).result() assert "Input documents can not be empty or None" in str(excinfo.value)
async def test_disable_service_logs(self, client): actions = [ RecognizeEntitiesAction(disable_service_logs=True), ExtractKeyPhrasesAction(disable_service_logs=True), RecognizePiiEntitiesAction(disable_service_logs=True), RecognizeLinkedEntitiesAction(disable_service_logs=True), AnalyzeSentimentAction(disable_service_logs=True), ExtractSummaryAction(disable_service_logs=True), ] for action in actions: assert action.disable_service_logs await (await client.begin_analyze_actions( documents=["Test for logging disable"], actions=actions, polling_interval=self._interval(), )).result()
async def test_too_many_documents(self, client): docs = list(itertools.repeat( "input document", 26)) # Maximum number of documents per request is 25 with pytest.raises(HttpResponseError) as excinfo: async with client: await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval())).result() assert excinfo.value.status_code == 400
async def test_bad_model_version_error_multiple_tasks(self, client): docs = [{ "id": "1", "language": "english", "text": "I did not like the hotel we stayed at." }] async with client: with pytest.raises(HttpResponseError): response = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="bad"), RecognizePiiEntitiesAction(model_version="bad"), RecognizeLinkedEntitiesAction(model_version="bad"), AnalyzeSentimentAction(model_version="bad"), ExtractSummaryAction(model_version="bad") ], polling_interval=self._interval())).result()
async def test_bad_model_version_error_all_tasks( self, client): # TODO: verify behavior of service docs = [{ "id": "1", "language": "english", "text": "I did not like the hotel we stayed at." }] with self.assertRaises(HttpResponseError): async with client: result = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(model_version="bad"), ExtractKeyPhrasesAction(model_version="bad"), RecognizePiiEntitiesAction(model_version="bad"), RecognizeLinkedEntitiesAction(model_version="bad"), AnalyzeSentimentAction(model_version="bad"), ExtractSummaryAction(model_version="bad") ], polling_interval=self._interval())).result()
def test_invalid_language_hint_method(self, client): response = list( client.begin_analyze_actions( [ "This should fail because we're passing in an invalid language hint" ], language="notalanguage", actions=[ RecognizeEntitiesAction(), ExtractKeyPhrasesAction(), RecognizePiiEntitiesAction(), RecognizeLinkedEntitiesAction(), AnalyzeSentimentAction(), ExtractSummaryAction() ], polling_interval=self._interval(), ).result()) for document_results in response: for doc in document_results: assert doc.is_error
async def test_show_stats_and_model_version_multiple_tasks(self, client): docs = [{ "id": "56", "text": ":)" }, { "id": "0", "text": ":(" }, { "id": "19", "text": ":P" }, { "id": "1", "text": ":D" }] def callback(resp): assert resp.raw_response tasks = resp.raw_response['tasks'] assert tasks['completed'] == 6 assert tasks['inProgress'] == 0 assert tasks['failed'] == 0 assert tasks['total'] == 6 num_tasks = 0 for key, task in tasks.items(): if "Tasks" in key: num_tasks += 1 assert len(task) == 1 task_stats = task[0]['results']['statistics'] assert task_stats['documentsCount'] == 4 assert task_stats['validDocumentsCount'] == 4 assert task_stats['erroneousDocumentsCount'] == 0 assert task_stats['transactionsCount'] == 4 assert num_tasks == 6 async with client: response = await (await client.begin_analyze_actions( docs, actions=[ RecognizeEntitiesAction(model_version="latest"), ExtractKeyPhrasesAction(model_version="latest"), RecognizePiiEntitiesAction(model_version="latest"), RecognizeLinkedEntitiesAction(model_version="latest"), AnalyzeSentimentAction(model_version="latest"), ExtractSummaryAction(model_version="latest") ], show_stats=True, polling_interval=self._interval(), raw_response_hook=callback, )).result() pages = [] async for p in response: pages.append(p) assert len(pages) == len(docs) action_order = [ _AnalyzeActionsType.RECOGNIZE_ENTITIES, _AnalyzeActionsType.EXTRACT_KEY_PHRASES, _AnalyzeActionsType.RECOGNIZE_PII_ENTITIES, _AnalyzeActionsType.RECOGNIZE_LINKED_ENTITIES, _AnalyzeActionsType.ANALYZE_SENTIMENT, _AnalyzeActionsType.EXTRACT_SUMMARY ] for document_results in pages: assert len(document_results) == len(action_order) for document_result in document_results: assert document_result.statistics assert document_result.statistics.character_count assert document_result.statistics.transaction_count