def test_rotate_subscription_key(self, resource_group, location, text_analytics_account, text_analytics_account_key): credential = AzureKeyCredential(text_analytics_account_key) client = TextAnalyticsClient(text_analytics_account, credential) docs = [{ "id": "1", "text": "I will go to the park." }, { "id": "2", "text": "I did not like the hotel we stayed at." }, { "id": "3", "text": "The restaurant had really good food." }] response = client.begin_analyze_healthcare( docs, polling_interval=self._interval()).result() self.assertIsNotNone(response) credential.update("xxx") # Make authentication fail with self.assertRaises(ClientAuthenticationError): response = client.begin_analyze_healthcare( docs, polling_interval=self._interval()).result() credential.update( text_analytics_account_key) # Authenticate successfully again response = client.begin_analyze_healthcare( docs, polling_interval=self._interval()).result() self.assertIsNotNone(response)
def health_with_cancellation(self): # [START health_with_cancellation] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import TextAnalyticsClient endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"] key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), ) documents = [ "RECORD #333582770390100 | MH | 85986313 | | 054351 | 2/14/2001 12:00:00 AM | \ CORONARY ARTERY DISEASE | Signed | DIS | Admission Date: 5/22/2001 \ Report Status: Signed Discharge Date: 4/24/2001 ADMISSION DIAGNOSIS: \ CORONARY ARTERY DISEASE. HISTORY OF PRESENT ILLNESS: \ The patient is a 54-year-old gentleman with a history of progressive angina over the past several months. \ The patient had a cardiac catheterization in July of this year revealing total occlusion of the RCA and \ 50% left main disease , with a strong family history of coronary artery disease with a brother dying at \ the age of 52 from a myocardial infarction and another brother who is status post coronary artery bypass grafting. \ The patient had a stress echocardiogram done on July , 2001 , which showed no wall motion abnormalities ,\ but this was a difficult study due to body habitus. The patient went for six minutes with minimal ST depressions \ in the anterior lateral leads , thought due to fatigue and wrist pain , his anginal equivalent. Due to the patient's \ increased symptoms and family history and history left main disease with total occasional of his RCA was referred \ for revascularization with open heart surgery." ] poller = text_analytics_client.begin_analyze_healthcare(documents) text_analytics_client.begin_cancel_analyze_healthcare(poller) poller.wait()
def main(config): endpoint = config["azure"]["endpoint"] key = config["azure"]["key"] print("Endpoint: " + endpoint) client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key), api_version="v3.1-preview.3") #documents = [ # {"id":"1", "language":"en" ,"text":"Subject is taking 100mg of ibuprofen twice daily"}, #] # load sample documents from docs folder path = os.path.join(os.curdir, 'docs') reader = sample_reader.SampleReader(path) documents = reader.process() poller = client.begin_analyze_healthcare(documents, show_stats=True) result = poller.result() docs = [doc for doc in result if not doc.is_error] print("Results of Healthcare Analysis:") for idx, doc in enumerate(docs): for entity in doc.entities: print("Entity: {}".format(entity.text)) print("...Category: {}".format(entity.category)) print("...Subcategory: {}".format(entity.subcategory)) print("...Offset: {}".format(entity.offset)) print("...Confidence score: {}".format(entity.confidence_score)) if entity.links is not None: print("...Links:") for link in entity.links: print("......ID: {}".format(link.id)) print("......Data source: {}".format(link.data_source)) for relation in doc.relations: print("Relation:") print("...Source: {}".format(relation.source.text)) print("...Target: {}".format(relation.target.text)) print("...Type: {}".format(relation.relation_type)) print("...Bidirectional: {}".format(relation.is_bidirectional)) print("------------------------------------------")
def analyze_healthcare(self): # [START analyze_healthcare] from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import TextAnalyticsClient endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"] key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), api_version="v3.1-preview.3") documents = ["Subject is taking 100mg of ibuprofen twice daily"] poller = text_analytics_client.begin_analyze_healthcare( documents, show_stats=True) result = poller.result() docs = [doc for doc in result if not doc.is_error] print("Results of Healthcare Analysis:") for idx, doc in enumerate(docs): for entity in doc.entities: print("Entity: {}".format(entity.text)) print("...Category: {}".format(entity.category)) print("...Subcategory: {}".format(entity.subcategory)) print("...Offset: {}".format(entity.offset)) print("...Confidence score: {}".format( entity.confidence_score)) if entity.links is not None: print("...Links:") for link in entity.links: print("......ID: {}".format(link.id)) print("......Data source: {}".format(link.data_source)) for relation in doc.relations: print("Relation:") print("...Source: {}".format(relation.source.text)) print("...Target: {}".format(relation.target.text)) print("...Type: {}".format(relation.relation_type)) print("...Bidirectional: {}".format(relation.is_bidirectional)) print("------------------------------------------")
def mars(config, argv): if len(argv) < 4: print("Invalid inputs") return dtStart = datetime.datetime.utcnow() apiTime=[] directory = argv[1] inputFile = argv[2] outputDir = argv[3] endpoint = config["azure"]["endpoint"] key = config["azure"]["key"] client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), api_version="v3.1-preview.3") # load sample documents from docs folder path = os.path.join(directory, inputFile) print("Input File: {}".format(path)) reader = mars_reader.MarsReader(path) data = reader.process() print("Documents read: {}".format(len(data))) # temp take only a small part of total data = data[100:200] # -------------------------------------------------------------------- # per Microsoft documentation concerning document size a data limits # [https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/concepts/data-limits?tabs=version-3] # size of individual document can not exceed 5,120 characters and # the max number of docs to send per request is 10/1000 web vs container # to accommodate larger documents and adhere to the request batch size each documents will be sent individually # with larger documents being broken into multiple sub documents # Note: Larger documents will utilize an a split at 4500 characters per block adn will seek to # identify end of current sentence contexts within the same block # -------------------------------------------------------------------- formatter = mars_formatter.MarsFormatter(limit=4500, noexceed=5100) for d in data: print("Document: {} -- processing".format(d["id"])) batch = formatter.prepare(d) chunks = [] offset = 0 # save chunking offsets so that positional findings in results can be re-attributed to orig for item in batch: chunk = {"id": item["id"], "start": offset, "length": len(item["text"])} offset += len(item["text"]) chunks.append(chunk) d["chunks"] = chunks try: dtApiStart = datetime.datetime.utcnow() poller = client.begin_analyze_healthcare(batch, show_stats=True) result = poller.result() dtApiEnd = datetime.datetime.utcnow() ttr = {"record": d["id"], "seconds": (dtApiEnd-dtApiStart).total_seconds()} apiTime.append(ttr) rez = [] results = [r for r in result if not r.is_error] for idx, r in enumerate(results): rezd = {"id": r.id} entities = [] relations = [] for entity in r.entities: e = { "entity": entity.text, "category": entity.category, "subcategory": entity.subcategory, "offset": entity.offset, "score": entity.confidence_score, "links": [], } if entity.links is not None: links=[] for link in entity.links: l = {"id": link.id, "source": link.data_source} links.append(l) e["links"] = links entities.append(e) for relation in r.relations: r = { "source": relation.source.text, "target": relation.target.text, "type": relation.relation_type, "is_bidirectional": relation.is_bidirectional } relations.append(r) rezd["entities"] = entities rezd["relations"] = relations rez.append(rezd) d["results"] = rez d["error"] = False except: d["error"] = True finally: print("Document: {} --- complete".format(d["id"])) dt = datetime.datetime.utcnow() print("Process complete, Total run time {} seconds".format((dt-dtStart).total_seconds())) eo = datetime.datetime(1970, 1, 1) epoch = (dt - eo).total_seconds() outputFile = os.path.join(directory, outputDir, "output_{}.json".format(epoch)) print("Writing output file [{}].".format(outputFile)) with open(outputFile, 'w+') as out: json.dump(data, out) apiMetricsFile = os.path.join(directory, outputDir, "metrics_{}.json".format(epoch)) with open(apiMetricsFile, 'w+') as out: json.dump(apiTime, out)
def main(config): endpoint = config["azure"]["endpoint"] key = config["azure"]["key"] print("Endpoint: " + endpoint) client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), api_version="v3.1-preview.3") # load sample documents from docs folder path = os.path.join(os.curdir, 'docs') reader = sample_reader.SampleReader(path) data = reader.process() print("Documents [{}]".format(len(data))) # -------------------------------------------------------------------- # per Microsoft documentation concerning document size a data limits # [https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/concepts/data-limits?tabs=version-3] # size of individual document can not exceed 5,120 characters and # the max number of docs to send per request is 10/1000 web vs container # to accommodate larger documents and adhere to the request batch size each documents will be sent individually # with larger documents being broken into multiple sub documents # Note: Larger documents will utilize an a split at 4000 characters per block adn will seek to # identify end of current sentence contexts within the same block # -------------------------------------------------------------------- formatter = sample_formatter.SampleFormatter(limit=4000, noexceed=5100) for document in data: print("Document:{} Size:{}".format(document["name"], len(document["text"]))) batch = formatter.prepare(document=document) for item in batch: print("Id:{} Size:{}".format(item["id"], len(item["text"]))) print("Processing batch with MS Text Analytics..") poller = client.begin_analyze_healthcare(batch, show_stats=True) result = poller.result() print("Results of Healthcare Analysis:") rez = [] docs = [doc for doc in result if not doc.is_error] for idx, doc in enumerate(docs): rezd = {"id": doc.id} entities=[] relations=[] for entity in doc.entities: e = { "entity": entity.text, "category": entity.category, "subcategory": entity.subcategory, "offset": entity.offset, "score": entity.confidence_score, } entities.append(e) for relation in doc.relations: r = { "source": relation.source.text, "target": relation.target.text, "type": relation.relation_type, "is_bidirectional": relation.is_bidirectional } relations.append(r) rezd["entities"] = entities rezd["relations"] = relations rez.append(rezd) document["results"] = rez # write output outputFile = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dumps", "out.json") print("Writing output file [{}].".format(outputFile)) with open(outputFile, 'w+') as out: json.dump(data, out)