コード例 #1
0
    def test_rotate_subscription_key(self, resource_group, location,
                                     text_analytics_account,
                                     text_analytics_account_key):

        credential = AzureKeyCredential(text_analytics_account_key)
        client = TextAnalyticsClient(text_analytics_account, credential)

        docs = [{
            "id": "1",
            "text": "I will go to the park."
        }, {
            "id": "2",
            "text": "I did not like the hotel we stayed at."
        }, {
            "id": "3",
            "text": "The restaurant had really good food."
        }]

        response = client.begin_analyze_healthcare(
            docs, polling_interval=self._interval()).result()
        self.assertIsNotNone(response)

        credential.update("xxx")  # Make authentication fail
        with self.assertRaises(ClientAuthenticationError):
            response = client.begin_analyze_healthcare(
                docs, polling_interval=self._interval()).result()

        credential.update(
            text_analytics_account_key)  # Authenticate successfully again
        response = client.begin_analyze_healthcare(
            docs, polling_interval=self._interval()).result()
        self.assertIsNotNone(response)
コード例 #2
0
    def health_with_cancellation(self):
        # [START health_with_cancellation]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.textanalytics import TextAnalyticsClient

        endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
        key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

        text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint,
            credential=AzureKeyCredential(key),
        )

        documents = [
            "RECORD #333582770390100 | MH | 85986313 | | 054351 | 2/14/2001 12:00:00 AM | \
            CORONARY ARTERY DISEASE | Signed | DIS | Admission Date: 5/22/2001 \
            Report Status: Signed Discharge Date: 4/24/2001 ADMISSION DIAGNOSIS: \
            CORONARY ARTERY DISEASE. HISTORY OF PRESENT ILLNESS: \
            The patient is a 54-year-old gentleman with a history of progressive angina over the past several months. \
            The patient had a cardiac catheterization in July of this year revealing total occlusion of the RCA and \
            50% left main disease , with a strong family history of coronary artery disease with a brother dying at \
            the age of 52 from a myocardial infarction and another brother who is status post coronary artery bypass grafting. \
            The patient had a stress echocardiogram done on July , 2001 , which showed no wall motion abnormalities ,\
            but this was a difficult study due to body habitus. The patient went for six minutes with minimal ST depressions \
            in the anterior lateral leads , thought due to fatigue and wrist pain , his anginal equivalent. Due to the patient's \
            increased symptoms and family history and history left main disease with total occasional of his RCA was referred \
            for revascularization with open heart surgery."
        ]

        poller = text_analytics_client.begin_analyze_healthcare(documents)
        text_analytics_client.begin_cancel_analyze_healthcare(poller)
        poller.wait()
コード例 #3
0
def main(config):
    endpoint = config["azure"]["endpoint"]
    key = config["azure"]["key"]

    print("Endpoint: " + endpoint)

    client = TextAnalyticsClient(endpoint=endpoint,
                                 credential=AzureKeyCredential(key),
                                 api_version="v3.1-preview.3")

    #documents = [
    #    {"id":"1", "language":"en" ,"text":"Subject is taking 100mg of ibuprofen twice daily"},
    #]

    # load sample documents from docs folder
    path = os.path.join(os.curdir, 'docs')
    reader = sample_reader.SampleReader(path)
    documents = reader.process()

    poller = client.begin_analyze_healthcare(documents, show_stats=True)
    result = poller.result()

    docs = [doc for doc in result if not doc.is_error]

    print("Results of Healthcare Analysis:")
    for idx, doc in enumerate(docs):
        for entity in doc.entities:
            print("Entity: {}".format(entity.text))
            print("...Category: {}".format(entity.category))
            print("...Subcategory: {}".format(entity.subcategory))
            print("...Offset: {}".format(entity.offset))
            print("...Confidence score: {}".format(entity.confidence_score))
            if entity.links is not None:
                print("...Links:")
                for link in entity.links:
                    print("......ID: {}".format(link.id))
                    print("......Data source: {}".format(link.data_source))
        for relation in doc.relations:
            print("Relation:")
            print("...Source: {}".format(relation.source.text))
            print("...Target: {}".format(relation.target.text))
            print("...Type: {}".format(relation.relation_type))
            print("...Bidirectional: {}".format(relation.is_bidirectional))
        print("------------------------------------------")
コード例 #4
0
    def analyze_healthcare(self):
        # [START analyze_healthcare]
        from azure.core.credentials import AzureKeyCredential
        from azure.ai.textanalytics import TextAnalyticsClient

        endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
        key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]

        text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint,
            credential=AzureKeyCredential(key),
            api_version="v3.1-preview.3")

        documents = ["Subject is taking 100mg of ibuprofen twice daily"]

        poller = text_analytics_client.begin_analyze_healthcare(
            documents, show_stats=True)
        result = poller.result()

        docs = [doc for doc in result if not doc.is_error]

        print("Results of Healthcare Analysis:")
        for idx, doc in enumerate(docs):
            for entity in doc.entities:
                print("Entity: {}".format(entity.text))
                print("...Category: {}".format(entity.category))
                print("...Subcategory: {}".format(entity.subcategory))
                print("...Offset: {}".format(entity.offset))
                print("...Confidence score: {}".format(
                    entity.confidence_score))
                if entity.links is not None:
                    print("...Links:")
                    for link in entity.links:
                        print("......ID: {}".format(link.id))
                        print("......Data source: {}".format(link.data_source))
            for relation in doc.relations:
                print("Relation:")
                print("...Source: {}".format(relation.source.text))
                print("...Target: {}".format(relation.target.text))
                print("...Type: {}".format(relation.relation_type))
                print("...Bidirectional: {}".format(relation.is_bidirectional))
            print("------------------------------------------")
コード例 #5
0
def mars(config, argv):

    if len(argv) < 4:
        print("Invalid inputs")
        return

    dtStart = datetime.datetime.utcnow()
    apiTime=[]

    directory = argv[1]
    inputFile = argv[2]
    outputDir = argv[3]

    endpoint = config["azure"]["endpoint"]
    key = config["azure"]["key"]
    client = TextAnalyticsClient(
        endpoint=endpoint,
        credential=AzureKeyCredential(key),
        api_version="v3.1-preview.3")

    # load sample documents from docs folder
    path = os.path.join(directory, inputFile)

    print("Input File: {}".format(path))

    reader = mars_reader.MarsReader(path)
    data = reader.process()

    print("Documents read: {}".format(len(data)))

    # temp take only a small part of total
    data = data[100:200]

    # --------------------------------------------------------------------
    # per Microsoft documentation concerning document size a data limits
    # [https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/concepts/data-limits?tabs=version-3]
    # size of individual document can not exceed 5,120 characters and
    #   the max number of docs to send per request is 10/1000 web vs container
    # to accommodate larger documents and adhere to the request batch size each documents will be sent individually
    #    with larger documents being broken into multiple sub documents
    # Note: Larger documents will utilize an a split at 4500 characters per block adn will seek to
    #   identify end of current sentence contexts within the same block
    # --------------------------------------------------------------------
    formatter = mars_formatter.MarsFormatter(limit=4500, noexceed=5100)

    for d in data:
        print("Document: {} -- processing".format(d["id"]))
        batch = formatter.prepare(d)

        chunks = []
        offset = 0

        # save chunking offsets so that positional findings in results can be re-attributed to orig
        for item in batch:
            chunk = {"id": item["id"], "start": offset, "length": len(item["text"])}
            offset += len(item["text"])
            chunks.append(chunk)
        d["chunks"] = chunks

        try:
            dtApiStart = datetime.datetime.utcnow()
            poller = client.begin_analyze_healthcare(batch, show_stats=True)
            result = poller.result()
            dtApiEnd = datetime.datetime.utcnow()

            ttr = {"record": d["id"],  "seconds": (dtApiEnd-dtApiStart).total_seconds()}
            apiTime.append(ttr)

            rez = []
            results = [r for r in result if not r.is_error]
            for idx, r in enumerate(results):
                rezd = {"id": r.id}
                entities = []
                relations = []
                for entity in r.entities:
                    e = {
                        "entity": entity.text,
                        "category": entity.category,
                        "subcategory": entity.subcategory,
                        "offset": entity.offset,
                        "score": entity.confidence_score,
                        "links": [],
                    }
                    if entity.links is not None:
                        links=[]
                        for link in entity.links:
                            l = {"id": link.id, "source": link.data_source}
                            links.append(l)
                        e["links"] = links

                    entities.append(e)

                for relation in r.relations:
                    r = {
                        "source": relation.source.text,
                        "target": relation.target.text,
                        "type": relation.relation_type,
                        "is_bidirectional": relation.is_bidirectional
                    }
                    relations.append(r)
                rezd["entities"] = entities
                rezd["relations"] = relations
                rez.append(rezd)
            d["results"] = rez
            d["error"] = False
        except:
            d["error"] = True
        finally:
            print("Document: {} --- complete".format(d["id"]))

    dt = datetime.datetime.utcnow()

    print("Process complete, Total run time {} seconds".format((dt-dtStart).total_seconds()))

    eo = datetime.datetime(1970, 1, 1)
    epoch = (dt - eo).total_seconds()

    outputFile = os.path.join(directory, outputDir, "output_{}.json".format(epoch))
    print("Writing output file [{}].".format(outputFile))
    with open(outputFile, 'w+') as out:
        json.dump(data, out)

    apiMetricsFile = os.path.join(directory, outputDir, "metrics_{}.json".format(epoch))
    with open(apiMetricsFile, 'w+') as out:
        json.dump(apiTime, out)
コード例 #6
0
ファイル: main.py プロジェクト: karlmoad/analytics
def main(config):
    endpoint = config["azure"]["endpoint"]
    key = config["azure"]["key"]

    print("Endpoint: " + endpoint)

    client = TextAnalyticsClient(
        endpoint=endpoint,
        credential=AzureKeyCredential(key),
        api_version="v3.1-preview.3")

    # load sample documents from docs folder
    path = os.path.join(os.curdir, 'docs')
    reader = sample_reader.SampleReader(path)
    data = reader.process()

    print("Documents [{}]".format(len(data)))

    # --------------------------------------------------------------------
    # per Microsoft documentation concerning document size a data limits
    # [https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/concepts/data-limits?tabs=version-3]
    # size of individual document can not exceed 5,120 characters and
    #   the max number of docs to send per request is 10/1000 web vs container
    # to accommodate larger documents and adhere to the request batch size each documents will be sent individually
    #    with larger documents being broken into multiple sub documents
    # Note: Larger documents will utilize an a split at 4000 characters per block adn will seek to
    #   identify end of current sentence contexts within the same block
    # --------------------------------------------------------------------

    formatter = sample_formatter.SampleFormatter(limit=4000, noexceed=5100)

    for document in data:
        print("Document:{} Size:{}".format(document["name"], len(document["text"])))
        batch = formatter.prepare(document=document)
        for item in batch:
            print("Id:{} Size:{}".format(item["id"], len(item["text"])))

        print("Processing batch with MS Text Analytics..")
        poller = client.begin_analyze_healthcare(batch, show_stats=True)
        result = poller.result()

        print("Results of Healthcare Analysis:")
        rez = []
        docs = [doc for doc in result if not doc.is_error]
        for idx, doc in enumerate(docs):
            rezd = {"id": doc.id}
            entities=[]
            relations=[]
            for entity in doc.entities:
                e = {
                    "entity": entity.text,
                    "category": entity.category,
                    "subcategory": entity.subcategory,
                    "offset": entity.offset,
                    "score": entity.confidence_score,
                }
                entities.append(e)

            for relation in doc.relations:
                r = {
                    "source": relation.source.text,
                    "target": relation.target.text,
                    "type": relation.relation_type,
                    "is_bidirectional": relation.is_bidirectional
                }
                relations.append(r)
            rezd["entities"] = entities
            rezd["relations"] = relations
            rez.append(rezd)
        document["results"] = rez

    # write output

    outputFile = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dumps", "out.json")
    print("Writing output file [{}].".format(outputFile))
    with open(outputFile, 'w+') as out:
        json.dump(data, out)