Example #1
0
    def test_sentence_resolver_training(self):
        """When training a chunk resolver, word_embedding are required.
        If none specifeid, the default `glove` word_embeddings will be used
        Alternatively, if a Word Embedding is specified in the load command before the train.chunk_resolver,
        it will be used instead of the default glove
        """
        import pandas as pd
        cols = ["y","_y","text"]
        p='/home/ckl/Documents/freelance/jsl/nlu/nlu4realgit2/tests/datasets/AskAPatient.fold-0.train.txt'
        dataset = pd.read_csv(p,sep="\t",encoding="ISO-8859-1",header=None)
        dataset.columns = cols
        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)

        trainable_pipe = nlu.load('train.resolve_sentence', verbose=True)
        trainable_pipe.print_info()
        fitted_pipe  = trainable_pipe.fit(dataset)
        res = fitted_pipe.predict(dataset, multithread=False)


        for c in res :
            print(c)
            print(res[c])
Example #2
0
    def test_chunk_resolver_training_custom_embeds(self):
        """When training a chunk resolver, word_embedding are required.
        If none specifeid, the default `glove` word_embeddings will be used
        Alternatively, if a Word Embedding is specified in the load command before the train.chunk_resolver,
        it will be used instead of the default glove
        """
        dataset = pd.DataFrame({
            'text': ['super sleepy', 'bleeding from ears','bleeding from nose','bleeding from mouth'],
            '_extra_info': ['bad disease', 'bad disease!', 'very bad', ' super bad  '],
            # 'y': [1,33,44,66]
            # 'label': ['lol','kek','lol','kek']
            'label': ['lol','kek','lol','kek']
        })

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)

        # trainable_pipe = nlu.load('glove train.resolve_chunks', verbose=True)
        trainable_pipe = nlu.load('en.embed.glove.biovec train.resolve_chunks', verbose=True)
        # TODO bert/elmo give wierd storage ref errors...
        # TODO WRITE ISSUE IN HEALTHCARE LIB ABOUT THIS!!!
        # ONLY GLOVE WORKS!!
        # trainable_pipe = nlu.load('bert train.resolve_chunks', verbose=True)
        trainable_pipe.print_info()
        fitted_pipe  = trainable_pipe.fit(dataset)
        res = fitted_pipe.predict(dataset, multithread=False)


        for c in res :
            print(c)
            print(res[c])
Example #3
0
    def test_chunk_resolver(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        s0 = """DIAGNOSIS: Left breast adenocarcinoma stage T3 N1b M0, stage IIIA. She has been found more recently to have stage IV disease with metastatic deposits and recurrence involving the chest wall and lower left neck lymph nodes. PHYSICAL EXAMINATION NECK: On physical examination palpable lymphadenopathy is present in the left lower neck and supraclavicular area. No other cervical lymphadenopathy or supraclavicular lymphadenopathy is present. RESPIRATORY: Good air entry bilaterally. Examination of the chest wall reveals a small lesion where the chest wall recurrence was resected. No lumps, bumps or evidence of disease involving the right breast is present. ABDOMEN: Normal bowel sounds, no hepatomegaly. No tenderness on deep palpation. She has just started her last cycle of chemotherapy today, and she wishes to visit her daughter in Brooklyn, New York. After this she will return in approximately 3 to 4 weeks and begin her radiotherapy treatment at that time."""
        s1 = 'The patient has COVID. He got very sick with it.'
        s2 = 'Peter got the Corona Virus!'
        s3 = 'COVID 21 has been diagnosed on the patient'
        s4 = """This is an 82 - year-old male with a history of prior tobacco use , hypertension , chronic renal insufficiency , COPD , gastritis , and TIA who initially presented to Braintree with a non-ST elevation MI and Guaiac positive stools , transferred to St . Margaret's Center for Women & Infants for cardiac catheterization with PTCA to mid LAD lesion complicated by hypotension and bradycardia requiring Atropine , IV fluids and transient dopamine possibly secondary to vagal reaction , subsequently transferred to CCU for close monitoring , hemodynamically stable at the time of admission to the CCU"""
        s5 = "The patient has cancer and high fever and will die from Leukemia"
        s6 = 'This is an 11-year-old female who comes in for two different things. 1. She was seen by the allergist. No allergies present, so she stopped her Allegra, but she is still real congested and does a lot of snorting. They do not notice a lot of snoring at night though, but she seems to be always like that. 2. On her right great toe, she has got some redness and erythema. Her skin is kind of peeling a little bit, but it has been like that for about a week and a half now. General: Well-developed female, in no acute distress, afebrile. HEENT: Sclerae and conjunctivae clear. Extraocular muscles intact. TMs clear. Nares patent. A little bit of swelling of the turbinates on the left. Oropharynx is essentially clear. Mucous membranes are moist. Neck: No lymphadenopathy. Chest: Clear. Abdomen: Positive bowel sounds and soft. Dermatologic: She has got redness along the lateral portion of her right great toe, but no bleeding or oozing. Some dryness of her skin. Her toenails themselves are very short and even on her left foot and her left great toe the toenails are very short.'
        data = [s1, s2, s3, s4, s5, s6]
        res = nlu.load('med_ner.jsl.wip.clinical resolve_chunk.icdo.clinical',
                       verbose=True).predict(
                           data,
                           drop_irrelevant_cols=False,
                           metadata=True,
                       )

        print(res.columns)
        for c in res.columns:
            print(res[c])
Example #4
0
    def test_assertion_dl_model(self):


        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)
        # b = BertSentenceEmbeddings.pretrained('sbiobert_base_cased_mli','en','clinical/models')

        # todo en.ner.ade Error not accessable in 2.7.6??
        s1='The patient has COVID. He got very sick with it.'
        s2='Peter got the Corona Virus!'
        s3='COVID 21 has been diagnosed on the patient'
        data = [s1,s2,s3]
        # en.resolve_sentence.icd10cm
        #TODO Not correct
        resolver_ref = 'en.resolve_sentence.icd10cm.augmented_billable'
        res = nlu.load(f'en.ner.diseases {resolver_ref}', verbose=True).predict(data, drop_irrelevant_cols=False, metadata=True)

        # res = nlu.load('en.ner.anatomy', verbose=True).predict(['The patient has cancer and a tumor and high fever and will die next week. He has pain in his left food and right upper brain', ' She had a seizure.'], drop_irrelevant_cols=False, metadata=True)
        print(res.columns)
        for c in res :
            print(c)
            print(res[c])

        print(res)
Example #5
0
    def test_chunk_resolver_training(self):
        """When training a chunk resolver, word_embedding are required.
        If none specifeid, the default `glove` word_embeddings will be used
        Alternatively, if a Word Embedding is specified in the load command before the train.chunk_resolver,
        it will be used instead of the default glove
        """
        dataset = pd.DataFrame({
            'text': ['super sleepy', 'bleeding from ears','bleeding from nose','bleeding from mouth'],
            '_extra_info': ['bad disease', 'bad disease!', 'very bad', ' super bad  '],
            # 'y': [1,33,44,66]
            # 'label': ['lol','kek','lol','kek']
            'label': ['lol','kek','lol','kek']
        })

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)

        trainable_pipe = nlu.load('train.assert', verbose=True)
        trainable_pipe = nlu.load('train.assert_dl', verbose=True)
        trainable_pipe.print_info()
        fitted_pipe  = trainable_pipe.fit(dataset)
        res = fitted_pipe.predict(dataset, multithread=False)


        for c in res :
            print(c)
            print(res[c])
Example #6
0
    def test_generic_classifier(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        #

        res = nlu.load('bert elmo', verbose=True).predict(
            'DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin'
        )

        # elmo_embeddings and bert_embeddings   is what should be passed 2 the feature asselmber/generic classifier

        # res.show()
        # for c in res.columns:
        #     print(c)
        #     res.select(c).show(truncate=False)
        # res = nlu.load('en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain')

        for c in res:
            print(c)
            print(res[c])
Example #7
0
    def test_modin(self):
        # ## works with RAY and DASK backends
        df_path = '/home/ckl/old_home/Documents/freelance/jsl/nlu/nlu4realgit/tests/datasets/covid/covid19_tweets.csv'
        pdf = pd.read_csv(df_path).iloc[:10]
        secrets_json_path = '/home/ckl/old_home/Documents/freelance/jsl/nlu/nlu4realgit/tests/nlu_hc_tests/spark_nlp_for_healthcare.json'

        # test 1 series chunk
        # res = nlu.auth(secrets_json_path).load('med_ner.jsl.wip.clinical resolve.icd10pcs',verbose=True).predict(pdf.text.iloc[0], output_level='chunk')
        # for c in res.columns:print(res[c])

        # Test longer series chunk
        # res = nlu.auth(secrets_json_path).load('med_ner.jsl.wip.clinical resolve.icd10pcs',verbose=True).predict(pdf.text.iloc[0:10], output_level='chunk')

        # Test df with text col chunk

        # res = nlu.auth(secrets_json_path).load('med_ner.jsl.wip.clinical', verbose=True).predict(pdf.text.iloc[:10], output_level='document')
        # for c in res.columns:print(res[c])

        #en.resolve_chunk.icd10cm.clinical
        res = nlu.auth(secrets_json_path).load(
            'en.resolve_chunk.icd10cm.clinical',
            verbose=True).predict(pdf.text[0:7], output_level='chunk')
        # res = nlu.auth(secrets_json_path).load('med_ner.jsl.wip.clinical resolve.icd10pcs',verbose=True).predict(pdf.text[0:7], output_level='chunk')
        for c in res.columns:
            print(res[c])
Example #8
0
    def test_chunk_resolver_training_custom_embeds(self):
        pass
        """When training a chunk resolver, word_embedding are required.
        If none specifeid, the default `glove` word_embeddings will be used
        Alternatively, if a Word Embedding is specified in the load command before the train.chunk_resolver,
        it will be used instead of the default glove
        """
        dataset = pd.DataFrame({
            'text': [
                'The Tesla company is good to invest is',
                'TSLA is good to invest', 'TESLA INC. we should buy',
                'PUT ALL MONEY IN TSLA inc!!'
            ],
            'y': ['23', '23', '23', '23'],
            '_y': ['TESLA', 'TESLA', 'TESLA', 'TESLA'],
        })
        cols = ["y", "_y", "text"]
        p = '/home/ckl/Documents/freelance/jsl/nlu/nlu4realgit2/tests/datasets/AskAPatient.fold-0.train.txt'
        dataset = pd.read_csv(p, sep="\t", encoding="ISO-8859-1", header=None)
        dataset.columns = cols

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)

        # trainable_pipe = nlu.load('glove train.resolve_chunks', verbose=True)
        # trainable_pipe = nlu.load('bert train.resolve_chunks', verbose=True)
        # trainable_pipe = nlu.load('bert train.resolve_chunks', verbose=True)
        trainable_pipe = nlu.load(
            'en.embed.glove.healthcare_100d train.resolve_chunks')
        trainable_pipe['chunk_resolver'].setNeighbours(350)

        # TODO bert/elmo give wierd storage ref errors...
        # TODO WRITE ISSUE IN HEALTHCARE LIB ABOUT THIS!!!
        # ONLY GLOVE WORKS!!
        # trainable_pipe = nlu.load('bert train.resolve_chunks', verbose=True)
        trainable_pipe.print_info()
        fitted_pipe = trainable_pipe.fit(dataset)
        res = fitted_pipe.predict(dataset, multithread=False)

        for c in res:
            print(c)
            print(res[c])
Example #9
0
    def test_assertion_dl_model(self):
        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)

        data = 'Patient has a headache for the last 2 weeks and appears anxious when she walks fast. No alopecia noted. She denies pain'
        res = nlu.load('en.assert.healthcare',
                       verbose=True).predict(data,
                                             metadata=True)  # .predict(data)

        print(res.columns)
        for c in res:
            print(res[c])
        print(res)
Example #10
0
    def test_pretrained_pipe(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        data = [
            'The patient has cancer and high fever and will die next week.',
            ' She had a seizure.'
        ]
        res = nlu.load('en.explain_doc.era', verbose=True).predict(data)

        for c in res:
            print(res[c])

        print(res)
Example #11
0
    def test_LicensedClassifier(self):

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET


        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)
        # b = BertSentenceEmbeddings.pretrained('sbiobert_base_cased_mli','en','clinical/models')
        # m = RelationExtractionModel().pretrained("posology_re")
#
        # res = nlu.load('en.ner.deid.augmented  en.de_identify', verbose=True).predict('DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin', return_spark_df=True)

        res = nlu.load('en.classify.ade.conversational', verbose=True).predict('DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin')


        print(res)
        for c in res :
            print(c)
            print(res[c])
Example #12
0
    def test_relation_extraction(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        # res = nlu.load('en.ner.posology en.extract_relation.drug_drug_interaction', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours. 1 Hour after the penicilin, 3mg Morphium was administred which had no problems with the Penicilin', return_spark_df=True)
        s1 = 'The patient was prescribed 1 unit of Advil for 5 days after meals. The patient was also given 1 unit of Metformin daily. He was seen by the endocrinology service and she was discharged on 40 units of insulin glargine at night , 12 units of insulin lispro with meals , and metformin 1000 mg two times a day.'
        data = [s1]
        res = nlu.load('med_ner.posology relation.drug_drug_interaction',
                       verbose=True).predict(
                           data,
                           drop_irrelevant_cols=False,
                           metadata=True,
                       )

        for c in res:
            print(c)
            print(res[c])
Example #13
0
    def test_relation_extraction(self):

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET


        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)
        # b = BertSentenceEmbeddings.pretrained('sbiobert_base_cased_mli','en','clinical/models')
        # m = RelationExtractionModel().pretrained("posology_re")
        """
        Bascally all RL extractor does is take POS for every token and a bunch of Conv-NER-CHUNKS and 
        
        """
        # TODO THIS CRASHES!
        # res = nlu.load('en.extract_relation', verbose=True).predict(['The patient has cancer and high fever and will die next week.', ' She had a seizure.'], drop_irrelevant_cols=False, metadata=True)

        # res = nlu.load('en.extract_relation', verbose=True).predict('I got shot in my foot')

        # TODO CRASEHS!
        # posology_re
        # res = nlu.load('en.pos.clinical en.ner.posology en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours', return_spark_df=True)
        # res = nlu.load('en.ner.posology en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours', return_spark_df=True)
        # res = nlu.load('en.ner.jsl en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours', return_spark_df=True)


        # res = nlu.load('en.ner.posology en.extract_relation.drug_drug_interaction', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours. 1 Hour after the penicilin, 3mg Morphium was administred which had no problems with the Penicilin', return_spark_df=True)
        s1='The patient was prescribed 1 unit of Advil for 5 days after meals. The patient was also given 1 unit of Metformin daily. He was seen by the endocrinology service and she was discharged on 40 units of insulin glargine at night , 12 units of insulin lispro with meals , and metformin 1000 mg two times a day.'
        data =[s1]
        res = nlu.load('en.ner.posology en.extract_relation.drug_drug_interaction', verbose=True).predict(data)

        # TODO relation extractor shoud be fed 3 different NER models
        # res = nlu.load('en.pos.clinical en.ner.posology en.extract_relation.bodypart.direction', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours', return_spark_df=True)
        # res = nlu.load('en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain')

        for c in res :
            print(c)
            print(res[c])
Example #14
0
    def test_multi_ner_pipe(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        # res = nlu.load('en.ner.diseases en.resolve_chunk.snomed.findings', verbose=True).predict(['The patient has cancer and high fever and will die next week.', ' She had a seizure.'], drop_irrelevant_cols=False, metadata=True, )

        data = [
            'The patient has cancer and high fever and will die next week.',
            ' She had a seizure.'
        ]
        res = nlu.load(
            'en.med_ner.tumour en.med_ner.radiology en.med_ner.diseases en.ner.onto ',
            verbose=True).predict(data)

        for c in res:
            print(res[c])

        print(res)
Example #15
0
    def test_simple_dataset(self):
        import pandas as pd
        import nlu
        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)

        dataset = pd.DataFrame({
            'text': ['The Tesla company is good to invest is', 'TSLA is good to invest','TESLA INC. we should buy','PUT ALL MONEY IN TSLA inc!!'],
            'y': ['23','23','23','23'],
            '_y': ['TESLA','TESLA','TESLA','TESLA'],

        })

        trainable_pipe = nlu.load('train.resolve_sentence')
        fitted_pipe  = trainable_pipe.fit(dataset)
        res  = fitted_pipe.predict(dataset.text)
        print(res.columns)
        for c in res :
            print(c)
            print(res[c])
    def test_deidentification(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        # b = BertSentenceEmbeddings.pretrained('sbiobert_base_cased_mli','en','clinical/models')
        # m = RelationExtractionModel().pretrained("posology_re")
        #
        # res = nlu.load('en.ner.deid.augmented  en.de_identify', verbose=True).predict('DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin', return_spark_df=True)

        res = nlu.load('en.de_identify', verbose=True).predict(
            'DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin',
            drop_irrelevant_cols=False,
            metadata=True,
        )
        # res = nlu.load('zh.segment_words pos', verbose=True)#.predict('DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin', return_spark_df=True)

        for c in res:
            print(c)
            print(res[c])
Example #17
0
    def test_drug_normalizer(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)

        data = [
            "Agnogenic one half cup", "adalimumab 54.5 + 43.2 gm",
            "aspirin 10 meq/ 5 ml oral sol",
            "interferon alfa-2b 10 million unit ( 1 ml ) injec",
            "Sodium Chloride/Potassium Chloride 13bag"
        ]
        res = nlu.load('norm_drugs').predict(
            data, output_level='document')  # .predict(data)

        print(res.columns)
        for c in res:
            print(res[c])

        print(res)
Example #18
0
    def test_chunk_resolver(self):

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)
        # res = nlu.load('en.ner.diseases en.resolve_chunk.snomed.findings', verbose=True).predict(['The patient has cancer and high fever and will die next week.', ' She had a seizure.'], drop_irrelevant_cols=False, metadata=True, )
        s1='The patient has COVID. He got very sick with it.'
        s2='Peter got the Corona Virus!'
        s3='COVID 21 has been diagnosed on the patient'
        s4 = """This is an 82 - year-old male with a history of prior tobacco use , hypertension , chronic renal insufficiency , COPD , gastritis , and TIA who initially presented to Braintree with a non-ST elevation MI and Guaiac positive stools , transferred to St . Margaret's Center for Women & Infants for cardiac catheterization with PTCA to mid LAD lesion complicated by hypotension and bradycardia requiring Atropine , IV fluids and transient dopamine possibly secondary to vagal reaction , subsequently transferred to CCU for close monitoring , hemodynamically stable at the time of admission to the CCU"""
        s5 = "The patient has cancer and high fever and will die from Leukemia"
        text = [s1,s2,s3,s4,s5]
        #
        # by specifying output_level=chunk you will get one row per entity
        # https://nlp.johnsnowlabs.com/2021/02/04/redl_temporal_events_biobert_en.html
        data = """She is diagnosed with cancer in 1991.Then she was admitted to Mayo Clinic in May 2000 and discharged in October 2001"""
        # data = ["She is diagnosed with cancer in 1991.","Then she was admitted to Mayo Clinic in May 2000 and discharged in October 2001"]
        data = ["""DIAGNOSIS: Left breast adenocarcinoma stage T3 N1b M0, stage IIIA.
        She has been found more recently to have stage IV disease with metastatic deposits and recurrence involving the chest wall and lower left neck lymph nodes.
        PHYSICAL EXAMINATION
        NECK: On physical examination palpable lymphadenopathy is present in the left lower neck and supraclavicular area. No other cervical lymphadenopathy or supraclavicular lymphadenopathy is present.
        RESPIRATORY: Good air entry bilaterally. Examination of the chest wall reveals a small lesion where the chest wall recurrence was resected. No lumps, bumps or evidence of disease involving the right breast is present.
        ABDOMEN: Normal bowel sounds, no hepatomegaly. No tenderness on deep palpation. She has just started her last cycle of chemotherapy today, and she wishes to visit her daughter in Brooklyn, New York. After this she will return in approximately 3 to 4 weeks and begin her radiotherapy treatment at that time."""]
        data = ' Hello Peter how are you I like Angela Merkel from germany'
        # res= nlu.load('en.resolve_chunk.cpt_clinical').predict(data, output_level='chunk')
        # res= nlu.load('med_ner.jsl.wip.clinical en.resolve_chunk.cpt_clinical').predict(data, output_level='chunk')
        # data ="""The patient is a 5-month-old infant who presented initially on Monday with a cold, cough, and runny nose for 2 days. Mom states she had no fever. Her appetite was good but she was spitting up a lot. She had no difficulty breathing and her cough was described as dry and hacky. At that time, physical exam showed a right TM, which was red. Left TM was okay. She was fairly congested but looked happy and playful. She was started on Amoxil and Aldex and we told to recheck in 2 weeks to recheck her ear. Mom returned to clinic again today because she got much worse overnight. She was having difficulty breathing. She was much more congested and her appetite had decreased significantly today. She also spiked a temperature yesterday of 102.6 and always having trouble sleeping secondary to congestion."""
        # res= nlu.load('med_ner.jsl.wip.clinical en.resolve_chunk.cpt_clinical').predict(data, output_level='chunk')
        #
        data = 'This is an 11-year-old female who comes in for two different things. 1. She was seen by the allergist. No allergies present, so she stopped her Allegra, but she is still real congested and does a lot of snorting. They do not notice a lot of snoring at night though, but she seems to be always like that. 2. On her right great toe, she has got some redness and erythema. Her skin is kind of peeling a little bit, but it has been like that for about a week and a half now. General: Well-developed female, in no acute distress, afebrile. HEENT: Sclerae and conjunctivae clear. Extraocular muscles intact. TMs clear. Nares patent. A little bit of swelling of the turbinates on the left. Oropharynx is essentially clear. Mucous membranes are moist. Neck: No lymphadenopathy. Chest: Clear. Abdomen: Positive bowel sounds and soft. Dermatologic: She has got redness along the lateral portion of her right great toe, but no bleeding or oozing. Some dryness of her skin. Her toenails themselves are very short and even on her left foot and her left great toe the toenails are very short.'
        df = nlu.load('en.med_ner.ade.clinical').predict(data, output_level =  "chunk")

        print(res)
        for c in res.columns: print(res[c])
Example #19
0
    def test_context_parser(self):
        """

        - contextLength  defines the maximum distance a prefix and suffix words can be away from the word to match,whereas context are words that must be immediately after or before the word to match

         - dictionary parameter. In this parameter, you define the set of words that you want to match and the word that will replace this match.
        :return:
        """

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        ContextParserTests().dump_data()
        data = 'Patient has a headache for the last 2 weeks and appears anxious when she walks fast. No alopecia noted. She denies pain'
        contex_pipe = nlu.load('match.context')
        contex_pipe.print_info()
        contex_pipe['context_matcher'].setCaseSensitive(
            False
        )  #| Info: Whether to use case sensitive when matching values | Currently set to : False
        contex_pipe['context_matcher'].setPrefixAndSuffixMatch(
            False
        )  #| Info: Whether to match both prefix and suffix to annotate the hit | Currently set to : False
        contex_pipe['context_matcher'].setContextMatch(
            False
        )  #| Info: Whether to include context to annotate the hit | Currently set to : True
        # contex_pipe['context_matcher'].setUpdateTokenizer(True)        #| Info: Whether to update tokenizer from pipeline when detecting multiple words on dictionary values | Currently set to : True
        contex_pipe['context_matcher'].setJsonPath(
            '/home/ckl/Documents/freelance/jsl/nlu/nlu4realgit2/tmp/trasgh/gender.json'
        )
        contex_pipe['context_matcher'].setDictionary(
            '/home/ckl/Documents/freelance/jsl/nlu/nlu4realgit2/tmp/trasgh/gender.csv',
            read_as=ReadAs.TEXT,
            options={"delimiter": ","})

        # contex_pipe['parse_context'].SET_SMTH

        data = """A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to 
    presentation and subsequent type two diabetes mellitus ( T2DM ), one prior episode of HTG-induced pancreatitis 
    three years prior to presentation , associated with an acute hepatitis , and obesity with a body mass index 
    ( BMI ) of 33.5 kg/m2 , presented with a one-week history of polyuria , polydipsia , poor appetite , and vomiting.
    Two weeks prior to presentation , she was treated with a five-day course of amoxicillin for a respiratory tract infection . 
    She was on metformin , glipizide , and dapagliflozin for T2DM and atorvastatin and gemfibrozil for HTG . 
    She had been on dapagliflozin for six months at the time of presentation . Physical examination on presentation was 
    significant for dry oral mucosa ; significantly , her abdominal examination was benign with no tenderness , guarding , 
    or rigidity . Pertinent laboratory findings on admission were : serum glucose 111 mg/dl , bicarbonate 18 mmol/l , 
    anion gap 20 , creatinine 0.4 mg/dL , triglycerides 508 mg/dL , total cholesterol 122 mg/dL , glycated hemoglobin 
    ( HbA1c ) 10% , and venous pH 7.27 . Serum lipase was normal at 43 U/L . Serum acetone levels could not be assessed 
    as blood samples kept hemolyzing due to significant lipemia .
    The patient was initially admitted for starvation ketosis , as she reported poor oral intake for three days prior 
    to admission . However , serum chemistry obtained six hours after presentation revealed her glucose was 186 mg/dL , 
    the anion gap was still elevated at 21 , serum bicarbonate was 16 mmol/L , triglyceride level peaked at 2050 mg/dL , 
    and lipase was 52 U/L .
     β-hydroxybutyrate level was obtained and found to be elevated at 5.29 mmol/L - the original sample was centrifuged 
     and the chylomicron layer removed prior to analysis due to interference from turbidity caused by lipemia again . 
     The patient was treated with an insulin drip for euDKA and HTG with a reduction in the anion gap to 13 and triglycerides 
     to 1400 mg/dL , within 24 hours .
     Twenty days ago.
     Her euDKA was thought to be precipitated by her respiratory tract infection in the setting of SGLT2 inhibitor use . 
     At birth the typical boy is growing slightly faster than the typical girl, but the velocities become equal at about 
     seven months, and then the girl grows faster until four years. 
     From then until adolescence no differences in velocity 
     can be detected. 21-02-2020 
    21/04/2020
    """

        res = contex_pipe.predict(
            data,
            metadata=True,
        )  # .predict(data)

        print(res.columns)
        for c in res:
            print(res[c])
        print(res)