Beispiel #1
0
    def test_modin(self):
        # ## works with RAY and DASK backends
        data = {
            "text": [
                'This day sucks but tomorrow will be better ! ',
                'I love this day', 'I dont like Sami'
            ]
        }
        mdf = mpd.DataFrame(data)
        res = nlu.load('sentiment').predict(mdf)

        print(res)
        self.assertTrue(type(res) == mpd.DataFrame)
        print(data)
        pdf = pd.DataFrame(data)
        print(pdf)
        res = nlu.load('sentiment').predict(pdf)
        print(res)
        self.assertTrue(type(res) == pd.DataFrame)

        print('TESTING SDF')
        sdf = nlu.spark.createDataFrame(pdf)
        res = nlu.load('sentiment', verbose=True).predict(sdf)
        self.assertTrue(type(res) == pyspark.sql.dataframe.DataFrame)

        res.show()
Beispiel #2
0
    def test_chunk_resolver_training(self):
        """When training a chunk resolver, word_embedding are required.
        If none specifeid, the default `glove` word_embeddings will be used
        Alternatively, if a Word Embedding is specified in the load command before the train.chunk_resolver,
        it will be used instead of the default glove
        """
        dataset = pd.DataFrame({
            'text': ['super sleepy', 'bleeding from ears','bleeding from nose','bleeding from mouth'],
            '_extra_info': ['bad disease', 'bad disease!', 'very bad', ' super bad  '],
            # 'y': [1,33,44,66]
            # 'label': ['lol','kek','lol','kek']
            'label': ['lol','kek','lol','kek']
        })

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)

        trainable_pipe = nlu.load('train.assert', verbose=True)
        trainable_pipe = nlu.load('train.assert_dl', verbose=True)
        trainable_pipe.print_info()
        fitted_pipe  = trainable_pipe.fit(dataset)
        res = fitted_pipe.predict(dataset, multithread=False)


        for c in res :
            print(c)
            print(res[c])
Beispiel #3
0
    def test_bad_ref(self):
        r1 = 'en.ner.onto.sm'
        r2 = 'en.ner.dl.bert'
        df = nlu.load(r1).predict('Hello world')

        print(df)
        print(df.columns)

        nlu.load(r2).predict('Hello world')
        print(df)
        print(df.columns)
 def test_every_default_component(self, nlu_reference, id):
     import nlu
     print('TESTING NLU REFERENCE : ', nlu_reference)
     df = nlu.load(nlu_reference).predict('What a wonderful day!')
     print(df)
     print(df.columns)
     print('TESTING DONE FOR NLU REFERENCE : ', nlu_reference)
Beispiel #5
0
    def test_quick(self):
        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET

        s3='The patient was prescribed 1 unit of Advil for 5 days after meals. The patient was also given 1 unit of Metformin daily. He was seen by the endocrinology service and she was discharged on 40 units of insulin glargine at night , 12 units of insulin lispro with meals , and metformin 1000 mg two times a day and then he got COVID and herpes.'
        s2='What is the capital of Germany>?'
        s1 = 'What is the most spoken language in France?'
        data =[s1,s2,s3]
        # TODO COL OVERLAPS!! --> Each annotator (or at least clasisfiers/overlaps) will be named with <type>@<nlu_ref_leaf> === <type>@identifier(nlu_ref_leaf)
        # ref = 'en.extract_relation.bodypart.problem' # TODO BAD
        # ref = 'ner'

        # ref = 'en.med_ner.diseases' # TODO AUTH BUG???
        # res = nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET).load(f'en.ner.diseases {ref}', verbose=True).predict(data)
        df = get_sample_pdf_with_extra_cols_and_entities()
        # res = nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET).load(f' {ref}', verbose=True).predict(df)
        # res = nlu.load(f' {ref}', verbose=True).predict(df, output_level='sentence')


        ref = '/home/loan/tmp/nlu_models_offline_test/analyze_sentiment_en_3.0.0_3.0_1616544471011'
        res = nlu.load(path=f'{ref}', verbose=True).predict(df, output_level='sentence')

        for c in res : print(res[c])
Beispiel #6
0
    def test_chunk_resolver_training_custom_embeds(self):
        """When training a chunk resolver, word_embedding are required.
        If none specifeid, the default `glove` word_embeddings will be used
        Alternatively, if a Word Embedding is specified in the load command before the train.chunk_resolver,
        it will be used instead of the default glove
        """
        dataset = pd.DataFrame({
            'text': ['super sleepy', 'bleeding from ears','bleeding from nose','bleeding from mouth'],
            '_extra_info': ['bad disease', 'bad disease!', 'very bad', ' super bad  '],
            # 'y': [1,33,44,66]
            # 'label': ['lol','kek','lol','kek']
            'label': ['lol','kek','lol','kek']
        })

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)

        # trainable_pipe = nlu.load('glove train.resolve_chunks', verbose=True)
        trainable_pipe = nlu.load('en.embed.glove.biovec train.resolve_chunks', verbose=True)
        # TODO bert/elmo give wierd storage ref errors...
        # TODO WRITE ISSUE IN HEALTHCARE LIB ABOUT THIS!!!
        # ONLY GLOVE WORKS!!
        # trainable_pipe = nlu.load('bert train.resolve_chunks', verbose=True)
        trainable_pipe.print_info()
        fitted_pipe  = trainable_pipe.fit(dataset)
        res = fitted_pipe.predict(dataset, multithread=False)


        for c in res :
            print(c)
            print(res[c])
Beispiel #7
0
    def test_generic_classifier(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        #

        res = nlu.load('bert elmo', verbose=True).predict(
            'DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin'
        )

        # elmo_embeddings and bert_embeddings   is what should be passed 2 the feature asselmber/generic classifier

        # res.show()
        # for c in res.columns:
        #     print(c)
        #     res.select(c).show(truncate=False)
        # res = nlu.load('en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain')

        for c in res:
            print(c)
            print(res[c])
Beispiel #8
0
    def test_sentence_resolver_training(self):
        """When training a chunk resolver, word_embedding are required.
        If none specifeid, the default `glove` word_embeddings will be used
        Alternatively, if a Word Embedding is specified in the load command before the train.chunk_resolver,
        it will be used instead of the default glove
        """
        import pandas as pd
        cols = ["y","_y","text"]
        p='/home/ckl/Documents/freelance/jsl/nlu/nlu4realgit2/tests/datasets/AskAPatient.fold-0.train.txt'
        dataset = pd.read_csv(p,sep="\t",encoding="ISO-8859-1",header=None)
        dataset.columns = cols
        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)

        trainable_pipe = nlu.load('train.resolve_sentence', verbose=True)
        trainable_pipe.print_info()
        fitted_pipe  = trainable_pipe.fit(dataset)
        res = fitted_pipe.predict(dataset, multithread=False)


        for c in res :
            print(c)
            print(res[c])
Beispiel #9
0
    def test_assertion_dl_model(self):


        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)
        # b = BertSentenceEmbeddings.pretrained('sbiobert_base_cased_mli','en','clinical/models')

        # todo en.ner.ade Error not accessable in 2.7.6??
        s1='The patient has COVID. He got very sick with it.'
        s2='Peter got the Corona Virus!'
        s3='COVID 21 has been diagnosed on the patient'
        data = [s1,s2,s3]
        # en.resolve_sentence.icd10cm
        #TODO Not correct
        resolver_ref = 'en.resolve_sentence.icd10cm.augmented_billable'
        res = nlu.load(f'en.ner.diseases {resolver_ref}', verbose=True).predict(data, drop_irrelevant_cols=False, metadata=True)

        # res = nlu.load('en.ner.anatomy', verbose=True).predict(['The patient has cancer and a tumor and high fever and will die next week. He has pain in his left food and right upper brain', ' She had a seizure.'], drop_irrelevant_cols=False, metadata=True)
        print(res.columns)
        for c in res :
            print(c)
            print(res[c])

        print(res)
    def test_chunk_resolver(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        s0 = """DIAGNOSIS: Left breast adenocarcinoma stage T3 N1b M0, stage IIIA. She has been found more recently to have stage IV disease with metastatic deposits and recurrence involving the chest wall and lower left neck lymph nodes. PHYSICAL EXAMINATION NECK: On physical examination palpable lymphadenopathy is present in the left lower neck and supraclavicular area. No other cervical lymphadenopathy or supraclavicular lymphadenopathy is present. RESPIRATORY: Good air entry bilaterally. Examination of the chest wall reveals a small lesion where the chest wall recurrence was resected. No lumps, bumps or evidence of disease involving the right breast is present. ABDOMEN: Normal bowel sounds, no hepatomegaly. No tenderness on deep palpation. She has just started her last cycle of chemotherapy today, and she wishes to visit her daughter in Brooklyn, New York. After this she will return in approximately 3 to 4 weeks and begin her radiotherapy treatment at that time."""
        s1 = 'The patient has COVID. He got very sick with it.'
        s2 = 'Peter got the Corona Virus!'
        s3 = 'COVID 21 has been diagnosed on the patient'
        s4 = """This is an 82 - year-old male with a history of prior tobacco use , hypertension , chronic renal insufficiency , COPD , gastritis , and TIA who initially presented to Braintree with a non-ST elevation MI and Guaiac positive stools , transferred to St . Margaret's Center for Women & Infants for cardiac catheterization with PTCA to mid LAD lesion complicated by hypotension and bradycardia requiring Atropine , IV fluids and transient dopamine possibly secondary to vagal reaction , subsequently transferred to CCU for close monitoring , hemodynamically stable at the time of admission to the CCU"""
        s5 = "The patient has cancer and high fever and will die from Leukemia"
        s6 = 'This is an 11-year-old female who comes in for two different things. 1. She was seen by the allergist. No allergies present, so she stopped her Allegra, but she is still real congested and does a lot of snorting. They do not notice a lot of snoring at night though, but she seems to be always like that. 2. On her right great toe, she has got some redness and erythema. Her skin is kind of peeling a little bit, but it has been like that for about a week and a half now. General: Well-developed female, in no acute distress, afebrile. HEENT: Sclerae and conjunctivae clear. Extraocular muscles intact. TMs clear. Nares patent. A little bit of swelling of the turbinates on the left. Oropharynx is essentially clear. Mucous membranes are moist. Neck: No lymphadenopathy. Chest: Clear. Abdomen: Positive bowel sounds and soft. Dermatologic: She has got redness along the lateral portion of her right great toe, but no bleeding or oozing. Some dryness of her skin. Her toenails themselves are very short and even on her left foot and her left great toe the toenails are very short.'
        data = [s1, s2, s3, s4, s5, s6]
        res = nlu.load('med_ner.jsl.wip.clinical resolve_chunk.icdo.clinical',
                       verbose=True).predict(
                           data,
                           drop_irrelevant_cols=False,
                           metadata=True,
                       )

        print(res.columns)
        for c in res.columns:
            print(res[c])
def test_every_default_component(nlu_reference, id):
    gc.collect()
    print('param =', nlu_reference)
    print('TESTING NLU REFERENCE : ', nlu_reference)
    if id < skip_to_test: return
    df = nlu.load(nlu_reference).predict('What a wonderful day!')
    print(df)
    print('TESTING DONE FOR NLU REFERENCE : ', nlu_reference)
    def test_sarcasm_stack(self):
        # df = nlu.load('sentiment elmo',verbose=True).predict('Hello world', output_level='document')
        # df = nlu.load('sentiment elmo',verbose=True).predict('Hello world', output_level='sentence')

        df = nlu.load('sarcasm elmo',verbose=True).predict('Hello world', output_level='token')

        print(df)
        print(df.columns)
        print(df['sarcasm'])
Beispiel #13
0
    def test_sentiment_stack(self):
        # df = nlu.load('sentiment elmo',verbose=True).predict('Hello world', output_level='document')
        # df = nlu.load('sentiment elmo',verbose=True).predict('Hello world', output_level='sentence')

        df = nlu.load('sentiment elmo',
                      verbose=True).predict('Hello world',
                                            output_level='token')
        for c in df.columns:
            print(df[c])
Beispiel #14
0
    def test_py_arrow(self):
        pipe = nlu.load('bert', verbose=True)
        # data = self.load_pandas_dataset()
        data = pd.read_csv('/home/ckl/Documents/self/steuern/2021/JUL/pakistan/en_lang_filtered_sample.csv')[0:1000]
        big_df = data.append(data)
        for i in range(10):big_df = big_df.append(big_df)
        big_df
        df = pipe.predict(big_df[:10000] , output_level='document')

        for c in df.columns: print(df[c])
Beispiel #15
0
    def test_data_batching(self):
        test = PandasTests().test_py_arrow
        ms = []
        data = {'text':[]}
        for i in range(10000):
            data['text'].append("Hello WOrld I like RAM")
        d = pd.DataFrame(data)
        print('GONNA PREDICT!')
        df = nlu.load('ner').predict(d)

        for c in df.columns:print(df)
Beispiel #16
0
    def test_range_index(self):

        data = {
            "text": ['This day sucks', 'I love this day', 'I dont like Sami'],
            "some feature": [1, 1, 0]
        }
        text_df = pd.DataFrame(data)
        res = nlu.load('sentiment',
                       verbose=True).predict(text_df, output_level='document')
        print(res)
        print(res.columns)
Beispiel #17
0
    def test_chunk_resolver_training_custom_embeds(self):
        pass
        """When training a chunk resolver, word_embedding are required.
        If none specifeid, the default `glove` word_embeddings will be used
        Alternatively, if a Word Embedding is specified in the load command before the train.chunk_resolver,
        it will be used instead of the default glove
        """
        dataset = pd.DataFrame({
            'text': [
                'The Tesla company is good to invest is',
                'TSLA is good to invest', 'TESLA INC. we should buy',
                'PUT ALL MONEY IN TSLA inc!!'
            ],
            'y': ['23', '23', '23', '23'],
            '_y': ['TESLA', 'TESLA', 'TESLA', 'TESLA'],
        })
        cols = ["y", "_y", "text"]
        p = '/home/ckl/Documents/freelance/jsl/nlu/nlu4realgit2/tests/datasets/AskAPatient.fold-0.train.txt'
        dataset = pd.read_csv(p, sep="\t", encoding="ISO-8859-1", header=None)
        dataset.columns = cols

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)

        # trainable_pipe = nlu.load('glove train.resolve_chunks', verbose=True)
        # trainable_pipe = nlu.load('bert train.resolve_chunks', verbose=True)
        # trainable_pipe = nlu.load('bert train.resolve_chunks', verbose=True)
        trainable_pipe = nlu.load(
            'en.embed.glove.healthcare_100d train.resolve_chunks')
        trainable_pipe['chunk_resolver'].setNeighbours(350)

        # TODO bert/elmo give wierd storage ref errors...
        # TODO WRITE ISSUE IN HEALTHCARE LIB ABOUT THIS!!!
        # ONLY GLOVE WORKS!!
        # trainable_pipe = nlu.load('bert train.resolve_chunks', verbose=True)
        trainable_pipe.print_info()
        fitted_pipe = trainable_pipe.fit(dataset)
        res = fitted_pipe.predict(dataset, multithread=False)

        for c in res:
            print(c)
            print(res[c])
Beispiel #18
0
    def test_assertion_dl_model(self):
        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)

        data = 'Patient has a headache for the last 2 weeks and appears anxious when she walks fast. No alopecia noted. She denies pain'
        res = nlu.load('en.assert.healthcare',
                       verbose=True).predict(data,
                                             metadata=True)  # .predict(data)

        print(res.columns)
        for c in res:
            print(res[c])
        print(res)
def test_every_default_component(nlu_reference, id):
    import nlu
    nlu.active_pipes.clear()
    gc.collect()
    from operator import itemgetter

    from pympler import tracker
    #TODO add temporary model cleanup in /tmp , then twe can ci/cd dis s**t
    mem = tracker.SummaryTracker()
    print("MEMORY",
          sorted(mem.create_summary(), reverse=True, key=itemgetter(2))[:10])
    print('param =', nlu_reference)
    print('TESTING NLU REFERENCE : ', nlu_reference)
    # if id < skip_to_test : return
    df = nlu.load(nlu_reference).predict('What a wonderful day!')
    print(df)
    print(df.columns)
    print('TESTING DONE FOR NLU REFERENCE : ', nlu_reference)
    def test_pretrained_pipe(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        data = [
            'The patient has cancer and high fever and will die next week.',
            ' She had a seizure.'
        ]
        res = nlu.load('en.explain_doc.era', verbose=True).predict(data)

        for c in res:
            print(res[c])

        print(res)
Beispiel #21
0
    def test_bert_seq_classifier(self):
        import pandas as pd
        te = [
            #
            # 'en.classify.bert_sequence.imdb_large',
            #  'en.classify.bert_sequence.imdb',
            #     'en.classify.bert_sequence.ag_news',
            #     'en.classify.bert_sequence.dbpedia_14',
            #     'en.classify.bert_sequence.finbert',
            'en.classify.bert_sequence.dehatebert_mono',
        ]

        for t in te:
            pipe = nlu.load(t, verbose=True)
            df = pipe.predict(
                ['Peter love pancaces. I hate Mondays', 'I love Fridays'],
                output_level='document',
                drop_irrelevant_cols=False,
            )
            for c in df.columns:
                print(df[c])
Beispiel #22
0
    def test_LicensedClassifier(self):

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET


        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)
        # b = BertSentenceEmbeddings.pretrained('sbiobert_base_cased_mli','en','clinical/models')
        # m = RelationExtractionModel().pretrained("posology_re")
#
        # res = nlu.load('en.ner.deid.augmented  en.de_identify', verbose=True).predict('DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin', return_spark_df=True)

        res = nlu.load('en.classify.ade.conversational', verbose=True).predict('DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin')


        print(res)
        for c in res :
            print(c)
            print(res[c])
Beispiel #23
0
    def test_relation_extraction(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        # res = nlu.load('en.ner.posology en.extract_relation.drug_drug_interaction', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours. 1 Hour after the penicilin, 3mg Morphium was administred which had no problems with the Penicilin', return_spark_df=True)
        s1 = 'The patient was prescribed 1 unit of Advil for 5 days after meals. The patient was also given 1 unit of Metformin daily. He was seen by the endocrinology service and she was discharged on 40 units of insulin glargine at night , 12 units of insulin lispro with meals , and metformin 1000 mg two times a day.'
        data = [s1]
        res = nlu.load('med_ner.posology relation.drug_drug_interaction',
                       verbose=True).predict(
                           data,
                           drop_irrelevant_cols=False,
                           metadata=True,
                       )

        for c in res:
            print(c)
            print(res[c])
Beispiel #24
0
    def test_relation_extraction(self):

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET


        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)
        # b = BertSentenceEmbeddings.pretrained('sbiobert_base_cased_mli','en','clinical/models')
        # m = RelationExtractionModel().pretrained("posology_re")
        """
        Bascally all RL extractor does is take POS for every token and a bunch of Conv-NER-CHUNKS and 
        
        """
        # TODO THIS CRASHES!
        # res = nlu.load('en.extract_relation', verbose=True).predict(['The patient has cancer and high fever and will die next week.', ' She had a seizure.'], drop_irrelevant_cols=False, metadata=True)

        # res = nlu.load('en.extract_relation', verbose=True).predict('I got shot in my foot')

        # TODO CRASEHS!
        # posology_re
        # res = nlu.load('en.pos.clinical en.ner.posology en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours', return_spark_df=True)
        # res = nlu.load('en.ner.posology en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours', return_spark_df=True)
        # res = nlu.load('en.ner.jsl en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours', return_spark_df=True)


        # res = nlu.load('en.ner.posology en.extract_relation.drug_drug_interaction', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours. 1 Hour after the penicilin, 3mg Morphium was administred which had no problems with the Penicilin', return_spark_df=True)
        s1='The patient was prescribed 1 unit of Advil for 5 days after meals. The patient was also given 1 unit of Metformin daily. He was seen by the endocrinology service and she was discharged on 40 units of insulin glargine at night , 12 units of insulin lispro with meals , and metformin 1000 mg two times a day.'
        data =[s1]
        res = nlu.load('en.ner.posology en.extract_relation.drug_drug_interaction', verbose=True).predict(data)

        # TODO relation extractor shoud be fed 3 different NER models
        # res = nlu.load('en.pos.clinical en.ner.posology en.extract_relation.bodypart.direction', verbose=True).predict('The patient got cancer in my foot and damage in his brain but we gave him 50G of  and 50mg Penicilin and this helped is brain injury after 6 hours', return_spark_df=True)
        # res = nlu.load('en.extract_relation', verbose=True).predict('The patient got cancer in my foot and damage in his brain')

        for c in res :
            print(c)
            print(res[c])
Beispiel #25
0
    def test_multi_ner_pipe(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        # res = nlu.load('en.ner.diseases en.resolve_chunk.snomed.findings', verbose=True).predict(['The patient has cancer and high fever and will die next week.', ' She had a seizure.'], drop_irrelevant_cols=False, metadata=True, )

        data = [
            'The patient has cancer and high fever and will die next week.',
            ' She had a seizure.'
        ]
        res = nlu.load(
            'en.med_ner.tumour en.med_ner.radiology en.med_ner.diseases en.ner.onto ',
            verbose=True).predict(data)

        for c in res:
            print(res[c])

        print(res)
Beispiel #26
0
    def test_drug_normalizer(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)

        data = [
            "Agnogenic one half cup", "adalimumab 54.5 + 43.2 gm",
            "aspirin 10 meq/ 5 ml oral sol",
            "interferon alfa-2b 10 million unit ( 1 ml ) injec",
            "Sodium Chloride/Potassium Chloride 13bag"
        ]
        res = nlu.load('norm_drugs').predict(
            data, output_level='document')  # .predict(data)

        print(res.columns)
        for c in res:
            print(res[c])

        print(res)
    def test_deidentification(self):

        SPARK_NLP_LICENSE = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                 JSL_SECRET)
        # b = BertSentenceEmbeddings.pretrained('sbiobert_base_cased_mli','en','clinical/models')
        # m = RelationExtractionModel().pretrained("posology_re")
        #
        # res = nlu.load('en.ner.deid.augmented  en.de_identify', verbose=True).predict('DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin', return_spark_df=True)

        res = nlu.load('en.de_identify', verbose=True).predict(
            'DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin',
            drop_irrelevant_cols=False,
            metadata=True,
        )
        # res = nlu.load('zh.segment_words pos', verbose=True)#.predict('DR Johnson administerd to the patient Peter Parker last week 30 MG of penicilin', return_spark_df=True)

        for c in res:
            print(c)
            print(res[c])
Beispiel #28
0
    def test_simple_dataset(self):
        import pandas as pd
        import nlu
        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET
        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)

        dataset = pd.DataFrame({
            'text': ['The Tesla company is good to invest is', 'TSLA is good to invest','TESLA INC. we should buy','PUT ALL MONEY IN TSLA inc!!'],
            'y': ['23','23','23','23'],
            '_y': ['TESLA','TESLA','TESLA','TESLA'],

        })

        trainable_pipe = nlu.load('train.resolve_sentence')
        fitted_pipe  = trainable_pipe.fit(dataset)
        res  = fitted_pipe.predict(dataset.text)
        print(res.columns)
        for c in res :
            print(c)
            print(res[c])
Beispiel #29
0
    def test_chunk_resolver(self):

        SPARK_NLP_LICENSE     = sct.SPARK_NLP_LICENSE
        AWS_ACCESS_KEY_ID     = sct.AWS_ACCESS_KEY_ID
        AWS_SECRET_ACCESS_KEY = sct.AWS_SECRET_ACCESS_KEY
        JSL_SECRET            = sct.JSL_SECRET

        nlu.auth(SPARK_NLP_LICENSE,AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,JSL_SECRET)
        # res = nlu.load('en.ner.diseases en.resolve_chunk.snomed.findings', verbose=True).predict(['The patient has cancer and high fever and will die next week.', ' She had a seizure.'], drop_irrelevant_cols=False, metadata=True, )
        s1='The patient has COVID. He got very sick with it.'
        s2='Peter got the Corona Virus!'
        s3='COVID 21 has been diagnosed on the patient'
        s4 = """This is an 82 - year-old male with a history of prior tobacco use , hypertension , chronic renal insufficiency , COPD , gastritis , and TIA who initially presented to Braintree with a non-ST elevation MI and Guaiac positive stools , transferred to St . Margaret's Center for Women & Infants for cardiac catheterization with PTCA to mid LAD lesion complicated by hypotension and bradycardia requiring Atropine , IV fluids and transient dopamine possibly secondary to vagal reaction , subsequently transferred to CCU for close monitoring , hemodynamically stable at the time of admission to the CCU"""
        s5 = "The patient has cancer and high fever and will die from Leukemia"
        text = [s1,s2,s3,s4,s5]
        #
        # by specifying output_level=chunk you will get one row per entity
        # https://nlp.johnsnowlabs.com/2021/02/04/redl_temporal_events_biobert_en.html
        data = """She is diagnosed with cancer in 1991.Then she was admitted to Mayo Clinic in May 2000 and discharged in October 2001"""
        # data = ["She is diagnosed with cancer in 1991.","Then she was admitted to Mayo Clinic in May 2000 and discharged in October 2001"]
        data = ["""DIAGNOSIS: Left breast adenocarcinoma stage T3 N1b M0, stage IIIA.
        She has been found more recently to have stage IV disease with metastatic deposits and recurrence involving the chest wall and lower left neck lymph nodes.
        PHYSICAL EXAMINATION
        NECK: On physical examination palpable lymphadenopathy is present in the left lower neck and supraclavicular area. No other cervical lymphadenopathy or supraclavicular lymphadenopathy is present.
        RESPIRATORY: Good air entry bilaterally. Examination of the chest wall reveals a small lesion where the chest wall recurrence was resected. No lumps, bumps or evidence of disease involving the right breast is present.
        ABDOMEN: Normal bowel sounds, no hepatomegaly. No tenderness on deep palpation. She has just started her last cycle of chemotherapy today, and she wishes to visit her daughter in Brooklyn, New York. After this she will return in approximately 3 to 4 weeks and begin her radiotherapy treatment at that time."""]
        data = ' Hello Peter how are you I like Angela Merkel from germany'
        # res= nlu.load('en.resolve_chunk.cpt_clinical').predict(data, output_level='chunk')
        # res= nlu.load('med_ner.jsl.wip.clinical en.resolve_chunk.cpt_clinical').predict(data, output_level='chunk')
        # data ="""The patient is a 5-month-old infant who presented initially on Monday with a cold, cough, and runny nose for 2 days. Mom states she had no fever. Her appetite was good but she was spitting up a lot. She had no difficulty breathing and her cough was described as dry and hacky. At that time, physical exam showed a right TM, which was red. Left TM was okay. She was fairly congested but looked happy and playful. She was started on Amoxil and Aldex and we told to recheck in 2 weeks to recheck her ear. Mom returned to clinic again today because she got much worse overnight. She was having difficulty breathing. She was much more congested and her appetite had decreased significantly today. She also spiked a temperature yesterday of 102.6 and always having trouble sleeping secondary to congestion."""
        # res= nlu.load('med_ner.jsl.wip.clinical en.resolve_chunk.cpt_clinical').predict(data, output_level='chunk')
        #
        data = 'This is an 11-year-old female who comes in for two different things. 1. She was seen by the allergist. No allergies present, so she stopped her Allegra, but she is still real congested and does a lot of snorting. They do not notice a lot of snoring at night though, but she seems to be always like that. 2. On her right great toe, she has got some redness and erythema. Her skin is kind of peeling a little bit, but it has been like that for about a week and a half now. General: Well-developed female, in no acute distress, afebrile. HEENT: Sclerae and conjunctivae clear. Extraocular muscles intact. TMs clear. Nares patent. A little bit of swelling of the turbinates on the left. Oropharynx is essentially clear. Mucous membranes are moist. Neck: No lymphadenopathy. Chest: Clear. Abdomen: Positive bowel sounds and soft. Dermatologic: She has got redness along the lateral portion of her right great toe, but no bleeding or oozing. Some dryness of her skin. Her toenails themselves are very short and even on her left foot and her left great toe the toenails are very short.'
        df = nlu.load('en.med_ner.ade.clinical').predict(data, output_level =  "chunk")

        print(res)
        for c in res.columns: print(res[c])
def show(session_state):
    """Run this function for showing the fake news section in the app
    """

    NLU_MODEL_NAMES = ["en.classify.fakenews"]

    # MAIN PAGE
    st.title("Fake News Classifier :newspaper:")
    st.info(
        "This is a pre trained language model for fake news detection."
        "The **fake news classifiers** is an version of the development of [**John Snow Lab**](https://nlu.johnsnowlabs.com/)."
        "It uses universal sentence embeddings and was trained with the classifierdl algorithm provided by Spark NLP."
    )

    # Load a model
    st.header("1. Load a model")
    model_name = st.selectbox("Select model", NLU_MODEL_NAMES)

    btn_load = st.button("Download the model from AWS", key="btn_load")
    if btn_load:
        with st.spinner("Download started this may take some time ..."):
            session_state.fakenews_pipe = nlu.load(model_name)
            session_state.fakenews_is_loaded = True

    if session_state.fakenews_is_loaded:
        st.success("Download {} done!".format(model_name))

        # Get prediction
        st.header("2. Try the algorithm here")
        txt = st.text_area("Enter news text for classification.",
                           ".".join(session_state.fakenews_txt))
        session_state.fakenews_txt = sentence_split(txt)

        btn_pred = st.button("Calculate", key="btn_pred")
        if btn_pred:
            with st.spinner("Calculation started ..."):
                session_state.fakenews_out = session_state.fakenews_pipe.predict(
                    session_state.fakenews_txt)
                session_state.fakenews_is_predicted = True

        if session_state.fakenews_is_predicted:
            st.success("Calculation done!")

            # Results
            st.header("Result")
            st.write("DEBUG", session_state.fakenews_out)
            fake_confidence, real_confidence = get_weighted_confidence_scores(
                session_state.fakenews_out)
            if fake_confidence > real_confidence:
                fakenews = "FAKE"
                st.warning("The news are {} with a certainty of {}".format(
                    fakenews, fake_confidence))
            else:
                fakenews = "REAL"
                st.info("The news are {} with a certainty of {}".format(
                    fakenews, real_confidence))
            st.write(
                "*Note: the decision is infered from the weighted mean of as FAKE or REAL detected sentences.*"
            )

            st.header("Deep Dive")
            st.dataframe(
                get_describe_over_fake_real_classes(
                    session_state.fakenews_out))
            session_state.fakenews_fig = px.histogram(
                session_state.fakenews_out, x="fakenews")
            st.plotly_chart(session_state.fakenews_fig)

            st.write("**Sentence Embeddings**")
            st.dataframe(session_state.fakenews_out)
    else:
        st.info("No model loaded. Please load first a model!")