Esempio n. 1
0
 def setUp(self):
     self.txt = 'There is fluid collection in the abdomen. There is no hematoma near the liver. Evaluate for abscess.'
     self.sentenceSpanPairs = helpers.my_sentence_splitter(self.txt)
     self.sentences = [x.text for x in self.sentenceSpanPairs]
     self.spans = [x.span for x in self.sentenceSpanPairs]
     self.modifiers = fc.modifiers
     self.targets = fc.targets
     self.markups = [fc.markup_sentence(x) for x in self.sentences]
     self.document = fc.create_context_doc(self.markups)
Esempio n. 2
0
def extract_markups_from_text(report, targets, modifiers):
    report_name, text = report
    split_report = helpers.my_sentence_splitter(text)
    markups = [
        create_markup(s=text, span=span, modifiers=modifiers, targets=targets)
        for (text, span) in split_report
    ]
    markups = [m for m in markups if len(m) != 0]
    markups = classify_markups(markups, report_name)
    return markups
 def setUp(self):
     self.txt = 'There is fluid collection in the abdomen. There is no hematoma near the liver. Evaluate for abscess.'
     self.sentenceSpanPairs = helpers.my_sentence_splitter(self.txt)
     self.sentences = [x.text for x in self.sentenceSpanPairs]
     self.spans = [x.span for x in self.sentenceSpanPairs]
     #self.sentences = self.sentences.remove('')
     self.modifiers = itemData.instantiateFromCSVtoitemData(
         "/Users/alec/Box Sync/Bucher_Surgical_MIMICIII/pyConText_implement/fcFinder/modifiers.tsv"
     )
     self.targets = itemData.instantiateFromCSVtoitemData(
         "file:///Users/alec/Box Sync/Bucher_Surgical_MIMICIII/pyConText_implement/fcFinder/targets.tsv"
     )
     self.markups = [fc.markup_sentence(x) for x in self.sentences]
     self.first_markup = self.markups[0]
     self.document = fc.create_context_doc(self.markups)
Esempio n. 4
0
 def setUp(self):
     self.txt = 'There is fluid collection in the abdomen. There is no hematoma near the liver. Evaluate for abscess.'
     self.sentenceSpanPairs = helpers.my_sentence_splitter(self.txt)
     self.sentences = [x.text for x in self.sentenceSpanPairs]
     self.spans = [x.span for x in self.sentenceSpanPairs]
     self.modifiers = fc.modifiers
     self.targets = fc.targets
     self.markups = [fc.markup_sentence(x) for x in self.sentences]
     self.document = fc.create_context_doc(self.markups)
     self.empty_markup = pyConText.ConTextMarkup()
     self.first_markup = self.markups[0]
     self.second_markup = self.markups[1]
     self.third_markup = self.markups[2]
     self.first_classifier = fc.markup_conditions(markup=self.first_markup)
     self.second_classifier = fc.markup_conditions(
         markup=self.second_markup)
     self.third_classifier = fc.markup_conditions(markup=self.third_markup)
     self.classifier = fc.markup_conditions(markup=self.empty_markup)
Esempio n. 5
0
 def test_sentences_are_split_txt(self):
     self.assertEqual(
         self.sentences,
         [x.text for x in helpers.my_sentence_splitter(self.txt)])
Esempio n. 6
0
def main():
    modifiers = itemData.instantiateFromCSVtoitemData(MODIFIERS_FILE)
    targets = targets = itemData.instantiateFromCSVtoitemData(TARGETS_FILE)

    df = pd.read_pickle(SOURCE_DF)
    df = df[df.train_val == 'val']
    print(df.head())
    print(len(df))
    #df = df.iloc[:10]
    ref = pd.read_excel(REFERENCE_STANDARD)
    ref = update_reference_df(ref)
    reports = list(zip(df['note_name'], df['text']))
    pool = Pool(processes=8)
    list_of_classified_markups = [
        pool.apply(extract_markups_from_text,
                   args=(name_and_text, targets, modifiers))
        for name_and_text in reports
    ]
    pool.close()
    pool.join()
    classified_markups = pd.DataFrame(
        columns=['m', 'doc_span', 'markup_class', 'text']).append(
            list_of_classified_markups)
    print(classified_markups.head())
    exit()
    ##PICK up here

    classified_markups = [{
        'm': m,
        'doc_span': m.docSpan,
        'markup_class': m.markup_class,
        'text': m.text
    } for m in list_of_markups]

    # TODO: Make this one long dataframe, like classified_markups
    df['markups'] = df.apply(
        lambda row: extract_markups_from_text(row.text, targets, modifiers),
        axis=1)
    print(df.head())
    classified_markups = pd.DataFrame(
        columns=['m', 'doc_span', 'markup_class', 'text'])
    for idx, row in df.iterrows():
        # Get all annotations from reference standard with this report name
        #annotations = ref[ref['File Name with extension'] == row.note_name]
        row_markups = classify_markups(row.markups, row.note_name)
        print(classified_markups)
        #if classified_markups
        classified_markups = classified_markups.append(row_markups,
                                                       ignore_index=True)
    print(len(classified_markups))
    print(classified_markups.head())
    evaluate_markups(ref, classified_markups)

    exit()
    reports = list(df[df.train_val == 'train']['text'])
    reports = [helpers.preprocess(report) for report in reports]
    split_reports = [
        helpers.my_sentence_splitter(report) for report in reports
    ]
    markups = []
    for report in split_reports[:10]:
        # Each report is a list of sentence span pairs
        for text, span in report:
            m = create_markup(s=text,
                              modifiers=modifiers,
                              targets=targets,
                              span=span)
            markups.append(m)
    print(markups)
    exit()

    markups = [
        create_markup(s=sentence,
                      modifiers=modifiers,
                      targets=targets,
                      span=span) for (sentence, span) in sentence_span_pairs
    ]

    report_names = list(set(df.note_name))
    for report in report_names:
        report_df = df[df.note_name == report]
        evaluate_report(report_df)