def setUp(self): self.txt = 'There is fluid collection in the abdomen. There is no hematoma near the liver. Evaluate for abscess.' self.sentenceSpanPairs = helpers.my_sentence_splitter(self.txt) self.sentences = [x.text for x in self.sentenceSpanPairs] self.spans = [x.span for x in self.sentenceSpanPairs] self.modifiers = fc.modifiers self.targets = fc.targets self.markups = [fc.markup_sentence(x) for x in self.sentences] self.document = fc.create_context_doc(self.markups)
def extract_markups_from_text(report, targets, modifiers): report_name, text = report split_report = helpers.my_sentence_splitter(text) markups = [ create_markup(s=text, span=span, modifiers=modifiers, targets=targets) for (text, span) in split_report ] markups = [m for m in markups if len(m) != 0] markups = classify_markups(markups, report_name) return markups
def setUp(self): self.txt = 'There is fluid collection in the abdomen. There is no hematoma near the liver. Evaluate for abscess.' self.sentenceSpanPairs = helpers.my_sentence_splitter(self.txt) self.sentences = [x.text for x in self.sentenceSpanPairs] self.spans = [x.span for x in self.sentenceSpanPairs] #self.sentences = self.sentences.remove('') self.modifiers = itemData.instantiateFromCSVtoitemData( "/Users/alec/Box Sync/Bucher_Surgical_MIMICIII/pyConText_implement/fcFinder/modifiers.tsv" ) self.targets = itemData.instantiateFromCSVtoitemData( "file:///Users/alec/Box Sync/Bucher_Surgical_MIMICIII/pyConText_implement/fcFinder/targets.tsv" ) self.markups = [fc.markup_sentence(x) for x in self.sentences] self.first_markup = self.markups[0] self.document = fc.create_context_doc(self.markups)
def setUp(self): self.txt = 'There is fluid collection in the abdomen. There is no hematoma near the liver. Evaluate for abscess.' self.sentenceSpanPairs = helpers.my_sentence_splitter(self.txt) self.sentences = [x.text for x in self.sentenceSpanPairs] self.spans = [x.span for x in self.sentenceSpanPairs] self.modifiers = fc.modifiers self.targets = fc.targets self.markups = [fc.markup_sentence(x) for x in self.sentences] self.document = fc.create_context_doc(self.markups) self.empty_markup = pyConText.ConTextMarkup() self.first_markup = self.markups[0] self.second_markup = self.markups[1] self.third_markup = self.markups[2] self.first_classifier = fc.markup_conditions(markup=self.first_markup) self.second_classifier = fc.markup_conditions( markup=self.second_markup) self.third_classifier = fc.markup_conditions(markup=self.third_markup) self.classifier = fc.markup_conditions(markup=self.empty_markup)
def test_sentences_are_split_txt(self): self.assertEqual( self.sentences, [x.text for x in helpers.my_sentence_splitter(self.txt)])
def main(): modifiers = itemData.instantiateFromCSVtoitemData(MODIFIERS_FILE) targets = targets = itemData.instantiateFromCSVtoitemData(TARGETS_FILE) df = pd.read_pickle(SOURCE_DF) df = df[df.train_val == 'val'] print(df.head()) print(len(df)) #df = df.iloc[:10] ref = pd.read_excel(REFERENCE_STANDARD) ref = update_reference_df(ref) reports = list(zip(df['note_name'], df['text'])) pool = Pool(processes=8) list_of_classified_markups = [ pool.apply(extract_markups_from_text, args=(name_and_text, targets, modifiers)) for name_and_text in reports ] pool.close() pool.join() classified_markups = pd.DataFrame( columns=['m', 'doc_span', 'markup_class', 'text']).append( list_of_classified_markups) print(classified_markups.head()) exit() ##PICK up here classified_markups = [{ 'm': m, 'doc_span': m.docSpan, 'markup_class': m.markup_class, 'text': m.text } for m in list_of_markups] # TODO: Make this one long dataframe, like classified_markups df['markups'] = df.apply( lambda row: extract_markups_from_text(row.text, targets, modifiers), axis=1) print(df.head()) classified_markups = pd.DataFrame( columns=['m', 'doc_span', 'markup_class', 'text']) for idx, row in df.iterrows(): # Get all annotations from reference standard with this report name #annotations = ref[ref['File Name with extension'] == row.note_name] row_markups = classify_markups(row.markups, row.note_name) print(classified_markups) #if classified_markups classified_markups = classified_markups.append(row_markups, ignore_index=True) print(len(classified_markups)) print(classified_markups.head()) evaluate_markups(ref, classified_markups) exit() reports = list(df[df.train_val == 'train']['text']) reports = [helpers.preprocess(report) for report in reports] split_reports = [ helpers.my_sentence_splitter(report) for report in reports ] markups = [] for report in split_reports[:10]: # Each report is a list of sentence span pairs for text, span in report: m = create_markup(s=text, modifiers=modifiers, targets=targets, span=span) markups.append(m) print(markups) exit() markups = [ create_markup(s=sentence, modifiers=modifiers, targets=targets, span=span) for (sentence, span) in sentence_span_pairs ] report_names = list(set(df.note_name)) for report in report_names: report_df = df[df.note_name == report] evaluate_report(report_df)