def annotate(self, model): """ This methods takes a MentionLevelModel that identifies targets and modifiers. For each sentence in self.sentences, the model identifies all findings using pyConText. These markups are then used to create Annotations and are added to `sentence['annotations']` """ self.annotations['hai_detect'] = [] annotations = self.annotations['hai_detect'] to_exclude = ['infection', 'discharge'] for sentence_num, sentence in enumerate(self.sentences): markup = model.markup_sentence(sentence['text']) targets = markup.getMarkedTargets() sentence_annotations = [] for target in targets: annotation = Annotation() annotation.from_markup(target, markup, sentence['text'], sentence['span'], rpt_id=self.rpt_id) # If classification is None, this markup should be disregarded if not annotation.classification: continue annotation.sentence_num = sentence_num sentence_annotations.append(annotation) sentence_annotations = self.prune_annotations(sentence_annotations) for annotation in sentence_annotations: if annotation.annotation_type not in to_exclude: self.sentences_with_annotations.append(sentence_num) annotations.append(annotation)
def setUp(self): self.annotation = Annotation() try: self.annotation.attributes.pop('assertion') self.annotation.attributes.pop('temporality') except KeyError: pass
def test_classify(self): annotation = Annotation() annotation.annotation_type = 'Evidence of SSI' retVal = annotation.classify() self.assertEqual(retVal, 'Positive Evidence of SSI - No Anatomy') annotation.attributes['anatomy'] = ['Arm'] retVal = annotation.classify() self.assertEqual(retVal, 'Positive Evidence of SSI') annotation.annotation_type = "HELLO WORLD" retVal = annotation.classify()
def import_from_xlsx(corpus_dir, file_name): print(corpus_dir) assert os.path.exists(corpus_dir) print(file_name) wb = load_workbook(filename=file_name, read_only=False) ws = wb.active # just getting the first worksheet regardless of its name row = list(ws)[0] col_size = len(row) if (col_size != 11): #no more no less raise ValueError("MalformedHostExcelRow") #MalformedeHostExcelRow documents = dict() row_cnt = len(list(ws)) print("{} rows".format(row_cnt)) for i in range(1, row_cnt): #if i == 10: # break row = list(ws)[i] full_file_name = row[1].value #second column full_file_path = os.path.join(corpus_dir, full_file_name) if not os.path.exists(full_file_path): continue anno = Annotation() anno.from_ehost_xlsx(row) doc = documents.setdefault(full_file_name, ClinicalTextDocument()) doc.annotations['gold_standard'].append(anno) doc.filepath = full_file_name doc.rpt_id = os.path.splitext(os.path.basename(doc.filepath))[0] doc.processText(filepath=full_file_path, rpt_id=os.path.splitext(os.path.basename( doc.filepath))[0]) return documents
def test_from_ehost_xlsx(self): annotation = Annotation() annotation.from_ehost_xlsx('myPath') mynewId = annotation.id self.assertEqual(mynewId, "12345") self.assertEqual(os.path.isfile("sample_annotations.xlsx"), True) self.assertEqual(annotation.from_ehost_xlsx('myPath'), False) self.assertEqual(annotation.from_ehost_xlsx('sample_annotations.xlsx'), True)
class test_Annotation(unittest.TestCase): annotation = Annotation() def setUp(self): self.annotation = Annotation() try: self.annotation.attributes.pop('assertion') self.annotation.attributes.pop('temporality') except KeyError: pass def tearDown(self): pass def test_classify(self): annotation = Annotation() annotation.annotation_type = 'Evidence of SSI' retVal = annotation.classify() self.assertEqual(retVal, 'Positive Evidence of SSI - No Anatomy') annotation.attributes['anatomy'] = ['Arm'] retVal = annotation.classify() self.assertEqual(retVal, 'Positive Evidence of SSI') annotation.annotation_type = "HELLO WORLD" retVal = annotation.classify() def test_from_ehost_xlsx(self): sampleFile = "sample_annotations.xlsx" self.assertEqual(os.path.isfile(sampleFile), True) wb = load_workbook(filename=sampleFile, read_only=False) ws = wb.active # just getting the first worksheet regardless of its name self.setUp() annotation = self.annotation row = list(ws)[2] annotation.from_ehost_xlsx(row) self.assertEqual(annotation.sentence, "drains") self.assertEqual(annotation.annotation_type, "DRAINAGE") self.assertEqual(annotation.span_in_sentence, (2608, 2614)) self.assertRaises(KeyError, lambda: annotation.attributes['classification']) self.assertRaises(KeyError, lambda: annotation.attributes['assertion']) self.assertRaises(KeyError, lambda: annotation.attributes['temporality']) self.setUp() annotation = self.annotation row = list(ws)[3] annotation.from_ehost_xlsx(row) self.assertEqual(annotation.sentence, "3 drains in place with serosanguinous output") self.assertEqual(annotation.annotation_type, "Evidence of SSI") self.assertEqual(annotation.span_in_sentence, (1234, 15069)) self.assertEqual(annotation.attributes['classification'], "superficial") self.assertEqual(annotation.attributes['assertion'], "negated") self.assertEqual(annotation.attributes['temporality'], "current") self.setUp() annotation = self.annotation row = list(ws)[4] annotation.from_ehost_xlsx(row) self.assertEqual(annotation.sentence, "Patient has a history of UTI") self.assertEqual(annotation.annotation_type, "Evidence of UTI") self.assertEqual(annotation.span_in_sentence, (2261, 2305)) self.assertEqual(annotation.attributes['classification'], "Evidence of UTI") self.assertEqual(annotation.attributes['assertion'], "positive") self.assertEqual(annotation.attributes['temporality'], "historical") self.setUp() annotation = self.annotation row = list(ws)[5] self.assertRaises(MalformedSpanValue, lambda: annotation.from_ehost_xlsx(row)) self.assertIs(annotation.sentence, None) self.assertIs(annotation.annotation_type, None) self.assertIs(annotation.span_in_sentence, None) self.assertRaises(KeyError, lambda: annotation.attributes['classification']) self.assertRaises(KeyError, lambda: annotation.attributes['assertion']) self.assertRaises(KeyError, lambda: annotation.attributes['temporality']) self.setUp() annotation = self.annotation row = list(ws)[6] self.assertRaises(MalformedSpanValue, lambda: annotation.from_ehost_xlsx(row)) self.assertIs(annotation.sentence, None) self.assertIs(annotation.annotation_type, None) self.assertIs(annotation.span_in_sentence, None) self.assertRaises(KeyError, lambda: annotation.attributes['classification']) self.assertRaises(KeyError, lambda: annotation.attributes['assertion']) self.assertRaises(KeyError, lambda: annotation.attributes['temporality']) def _anno_overlap(self, left, right, threshold=0.01): return (left.isOverlap(right, threshold) and right.isOverlap(left, threshold)) def test_isOverlap(self): left = Annotation() right = Annotation() left.span_in_sentence = None right.span_in_sentence = None self.assertFalse(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = None self.assertFalse(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (11, 20) self.assertFalse(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (10, 20) self.assertFalse(self._anno_overlap(left, right)) left.span_in_sentence = (1, 11) right.span_in_sentence = (10, 20) self.assertFalse(self._anno_overlap(left, right, 0.30)) left.span_in_sentence = (1, 11) right.span_in_sentence = (10, 20) self.assertFalse(self._anno_overlap(left, right, 0.20)) left.span_in_sentence = (1, 11) right.span_in_sentence = (10, 20) self.assertTrue(self._anno_overlap(left, right, 0.10)) left.span_in_sentence = (1, 10) right.span_in_sentence = (1, 10) self.assertTrue(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (8, 20) self.assertTrue(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (9, 10) self.assertTrue(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (1, 5) self.assertTrue(self._anno_overlap(left, right)) left.span_in_sentence = (11, 11) right.span_in_sentence = (9, 11) self.assertFalse(self._anno_overlap(left, right)) def test_isSimilar(self): left = Annotation() right = Annotation() self.assertFalse( left.isSimilar(right)) #empty annotation are not similar left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) self.assertTrue( left.isSimilar(right)) #overlap and empty then is similar left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" right.classification = "" self.assertFalse(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" right.classification = "SSI" self.assertTrue(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': [], } right.classification = "SSI" right.attributes = {} self.assertFalse(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': [], } right.classification = "SSI" right.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': [], } self.assertTrue(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': ["surgical site"], } right.classification = "SSI" right.attributes = { 'assertion': 'positive', 'temporality': 'future', 'anatomy': [], } self.assertFalse(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': ["surgical site"], } right.classification = "SSI" right.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': [], } self.assertFalse(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': ["surgical site"], } right.classification = "SSI" right.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': ["surgical site"], } self.assertTrue(left.isSimilar(right)) #overlap but attri differs
def test_isSimilar(self): left = Annotation() right = Annotation() self.assertFalse( left.isSimilar(right)) #empty annotation are not similar left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) self.assertTrue( left.isSimilar(right)) #overlap and empty then is similar left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" right.classification = "" self.assertFalse(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" right.classification = "SSI" self.assertTrue(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': [], } right.classification = "SSI" right.attributes = {} self.assertFalse(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': [], } right.classification = "SSI" right.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': [], } self.assertTrue(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': ["surgical site"], } right.classification = "SSI" right.attributes = { 'assertion': 'positive', 'temporality': 'future', 'anatomy': [], } self.assertFalse(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': ["surgical site"], } right.classification = "SSI" right.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': [], } self.assertFalse(left.isSimilar(right)) #overlap but attri differs left.span_in_sentence = (1, 10) right.span_in_sentence = (2, 10) left.classification = "SSI" left.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': ["surgical site"], } right.classification = "SSI" right.attributes = { 'assertion': 'positive', 'temporality': 'current', 'anatomy': ["surgical site"], } self.assertTrue(left.isSimilar(right)) #overlap but attri differs
def test_isOverlap(self): left = Annotation() right = Annotation() left.span_in_sentence = None right.span_in_sentence = None self.assertFalse(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = None self.assertFalse(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (11, 20) self.assertFalse(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (10, 20) self.assertFalse(self._anno_overlap(left, right)) left.span_in_sentence = (1, 11) right.span_in_sentence = (10, 20) self.assertFalse(self._anno_overlap(left, right, 0.30)) left.span_in_sentence = (1, 11) right.span_in_sentence = (10, 20) self.assertFalse(self._anno_overlap(left, right, 0.20)) left.span_in_sentence = (1, 11) right.span_in_sentence = (10, 20) self.assertTrue(self._anno_overlap(left, right, 0.10)) left.span_in_sentence = (1, 10) right.span_in_sentence = (1, 10) self.assertTrue(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (8, 20) self.assertTrue(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (9, 10) self.assertTrue(self._anno_overlap(left, right)) left.span_in_sentence = (1, 10) right.span_in_sentence = (1, 5) self.assertTrue(self._anno_overlap(left, right)) left.span_in_sentence = (11, 11) right.span_in_sentence = (9, 11) self.assertFalse(self._anno_overlap(left, right))
def setup(self): annotation = Annotation() annotation.id = "1234567890"
def _anno(self, span, classy, attri): anno = Annotation() anno.span_in_sentence = span anno.classification = classy anno.attributes = attri return anno