def annotate(self, model):
        """
        This methods takes a MentionLevelModel that identifies targets and modifiers.
        For each sentence in self.sentences, the model identifies all findings using pyConText.
        These markups are then used to create Annotations and are added to `sentence['annotations']`
        """
        self.annotations['hai_detect'] = []
        annotations = self.annotations['hai_detect']
        to_exclude = ['infection', 'discharge']
        for sentence_num, sentence in enumerate(self.sentences):
            markup = model.markup_sentence(sentence['text'])
            targets = markup.getMarkedTargets()

            sentence_annotations = []
            for target in targets:
                annotation = Annotation()
                annotation.from_markup(target,
                                       markup,
                                       sentence['text'],
                                       sentence['span'],
                                       rpt_id=self.rpt_id)
                # If classification is None, this markup should be disregarded
                if not annotation.classification:
                    continue
                annotation.sentence_num = sentence_num
                sentence_annotations.append(annotation)
            sentence_annotations = self.prune_annotations(sentence_annotations)
            for annotation in sentence_annotations:
                if annotation.annotation_type not in to_exclude:
                    self.sentences_with_annotations.append(sentence_num)
                    annotations.append(annotation)
예제 #2
0
    def setUp(self):

        self.annotation = Annotation()
        try:
            self.annotation.attributes.pop('assertion')
            self.annotation.attributes.pop('temporality')
        except KeyError:
            pass
예제 #3
0
    def test_classify(self):

        annotation = Annotation()
        annotation.annotation_type = 'Evidence of SSI'
        retVal = annotation.classify()
        self.assertEqual(retVal, 'Positive Evidence of SSI - No Anatomy')

        annotation.attributes['anatomy'] = ['Arm']
        retVal = annotation.classify()
        self.assertEqual(retVal, 'Positive Evidence of SSI')

        annotation.annotation_type = "HELLO WORLD"
        retVal = annotation.classify()
예제 #4
0
def import_from_xlsx(corpus_dir, file_name):

    print(corpus_dir)
    assert os.path.exists(corpus_dir)
    print(file_name)
    wb = load_workbook(filename=file_name, read_only=False)
    ws = wb.active  # just getting the first worksheet regardless of its name

    row = list(ws)[0]
    col_size = len(row)
    if (col_size != 11):  #no more no less
        raise ValueError("MalformedHostExcelRow")  #MalformedeHostExcelRow

    documents = dict()
    row_cnt = len(list(ws))
    print("{} rows".format(row_cnt))

    for i in range(1, row_cnt):
        #if i == 10:
        #    break
        row = list(ws)[i]
        full_file_name = row[1].value  #second column
        full_file_path = os.path.join(corpus_dir, full_file_name)
        if not os.path.exists(full_file_path):
            continue

        anno = Annotation()
        anno.from_ehost_xlsx(row)
        doc = documents.setdefault(full_file_name, ClinicalTextDocument())
        doc.annotations['gold_standard'].append(anno)
        doc.filepath = full_file_name
        doc.rpt_id = os.path.splitext(os.path.basename(doc.filepath))[0]
        doc.processText(filepath=full_file_path,
                        rpt_id=os.path.splitext(os.path.basename(
                            doc.filepath))[0])

    return documents
예제 #5
0
    def test_from_ehost_xlsx(self):
        annotation = Annotation()

        annotation.from_ehost_xlsx('myPath')
        mynewId = annotation.id
        self.assertEqual(mynewId, "12345")

        self.assertEqual(os.path.isfile("sample_annotations.xlsx"), True)
        self.assertEqual(annotation.from_ehost_xlsx('myPath'), False)
        self.assertEqual(annotation.from_ehost_xlsx('sample_annotations.xlsx'),
                         True)
예제 #6
0
class test_Annotation(unittest.TestCase):
    annotation = Annotation()

    def setUp(self):

        self.annotation = Annotation()
        try:
            self.annotation.attributes.pop('assertion')
            self.annotation.attributes.pop('temporality')
        except KeyError:
            pass

    def tearDown(self):
        pass

    def test_classify(self):

        annotation = Annotation()
        annotation.annotation_type = 'Evidence of SSI'
        retVal = annotation.classify()
        self.assertEqual(retVal, 'Positive Evidence of SSI - No Anatomy')

        annotation.attributes['anatomy'] = ['Arm']
        retVal = annotation.classify()
        self.assertEqual(retVal, 'Positive Evidence of SSI')

        annotation.annotation_type = "HELLO WORLD"
        retVal = annotation.classify()

    def test_from_ehost_xlsx(self):
        sampleFile = "sample_annotations.xlsx"
        self.assertEqual(os.path.isfile(sampleFile), True)

        wb = load_workbook(filename=sampleFile, read_only=False)
        ws = wb.active  # just getting the first worksheet regardless of its name

        self.setUp()
        annotation = self.annotation

        row = list(ws)[2]
        annotation.from_ehost_xlsx(row)

        self.assertEqual(annotation.sentence, "drains")
        self.assertEqual(annotation.annotation_type, "DRAINAGE")
        self.assertEqual(annotation.span_in_sentence, (2608, 2614))
        self.assertRaises(KeyError,
                          lambda: annotation.attributes['classification'])
        self.assertRaises(KeyError, lambda: annotation.attributes['assertion'])
        self.assertRaises(KeyError,
                          lambda: annotation.attributes['temporality'])

        self.setUp()
        annotation = self.annotation
        row = list(ws)[3]
        annotation.from_ehost_xlsx(row)

        self.assertEqual(annotation.sentence,
                         "3 drains in place with serosanguinous output")
        self.assertEqual(annotation.annotation_type, "Evidence of SSI")
        self.assertEqual(annotation.span_in_sentence, (1234, 15069))
        self.assertEqual(annotation.attributes['classification'],
                         "superficial")
        self.assertEqual(annotation.attributes['assertion'], "negated")
        self.assertEqual(annotation.attributes['temporality'], "current")

        self.setUp()
        annotation = self.annotation
        row = list(ws)[4]
        annotation.from_ehost_xlsx(row)

        self.assertEqual(annotation.sentence, "Patient has a history of UTI")
        self.assertEqual(annotation.annotation_type, "Evidence of UTI")
        self.assertEqual(annotation.span_in_sentence, (2261, 2305))
        self.assertEqual(annotation.attributes['classification'],
                         "Evidence of UTI")
        self.assertEqual(annotation.attributes['assertion'], "positive")
        self.assertEqual(annotation.attributes['temporality'], "historical")

        self.setUp()
        annotation = self.annotation
        row = list(ws)[5]
        self.assertRaises(MalformedSpanValue,
                          lambda: annotation.from_ehost_xlsx(row))

        self.assertIs(annotation.sentence, None)
        self.assertIs(annotation.annotation_type, None)
        self.assertIs(annotation.span_in_sentence, None)
        self.assertRaises(KeyError,
                          lambda: annotation.attributes['classification'])
        self.assertRaises(KeyError, lambda: annotation.attributes['assertion'])
        self.assertRaises(KeyError,
                          lambda: annotation.attributes['temporality'])

        self.setUp()
        annotation = self.annotation
        row = list(ws)[6]
        self.assertRaises(MalformedSpanValue,
                          lambda: annotation.from_ehost_xlsx(row))

        self.assertIs(annotation.sentence, None)
        self.assertIs(annotation.annotation_type, None)
        self.assertIs(annotation.span_in_sentence, None)
        self.assertRaises(KeyError,
                          lambda: annotation.attributes['classification'])
        self.assertRaises(KeyError, lambda: annotation.attributes['assertion'])
        self.assertRaises(KeyError,
                          lambda: annotation.attributes['temporality'])

    def _anno_overlap(self, left, right, threshold=0.01):
        return (left.isOverlap(right, threshold)
                and right.isOverlap(left, threshold))

    def test_isOverlap(self):
        left = Annotation()
        right = Annotation()
        left.span_in_sentence = None
        right.span_in_sentence = None
        self.assertFalse(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = None
        self.assertFalse(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (11, 20)

        self.assertFalse(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (10, 20)
        self.assertFalse(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 11)
        right.span_in_sentence = (10, 20)
        self.assertFalse(self._anno_overlap(left, right, 0.30))

        left.span_in_sentence = (1, 11)
        right.span_in_sentence = (10, 20)
        self.assertFalse(self._anno_overlap(left, right, 0.20))

        left.span_in_sentence = (1, 11)
        right.span_in_sentence = (10, 20)
        self.assertTrue(self._anno_overlap(left, right, 0.10))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (1, 10)
        self.assertTrue(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (8, 20)
        self.assertTrue(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (9, 10)
        self.assertTrue(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (1, 5)
        self.assertTrue(self._anno_overlap(left, right))

        left.span_in_sentence = (11, 11)
        right.span_in_sentence = (9, 11)
        self.assertFalse(self._anno_overlap(left, right))

    def test_isSimilar(self):
        left = Annotation()
        right = Annotation()
        self.assertFalse(
            left.isSimilar(right))  #empty annotation are not similar

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        self.assertTrue(
            left.isSimilar(right))  #overlap and empty then is similar

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        right.classification = ""
        self.assertFalse(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        right.classification = "SSI"
        self.assertTrue(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': [],
        }
        right.classification = "SSI"
        right.attributes = {}
        self.assertFalse(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': [],
        }
        right.classification = "SSI"
        right.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': [],
        }
        self.assertTrue(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': ["surgical site"],
        }
        right.classification = "SSI"
        right.attributes = {
            'assertion': 'positive',
            'temporality': 'future',
            'anatomy': [],
        }
        self.assertFalse(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': ["surgical site"],
        }
        right.classification = "SSI"
        right.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': [],
        }
        self.assertFalse(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': ["surgical site"],
        }
        right.classification = "SSI"
        right.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': ["surgical site"],
        }
        self.assertTrue(left.isSimilar(right))  #overlap but attri differs
예제 #7
0
    def test_isSimilar(self):
        left = Annotation()
        right = Annotation()
        self.assertFalse(
            left.isSimilar(right))  #empty annotation are not similar

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        self.assertTrue(
            left.isSimilar(right))  #overlap and empty then is similar

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        right.classification = ""
        self.assertFalse(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        right.classification = "SSI"
        self.assertTrue(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': [],
        }
        right.classification = "SSI"
        right.attributes = {}
        self.assertFalse(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': [],
        }
        right.classification = "SSI"
        right.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': [],
        }
        self.assertTrue(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': ["surgical site"],
        }
        right.classification = "SSI"
        right.attributes = {
            'assertion': 'positive',
            'temporality': 'future',
            'anatomy': [],
        }
        self.assertFalse(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': ["surgical site"],
        }
        right.classification = "SSI"
        right.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': [],
        }
        self.assertFalse(left.isSimilar(right))  #overlap but attri differs

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (2, 10)
        left.classification = "SSI"
        left.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': ["surgical site"],
        }
        right.classification = "SSI"
        right.attributes = {
            'assertion': 'positive',
            'temporality': 'current',
            'anatomy': ["surgical site"],
        }
        self.assertTrue(left.isSimilar(right))  #overlap but attri differs
예제 #8
0
    def test_isOverlap(self):
        left = Annotation()
        right = Annotation()
        left.span_in_sentence = None
        right.span_in_sentence = None
        self.assertFalse(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = None
        self.assertFalse(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (11, 20)

        self.assertFalse(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (10, 20)
        self.assertFalse(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 11)
        right.span_in_sentence = (10, 20)
        self.assertFalse(self._anno_overlap(left, right, 0.30))

        left.span_in_sentence = (1, 11)
        right.span_in_sentence = (10, 20)
        self.assertFalse(self._anno_overlap(left, right, 0.20))

        left.span_in_sentence = (1, 11)
        right.span_in_sentence = (10, 20)
        self.assertTrue(self._anno_overlap(left, right, 0.10))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (1, 10)
        self.assertTrue(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (8, 20)
        self.assertTrue(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (9, 10)
        self.assertTrue(self._anno_overlap(left, right))

        left.span_in_sentence = (1, 10)
        right.span_in_sentence = (1, 5)
        self.assertTrue(self._anno_overlap(left, right))

        left.span_in_sentence = (11, 11)
        right.span_in_sentence = (9, 11)
        self.assertFalse(self._anno_overlap(left, right))
예제 #9
0
 def setup(self):
     annotation = Annotation()
     annotation.id = "1234567890"
예제 #10
0
 def _anno(self, span, classy, attri):
     anno = Annotation()
     anno.span_in_sentence = span
     anno.classification = classy
     anno.attributes = attri
     return anno