Beispiel #1
0
def test_parse_doc():
    text = 'No pneumothorax.'
    tree = '(S1 (S (S (NP (DT No) (NN pneumothorax))) (. .)))'
    document = text_to_bioc([text], type='d/p/s')
    d = parser.__call__(document)
    assert d.passages[0].sentences[0].infons['parse tree'] == tree

    # test empty sentence
    document = text_to_bioc([''], type='d/p/s')
    d = parser.__call__(document)
    assert d.passages[0].sentences[0].infons['parse tree'] is None
Beispiel #2
0
def test_parse():
    converter = NegBioPtb2DepConverter(representation='CCprocessed',
                                       universal=True)
    # neg(evidence-2, no-1)
    # !root(ROOT-0, evidence-2)
    # !case(infiltrate-5, of-3)
    # amod(infiltrate-5, focal-4)
    # nmod:of(evidence-2, infiltrate-5)
    # nmod:of(evidence-2, effusion-7)
    # conj:or(infiltrate-5, effusion-7)
    # cc(infiltrate-5, or-8)
    # nmod:of(evidence-2, pneumothorax-9)
    # conj:or(infiltrate-5, pneumothorax-9)
    text = 'no evidence of focal infiltrate, effusion or pneumothorax.'
    tree = '(S1 (S (S (NP (NP (DT no) (NN evidence)) (PP (IN of) (NP (NP (JJ focal)' \
           ' (NN infiltrate)) (, ,) (NP (NN effusion)) (CC or) (NP (NN pneumothorax)))))) (. .)))'
    t = parser.parse(text)
    assert str(t) == tree

    d = text_to_bioc([text], type='d/p/s')
    s = d.passages[0].sentences[0]
    s.infons['parse tree'] = tree
    converter.__call__(d)

    # print(repr(d))

    for i, word in enumerate(
            'no evidence of focal infiltrate , effusion or pneumothorax .'.
            split()):
        assert s.annotations[i].text == word

    for i, dep in enumerate(
            'neg case amod nmod:of punct nmod:of conj:or cc nmod:of conj:or punct'
            .split()):
        assert s.relations[i].infons['dependency'] == dep
Beispiel #3
0
def test_extend():
    text = 'findings: no pneumothorax.'
    d = text_to_bioc([text], type='d/p/s')
    a = bioc.BioCAnnotation()
    a.text = 'pneumothorax'
    a.add_location(bioc.BioCLocation(13, 12))
    d.passages[0].add_annotation(a)
    detector.__call__(d)

    # fake ann
    a = bioc.BioCAnnotation()
    a.text = 'eumothor'
    a.add_location(bioc.BioCLocation(15, 8))
    d.passages[0].add_annotation(a)

    a = bioc.BioCAnnotation()
    a.text = 'foo'
    a.add_location(bioc.BioCLocation(27, 3))
    d.passages[0].add_annotation(a)

    _extend(d, 'negation')

    assert d.passages[0].annotations[1].infons['negation'] == 'True'
    assert 'negation' not in d.passages[0].annotations[2].infons

    d.passages[0].annotations[0].infons['CUI'] = 'xxx'
    d.passages[0].annotations[2].infons['CUI'] = 'xxx'
    _extend(d, 'negation')
    assert 'negation' not in d.passages[0].annotations[2].infons
Beispiel #4
0
def _get_document(text, tree, sen_ann_index):
    d = text_to_bioc([text], type='d/p/s')
    d.passages[0].sentences[0].infons['parse tree'] = tree
    c = NegBioPtb2DepConverter()
    c.__call__(d)
    d.passages[0].add_annotation(d.passages[0].sentences[0].annotations[sen_ann_index])
    return d
def test_normalize():
    text = '[**Hospital 9**] MEDICAL CONDITION'
    expe = '                 MEDICAL CONDITION'
    d = text_to_bioc([text], 'd/p')
    d = normalize(d)
    assert d.passages[0].text == expe

    d.passages[0].text = None
    normalize(d)

    # skip if there is more than one passages
    d = text_to_bioc([text, text], 'd/p')
    d = normalize(d)
    assert d.passages[0].text == text

    del d.passages[:]
    normalize(d)
Beispiel #6
0
    def test_convert_doc2(self):
        text = "Can't exclude 1 cm lesion in or near lower esophagus (for example series 2 image 91) BOOKMARK (1.1 cm) appearing or better demonstrated."
        tree = "(S1 (S (S (VP (MD Can) (RB n't) (VP (VB exclude) (NP (NP (ADJP (CD 1) (NN cm)) (NN lesion)) (PP (IN in) (NP (NP (NP (test_convert_doc2CC or) (JJ near) (NP (NP (JJR lower) (NN esophagus)) (PRN (-LRB- -LRB-) (PP (IN for) (NP (NN example))) (NP (NN series) (CD 2) (NN image) (CD 91)) (-RRB- -RRB-))) (NN BOOKMARK)) (PRN (-LRB- -LRB-) (NP (CD 1.1) (NN cm)) (-RRB- -RRB-))) (VP (VBG appearing) (ADVP (CC or) (ADVP (RBR better))) (VP (VBN demonstrated))))))))) (. .)))"
        d = text_to_bioc([text], type='d/p/s')
        s = d.passages[0].sentences[0]
        s.infons['parse tree'] = tree

        c = NegBioPtb2DepConverter()
        c(d)
Beispiel #7
0
def create_collections():
    filenames = []
    top_dir = tempfile.mkdtemp()
    for i in range(10):
        c = text_to_bioc(['No pneumothorax.'], 'c/d/p')
        filename = os.path.join(top_dir, '{}.xml'.format(i))
        with open(filename, 'w') as fp:
            bioc.dump(c, fp)
        filenames.append(filename)
    return filenames
Beispiel #8
0
def test_neg_regex():
    text = 'findings: no pneumothorax.'
    assert is_neg_regex(text)

    d = text_to_bioc([text], type='d/p/s')
    a = bioc.BioCAnnotation()
    a.text = 'pneumothorax'
    a.add_location(bioc.BioCLocation(13, 12))
    d.passages[0].add_annotation(a)
    detector.__call__(d)
    assert d.passages[0].annotations[0].infons['negation'] == 'True'
Beispiel #9
0
 def test_convert_doc_no_jpype(self):
     c = NegBioPtb2DepConverter()
     c._backend = 'subprocess'
     c._sd = StanfordDependencies.get_instance(backend=c._backend)
     text = 'No pneumothorax.'
     tree = '(S1 (S (S (NP (DT No) (NN pneumothorax))) (. .)))'
     d = text_to_bioc([text], type='d/p/s')
     s = d.passages[0].sentences[0]
     s.infons['parse tree'] = tree
     d = c.__call__(d)
     s = d.passages[0].sentences[0]
     assert 'lemma' not in s.annotations[1].infons
Beispiel #10
0
    def test_split_doc(self, splitter):
        text = 'No pneumothorax.\nNo pneumothorax.'
        document = text_to_bioc([text], 'd/p')
        p = document.passages[0]
        assert p.text == text
        assert len(p.sentences) == 0

        document = splitter.__call__(document)
        p = document.passages[0]
        assert len(p.sentences) == 2
        assert p.sentences[0].text == 'No pneumothorax.'
        assert p.sentences[0].offset == 0
        assert p.sentences[1].text == 'No pneumothorax.'
        assert p.sentences[1].offset == 17
Beispiel #11
0
def test_split_document():
    text = """findings: pa and lat cxr at 7:34 p.m.. heart and mediastinum are
stable. lungs are unchanged. air- filled cystic changes. no
pneumothorax. osseous structures unchanged scoliosis
impression: stable chest.
dictating 
"""
    d = text_to_bioc([text], type='d/p')
    d = SectionSplitter().__call__(d)
    assert len(d.passages) == 4
    assert d.passages[0].text == 'findings:'
    assert d.passages[
        1].text == """pa and lat cxr at 7:34 p.m.. heart and mediastinum are
stable. lungs are unchanged. air- filled cystic changes. no
pneumothorax. osseous structures unchanged scoliosis"""
    assert d.passages[2].text == 'impression:'
    assert d.passages[3].text == """stable chest.
Beispiel #12
0
    def test_convert_doc(self):
        text = 'No pneumothorax.'
        tree = '(S1 (S (S (NP (DT No) (NN pneumothorax))) (. .)))'
        d = text_to_bioc([text], type='d/p/s')
        s = d.passages[0].sentences[0]
        s.infons['parse tree'] = tree

        c = NegBioPtb2DepConverter()
        d = c.__call__(d)
        s = d.passages[0].sentences[0]

        assert len(s.annotations) == 3, len(s.annotations)
        assert len(s.relations) == 2
        assert s.annotations[0].text == 'No'
        assert s.annotations[0].infons['tag'] == 'DT'
        assert s.annotations[0].infons['lemma'] == 'no'
        assert s.annotations[0].total_span.offset == 0

        assert s.annotations[1].text == 'pneumothorax'
        assert s.annotations[1].infons['tag'] == 'NN'
        assert s.annotations[1].infons['lemma'] == 'pneumothorax'
        assert s.annotations[1].total_span.offset == 3

        assert s.annotations[2].text == '.'
        assert s.annotations[2].infons['tag'] == '.'
        assert s.annotations[2].infons['lemma'] == '.'
        assert s.annotations[2].total_span.offset == 15

        assert s.relations[0].infons['dependency'] == 'neg'
        assert s.relations[0].nodes[0].refid == 'T0'
        assert s.relations[0].nodes[1].refid == 'T1'

        assert s.relations[1].infons['dependency'] == 'punct'
        assert s.relations[1].nodes[0].refid == 'T2'
        assert s.relations[1].nodes[1].refid == 'T1'

        # test empty parse tree
        del s.annotations[:]

        del s.infons['parse tree']
        c.__call__(d)

        s.infons['parse tree'] = None
        c.__call__(d)
Beispiel #13
0
def test_clean_sentences():
    cleanup = CleanUp()

    doc = text_to_bioc(['No pneumothorax.', 'No pneumothorax.'], type='d/p/s')
    p = doc.passages[0]
    for i in range(10, 0, -1):
        ann = bioc.BioCAnnotation()
        ann.add_location(bioc.BioCLocation(i, 1))
        p.add_annotation(ann)

    assert len(doc.passages[0].sentences) == 2
    doc = cleanup.__call__(doc)
    assert len(doc.passages[0].sentences) == 0
    assert len(doc.passages[0].annotations) == 10
    for i in range(10):
        assert doc.passages[0].annotations[i].total_span.offset == 10 - i

    doc = cleanup.__call__(doc, sort_anns=True)
    for i in range(10):
        assert doc.passages[0].annotations[i].total_span.offset == i + 1
Beispiel #14
0
def test_lemmatize_doc():
    converter = NegBioPtb2DepConverter(representation='CCprocessed',
                                       universal=True)
    lemmatizer = Lemmatizer()

    text = 'no evidence of focal infiltrate, effusion or pneumothorax.'
    tree = '(S1 (S (S (NP (NP (DT no) (NN evidence)) (PP (IN of) (NP (NP (JJ focal)' \
           ' (NN infiltrate)) (, ,) (NP (NN effusion)) (CC or) (NP (NN pneumothorax)))))) (. .)))'
    d = text_to_bioc([text], type='d/p/s')
    s = d.passages[0].sentences[0]
    s.infons['parse tree'] = tree
    converter.__call__(d)

    expected = []
    for ann in s.annotations:
        expected.append(ann.infons['lemma'])
        del ann.infons['lemma']

    lemmatizer(d)
    for i, ann in enumerate(s.annotations):
        assert expected[i] == ann.infons['lemma']