def test_dir_reader2(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     dir_reader = EhostDirReader(
         nlp=English(),
         support_overlap=True,
         recursive=True,
         schema_file='data/ehost_test_corpus/config/projectschema.xml')
     docs = dir_reader.read(txt_dir='data/ehost_test_corpus/')
     assert (len(docs) == 2)
     for doc in docs:
         assert (len(doc._.concepts) == 7)
         assert ('Doc_Level_Purulence_Assessment' in doc._.concepts)
         assert (str(
             doc._.concepts['Doc_Level_Purulence_Assessment'][0]) == 'CHIEF'
                 )
         assert ('Purulent' in doc._.concepts)
         assert (str(doc._.concepts['Purulent'][0]) == 'Abdominal pain')
         assert ('Non-Purulent' in doc._.concepts)
         assert (str(doc._.concepts['Non-Purulent'][0]) == 'PRESENT')
         assert ('Incision_and_Drainage' in doc._.concepts)
         assert (str(
             doc._.concepts['Incision_and_Drainage'][0]) == 'patient')
         assert ('PreAnnotated' in doc._.concepts)
         assert (str(doc._.concepts['PreAnnotated'][0]) == '71-year-old')
         assert ('Nonspecific_SSTI' in doc._.concepts)
         assert (str(
             doc._.concepts['Nonspecific_SSTI'][0]) == 'X. The patient')
         assert ('Exclusions' in doc._.concepts)
         assert (str(doc._.concepts['Exclusions'][0]) == 'presented')
    def test_read(self):
        if Doc.has_extension("concepts"):
            Doc.remove_extension("concepts")
        ereader = EhostDocReader(
            nlp=English(),
            schema_file='data/ehost_test_corpus/config/projectschema.xml')
        doc = ereader.read('data/ehost_test_corpus/corpus/doc1.txt')
        self.eval(doc)

        def test_read_doc_name(self):
            ereader = EhostDocReader(
                nlp=English(),
                schema_file='data/ehost_test_corpus/config/projectschema.xml')

        doc = ereader.read('data/ehost_test_corpus/corpus/doc1.txt')
        assert (doc._.doc_name == 'doc1.txt')
        ereader.doc_name_depth = 1
        doc = ereader.read('data/ehost_test_corpus/corpus/doc1.txt')
        assert (doc._.doc_name == r'corpus/doc1.txt')
        ereader = EhostDocReader(
            nlp=English(),
            schema_file='data/ehost_test_corpus/config/projectschema.xml',
            doc_name_depth=2)
        doc = ereader.read('data/ehost_test_corpus/corpus/doc1.txt')
        assert (doc._.doc_name == r'ehost_test_corpus/corpus/doc1.txt')
 def test_parse_to_dicts(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     ereader = EhostDocReader(nlp=English())
     spans, classes, attributes, relations = ereader.parse_to_dicts(
         'data/ehost_test_corpus/saved/doc1.txt.knowtator.xml')
     assert (len(spans) == 7)
     assert (len(classes) == 7)
     assert (len(attributes) == 6)
 def test_set_attributes(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     EhostDocReader(
         nlp=English(),
         schema_file='data/ehost_test_corpus/config/projectschema.xml')
     nlp = English()
     doc = nlp('test status attribute')
     assert (hasattr(doc[1:2]._, 'status'))
     assert (doc[1:2]._.status == 'present')
 def test_set_attributes(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     BratDocReader(nlp=English(),
                   schema_file='data/brat_test_corpus/annotation.conf')
     nlp = English()
     doc = nlp('test status attribute')
     span = doc[1:2]
     assert (hasattr(span._, 'Negation'))
     assert (hasattr(span._, 'Confidence'))
 def test_parse_to_dicts(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     breader = BratDocReader(nlp=English())
     spans, classes, attributes, relations = breader.parse_to_dicts(
         Path('data/brat_test_corpus/000-introduction.ann').read_text())
     assert (len(spans) == 12)
     assert (len(classes) == 17)
     assert (len(attributes) == 6)
     assert (len(relations) == 5)
 def test_dir_reader(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     dir_reader = EhostDirReader(
         nlp=English(),
         recursive=True,
         schema_file='data/ehost_test_corpus/config/projectschema.xml')
     docs = dir_reader.read(txt_dir='data/ehost_test_corpus/')
     assert (len(docs) == 2)
     for doc in docs:
         self.eval(doc)
 def test_check_spans(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     ereader = EhostDocReader(
         nlp=English(),
         schema_file='data/ehost_test_corpus/config/projectschema.xml',
         support_overlap=False,
         store_anno_string=True,
         encoding='UTF8',
         log_level=logging.DEBUG)
     doc = ereader.read('data/ehost_test_corpus/corpus/doc2.txt')
     for span in doc.ents:
         print(span._.span_txt, '<>', span)
         assert (span._.span_txt.replace('\n', ' ')
                 in str(span).replace('\n', ' '))
Esempio n. 9
0
def remove_doc_extensions():
    """
    Remove :mod:`textacy.extract` custom property and method doc extensions
    from the global :class:`spacy.tokens.Doc`.
    """
    for name in get_doc_extensions().keys():
        _ = Doc.remove_extension(name)
Esempio n. 10
0
 def test_check_spans(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     breader = BratDocReader(
         nlp=English(),
         schema_file='data/brat_test_corpus/annotation.conf',
         support_overlap=True,
         store_anno_string=True,
         encoding='UTF8',
         log_level=logging.DEBUG)
     doc = breader.read('data/brat_test_corpus/000-introduction.txt')
     for span in doc.ents:
         if span._.span_txt.replace('\n', ' ') not in str(span).replace(
                 '\n', ' '):
             print(span._.span_txt, '<>', span)
         assert (span._.span_txt == 'complicated panic'
                 or (span._.span_txt.replace('\n', ' ')
                     in str(span).replace('\n', ' ')))
Esempio n. 11
0
 def test_read(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     breader = BratDocReader(
         nlp=English(), schema_file='data/brat_test_corpus/annotation.conf')
     doc = breader.read('data/brat_test_corpus/000-introduction.txt')
     assert (len(doc.ents) == 12)
     assert (str(doc.ents[0].label_) == 'Gene_expression')
     assert (str(doc.ents[1].label_) == 'Protein')
     assert (str(doc.ents[2].label_) == 'Negative_regulation')
     assert (str(doc.ents[3].label_) == 'Positive_regulation')
     assert (str(doc.ents[4].label_) == 'Protein')
     assert (str(doc.ents[5].label_) == 'Gene_expression')
     assert (str(doc.ents[6].label_) == 'Protein')
     assert (str(doc.ents[7].label_) == 'Complex')
     assert (str(doc.ents[8].label_) == 'Protein')
     assert (str(doc.ents[9].label_) == 'Positive_regulation')
     assert (str(doc.ents[10].label_) == 'Simple_chemical')
     assert (str(doc.ents[11].label_) == 'Protein')
Esempio n. 12
0
    def test_check_spans2(self):
        if Doc.has_extension("concepts"):
            Doc.remove_extension("concepts")
        breader = BratDocReader(
            nlp=English(),
            schema_file='data/brat_test_corpus/annotation.conf',
            support_overlap=False,
            store_anno_string=True,
            encoding='UTF8',
            log_level=logging.DEBUG)
        doc = breader.read(
            'data/brat_test_corpus/040-text_span_annotation.txt')
        for span in doc.ents:
            assert (span._.span_txt.replace('\n', ' ')
                    in str(span).replace('\n', ' '))

        def test_dir_reader(self):
            if Doc.has_extension("concepts"):
                Doc.remove_extension("concepts")
            dir_reader = BratDirReader(
                nlp=English(),
                support_overlap=True,
                recursive=True,
                schema_file='data/brat_test_corpus/annotation.conf')
            docs = dir_reader.read(txt_dir='data/brat_test_corpus/')
            assert (len(docs) == 2)
            doc = docs[0]
            assert (len(doc._.concepts) == 6)
            assert ('Gene_expression' in doc._.concepts)
            assert ('Protein' in doc._.concepts)
            assert ('Negative_regulation' in doc._.concepts)
            assert ('Positive_regulation' in doc._.concepts)
            assert ('Complex' in doc._.concepts)
            assert ('Simple_chemical' in doc._.concepts)
            assert (len(doc._.concepts['Gene_expression']) == 2)
            assert (len(doc._.concepts['Protein']) == 5)
            assert (len(doc._.concepts['Negative_regulation']) == 1)
            assert (len(doc._.concepts['Positive_regulation']) == 2)
            assert (len(doc._.concepts['Complex']) == 1)
            assert (len(doc._.concepts['Simple_chemical']) == 1)
Esempio n. 13
0
 def test_dir_reader(self):
     if Doc.has_extension("concepts"):
         Doc.remove_extension("concepts")
     dir_reader = BratDirReader(
         nlp=English(),
         support_overlap=True,
         recursive=True,
         schema_file='data/brat_test_corpus/annotation.conf')
     docs = dir_reader.read(txt_dir='data/brat_test_corpus/')
     assert (len(docs) == 2)
     doc = docs[0]
     assert (len(doc._.concepts) == 6)
     assert ('Gene_expression' in doc._.concepts)
     assert ('Protein' in doc._.concepts)
     assert ('Negative_regulation' in doc._.concepts)
     assert ('Positive_regulation' in doc._.concepts)
     assert ('Complex' in doc._.concepts)
     assert ('Simple_chemical' in doc._.concepts)
     assert (len(doc._.concepts['Gene_expression']) == 2)
     assert (len(doc._.concepts['Protein']) == 5)
     assert (len(doc._.concepts['Negative_regulation']) == 1)
     assert (len(doc._.concepts['Positive_regulation']) == 2)
     assert (len(doc._.concepts['Complex']) == 1)
     assert (len(doc._.concepts['Simple_chemical']) == 1)