def test_readme_03(self):
        """ Test: code snippet found in README.rst """

        excelcy = ExcelCy()
        excelcy.storage.base_path = self.test_data_path
        excelcy.storage.config = Config(nlp_base='en_core_web_sm',
                                        train_iteration=2,
                                        train_drop=0.2)
        excelcy.storage.source.add(
            kind='text', value='Robertus Johansyah is the maintainer ExcelCy')
        excelcy.storage.source.add(kind='textract',
                                   value='source/source_01.txt')
        excelcy.storage.prepare.add(kind='phrase', value='Uber', entity='ORG')
        excelcy.storage.prepare.add(kind='phrase',
                                    value='Robertus Johansyah',
                                    entity='PERSON')
        excelcy.discover()
        excelcy.prepare()
        excelcy.train()
        assert excelcy.nlp(
            'Uber blew through $1 million a week').ents[0].label_ == 'ORG'
        assert excelcy.nlp('Robertus Johansyah is maintainer ExcelCy'
                           ).ents[0].label_ == 'PERSON'
Beispiel #2
0
def train_excelcy(save=False):
    excelcy = ExcelCy()
    add_stopwords(excelcy.nlp)
    excelcy.execute(str(constants.MODEL_DATA_DIR / 'train_model.xlsx'))
    if save:
        excelcy.save_nlp(str(constants.MODEL_DIR))

    doc = excelcy.nlp(load_book_by_nr(1).content())
    ships = set([
        re.sub('[tT]he ', '', ent.text) for ent in doc.ents
        if ent.label_ == 'SHIP'
    ])
    persons = set([ent.text for ent in doc.ents if ent.label_ == 'PERSON'])
    print(ships)
    print(persons)
Beispiel #3
0
from excelcy import ExcelCy
from excelcy.storage import Config

# test_string = 'Android Pay expands to Canada'
# excelcy = ExcelCy()
# excelcy.storage.config = Config(nlp_base='en_core_web_sm', train_iteration=50, train_drop=0.2)
# doc = excelcy.nlp(test_string)
# # showing no ORG
# print([(ent.label_, ent.text) for ent in doc.ents])
# excelcy.storage.source.add(kind='text', value=test_string)
# excelcy.discover()
# excelcy.storage.prepare.add(kind='phrase', value='Android Pay', entity='PRODUCT')
# excelcy.prepare()
# excelcy.train()
# doc = excelcy.nlp(test_string)
# print([(ent.label_, ent.text) for ent in doc.ents])

# FAILED tests/test_excelcy.py::ExcelCyTestCase::test_execute - AssertionError: assert ('$1', 'MONEY') in {('$1 million', 'MONEY'), ('Uber', 'ORG')}
# FAILED tests/test_pipe.py::PipeTestCase::test_execute - AssertionError: assert ('$1', 'MONEY') in {('$1 million', 'MONEY'), ('Uber', 'ORG')}
# FAILED tests/test_readme.py::ReadmeTestCase::test_readme_04 - AssertionError: assert ('China' == 'Himalayas'

excelcy = ExcelCy()
doc = excelcy.nlp('Android Pay expands to Canada')
print([(ent.label_, ent.text) for ent in doc.ents])
excelcy = ExcelCy.execute(file_path='tests/data/test_data_03.xlsx')
doc = excelcy.nlp('Android Pay expands to Canada')
print([(ent.label_, ent.text) for ent in doc.ents])