예제 #1
0
    def test_readme_04(self):
        """ Test: code snippet found in README.rst """

        # load first and confirm Himalayas is PRODUCT
        excelcy = ExcelCy.execute(file_path=self.get_test_data_path(
            fs_path='test_data_05.xlsx'))
        gold = excelcy.storage.train.items.get('1').items.get('1.1')
        assert gold.subtext == 'Himalayas' and gold.entity == 'PRODUCT'

        # retrain and set the entity of Himalaya to PLACE
        excelcy = ExcelCy.execute(file_path=self.get_test_data_path(
            fs_path='test_data_05a.xlsx'))
        gold = excelcy.storage.train.items.get('1').items.get('1.1')
        assert gold.subtext == 'Himalayas' and gold.entity == 'FAC'
예제 #2
0
def train_excelcy(save=False):
    excelcy = ExcelCy()
    add_stopwords(excelcy.nlp)
    excelcy.execute(str(constants.MODEL_DATA_DIR / 'train_model.xlsx'))
    if save:
        excelcy.save_nlp(str(constants.MODEL_DIR))

    doc = excelcy.nlp(load_book_by_nr(1).content())
    ships = set([
        re.sub('[tT]he ', '', ent.text) for ent in doc.ents
        if ent.label_ == 'SHIP'
    ])
    persons = set([ent.text for ent in doc.ents if ent.label_ == 'PERSON'])
    print(ships)
    print(persons)
예제 #3
0
    def test_readme_02(self):
        """ Test: code snippet found in README.rst """

        excelcy = ExcelCy.execute(file_path=self.get_test_data_path(
            fs_path='test_data_01.xlsx'))
        doc = excelcy.nlp('Google rebrands its business apps')
        assert doc.ents[0].label_ == 'ORG'
예제 #4
0
    def test_save(self):
        """ Test: save training """

        excelcy = ExcelCy.execute(file_path=self.get_test_data_path(
            fs_path='test_data_01.xlsx'))
        file_path = self.get_test_tmp_path(fs_path='test_data_01.xlsx')
        excelcy.save(file_path=file_path)
        excelcy.load(file_path=file_path)
예제 #5
0
 def assert_training(self, file_path: str, entity_tests: dict = None):
     excelcy = ExcelCy.execute(file_path=file_path)
     nlp = excelcy.nlp
     for idx, train in excelcy.storage.train.items.items():
         train_ents = set([(gold.subtext, gold.entity)
                           for _, gold in train.items.items()])
         doc = nlp(train.text)
         ents = set([(ent.text, ent.label_) for ent in doc.ents])
         for ent in ents:
             assert ent in train_ents
예제 #6
0
    def test_save(self):
        """ Test: save training """

        excelcy = ExcelCy.execute(file_path=self.get_test_data_path(fs_path='test_data_01.xlsx'))
        file_path = self.get_test_tmp_path(fs_path='test_data_01.xlsx')
        excelcy.save_storage(file_path=file_path)

        data = self.extract_storage(storage=excelcy.storage)
        excelcy.load(file_path=file_path)
        data2 = self.extract_storage(storage=excelcy.storage)

        assert data == data2
예제 #7
0
 def assert_training(self, file_path: str, entity_tests: dict = None):
     excelcy = ExcelCy.execute(file_path=file_path)
     nlp = excelcy.nlp
     for idx, train in excelcy.storage.train.items.items():
         train_ents = set([(gold.subtext, gold.entity)
                           for _, gold in train.items.items()])
         doc = nlp(train.text)
         ents = set([(ent.text, ent.label_) for ent in doc.ents])
         # verify based on data
         assert train_ents <= ents
         # verify if test given
         test = (entity_tests or {}).get(idx, set())
         assert test <= ents
예제 #8
0
from excelcy import ExcelCy
from excelcy.storage import Config

# test_string = 'Android Pay expands to Canada'
# excelcy = ExcelCy()
# excelcy.storage.config = Config(nlp_base='en_core_web_sm', train_iteration=50, train_drop=0.2)
# doc = excelcy.nlp(test_string)
# # showing no ORG
# print([(ent.label_, ent.text) for ent in doc.ents])
# excelcy.storage.source.add(kind='text', value=test_string)
# excelcy.discover()
# excelcy.storage.prepare.add(kind='phrase', value='Android Pay', entity='PRODUCT')
# excelcy.prepare()
# excelcy.train()
# doc = excelcy.nlp(test_string)
# print([(ent.label_, ent.text) for ent in doc.ents])

# FAILED tests/test_excelcy.py::ExcelCyTestCase::test_execute - AssertionError: assert ('$1', 'MONEY') in {('$1 million', 'MONEY'), ('Uber', 'ORG')}
# FAILED tests/test_pipe.py::PipeTestCase::test_execute - AssertionError: assert ('$1', 'MONEY') in {('$1 million', 'MONEY'), ('Uber', 'ORG')}
# FAILED tests/test_readme.py::ReadmeTestCase::test_readme_04 - AssertionError: assert ('China' == 'Himalayas'

excelcy = ExcelCy()
doc = excelcy.nlp('Android Pay expands to Canada')
print([(ent.label_, ent.text) for ent in doc.ents])
excelcy = ExcelCy.execute(file_path='tests/data/test_data_03.xlsx')
doc = excelcy.nlp('Android Pay expands to Canada')
print([(ent.label_, ent.text) for ent in doc.ents])
예제 #9
0
파일: cli.py 프로젝트: todun/excelcy
def main(argv: list = None):
    # quick CLI execution
    args = argv or sys.argv
    if args[1] == 'execute':
        excelcy = ExcelCy.execute(file_path=args[2])