Beispiel #1
0
def prepare_excelcy_data():
    excelcy = ExcelCy()
    add_stopwords(excelcy.nlp)
    excelcy.storage.config = Config(nlp_base='en_core_web_lg',
                                    train_iteration=20,
                                    train_drop=0.2)
    excelcy.storage.base_path = str(constants.MODEL_DATA_DIR)
    excelcy.storage.source.add(kind='textract',
                               value='[base_path]/source/training_text.txt')
    excelcy.discover()
    excelcy.storage.prepare.add(kind='file',
                                value='[base_path]/prepare/pers.xlsx',
                                entity='')
    excelcy.storage.prepare.add(kind='file',
                                value='[base_path]/prepare/orgs.xlsx',
                                entity='')
    excelcy.storage.prepare.add(kind='file',
                                value='[base_path]/prepare/locs.xlsx',
                                entity='')
    excelcy.storage.prepare.add(kind='file',
                                value='[base_path]/prepare/ships.xlsx',
                                entity='')
    excelcy.storage.prepare.add(kind='file',
                                value='[base_path]/prepare/misc.xlsx',
                                entity='')
    excelcy.prepare()
    excelcy.storage.phase.add('discover')
    excelcy.storage.phase.add('prepare')
    excelcy.storage.phase.add('train')
    excelcy.storage.phase.add('retest')
    excelcy.storage.config.prepare_enabled = False
    excelcy.save_storage(str(constants.MODEL_DATA_DIR / 'train_model.xlsx'))
Beispiel #2
0
    def test_readme_03(self):
        """ Test: code snippet found in README.rst """

        excelcy = ExcelCy()
        excelcy.storage.base_path = self.test_data_path
        excelcy.storage.config = Config(nlp_base='en_core_web_sm',
                                        train_iteration=2,
                                        train_drop=0.2)
        excelcy.storage.source.add(kind='textract',
                                   value='source/source_01.txt')
        excelcy.storage.prepare.add(kind='phrase', value='Uber', entity='ORG')
        excelcy.discover()
        excelcy.prepare()
        excelcy.train()
        assert excelcy.nlp(
            'Uber blew through $1 million a week').ents[0].label_ == 'ORG'