def prepare_excelcy_data(): excelcy = ExcelCy() add_stopwords(excelcy.nlp) excelcy.storage.config = Config(nlp_base='en_core_web_lg', train_iteration=20, train_drop=0.2) excelcy.storage.base_path = str(constants.MODEL_DATA_DIR) excelcy.storage.source.add(kind='textract', value='[base_path]/source/training_text.txt') excelcy.discover() excelcy.storage.prepare.add(kind='file', value='[base_path]/prepare/pers.xlsx', entity='') excelcy.storage.prepare.add(kind='file', value='[base_path]/prepare/orgs.xlsx', entity='') excelcy.storage.prepare.add(kind='file', value='[base_path]/prepare/locs.xlsx', entity='') excelcy.storage.prepare.add(kind='file', value='[base_path]/prepare/ships.xlsx', entity='') excelcy.storage.prepare.add(kind='file', value='[base_path]/prepare/misc.xlsx', entity='') excelcy.prepare() excelcy.storage.phase.add('discover') excelcy.storage.phase.add('prepare') excelcy.storage.phase.add('train') excelcy.storage.phase.add('retest') excelcy.storage.config.prepare_enabled = False excelcy.save_storage(str(constants.MODEL_DATA_DIR / 'train_model.xlsx'))
def test_readme_03(self): """ Test: code snippet found in README.rst """ excelcy = ExcelCy() excelcy.storage.base_path = self.test_data_path excelcy.storage.config = Config(nlp_base='en_core_web_sm', train_iteration=2, train_drop=0.2) excelcy.storage.source.add(kind='textract', value='source/source_01.txt') excelcy.storage.prepare.add(kind='phrase', value='Uber', entity='ORG') excelcy.discover() excelcy.prepare() excelcy.train() assert excelcy.nlp( 'Uber blew through $1 million a week').ents[0].label_ == 'ORG'