def retrain(): ds = process(PreProcessing('./data/starwars.txt')) word_embedding = WordEmbedding(source='./embedding/FT/fasttext_cbow_300d.bin') word_embedding.train(ds.pairs) word_embedding.save('./embedding/starwars', 'starwars.bin')
def train(): ds = process(PreProcessing(open('./data/starwars.txt', 'r'))) word_embedding = WordEmbedding(source=ds.pairs) word_embedding.train(ds.pairs) word_embedding.save(target_folder='./embedding/starwars', filename='starwars.bin')
def test_load_from_file(self): embeddings_path = os.path.join(settings.BASE_DIR, 'embeddings', uuid.uuid4().hex) filename = str(self.__class__.dataset.idx) + ".bin" word_embedding = WordEmbedding(source=self.__class__.dataset.pairs) word_embedding.train() word_embedding.save(embeddings_path, filename) model = WordEmbedding(source=os.path.join(embeddings_path, filename)) print(model._embedding.wv.similarity('batendo', 'porta'))
def test_should_generate_training_pairs(self): pre_processing = PreProcessing(sentences) dataset = ds.process(pre_processing) word_embedding = WordEmbedding(freeze=False, source=dataset.pairs) word_embedding.train() self.assertEqual(len(dataset.training_pairs(2, word_embedding)), 2)