Example #1
0
def retrain():
    ds = process(PreProcessing('./data/starwars.txt'))

    word_embedding = WordEmbedding(source='./embedding/FT/fasttext_cbow_300d.bin')

    word_embedding.train(ds.pairs)
    word_embedding.save('./embedding/starwars', 'starwars.bin')
Example #2
0
def train():
    ds = process(PreProcessing(open('./data/starwars.txt', 'r')))

    word_embedding = WordEmbedding(source=ds.pairs)

    word_embedding.train(ds.pairs)

    word_embedding.save(target_folder='./embedding/starwars', filename='starwars.bin')
Example #3
0
    def test_load_from_file(self):
        embeddings_path = os.path.join(settings.BASE_DIR, 'embeddings',
                                       uuid.uuid4().hex)
        filename = str(self.__class__.dataset.idx) + ".bin"

        word_embedding = WordEmbedding(source=self.__class__.dataset.pairs)
        word_embedding.train()
        word_embedding.save(embeddings_path, filename)

        model = WordEmbedding(source=os.path.join(embeddings_path, filename))
        print(model._embedding.wv.similarity('batendo', 'porta'))
Example #4
0
 def test_should_generate_training_pairs(self):
     pre_processing = PreProcessing(sentences)
     dataset = ds.process(pre_processing)
     word_embedding = WordEmbedding(freeze=False, source=dataset.pairs)
     word_embedding.train()
     self.assertEqual(len(dataset.training_pairs(2, word_embedding)), 2)