def test_save_load_with_memmap(self):
        ft = FastText(min_count=1, size=5)
        ft.build_vocab(SENTENCES)
        shape = (1000, 1000)
        ft.wv.vectors = np.zeros(shape, np.float32)

        p = Path("fse/test/test_data/test_emb")
        p_vecs = Path("fse/test/test_data/test_emb_wv.vectors")
        p_ngrams = Path("fse/test/test_data/test_emb_ngrams.vectors")
        p_vocab = Path("fse/test/test_data/test_emb_vocab.vectors")

        p_not_exists = Path("fse/test/test_data/test_emb.wv.vectors.npy")

        se = BaseSentence2VecModel(ft, wv_mapfile_path=str(p))
        self.assertTrue(p_vecs.exists())
        self.assertTrue(p_ngrams.exists())
        self.assertTrue(p_vocab.exists())

        se.save(str(p.absolute()))
        self.assertTrue(p.exists())
        self.assertFalse(p_not_exists.exists())

        se = BaseSentence2VecModel.load(str(p.absolute()))
        self.assertFalse(se.wv.vectors_vocab.flags.writeable)
        self.assertEqual(shape, se.wv.vectors.shape)
        self.assertEqual((2000000, 5), se.wv.vectors_ngrams.shape)

        for p in [p, p_vecs, p_ngrams, p_vocab]:
            p.unlink()
Example #2
0
	def __init__(self, model_path = None):

		if model_path[3:] == 'vec': # If it is a pre-trained word vector
			ft = KeyedVectors.load_word2vec_format(model_path)
			self.model = SIF(ft, components=10)

		elif model_path[-6:] == 'pickle': # Already trained sentence vector 
			self.model = BaseSentence2VecModel.load(model_path)
 def test_save_load(self):
     se = BaseSentence2VecModel(W2V)
     p = Path("fse/test/test_data/test_emb.model")
     se.save(str(p.absolute()))
     self.assertTrue(p.exists())
     se2 = BaseSentence2VecModel.load(str(p.absolute()))
     self.assertTrue((se.wv.vectors == se2.wv.vectors).all())
     self.assertTrue(se.wv.index2word == se2.wv.index2word)
     self.assertEqual(se.workers, se2.workers)
     p.unlink()