def testPersistence(self): model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True) model.save(testfile()) model2 = logentropy_model.LogEntropyModel.load(testfile()) self.assertTrue(model.entr == model2.entr) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec]))
def testPersistenceCompressed(self): fname = testfile() + '.gz' model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True) model.save(fname) model2 = logentropy_model.LogEntropyModel.load(fname, mmap=None) self.assertTrue(model.entr == model2.entr) tstvec = [] self.assertTrue(numpy.allclose(model[tstvec], model2[tstvec]))
def testPersistenceCompressed(self): fname = get_tmpfile('gensim_models_logentry.tst.gz') model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True) model.save(fname) model2 = logentropy_model.LogEntropyModel.load(fname, mmap=None) self.assertTrue(model.entr == model2.entr) tstvec = [] self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))
def test_corpus_validity(self): """ check whether the corpus is valid words that appear in only one context cause trouble in the log entropy normalization and should therefore be filtered out. --> context diversity should be > 1 """ logentropy_model.LogEntropyModel(self.corpus_small)
def testTransform(self): # create the transformation model model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=False) # transform one document doc = list(self.corpus_ok)[0] transformed = model[doc] expected = [(0, 0.056633012265132537), (1, 0.024757785476437949), (3, 0.62707564002906502)] self.assertTrue(numpy.allclose(transformed, expected))
def testTransform(self): # create the transformation model model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=False) # transform one document doc = list(self.corpus_ok)[0] transformed = model[doc] expected = [(0, 0.29155145321295795), (1, 0.024757785476437949), (3, 1.0569257878828748)] self.assertTrue(numpy.allclose(transformed, expected))
def test_transform(self): # create the transformation model model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=False) # transform one document doc = list(self.corpus_ok)[0] transformed = model[doc] expected = [(0, 0.3748900964125389), (1, 0.30730215324230725), (3, 1.20941755462856)] self.assertTrue(np.allclose(transformed, expected))