def test_loglikelihood(self): gsm = GSM(3, 1) samples = gsm.sample(100000) # compute entropy analytically entropy = 0.5 * slogdet(2. * pi * e * gsm.covariance / gsm.scales)[1] # compare with estimated entropy self.assertAlmostEqual(entropy, -mean(gsm.loglikelihood(samples)), 1)
def test_basics(self): gsm = GSM(3, 5) self.assertTrue(gsm.scales.size, 5) self.assertTrue(gsm.dim, 3) covariance = cov(randn(gsm.dim, 10)) gsm.covariance = covariance self.assertLess(max(abs(gsm.covariance - covariance)), 1e-8)
def test_pickle(self): models = [ Mixture(dim=5), MoGSM(dim=3, num_components=4, num_scales=7)] for _ in range(3): models[0].add_component(GSM(models[0].dim, 7)) for model0 in models: tmp_file = mkstemp()[1] # store model with open(tmp_file, 'w') as handle: dump({'model': model0}, handle) # load model with open(tmp_file) as handle: model1 = load(handle)['model'] # make sure parameters haven't changed self.assertEqual(model0.dim, model1.dim) self.assertEqual(model0.num_components, model1.num_components) for k in range(model0.num_components): self.assertLess(max(abs(model0[k].scales - model0[k].scales)), 1e-10) self.assertLess(max(abs(model0[k].priors - model1[k].priors)), 1e-10) self.assertLess(max(abs(model0[k].mean - model1[k].mean)), 1e-10) self.assertLess(max(abs(model0[k].covariance - model1[k].covariance)), 1e-10)
def test_train(self): model = Mixture(3) model.add_component(GSM(3, 1)) model.add_component(GSM(3, 1)) self.assertRaises(Exception, model.add_component, GSM(5)) self.assertRaises(Exception, model.__getitem__, 2) self.assertIsInstance(model[1], GSM) p0 = 0.3 p1 = 0.7 N = 20000 m0 = array([[2], [0], [0]]) m1 = array([[0], [2], [1]]) C0 = cov(randn(model.dim, model.dim**2)) C1 = cov(randn(model.dim, model.dim**2)) data = hstack([ dot(cholesky(C0), randn(model.dim, int(p0 * N))) + m0, dot(cholesky(C1), randn(model.dim, int(p1 * N))) + m1]) # if this is not call train() will initialize the parameters model.initialize(data) model[0].mean = m0 model[1].mean = m1 model[0].covariance = C0 model[1].covariance = C1 model[0].scales = [1.] model[1].scales = [1.] # training shouldn't change the parameters too much model.train(data, parameters={'verbosity': 0, 'max_iter': 20, 'threshold': 1e-7}) self.assertLess(abs(1. - model.priors[0] / p0), 0.1) self.assertLess(abs(1. - model.priors[1] / p1), 0.1) self.assertLess(max(abs(model[0].mean - m0)), 0.2) self.assertLess(max(abs(model[1].mean - m1)), 0.2) self.assertLess(max(abs(model[0].covariance / model[0].scales - C0)), 0.2) self.assertLess(max(abs(model[1].covariance / model[1].scales - C1)), 0.2)
def test_pickle(self): model0 = GSM(7, 9) model0.mean = randn(7, 1) model0.covariance = cov(randn(7, 20)) tmp_file = mkstemp()[1] # store model with open(tmp_file, 'w') as handle: dump({'model': model0}, handle) # load model with open(tmp_file) as handle: model1 = load(handle)['model'] # make sure parameters haven't changed self.assertEqual(model0.dim, model1.dim) self.assertEqual(model0.num_scales, model1.num_scales) self.assertLess(max(abs(model0.scales - model0.scales)), 1e-10) self.assertLess(max(abs(model0.priors - model1.priors)), 1e-10) self.assertLess(max(abs(model0.mean - model1.mean)), 1e-10) self.assertLess(max(abs(model0.covariance - model1.covariance)), 1e-10)
def test_train(self): gsm0 = GSM(3, 2) gsm0.mean = [1, 1, 1] gsm0.scales = [1, 5] gsm0.priors = [0.7, 0.3] gsm0.covariance = gsm0.covariance / power(det(gsm0.covariance), 1. / gsm0.dim) samples = gsm0.sample(50000) # try to recover parameters gsm1 = GSM(3, 2) gsm1.train(samples, parameters={'max_iter': 50}) # normalize f = power(det(gsm1.covariance), 1. / gsm1.dim) gsm1.covariance = gsm1.covariance / f gsm1.scales = gsm1.scales / f self.assertLess(max(abs(gsm1.mean - gsm0.mean)), 0.2) self.assertLess(max(abs(1. - sort(gsm1.priors.ravel()) / sort(gsm0.priors.ravel()))), 0.2) self.assertLess(max(abs(1. - sort(gsm1.scales.ravel()) / sort(gsm0.scales.ravel()))), 0.2) self.assertLess(max(abs(gsm1.covariance - gsm0.covariance)), 0.2) weights = rand(1, samples.shape[1]) weights /= sum(weights) gsm1 = GSM(3, 2) gsm1.train(samples, weights=weights, parameters={'max_iter': 100}) # normalize f = power(det(gsm1.covariance), 1. / gsm1.dim) gsm1.covariance = gsm1.covariance / f gsm1.scales = gsm1.scales / f self.assertLess(max(abs(gsm1.mean - gsm0.mean)), 0.2) self.assertLess(max(abs(1. - sort(gsm1.priors.ravel()) / sort(gsm0.priors.ravel()))), 0.2) self.assertLess(max(abs(1. - sort(gsm1.scales.ravel()) / sort(gsm0.scales.ravel()))), 0.2) self.assertLess(max(abs(gsm1.covariance - gsm0.covariance)), 0.2)
def test_train(self): gsm0 = GSM(3, 2) gsm0.mean = [1, 1, 1] gsm0.scales = [1, 5] gsm0.priors = [0.7, 0.3] gsm0.covariance = gsm0.covariance / power(det(gsm0.covariance), 1. / gsm0.dim) samples = gsm0.sample(50000) # try to recover parameters gsm1 = GSM(3, 2) gsm1.train(samples, parameters={'max_iter': 50}) # normalize f = power(det(gsm1.covariance), 1. / gsm1.dim) gsm1.covariance = gsm1.covariance / f gsm1.scales = gsm1.scales / f self.assertLess(max(abs(gsm1.mean - gsm0.mean)), 0.2) self.assertLess( max(abs(1. - sort(gsm1.priors.ravel()) / sort(gsm0.priors.ravel()))), 0.2) self.assertLess( max(abs(1. - sort(gsm1.scales.ravel()) / sort(gsm0.scales.ravel()))), 0.2) self.assertLess(max(abs(gsm1.covariance - gsm0.covariance)), 0.2) weights = rand(1, samples.shape[1]) weights /= sum(weights) gsm1 = GSM(3, 2) gsm1.train(samples, weights=weights, parameters={'max_iter': 100}) # normalize f = power(det(gsm1.covariance), 1. / gsm1.dim) gsm1.covariance = gsm1.covariance / f gsm1.scales = gsm1.scales / f self.assertLess(max(abs(gsm1.mean - gsm0.mean)), 0.2) self.assertLess( max(abs(1. - sort(gsm1.priors.ravel()) / sort(gsm0.priors.ravel()))), 0.2) self.assertLess( max(abs(1. - sort(gsm1.scales.ravel()) / sort(gsm0.scales.ravel()))), 0.2) self.assertLess(max(abs(gsm1.covariance - gsm0.covariance)), 0.2)