def _create_model(self, d, k, mode, nframes, emiter): # +++++++++++++++++++++++++++++++++++++++++++++++++ # Generate a model with k components, d dimensions # +++++++++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread=1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) # ++++++++++++++++++++++++++++++++++++++++++ # Approximate the models with classical EM # ++++++++++++++++++++++++++++++++++++++++++ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, "kmean") gmm.init(data, niter=KM_ITER) self.gm0 = copy.copy(gmm.gm) # The actual EM, with likelihood computation for i in range(emiter): g, tgd = gmm.compute_responsabilities(data) gmm.update_em(data, g) self.data = data self.gm = lgm
def _create_model(self, d, k, mode, nframes, emiter): #+++++++++++++++++++++++++++++++++++++++++++++++++ # Generate a model with k components, d dimensions #+++++++++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread=1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++++++++++++++++++++ # Approximate the models with classical EM #++++++++++++++++++++++++++++++++++++++++++ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') gmm.init(data, niter=KM_ITER) self.gm0 = copy.copy(gmm.gm) # The actual EM, with likelihood computation for i in range(emiter): g, tgd = gmm.compute_responsabilities(data) gmm.update_em(data, g) self.data = data self.gm = lgm
def test_conf_ellip(self): """Only test whether the call succeed. To check wether the result is OK, you have to plot the results.""" d = 3 k = 3 w, mu, va = GM.gen_param(d, k) gm = GM.fromvalues(w, mu, va) gm.conf_ellipses()
def test_1d_bogus(self): """Check that functions which do not make sense for 1d fail nicely.""" d = 1 k = 2 w, mu, va = GM.gen_param(d, k) gm = GM.fromvalues(w, mu, va) try: gm.conf_ellipses() raise AssertionError("This should not work !") except ValueError, e: print "Ok, conf_ellipses failed as expected (with msg: " + str(e) + ")"
def _run_pure_online(self, d, k, mode, nframes): #++++++++++++++++++++++++++++++++++++++++ # Approximate the models with online EM #++++++++++++++++++++++++++++++++++++++++ ogm = GM(d, k, mode) ogmm = OnGMM(ogm, 'kmean') init_data = self.data[0:nframes / 20, :] ogmm.init(init_data) # Forgetting param ku = 0.005 t0 = 200 lamb = 1 - 1 / (N.arange(-1, nframes - 1) * ku + t0) nu0 = 0.2 nu = N.zeros((len(lamb), 1)) nu[0] = nu0 for i in range(1, len(lamb)): nu[i] = 1. / (1 + lamb[i] / nu[i - 1]) # object version of online EM for t in range(nframes): # the assert are here to check we do not create copies # unvoluntary for parameters assert ogmm.pw is ogmm.cw assert ogmm.pmu is ogmm.cmu assert ogmm.pva is ogmm.cva ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t]) ogmm.update_em_frame() ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) return ogmm.gm
def test_get_va(self): """Test _get_va for diag and full mode.""" d = 3 k = 2 ld = 2 dim = [0, 2] w, mu, va = GM.gen_param(d, k, 'full') va = N.arange(d*d*k).reshape(d*k, d) gm = GM.fromvalues(w, mu, va) tva = N.empty(ld * ld * k) for i in range(k * ld * ld): tva[i] = dim[i%ld] + (i % 4)/ ld * dim[1] * d + d*d * (i / (ld*ld)) tva = tva.reshape(ld * k, ld) sva = gm._get_va(dim) assert N.all(sva == tva)
def _check(self, d, k, mode, nframes, emiter): #++++++++++++++++++++++++++++++++++++++++ # Approximate the models with online EM #++++++++++++++++++++++++++++++++++++++++ # Learn the model with Online EM ogm = GM(d, k, mode) ogmm = OnGMM(ogm, 'kmean') init_data = self.data ogmm.init(init_data, niter=KM_ITER) # Check that online kmean init is the same than kmean offline init ogm0 = copy.copy(ogm) assert_array_equal(ogm0.w, self.gm0.w) assert_array_equal(ogm0.mu, self.gm0.mu) assert_array_equal(ogm0.va, self.gm0.va) # Forgetting param lamb = N.ones((nframes, 1)) lamb[0] = 0 nu0 = 1.0 nu = N.zeros((len(lamb), 1)) nu[0] = nu0 for i in range(1, len(lamb)): nu[i] = 1. / (1 + lamb[i] / nu[i - 1]) # object version of online EM: the p* arguments are updated only at each # epoch, which is equivalent to on full EM iteration on the # classic EM algorithm ogmm.pw = ogmm.cw.copy() ogmm.pmu = ogmm.cmu.copy() ogmm.pva = ogmm.cva.copy() for e in range(emiter): for t in range(nframes): ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t]) ogmm.update_em_frame() # Change pw args only a each epoch ogmm.pw = ogmm.cw.copy() ogmm.pmu = ogmm.cmu.copy() ogmm.pva = ogmm.cva.copy() # For equivalence between off and on, we allow a margin of error, # because of round-off errors. print " Checking precision of equivalence with offline EM trainer " maxtestprec = 18 try: for i in range(maxtestprec): assert_array_almost_equal(self.gm.w, ogmm.pw, decimal=i) assert_array_almost_equal(self.gm.mu, ogmm.pmu, decimal=i) assert_array_almost_equal(self.gm.va, ogmm.pva, decimal=i) print "\t !! Precision up to %d decimals !! " % i except AssertionError: if i < AR_AS_PREC: print """\t !!NOT OK: Precision up to %d decimals only, outside the allowed range (%d) !! """ % (i, AR_AS_PREC) raise AssertionError else: print "\t !!OK: Precision up to %d decimals !! " % i
def _create_model_and_run_em(self, d, k, mode, nframes): #+++++++++++++++++++++++++++++++++++++++++++++++++ # Generate a model with k components, d dimensions #+++++++++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread=1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++++++++++++++++++++ # Approximate the models with classical EM #++++++++++++++++++++++++++++++++++++++++++ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') em = EM() lk = em.train(data, gmm)
def _create_model_and_run_em(self, d, k, mode, nframes): #+++++++++++++++++++++++++++++++++++++++++++++++++ # Generate a model with k components, d dimensions #+++++++++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++++++++++++++++++++ # Approximate the models with classical EM #++++++++++++++++++++++++++++++++++++++++++ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') em = EM() lk = em.train(data, gmm)
def _test(self, dataset, log): dic = load_dataset(dataset) gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) gmm = GMM(gm, 'test') EM().train(dic['data'], gmm, log=log) assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC) assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
def _test(self, dataset, log): dic = load_dataset(dataset) gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) gmm = GMM(gm, 'test') EM().train(dic['data'], gmm, log = log) assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC) assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
def test_2d_diag_logpdf(self): d = 2 w = N.array([0.4, 0.6]) mu = N.array([[0., 2], [-1, -2]]) va = N.array([[1, 0.5], [0.5, 1]]) x = N.random.randn(100, 2) gm = GM.fromvalues(w, mu, va) y1 = N.sum(multiple_gauss_den(x, mu, va) * w, 1) y2 = gm.pdf(x, log = True) assert_array_almost_equal(N.log(y1), y2)
def _test_common(self, d, k, mode): dic = load_dataset('%s_%dd_%dk.mat' % (mode, d, k)) gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) gmm = GMM(gm, 'test') a, na = gmm.compute_responsabilities(dic['data']) la, nla = gmm.compute_log_responsabilities(dic['data']) ta = N.log(a) tna = N.log(na) if not N.all(N.isfinite(ta)): print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k) else: assert_array_almost_equal(ta, la, DEF_DEC) if not N.all(N.isfinite(tna)): print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k) else: assert_array_almost_equal(tna, nla, DEF_DEC)
def _test_common(self, d, k, mode): dic = load_dataset('%s_%dd_%dk.mat' % (mode, d, k)) gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) gmm = GMM(gm, 'test') a, na = gmm.compute_responsabilities(dic['data']) la, nla = gmm.compute_log_responsabilities(dic['data']) ta = N.log(a) tna = N.log(na) if not N.all(N.isfinite(ta)): print "precision problem for %s, %dd, %dk, test need fixing" % ( mode, d, k) else: assert_array_almost_equal(ta, la, DEF_DEC) if not N.all(N.isfinite(tna)): print "precision problem for %s, %dd, %dk, test need fixing" % ( mode, d, k) else: assert_array_almost_equal(tna, nla, DEF_DEC)