def generate_dataset(d, k, mode, nframes): """Generate a dataset useful for EM anf GMM testing. returns: data : ndarray data from the true model. tgm : GM the true model (randomly generated) gm0 : GM the initial model gm : GM the trained model """ # Generate a model w, mu, va = GM.gen_param(d, k, mode, spread=2.0) tgm = GM.fromvalues(w, mu, va) # Generate data from the model data = tgm.sample(nframes) # Run EM on the model, by running the initialization separetely. gmm = GMM(GM(d, k, mode), 'test') gmm.init_random(data) gm0 = copy.copy(gmm.gm) gmm = GMM(copy.copy(gmm.gm), 'test') em = EM() em.train(data, gmm) return data, tgm, gm0, gmm.gm
def cluster(data, k, mode='full'): d = data.shape[1] gm = GM(d, k, mode) gmm = GMM(gm, 'random') em = RegularizedEM(pcnt=pcnt, pval=pval) em.train(data, gmm, maxiter=20) return gm, gmm.bic(data)
def cluster(data, k, mode='full'): d = data.shape[1] gm = GM(d, k, mode) gmm = GMM(gm) em = EM() em.train(data, gmm, maxiter=20) return gm
def _create_model(self, d, k, mode, nframes, emiter): #+++++++++++++++++++++++++++++++++++++++++++++++++ # Generate a model with k components, d dimensions #+++++++++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread=1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++++++++++++++++++++ # Approximate the models with classical EM #++++++++++++++++++++++++++++++++++++++++++ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') gmm.init(data, niter=KM_ITER) self.gm0 = copy.copy(gmm.gm) # The actual EM, with likelihood computation for i in range(emiter): g, tgd = gmm.compute_responsabilities(data) gmm.update_em(data, g) self.data = data self.gm = lgm
def _run_pure_online(self, d, k, mode, nframes): #++++++++++++++++++++++++++++++++++++++++ # Approximate the models with online EM #++++++++++++++++++++++++++++++++++++++++ ogm = GM(d, k, mode) ogmm = OnGMM(ogm, 'kmean') init_data = self.data[0:nframes / 20, :] ogmm.init(init_data) # Forgetting param ku = 0.005 t0 = 200 lamb = 1 - 1 / (N.arange(-1, nframes - 1) * ku + t0) nu0 = 0.2 nu = N.zeros((len(lamb), 1)) nu[0] = nu0 for i in range(1, len(lamb)): nu[i] = 1. / (1 + lamb[i] / nu[i - 1]) # object version of online EM for t in range(nframes): # the assert are here to check we do not create copies # unvoluntary for parameters assert ogmm.pw is ogmm.cw assert ogmm.pmu is ogmm.cmu assert ogmm.pva is ogmm.cva ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t]) ogmm.update_em_frame() ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva) return ogmm.gm
def cluster(data, k): d = data.shape[1] gm = GM(d, k) gmm = GMM(gm) em = EM() em.train(data, gmm, maxiter=20) return gm, gmm.bic(data)
def _check(self, d, k, mode, nframes, emiter): #++++++++++++++++++++++++++++++++++++++++ # Approximate the models with online EM #++++++++++++++++++++++++++++++++++++++++ # Learn the model with Online EM ogm = GM(d, k, mode) ogmm = OnGMM(ogm, 'kmean') init_data = self.data ogmm.init(init_data, niter=KM_ITER) # Check that online kmean init is the same than kmean offline init ogm0 = copy.copy(ogm) assert_array_equal(ogm0.w, self.gm0.w) assert_array_equal(ogm0.mu, self.gm0.mu) assert_array_equal(ogm0.va, self.gm0.va) # Forgetting param lamb = N.ones((nframes, 1)) lamb[0] = 0 nu0 = 1.0 nu = N.zeros((len(lamb), 1)) nu[0] = nu0 for i in range(1, len(lamb)): nu[i] = 1. / (1 + lamb[i] / nu[i - 1]) # object version of online EM: the p* arguments are updated only at each # epoch, which is equivalent to on full EM iteration on the # classic EM algorithm ogmm.pw = ogmm.cw.copy() ogmm.pmu = ogmm.cmu.copy() ogmm.pva = ogmm.cva.copy() for e in range(emiter): for t in range(nframes): ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t]) ogmm.update_em_frame() # Change pw args only a each epoch ogmm.pw = ogmm.cw.copy() ogmm.pmu = ogmm.cmu.copy() ogmm.pva = ogmm.cva.copy() # For equivalence between off and on, we allow a margin of error, # because of round-off errors. print " Checking precision of equivalence with offline EM trainer " maxtestprec = 18 try: for i in range(maxtestprec): assert_array_almost_equal(self.gm.w, ogmm.pw, decimal=i) assert_array_almost_equal(self.gm.mu, ogmm.pmu, decimal=i) assert_array_almost_equal(self.gm.va, ogmm.pva, decimal=i) print "\t !! Precision up to %d decimals !! " % i except AssertionError: if i < AR_AS_PREC: print """\t !!NOT OK: Precision up to %d decimals only, outside the allowed range (%d) !! """ % (i, AR_AS_PREC) raise AssertionError else: print "\t !!OK: Precision up to %d decimals !! " % i
def _create_model_and_run_em(self, d, k, mode, nframes): #+++++++++++++++++++++++++++++++++++++++++++++++++ # Generate a model with k components, d dimensions #+++++++++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++++++++++++++++++++ # Approximate the models with classical EM #++++++++++++++++++++++++++++++++++++++++++ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') em = EM() lk = em.train(data, gmm)
# - variances are also expteced to be rank 2. For diagonal, one row # is one diagonal, for full, the first d rows are the first variance, # etc... In this case, the variance matrix should be k*d rows and d # colums w = N.array([0.2, 0.45, 0.35]) mu = N.array([[4.1, 3], [1, 5], [-2, -3]]) va = N.array([[1, 1.5], [3, 4], [2, 3.5]]) #----------------------------------------- # First method: directly from parameters: # Both methods are equivalents. gm = GM.fromvalues(w, mu, va) #------------------------------------- # Second method to build a GM instance: gm = GM(d, k, mode='diag') # The set_params checks that w, mu, and va corresponds to k, d and m gm.set_param(w, mu, va) # Once set_params is called, both methods are equivalent. The 2d # method is useful when using a GM object for learning (where # the learner class will set the params), whereas the first one # is useful when there is a need to quickly sample a model # from existing values, without a need to give the hyper parameters # Create a Gaussian Mixture from the parameters, and sample # 1000 items from it (one row = one 2 dimension sample) data = gm.sample(1000) # Plot the samples P.plot(data[:, 0], data[:, 1], '.')
k = 2 d = 2 mode = 'diag' nframes = 1e3 #+++++++++++++++++++++++++++++++++++++++++++ # Create an artificial GM model, samples it #+++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++ # Learn the model with EM #++++++++++++++++++++++++ # Create a Model from a Gaussian mixture with kmean initialization lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') # The actual EM, with likelihood computation. The threshold # is compared to the (linearly appromixated) derivative of the likelihood em = EM() like = em.train(data, gmm, maxiter = 30, thresh = 1e-8) # The computed parameters are in gmm.gm, which is the same than lgm # (remember, python does not copy most objects by default). You can for example # plot lgm against gm to compare