def generate_dataset(d, k, mode, nframes): """Generate a dataset useful for EM anf GMM testing. returns: data : ndarray data from the true model. tgm : GM the true model (randomly generated) gm0 : GM the initial model gm : GM the trained model """ # Generate a model w, mu, va = GM.gen_param(d, k, mode, spread=2.0) tgm = GM.fromvalues(w, mu, va) # Generate data from the model data = tgm.sample(nframes) # Run EM on the model, by running the initialization separetely. gmm = GMM(GM(d, k, mode), 'test') gmm.init_random(data) gm0 = copy.copy(gmm.gm) gmm = GMM(copy.copy(gmm.gm), 'test') em = EM() em.train(data, gmm) return data, tgm, gm0, gmm.gm
def _create_model(self, d, k, mode, nframes, emiter): #+++++++++++++++++++++++++++++++++++++++++++++++++ # Generate a model with k components, d dimensions #+++++++++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread=1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++++++++++++++++++++ # Approximate the models with classical EM #++++++++++++++++++++++++++++++++++++++++++ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') gmm.init(data, niter=KM_ITER) self.gm0 = copy.copy(gmm.gm) # The actual EM, with likelihood computation for i in range(emiter): g, tgd = gmm.compute_responsabilities(data) gmm.update_em(data, g) self.data = data self.gm = lgm
def test_conf_ellip(self): """Only test whether the call succeed. To check wether the result is OK, you have to plot the results.""" d = 3 k = 3 w, mu, va = GM.gen_param(d, k) gm = GM.fromvalues(w, mu, va) gm.conf_ellipses()
def _test(self, dataset, log): dic = load_dataset(dataset) gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) gmm = GMM(gm, 'test') EM().train(dic['data'], gmm, log = log) assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC) assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC) assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
def test_2d_diag_logpdf(self): d = 2 w = N.array([0.4, 0.6]) mu = N.array([[0., 2], [-1, -2]]) va = N.array([[1, 0.5], [0.5, 1]]) x = N.random.randn(100, 2) gm = GM.fromvalues(w, mu, va) y1 = N.sum(multiple_gauss_den(x, mu, va) * w, 1) y2 = gm.pdf(x, log=True) assert_array_almost_equal(N.log(y1), y2)
def test_1d_bogus(self): """Check that functions which do not make sense for 1d fail nicely.""" d = 1 k = 2 w, mu, va = GM.gen_param(d, k) gm = GM.fromvalues(w, mu, va) try: gm.conf_ellipses() raise AssertionError("This should not work !") except ValueError, e: print "Ok, conf_ellipses failed as expected (with msg: " + str( e) + ")"
def _create_model_and_run_em(self, d, k, mode, nframes): #+++++++++++++++++++++++++++++++++++++++++++++++++ # Generate a model with k components, d dimensions #+++++++++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++++++++++++++++++++ # Approximate the models with classical EM #++++++++++++++++++++++++++++++++++++++++++ # Init the model lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') em = EM() lk = em.train(data, gmm)
def test_get_va(self): """Test _get_va for diag and full mode.""" d = 3 k = 2 ld = 2 dim = [0, 2] w, mu, va = GM.gen_param(d, k, 'full') va = N.arange(d * d * k).reshape(d * k, d) gm = GM.fromvalues(w, mu, va) tva = N.empty(ld * ld * k) for i in range(k * ld * ld): tva[i] = dim[i % ld] + (i % 4) / ld * dim[1] * d + d * d * (i / (ld * ld)) tva = tva.reshape(ld * k, ld) sva = gm._get_va(dim) assert N.all(sva == tva)
def _test_common(self, d, k, mode): dic = load_dataset('%s_%dd_%dk.mat' % (mode, d, k)) gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0']) gmm = GMM(gm, 'test') a, na = gmm.compute_responsabilities(dic['data']) la, nla = gmm.compute_log_responsabilities(dic['data']) ta = N.log(a) tna = N.log(na) if not N.all(N.isfinite(ta)): print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k) else: assert_array_almost_equal(ta, la, DEF_DEC) if not N.all(N.isfinite(tna)): print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k) else: assert_array_almost_equal(tna, nla, DEF_DEC)
#------------------------------------------------------- # Values for weights, mean and (diagonal) variances # - the weights are an array of rank 1 # - mean is expected to be rank 2 with one row for one component # - variances are also expteced to be rank 2. For diagonal, one row # is one diagonal, for full, the first d rows are the first variance, # etc... In this case, the variance matrix should be k*d rows and d # colums w = N.array([0.2, 0.45, 0.35]) mu = N.array([[4.1, 3], [1, 5], [-2, -3]]) va = N.array([[1, 1.5], [3, 4], [2, 3.5]]) #----------------------------------------- # First method: directly from parameters: # Both methods are equivalents. gm = GM.fromvalues(w, mu, va) #------------------------------------- # Second method to build a GM instance: gm = GM(d, k, mode='diag') # The set_params checks that w, mu, and va corresponds to k, d and m gm.set_param(w, mu, va) # Once set_params is called, both methods are equivalent. The 2d # method is useful when using a GM object for learning (where # the learner class will set the params), whereas the first one # is useful when there is a need to quickly sample a model # from existing values, without a need to give the hyper parameters # Create a Gaussian Mixture from the parameters, and sample # 1000 items from it (one row = one 2 dimension sample)
# Meta parameters of the model # - k: Number of components # - d: dimension of each Gaussian # - mode: Mode of covariance matrix: full or diag (string) # - nframes: number of frames (frame = one data point = one # row of d elements) k = 2 d = 2 mode = 'diag' nframes = 1e3 #+++++++++++++++++++++++++++++++++++++++++++ # Create an artificial GM model, samples it #+++++++++++++++++++++++++++++++++++++++++++ w, mu, va = GM.gen_param(d, k, mode, spread = 1.5) gm = GM.fromvalues(w, mu, va) # Sample nframes frames from the model data = gm.sample(nframes) #++++++++++++++++++++++++ # Learn the model with EM #++++++++++++++++++++++++ # Create a Model from a Gaussian mixture with kmean initialization lgm = GM(d, k, mode) gmm = GMM(lgm, 'kmean') # The actual EM, with likelihood computation. The threshold # is compared to the (linearly appromixated) derivative of the likelihood em = EM()