Ejemplo n.º 1
0
    def _create_model(self, d, k, mode, nframes, emiter):
        #+++++++++++++++++++++++++++++++++++++++++++++++++
        # Generate a model with k components, d dimensions
        #+++++++++++++++++++++++++++++++++++++++++++++++++
        w, mu, va = GM.gen_param(d, k, mode, spread=1.5)
        gm = GM.fromvalues(w, mu, va)
        # Sample nframes frames  from the model
        data = gm.sample(nframes)

        #++++++++++++++++++++++++++++++++++++++++++
        # Approximate the models with classical EM
        #++++++++++++++++++++++++++++++++++++++++++
        # Init the model
        lgm = GM(d, k, mode)
        gmm = GMM(lgm, 'kmean')
        gmm.init(data, niter=KM_ITER)

        self.gm0 = copy.copy(gmm.gm)
        # The actual EM, with likelihood computation
        for i in range(emiter):
            g, tgd = gmm.compute_responsabilities(data)
            gmm.update_em(data, g)

        self.data = data
        self.gm = lgm
def generate_dataset(d, k, mode, nframes):
    """Generate a dataset useful for EM anf GMM testing.
    
    returns:
        data : ndarray
            data from the true model.
        tgm : GM
            the true model (randomly generated)
        gm0 : GM
            the initial model
        gm : GM
            the trained model
    """
    # Generate a model
    w, mu, va = GM.gen_param(d, k, mode, spread=2.0)
    tgm = GM.fromvalues(w, mu, va)

    # Generate data from the model
    data = tgm.sample(nframes)

    # Run EM on the model, by running the initialization separetely.
    gmm = GMM(GM(d, k, mode), 'test')
    gmm.init_random(data)
    gm0 = copy.copy(gmm.gm)

    gmm = GMM(copy.copy(gmm.gm), 'test')
    em = EM()
    em.train(data, gmm)

    return data, tgm, gm0, gmm.gm
 def test_conf_ellip(self):
     """Only test whether the call succeed. To check wether the result is
     OK, you have to plot the results."""
     d = 3
     k = 3
     w, mu, va = GM.gen_param(d, k)
     gm = GM.fromvalues(w, mu, va)
     gm.conf_ellipses()
 def test_1d_bogus(self):
     """Check that functions which do not make sense for 1d fail nicely."""
     d = 1
     k = 2
     w, mu, va = GM.gen_param(d, k)
     gm = GM.fromvalues(w, mu, va)
     try:
         gm.conf_ellipses()
         raise AssertionError("This should not work !")
     except ValueError, e:
         print "Ok, conf_ellipses failed as expected (with msg: " + str(
             e) + ")"
def cluster(data, k, mode='full'):
    d = data.shape[1]
    gm = GM(d, k, mode)
    gmm = GMM(gm)
    em = EM()
    em.train(data, gmm, maxiter=20)
    return gm
Ejemplo n.º 6
0
def cluster(data, k, mode='full'):
    d = data.shape[1]
    gm = GM(d, k, mode)
    gmm = GMM(gm, 'random')
    em = RegularizedEM(pcnt=pcnt, pval=pval)
    em.train(data, gmm, maxiter=20)
    return gm, gmm.bic(data)
Ejemplo n.º 7
0
    def _run_pure_online(self, d, k, mode, nframes):
        #++++++++++++++++++++++++++++++++++++++++
        # Approximate the models with online EM
        #++++++++++++++++++++++++++++++++++++++++
        ogm = GM(d, k, mode)
        ogmm = OnGMM(ogm, 'kmean')
        init_data = self.data[0:nframes / 20, :]
        ogmm.init(init_data)

        # Forgetting param
        ku = 0.005
        t0 = 200
        lamb = 1 - 1 / (N.arange(-1, nframes - 1) * ku + t0)
        nu0 = 0.2
        nu = N.zeros((len(lamb), 1))
        nu[0] = nu0
        for i in range(1, len(lamb)):
            nu[i] = 1. / (1 + lamb[i] / nu[i - 1])

        # object version of online EM
        for t in range(nframes):
            # the assert are here to check we do not create copies
            # unvoluntary for parameters
            assert ogmm.pw is ogmm.cw
            assert ogmm.pmu is ogmm.cmu
            assert ogmm.pva is ogmm.cva
            ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t])
            ogmm.update_em_frame()

        ogmm.gm.set_param(ogmm.cw, ogmm.cmu, ogmm.cva)

        return ogmm.gm
Ejemplo n.º 8
0
def cluster(data, k):
    d = data.shape[1]
    gm = GM(d, k)
    gmm = GMM(gm)
    em = EM()
    em.train(data, gmm, maxiter=20)
    return gm, gmm.bic(data)
Ejemplo n.º 9
0
    def _check(self, d, k, mode, nframes, emiter):
        #++++++++++++++++++++++++++++++++++++++++
        # Approximate the models with online EM
        #++++++++++++++++++++++++++++++++++++++++
        # Learn the model with Online EM
        ogm = GM(d, k, mode)
        ogmm = OnGMM(ogm, 'kmean')
        init_data = self.data
        ogmm.init(init_data, niter=KM_ITER)

        # Check that online kmean init is the same than kmean offline init
        ogm0 = copy.copy(ogm)
        assert_array_equal(ogm0.w, self.gm0.w)
        assert_array_equal(ogm0.mu, self.gm0.mu)
        assert_array_equal(ogm0.va, self.gm0.va)

        # Forgetting param
        lamb = N.ones((nframes, 1))
        lamb[0] = 0
        nu0 = 1.0
        nu = N.zeros((len(lamb), 1))
        nu[0] = nu0
        for i in range(1, len(lamb)):
            nu[i] = 1. / (1 + lamb[i] / nu[i - 1])

        # object version of online EM: the p* arguments are updated only at each
        # epoch, which is equivalent to on full EM iteration on the
        # classic EM algorithm
        ogmm.pw = ogmm.cw.copy()
        ogmm.pmu = ogmm.cmu.copy()
        ogmm.pva = ogmm.cva.copy()
        for e in range(emiter):
            for t in range(nframes):
                ogmm.compute_sufficient_statistics_frame(self.data[t], nu[t])
                ogmm.update_em_frame()

            # Change pw args only a each epoch
            ogmm.pw = ogmm.cw.copy()
            ogmm.pmu = ogmm.cmu.copy()
            ogmm.pva = ogmm.cva.copy()

        # For equivalence between off and on, we allow a margin of error,
        # because of round-off errors.
        print " Checking precision of equivalence with offline EM trainer "
        maxtestprec = 18
        try:
            for i in range(maxtestprec):
                assert_array_almost_equal(self.gm.w, ogmm.pw, decimal=i)
                assert_array_almost_equal(self.gm.mu, ogmm.pmu, decimal=i)
                assert_array_almost_equal(self.gm.va, ogmm.pva, decimal=i)
            print "\t !! Precision up to %d decimals !! " % i
        except AssertionError:
            if i < AR_AS_PREC:
                print """\t !!NOT OK: Precision up to %d decimals only, 
                    outside the allowed range (%d) !! """ % (i, AR_AS_PREC)
                raise AssertionError
            else:
                print "\t !!OK: Precision up to %d decimals !! " % i
Ejemplo n.º 10
0
    def test_get_va(self):
        """Test _get_va for diag and full mode."""
        d = 3
        k = 2
        ld = 2
        dim = [0, 2]
        w, mu, va = GM.gen_param(d, k, 'full')
        va = N.arange(d * d * k).reshape(d * k, d)
        gm = GM.fromvalues(w, mu, va)

        tva = N.empty(ld * ld * k)
        for i in range(k * ld * ld):
            tva[i] = dim[i %
                         ld] + (i % 4) / ld * dim[1] * d + d * d * (i /
                                                                    (ld * ld))
        tva = tva.reshape(ld * k, ld)
        sva = gm._get_va(dim)
        assert N.all(sva == tva)
Ejemplo n.º 11
0
    def _create_model_and_run_em(self, d, k, mode, nframes):
        #+++++++++++++++++++++++++++++++++++++++++++++++++
        # Generate a model with k components, d dimensions
        #+++++++++++++++++++++++++++++++++++++++++++++++++
        w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
        gm          = GM.fromvalues(w, mu, va)
        # Sample nframes frames  from the model
        data        = gm.sample(nframes)

        #++++++++++++++++++++++++++++++++++++++++++
        # Approximate the models with classical EM
        #++++++++++++++++++++++++++++++++++++++++++
        # Init the model
        lgm = GM(d, k, mode)
        gmm = GMM(lgm, 'kmean')

        em  = EM()
        lk  = em.train(data, gmm)
Ejemplo n.º 12
0
    def _test(self, dataset, log):
        dic = load_dataset(dataset)

        gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
        gmm = GMM(gm, 'test')
        EM().train(dic['data'], gmm, log = log)

        assert_array_almost_equal(gmm.gm.w, dic['w'], DEF_DEC)
        assert_array_almost_equal(gmm.gm.mu, dic['mu'], DEF_DEC)
        assert_array_almost_equal(gmm.gm.va, dic['va'], DEF_DEC)
Ejemplo n.º 13
0
 def test_2d_diag_logpdf(self):
     d = 2
     w = N.array([0.4, 0.6])
     mu = N.array([[0., 2], [-1, -2]])
     va = N.array([[1, 0.5], [0.5, 1]])
     x = N.random.randn(100, 2)
     gm = GM.fromvalues(w, mu, va)
     y1 = N.sum(multiple_gauss_den(x, mu, va) * w, 1)
     y2 = gm.pdf(x, log=True)
     assert_array_almost_equal(N.log(y1), y2)
Ejemplo n.º 14
0
    def _test_common(self, d, k, mode):
        dic = load_dataset('%s_%dd_%dk.mat' % (mode, d, k))

        gm = GM.fromvalues(dic['w0'], dic['mu0'], dic['va0'])
        gmm = GMM(gm, 'test')

        a, na = gmm.compute_responsabilities(dic['data'])
        la, nla = gmm.compute_log_responsabilities(dic['data'])

        ta = N.log(a)
        tna = N.log(na)
        if not N.all(N.isfinite(ta)):
            print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k)
        else:
            assert_array_almost_equal(ta, la, DEF_DEC)

        if not N.all(N.isfinite(tna)):
            print "precision problem for %s, %dd, %dk, test need fixing" % (mode, d, k)
        else:
            assert_array_almost_equal(tna, nla, DEF_DEC)
Ejemplo n.º 15
0
#-------------------------------------------------------
# Values for weights, mean and (diagonal) variances
#   - the weights are an array of rank 1
#   - mean is expected to be rank 2 with one row for one component
#   - variances are also expteced to be rank 2. For diagonal, one row
#   is one diagonal, for full, the first d rows are the first variance,
#   etc... In this case, the variance matrix should be k*d rows and d 
#   colums
w   = N.array([0.2, 0.45, 0.35])
mu  = N.array([[4.1, 3], [1, 5], [-2, -3]])
va  = N.array([[1, 1.5], [3, 4], [2, 3.5]])

#-----------------------------------------
# First method: directly from parameters:
# Both methods are equivalents.
gm      = GM.fromvalues(w, mu, va)

#-------------------------------------
# Second method to build a GM instance:
gm      = GM(d, k, mode = 'diag')
# The set_params checks that w, mu, and va corresponds to k, d and m
gm.set_param(w, mu, va)

# Once set_params is called, both methods are equivalent. The 2d
# method is useful when using a GM object for learning (where
# the learner class will set the params), whereas the first one
# is useful when there is a need to quickly sample a model
# from existing values, without a need to give the hyper parameters

# Create a Gaussian Mixture from the parameters, and sample
# 1000 items from it (one row = one 2 dimension sample)
Ejemplo n.º 16
0
#+++++++++++++++++++++++++++++
# Meta parameters of the model
#   - k: Number of components
#   - d: dimension of each Gaussian
#   - mode: Mode of covariance matrix: full or diag (string)
#   - nframes: number of frames (frame = one data point = one
#   row of d elements)
k       = 2
d       = 2
mode    = 'diag'
nframes = 1e3

#+++++++++++++++++++++++++++++++++++++++++++
# Create an artificial GM model, samples it
#+++++++++++++++++++++++++++++++++++++++++++
w, mu, va   = GM.gen_param(d, k, mode, spread = 1.5)
gm          = GM.fromvalues(w, mu, va)

# Sample nframes frames  from the model
data    = gm.sample(nframes)

#++++++++++++++++++++++++
# Learn the model with EM
#++++++++++++++++++++++++

# Create a Model from a Gaussian mixture with kmean initialization
lgm = GM(d, k, mode)
gmm = GMM(lgm, 'kmean')

# The actual EM, with likelihood computation. The threshold
# is compared to the (linearly appromixated) derivative of the likelihood
Ejemplo n.º 17
0
#-------------------------------------------------------
# Values for weights, mean and (diagonal) variances
#   - the weights are an array of rank 1
#   - mean is expected to be rank 2 with one row for one component
#   - variances are also expteced to be rank 2. For diagonal, one row
#   is one diagonal, for full, the first d rows are the first variance,
#   etc... In this case, the variance matrix should be k*d rows and d
#   colums
w = N.array([0.2, 0.45, 0.35])
mu = N.array([[4.1, 3], [1, 5], [-2, -3]])
va = N.array([[1, 1.5], [3, 4], [2, 3.5]])

#-----------------------------------------
# First method: directly from parameters:
# Both methods are equivalents.
gm = GM.fromvalues(w, mu, va)

#-------------------------------------
# Second method to build a GM instance:
gm = GM(d, k, mode='diag')
# The set_params checks that w, mu, and va corresponds to k, d and m
gm.set_param(w, mu, va)

# Once set_params is called, both methods are equivalent. The 2d
# method is useful when using a GM object for learning (where
# the learner class will set the params), whereas the first one
# is useful when there is a need to quickly sample a model
# from existing values, without a need to give the hyper parameters

# Create a Gaussian Mixture from the parameters, and sample
# 1000 items from it (one row = one 2 dimension sample)
Ejemplo n.º 18
0
# This is a simple test to check whether plotting ellipsoides of confidence and
# isodensity contours match
import numpy as N
from numpy.testing import set_package_path, restore_path

import pylab as P

from scikits.learn.machine.em import EM, GM, GMM

# Generate a simple mixture model, plot its confidence ellipses + isodensity
# curves for both diagonal and full covariance matrices
d = 3
k = 3
dim = [0, 2]
# diag model
w, mu, va = GM.gen_param(d, k)
dgm = GM.fromvalues(w, mu, va)
# full model
w, mu, va = GM.gen_param(d, k, 'full', spread = 1)
fgm = GM.fromvalues(w, mu, va)

def plot_model(gm, dim):
    X, Y, Z, V = gm.density_on_grid(dim = dim)
    h = gm.plot(dim = dim)
    [i.set_linestyle('-.') for i in h]
    P.contour(X, Y, Z, V)
    data = gm.sample(200)
    P.plot(data[:, dim[0]], data[:,dim[1]], '.')

# Plot the contours and the ellipsoids of confidence
P.subplot(2, 1, 1)