Esempio n. 1
0
def test_em_gmm_heterosc(verbose=0):
    # testing the model in very ellipsoidal data: compute the big values
    # for several values of k and check that the macimal is 1 or 2

    # generate some data
    dim = 2
    x = nr.randn(100, dim)
    x[:50, :] += 3
    # x[:,0]*=10

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-4

    bic = np.zeros(5)
    for k in range(1, 6):
        lgmm = GMM(k, dim)
        lgmm.initialize(x)
        bic[k - 1] = lgmm.estimate(x, maxiter, delta, 0)
        if verbose:
            print "bic of the %d-classes model" % k, bic

    if verbose:
        # plot the result
        z = lgmm.map_label(x)
        from test_bgmm import plot2D

        plot2D(x, lgmm, z, show=1, verbose=0)
    assert bic[4] < bic[1]
Esempio n. 2
0
def test_em_gmm_multi(verbose=0):
    # Playing with various initilizations on the same data

    # generate some data
    dim = 2
    x = np.concatenate((nr.randn(1000, dim), 3 + 2 * nr.randn(100, dim)))

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-4
    ninit = 5
    k = 2

    lgmm = GMM(k, dim)
    bgmm = lgmm.initialize_and_estimate(x, maxiter, delta, ninit, verbose)
    bic = bgmm.evidence(x)

    if verbose:
        print "bic of the best model", bic

    if verbose:
        # plot the result
        from test_bgmm import plot2D

        z = lgmm.map_label(x)
        plot2D(x, lgmm, z, show=1, verbose=0)

    assert_true(np.isfinite(bic))
Esempio n. 3
0
def test_em_gmm_largedim(verbose=0):
    # testing the GMM model in larger dimensions

    # generate some data
    dim = 10
    x = nr.randn(100, dim)
    x[:30, :] += 1

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-4

    for k in range(1, 3):
        lgmm = GMM(k, dim)
        lgmm.initialize(x)
        bic = lgmm.estimate(x, maxiter, delta, verbose)
        if verbose:
            print "bic of the %d-classes model" % k, bic

    z = lgmm.map_label(x)

    # define the correct labelling
    u = np.zeros(100)
    u[:30] = 1

    # check the correlation between the true labelling
    # and the computed one
    eta = np.absolute(np.dot(z - z.mean(), u - u.mean()) / (np.std(z) * np.std(u) * 100))
    assert_true(eta > 0.3)
Esempio n. 4
0
def test_em_gmm_diag(verbose=0):
    # Computing the BIC value for GMMs with different number of classes,
    # with diagonal covariance models The BIC should maximal for a
    # number of classes of 1 or 2

    # generate some data
    dim = 2
    x = np.concatenate((nr.randn(1000, dim), 3 + 2 * nr.randn(1000, dim)))

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-8
    prec_type = "diag"

    bic = np.zeros(5)
    for k in range(1, 6):
        lgmm = GMM(k, dim, prec_type)
        lgmm.initialize(x)
        bic[k - 1] = lgmm.estimate(x, maxiter, delta, verbose)
        if verbose:
            print "bic of the %d-classes model" % k, bic

    z = lgmm.map_label(x)

    assert_true((z.max() + 1 == lgmm.k) & (bic[4] < bic[1]))
Esempio n. 5
0
def test_em_gmm_largedim(verbose=0):
    """
    testing the GMM model in larger dimensions
    """
    # generate some data
    dim = 10
    x = nr.randn(100,dim)
    x[:30,:] += 2
    
    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.e-4
    
    for k in range(2,3):
        lgmm = GMM(k,dim)
        bgmm = lgmm.initialize_and_estimate(x, None, maxiter, delta, ninit=5)
        
    z = bgmm.map_label(x)
    
    # define the correct labelling
    u = np.zeros(100)
    u[:30]=1

    #check the correlation between the true labelling
    # and the computed one
    eta = np.absolute(np.dot(z-z.mean(),u-u.mean())/(np.std(z)*np.std(u)*100))
    assert (eta>0.3)
Esempio n. 6
0
def test_em_gmm_full(verbose=0):
    # Computing the BIC value for different configurations of a GMM with
    # ful diagonal matrices The BIC should be maximal for a number of
    # classes of 1 or 2

    # generate some data
    dim = 2
    x = np.concatenate((nr.randn(100, dim), 3 + 2 * nr.randn(100, dim)))

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-4

    bic = np.zeros(5)
    for k in range(1, 6):
        lgmm = GMM(k, dim)
        lgmm.initialize(x)
        bic[k - 1] = lgmm.estimate(x, maxiter, delta, verbose)
        if verbose:
            print "bic of the %d-classes model" % k, bic

    z = lgmm.map_label(x)
    assert_true(bic[4] < bic[1])
Esempio n. 7
0
def test_em_loglike1():
    dim = 1
    k = 3
    n = 1000
    x = nr.randn(n, dim)
    lgmm = GMM(k, dim)
    lgmm.initialize(x)
    lgmm.estimate(x)
    ll = lgmm.average_log_like(x)
    ent = 0.5 * (1 + np.log(2 * np.pi))
    print ll, ent
    assert_true(np.absolute(ll + ent) < 3.0 / np.sqrt(n))
Esempio n. 8
0
def test_em_loglike4():
    dim = 5
    k = 1
    n = 1000
    scale = 3.0
    offset = 4.0
    x = offset + scale * nr.randn(n, dim)
    lgmm = GMM(k, dim)
    lgmm.initialize(x)
    lgmm.estimate(x)
    ll = lgmm.average_log_like(x)
    ent = dim * 0.5 * (1 + np.log(2 * np.pi * scale ** 2))
    print ll, ent
    assert_true(np.absolute(ll + ent) < dim * 3.0 / np.sqrt(n))
Esempio n. 9
0
def test_em_loglike6():
    """
    """
    dim = 1
    k = 1
    n = 100
    offset = 3.
    x = nr.randn(n,dim)
    y = offset+nr.randn(n,dim)
    lgmm = GMM(k,dim)
    lgmm.initialize(x)
    lgmm.estimate(x)
    ll1 =  lgmm.average_log_like(x)
    ll2 = lgmm.average_log_like(y)
    ent = 0.5*(1+np.log(2*np.pi))
    dkl = 0.5*offset**2
    print ll2, ll1,dkl
    assert ll2<ll1
Esempio n. 10
0
def test_em_loglike2():
    """
    """
    dim = 1
    k = 1
    n = 1000
    scale = 3.
    offset = 4.
    x = offset + scale * nr.randn(n,dim)
    lgmm = GMM(k,dim)
    lgmm.initialize(x)
    lgmm.estimate(x)
    ll = lgmm.average_log_like(x)
    ent = 0.5*(1+np.log(2*np.pi*scale**2))
    print ll, ent
    assert np.absolute(ll+ent)<3./np.sqrt(n)
Esempio n. 11
0
def test_em_gmm_cv(verbose=0):
    # Comparison of different GMMs using cross-validation

    # generate some data
    dim = 2
    xtrain = np.concatenate((nr.randn(100, dim), 3 + 2 * nr.randn(100, dim)))
    xtest = np.concatenate((nr.randn(1000, dim), 3 + 2 * nr.randn(1000, dim)))

    # estimate different GMMs for xtrain, and test it on xtest
    prec_type = "full"
    k = 2
    maxiter = 300
    delta = 1.0e-4
    ll = []

    # model 1
    lgmm = GMM(k, dim, prec_type)
    lgmm.initialize(xtrain)
    bic = lgmm.estimate(xtrain, maxiter, delta)
    ll.append(lgmm.test(xtest).mean())

    prec_type = "diag"
    # model 2
    lgmm = GMM(k, dim, prec_type)
    lgmm.initialize(xtrain)
    bic = lgmm.estimate(xtrain, maxiter, delta)
    ll.append(lgmm.test(xtest).mean())

    for k in [1, 3, 10]:
        lgmm = GMM(k, dim, prec_type)
        lgmm.initialize(xtrain)
        bic = lgmm.estimate(xtrain, maxiter, delta)
        ll.append(lgmm.test(xtest).mean())

    assert_true(ll[4] < ll[1])