Beispiel #1
0
def test_em_gmm_heterosc(verbose=0):
    # testing the model in very ellipsoidal data: compute the big values
    # for several values of k and check that the macimal is 1 or 2

    # generate some data
    dim = 2
    x = nr.randn(100, dim)
    x[:50, :] += 3
    # x[:,0]*=10

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-4

    bic = np.zeros(5)
    for k in range(1, 6):
        lgmm = GMM(k, dim)
        lgmm.initialize(x)
        bic[k - 1] = lgmm.estimate(x, maxiter, delta, 0)
        if verbose:
            print "bic of the %d-classes model" % k, bic

    if verbose:
        # plot the result
        z = lgmm.map_label(x)
        from test_bgmm import plot2D

        plot2D(x, lgmm, z, show=1, verbose=0)
    assert bic[4] < bic[1]
Beispiel #2
0
def test_em_gmm_largedim(verbose=0):
    # testing the GMM model in larger dimensions

    # generate some data
    dim = 10
    x = nr.randn(100, dim)
    x[:30, :] += 1

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-4

    for k in range(1, 3):
        lgmm = GMM(k, dim)
        lgmm.initialize(x)
        bic = lgmm.estimate(x, maxiter, delta, verbose)
        if verbose:
            print "bic of the %d-classes model" % k, bic

    z = lgmm.map_label(x)

    # define the correct labelling
    u = np.zeros(100)
    u[:30] = 1

    # check the correlation between the true labelling
    # and the computed one
    eta = np.absolute(np.dot(z - z.mean(), u - u.mean()) / (np.std(z) * np.std(u) * 100))
    assert_true(eta > 0.3)
Beispiel #3
0
def test_em_gmm_diag(verbose=0):
    # Computing the BIC value for GMMs with different number of classes,
    # with diagonal covariance models The BIC should maximal for a
    # number of classes of 1 or 2

    # generate some data
    dim = 2
    x = np.concatenate((nr.randn(1000, dim), 3 + 2 * nr.randn(1000, dim)))

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-8
    prec_type = "diag"

    bic = np.zeros(5)
    for k in range(1, 6):
        lgmm = GMM(k, dim, prec_type)
        lgmm.initialize(x)
        bic[k - 1] = lgmm.estimate(x, maxiter, delta, verbose)
        if verbose:
            print "bic of the %d-classes model" % k, bic

    z = lgmm.map_label(x)

    assert_true((z.max() + 1 == lgmm.k) & (bic[4] < bic[1]))
Beispiel #4
0
def test_em_gmm_multi(verbose=0):
    # Playing with various initilizations on the same data

    # generate some data
    dim = 2
    x = np.concatenate((nr.randn(1000, dim), 3 + 2 * nr.randn(100, dim)))

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-4
    ninit = 5
    k = 2

    lgmm = GMM(k, dim)
    bgmm = lgmm.initialize_and_estimate(x, maxiter, delta, ninit, verbose)
    bic = bgmm.evidence(x)

    if verbose:
        print "bic of the best model", bic

    if verbose:
        # plot the result
        from test_bgmm import plot2D

        z = lgmm.map_label(x)
        plot2D(x, lgmm, z, show=1, verbose=0)

    assert_true(np.isfinite(bic))
Beispiel #5
0
def test_em_gmm_full(verbose=0):
    # Computing the BIC value for different configurations of a GMM with
    # ful diagonal matrices The BIC should be maximal for a number of
    # classes of 1 or 2

    # generate some data
    dim = 2
    x = np.concatenate((nr.randn(100, dim), 3 + 2 * nr.randn(100, dim)))

    # estimate different GMMs of that data
    maxiter = 100
    delta = 1.0e-4

    bic = np.zeros(5)
    for k in range(1, 6):
        lgmm = GMM(k, dim)
        lgmm.initialize(x)
        bic[k - 1] = lgmm.estimate(x, maxiter, delta, verbose)
        if verbose:
            print "bic of the %d-classes model" % k, bic

    z = lgmm.map_label(x)
    assert_true(bic[4] < bic[1])