Beispiel #1
0
def test_gaussian_em():
    """Test the Gaussian EM on a small generated dataset"""
    fname = "gmm-3-10-0.7.npz"
    gmm = GaussianMixtureModel.generate(fname, 3, 3)
    k, d, M, S, w = gmm.k, gmm.d, gmm.means, gmm.sigmas, gmm.weights
    N, n = 1e6, 1e5

    X = gmm.sample(N, n)

    algo = GaussianMixtureEM(k, d)

    def report(i, O_, lhood):
        M_, _, _ = O_

    lhood, Z, O_ = algo.run(X, None, report)

    M_, S_, w_ = O_

    M_ = closest_permuted_matrix(M, M_)
    w_ = closest_permuted_vector(w, w_)

    print w, w_

    print norm(M - M_) / norm(M)
    print abs(S - S_).max()
    print norm(w - w_)

    assert (norm(M - M_) / norm(M) < 1e-1)
    assert (abs(S - S_) < 1).all()
    assert (norm(w - w_) < 1e-2)
Beispiel #2
0
def test_univarate(n=3, d=20, e=20):
    # for sigma in [0, 1e-4, 1e-3, 1e-2]:
    for _ in xrange(100):
        sigma = 0
        R, I, V = generate_univariate_problem(n, d, e)
        print "V", V
        I = add_noise(I, sigma)
        for eps in [1e-7, 1e-6, 1e-5, 1e-4]:
            try:
                V_ = BB.BorderBasisFactory(eps + sigma).generate(R, I).zeros()
                # Find the maximal matching between V and V_
                V_ = array(V_).flatten()
                print "V_", V_
                print "e", eps
                break
            except AssertionError:
                continue
        else:
            print "could not find 0"
            return R, I, V
        if len(V) == len(V_):
            V_ = closest_permuted_vector(V, V_)
            print "diff", (V - V_)
            if max(abs(V - V_)) > max(1e-3, sigma):
                print "error too large!"
                return R, I, V
        else:
            print "incorrect lengths!"
            return R, I, V
Beispiel #3
0
def get_univariate_stats(n=3, d=20, e=20, sigma=0.0, tries=100):
    status, epses, err = [], [], []
    for _ in xrange(tries):
        R, I, V = generate_univariate_problem(n, d, e)
        I = add_noise(I, sigma)
        for eps in [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]:
            try:
                V_ = BB.BorderBasisFactory(sigma + eps).generate(R, I).zeros()
                V_ = array(V_).flatten()
                epses.append(eps)
                break
            except AssertionError:
                continue
        else:
            status.append(1)
            epses.append(1)
            err.append(1)
            continue
        if len(V_) < len(V):
            status.append(2)
            # Pad V_.
            V_ = np.hstack((V_, np.zeros(len(V) - len(V_))))
        elif len(V_) > len(V):
            status.append(3)
            # Pad V_.
            V = np.hstack((V, np.zeros(len(V_) - len(V))))
        else:
            status.append(4)

        V_ = closest_permuted_vector(V, V_)
        err.append(norm(V - V_))
    return status, epses, err
Beispiel #4
0
def test_gaussian_em():
    """Test the Gaussian EM on a small generated dataset"""
    fname = "gmm-3-10-0.7.npz"
    gmm = GaussianMixtureModel.generate( fname, 3, 3 )
    k, d, M, S, w = gmm.k, gmm.d, gmm.means, gmm.sigmas, gmm.weights
    N, n = 1e6, 1e5


    X = gmm.sample( N, n )

    algo = GaussianMixtureEM(k, d)

    def report( i, O_, lhood ):
        M_, _, _ = O_
    lhood, Z, O_ = algo.run( X, None, report )

    M_, S_, w_ = O_

    M_ = closest_permuted_matrix( M, M_ )
    w_ = closest_permuted_vector( w, w_ )

    print w, w_

    print norm( M - M_ )/norm(M)
    print abs(S - S_).max()
    print norm( w - w_ ) 

    assert( norm( M - M_ )/norm(M) < 1e-1 )
    assert (abs(S - S_) < 1 ).all()
    assert( norm( w - w_ ) < 1e-2 )
Beispiel #5
0
def test_univarate(n=3, d=20, e=20):
    #for sigma in [0, 1e-4, 1e-3, 1e-2]:
    for _ in xrange(100):
        sigma = 0
        R, I, V = generate_univariate_problem(n, d, e)
        print "V", V
        I = add_noise(I, sigma)
        for eps in [1e-7, 1e-6, 1e-5, 1e-4]:
            try:
                V_ = BB.BorderBasisFactory(eps + sigma).generate(R, I).zeros()
                # Find the maximal matching between V and V_
                V_ = array(V_).flatten()
                print "V_", V_
                print 'e', eps
                break
            except AssertionError:
                continue
        else:
            print "could not find 0"
            return R, I, V
        if len(V) == len(V_):
            V_ = closest_permuted_vector(V, V_)
            print "diff", (V - V_)
            if max(abs(V - V_)) > max(1e-3, sigma):
                print "error too large!"
                return R, I, V
        else:
            print "incorrect lengths!"
            return R, I, V
Beispiel #6
0
def get_univariate_stats(n=3, d=20, e=20, sigma=0., tries=100):
    status, epses, err = [], [], []
    for _ in xrange(tries):
        R, I, V = generate_univariate_problem(n, d, e)
        I = add_noise(I, sigma)
        for eps in [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]:
            try:
                V_ = BB.BorderBasisFactory(sigma + eps).generate(R, I).zeros()
                V_ = array(V_).flatten()
                epses.append(eps)
                break
            except AssertionError:
                continue
        else:
            status.append(1)
            epses.append(1)
            err.append(1)
            continue
        if len(V_) < len(V):
            status.append(2)
            # Pad V_.
            V_ = np.hstack((V_, np.zeros(len(V) - len(V_))))
        elif len(V_) > len(V):
            status.append(3)
            # Pad V_.
            V = np.hstack((V, np.zeros(len(V_) - len(V))))
        else:
            status.append(4)

        V_ = closest_permuted_vector(V, V_)
        err.append(norm(V - V_))
    return status, epses, err