Esempio n. 1
0
def gmmmemberships(mu,S,priors,x,weights=1,initcovars=None):
    if initcovars is None:
        initcovars = S.copy()

    # number of data points
    n = x.shape[0]
    # dimensionality of data
    d = x.shape[1]
    # number of clusters
    k = mu.shape[0]

    # allocate output
    gamma = num.zeros((n,k))

    # normalization constant
    normal = (2.0*num.pi)**(num.double(d)/2.0)
    #print 'd = %d, normal = %f' % (d,normal)
    #print 'mu = '
    #print mu
    #print 'S = '
    #for j in range(k):
    #    print S[:,:,j]
    #print 'priors = '
    #print priors
    #print 'weights = '
    #print weights

    # compute the activation for each data point
    for j in range(k):
        #print 'j = %d' % j
        #print 'mu[%d,:] = '%j + str(mu[j,:])
        #print 'S[:,:,%d] = '%j + str(S[:,:,j])
        diffs = x - mu[j,:]
        #print 'diffs = '
        #print diffs
        try:
            c = decomp.cholesky(S[:,:,j])
        except num.linalg.linalg.LinAlgError:
            if DEBUG: print 'S[:,:,%d] = '%j + str(S[:,:,j]) + ' is singular'
            if DEBUG: print 'Reverting to initcovars[:,:,%d] = '%j + str(initcovars[:,:,j])
            S[:,:,j] = initcovars[:,:,j]
            c = decomp.cholesky(S[:,:,j])
        #print 'chol(S[:,:,%d]) = '%j + str(c)
        temp = num.transpose(num.linalg.solve(num.transpose(c),num.transpose(diffs)))
        #print 'temp = '
        #print temp
        gamma[:,j] = num.exp(-.5*num.sum(temp**2,axis=1))/(normal*num.prod(num.diag(c)))
        #print 'gamma[:,%d] = ' % j
        #print gamma[:,j]

    # include prior
    gamma *= priors
    #print 'after including prior, gamma = '
    #print gamma

    # compute negative log likelihood
    e = -num.sum(num.log(num.sum(gamma,axis=1))*weights)
    #print 'e = %f' % e
    
    s = num.sum(gamma,axis=1)
    #print 's = '
    #print s
    # make sure we don't divide by 0
    s[s==0] = 1
    gamma /= s.reshape(n,1)
    #print 'gamma = '
    #print gamma
    
    return (gamma,e)    
D = num.zeros((nclusts,n))
E = num.zeros((nclusts,n))
for i in range(nclusts):
    D[i,:] = num.sum( (x - c[i,:])**2, axis=1 )
    E[i,:] = num.sum((x - num.tile(c[i,:],[n,1]))**2,axis=1)
    print "D[%d] = "%i + str(D[i,:])
    print "E[%d] = "%i + str(E[i,:])

gamma1 = num.zeros((n,nclusts))
gamma2 = num.zeros((n,nclusts))
for j in range(nclusts):
    print "j = " + str(j)
    print "c.shape = " + str(c.shape)
    diffs = x - c[j,:]
    zz = S[0,0,j]*S[1,1,j] - S[0,1,j]**2
    temp1 = (diffs[:,0]**2*S[1,1,j]
            - 2*diffs[:,0]*diffs[:,1]*S[0,1,j]
            + diffs[:,1]**2*S[0,0,j]) / zz

    print "temp1 = " + str(temp1)
    ch = decomp.cholesky(S[:,:,j])
    temp2 = num.transpose(num.linalg.solve(num.transpose(ch),num.transpose(diffs)))
    temp2 = num.sum(temp2**2,axis=1)
    gamma1[:,j] = num.exp(-.5*temp1)/(normal*num.sqrt(zz))
    gamma2[:,j] = num.exp(-.5*temp2)/(normal*num.prod(num.diag(ch)))
    print "temp2 = " + str(temp2)
    print "sigma1 = " + str(num.sqrt(zz))
    print "sigma2 = " + str(num.prod(num.diag(ch)))
    print "gamma1 = " + str(gamma1[:,j])
    print "gamma2 = " + str(gamma2[:,j])
Esempio n. 3
0
def fixsmallpriors(x, mu, S, priors, initcovars, gamma=None):

    # print 'calling fixsmallpriors with: '
    # print 'mu = ' + str(mu)
    # print 'S = '
    # for i in range(S.shape[2]):
    #    print S[:,:,i]
    # print 'priors = ' + str(priors)
    # for i in range(initcovars.shape[2]):
    #    print 'initcovars[:,:,%d]: '%i
    #    print initcovars[:,:,i]
    #    print 'initcovars[:,:,%d].shape: '%i + str(initcovars[:,:,i].shape)

    MINPRIOR = 0.01
    issmall = priors < 0.01
    if not issmall.any():
        return

    n = x.shape[0]
    d = x.shape[1]
    k = mu.shape[0]

    normal = (2.0 * num.pi) ** (num.double(d) / 2.0)

    if gamma is None:

        # compute the density for each x from each mixture component
        gamma = num.zeros((n, k))
        for i in range(k):
            diffs = x - num.tile(mu[i, :], [n, 1])
            c = decomp.cholesky(S[:, :, i])
            temp = num.transpose(num.linalg.solve(num.transpose(c), num.transpose(diffs)))
            gamma[:, i] = num.exp(-0.5 * num.sum(temp ** 2, axis=1)) / (normal * num.prod(num.diag(c)))
    # end gamma is None

    # loop through all small priors
    smalli, = num.where(issmall)
    for i in smalli:

        print "fixing cluster %d with small prior = %f: " % (i, priors[i])

        # compute mixture density of each data point
        p = num.sum(gamma * num.tile(priors, [n, 1]), axis=1)

        # print 'samples: '
        # print x
        # print 'density of each sample: '
        # print p

        # choose the point with the smallest probability
        j = p.argmin()

        print "lowest density sample: x[%d] = " % j + str(x[j, :])

        # create a new cluster
        mu[i, :] = x[j, :]
        S[:, :, i] = initcovars[:, :, i]
        priors *= (1 - MINPRIOR) / (1.0 - priors[i])
        priors[i] = MINPRIOR

        print "reset cluster %d to: " % i
        print "mu = " + str(mu[i, :])
        print "S = "
        print S[:, :, i]
        print "S.shape: " + str(S[:, :, i].shape)
        print "prior = " + str(priors[i])

        # update gamma
        diffs = x - num.tile(mu[i, :], [n, 1])
        c = decomp.cholesky(S[:, :, i])
        temp = num.transpose(num.linalg.solve(num.transpose(c), num.transpose(diffs)))
        gamma[:, i] = num.exp(-0.5 * num.sum(temp ** 2, axis=1)) / (normal * num.prod(num.diag(c)))