Beispiel #1
0
def histgmm_main( mu0, sigma0, gamma, prior, w, x,
                  maxiters, minchange, minsigma, n, m ):
    """Do EM."""
    mu = num.zeros( mu0.shape )
    sigma = num.zeros( mu0.shape )

    ischange = num.ones( m ).astype( num.bool )

    if PRINT_PROGRESS: print "EM iterations:", ; sys.stdout.flush()
    for iter in range( maxiters ):
        if PRINT_PROGRESS: print iter, ; sys.stdout.flush()

        # compute probability for each
        p1 = normpdf( make_vect( x[:,ischange] ).T,
                      make_vect( kron( num.ones((n,1)), mu0[0,ischange] ) ).T,
                      make_vect( kron( num.ones((n,1)), sigma0[0,ischange] ) ) ) \
                      * make_vect( kron( num.ones((n,1)), prior[0,ischange] ) )
        p2 = normpdf( make_vect( x[:,ischange] ).T,
                      make_vect( kron( num.ones((n,1)), mu0[1,ischange] ) ).T,
                      make_vect( kron( num.ones((n,1)), sigma0[1,ischange] ) ) ) \
                      * make_vect( kron( num.ones((n,1)), prior[1,ischange] ) )
        tmp = p1 + p2
        p1[tmp < eps] = 0.5
        p2[tmp < eps] = 0.5
        tmp[tmp < eps] = 1
        gamma[:,ischange] = (p1/tmp).reshape( (n,len(ischange.nonzero()[0])) )

        # update the mean, variance, and prior
        tmp = w[:,ischange] * gamma[:,ischange]
        prior[0,ischange] = tmp.sum( 0 )
        tmp = tmp[:,prior[0,ischange] > 0]
        isprior = prior[0,:] > 0
        c_and_p = num.logical_and( ischange, isprior )
        if c_and_p.any():
            mu[0,c_and_p] = (tmp * x[:,c_and_p]).sum(0) / prior[0,c_and_p]
            a = (x[:,c_and_p].T - \
                  kron( num.ones((n,1)), mu[0,c_and_p] )).T
            sigma[0,c_and_p] = num.sqrt( (tmp * a**2).sum(0) / prior[0,c_and_p] )
        
        tmp = w[:,ischange] * (1-gamma[:,ischange])
        prior[1,ischange] = tmp.sum( 0 )
        tmp = tmp[:,prior[1,ischange] > 0]
        isprior = prior[1,:] > 0
        c_and_p = num.logical_and( ischange, isprior )
        if c_and_p.any():
            mu[1,c_and_p] = (tmp * x[:,c_and_p]).sum(0) / prior[1,c_and_p]
            a = (x[:,c_and_p].T - \
                  kron( num.ones((n,1)), mu[1,c_and_p] )).T
            sigma[1,c_and_p] = num.sqrt( (tmp * a**2).sum(0) / prior[1,c_and_p] )

        sigma[sigma < minsigma] = minsigma

        # see if there is a change
        ischange = num.logical_or( (num.abs( mu - mu0 ) >= minchange),
                                   (num.abs( sigma - sigma0 ) >= minchange) ).any(0)
        if not ischange.any(): break

        mu0 = mu.copy()
        sigma0 = sigma.copy()
    # endfor: EM iterations
    if PRINT_PROGRESS: print

    p1 = normpdf( make_vect( x ).T,
                  make_vect( kron( num.ones((n,1)), mu[0,:] ) ).T,
                  make_vect( kron( num.ones((n,1)), sigma[0,:] ) ) ) \
                  * make_vect( kron( num.ones((n,1)), prior[0,:] ) )
    p2 = normpdf( make_vect( x ).T,
                  make_vect( kron( num.ones((n,1)), mu[1,:] ) ).T,
                  make_vect( kron( num.ones((n,1)), sigma[1,:] ) ) ) \
                  * make_vect( kron( num.ones((n,1)), prior[1,:] ) )
    p1.reshape( (n,m) )
    p2.reshape( (n,m) )
    a = make_vect(w) * (make_vect(gamma)*p1 + (1-make_vect(gamma))*p2)
    score = a.sum( 0 )

    return mu, sigma, prior, score
Beispiel #2
0
def histgmm( x, w, maxiters=100, minchange=None, minsigma=None ):
    """Gaussian mixture modeling using 2 clusters.
    Returns 2-D arrays 'mu', 'sigma', 'prior'."""

    if len(w.shape) == 1:
        n = w.size
        m = 1
        w = w.reshape( (n,m) )
        x = x.reshape( (n,m) )
    else:
        n,m = w.shape
    w /= w.sum( 0 )
    x = kron( num.ones((1,m)), x )

    if minchange is None:
        minchange = (x[-1]-x[0]) / n/2
    if minsigma is None:
        minsigma = (x[-1]-x[0]) / n/2

    ## initialize ##

    # first initialization scheme
    mu0 = num.zeros( (2,m) )
    mu0[0,:] = x[0,:]
    mu0[1,:] = x[-1,:]
    sigma0 = (x[-1]-x[0]) / 4 * num.ones( (2,m) )
    gamma = 0.5 * num.ones( (n,m) )
    prior = 0.5 * num.ones( (2,m) )

    mu, sigma, prior, score1 = histgmm_main( mu0, sigma0, gamma, prior, w, x,
                                             maxiters, minchange, minsigma, n, m )

    # second initialization scheme
    c = num.r_[num.zeros(m), w.cumsum()]
    gamma = (c[:-1] < 0.5).astype( num.float )
    gamma.resize( (n,m) )
    mu0 = num.zeros( (2,m) )
    sigma0 = num.zeros( (2,m) )
    tmp = w * gamma
    prior[0,:] = tmp.sum( 0 )
    isprior = prior[0,:] > 0
    # the 'if' statement is necessary because sum( [] ) returns 0, not empty
    if isprior.any():
        mu0[0,isprior] = (tmp * x[:,isprior]).sum(0) / prior[0,isprior]
        a = (x[:,isprior].T - \
              kron( num.ones((n,1)), mu0[0,isprior] )).T
        sigma0[0,isprior] = num.sqrt( (tmp * a**2).sum(0) / prior[0,isprior] )
    tmp = w * (1.-gamma)
    prior[1,:] = tmp.sum( 0 )
    isprior = prior[1,:] > 0
    if isprior.any():
        mu0[1,isprior] = (tmp * x[:,isprior]).sum(0) / prior[1,isprior]
        a = (x[:,isprior].T - \
              kron( num.ones((n,1)), mu0[1,isprior] )).T
        sigma0[1,isprior] = num.sqrt( (tmp * a**2).sum(0) / prior[1,isprior] )
    sigma0[sigma0 < minsigma] = minsigma

    mu2, sigma2, prior2, score2 = histgmm_main( mu0, sigma0, gamma, prior, w, x,
                                                maxiters, minchange, minsigma, n, m )

    ## choose best of mu, mu2
    
    # vectorize scores to allow indexing
    inds2 = num.array((score2,0.)) > num.array((score1,1.))
    if inds2.any():
        mu[:,inds2] = mu2[:,inds2]
        sigma[:,inds2] = sigma2[:,inds2]
        prior[:,inds2] = prior2[:,inds2]

    noprior = prior[0,:] < eps
    if noprior.any():
        mu[0,noprior] = mu[1,noprior]
        sigma[0,noprior] = sigma[1,noprior]
    noprior = prior[1,:] < eps
    if noprior.any():
        mu[1,noprior] = mu[0,noprior]
        sigma[1,noprior] = sigma[0,noprior]

    return mu, sigma, prior