Python gmmmemberships Examples

Programming Language: Python

Namespace/Package Name: kcluster

Method/Function: gmmmemberships

Examples at hotexamples.com: 4

Python gmmmemberships - 4 examples found. These are the top rated real world Python examples of kcluster.gmmmemberships extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: estconncomps.py Project: BackupTheBerlios/ctrax-svn

def trysplit(ellipses,i,isdone,L,dfore):

    if DEBUG: print 'trying to split target i=%d: '%i
    if DEBUG: print str(ellipses[i])

    # get datapoints
    (r,c) = num.where(L==i+1)
    x = num.hstack((c.reshape(c.size,1),r.reshape(r.size,1)))
    w = dfore[L==i+1]
    ndata = r.size

    ## try increasing threshold

    # get a bounding box around L == i+1
    c1 = num.min(c);
    c2 = num.max(c);
    r1 = num.min(r);
    r2 = num.max(r);
    dforebox = dfore[r1:r2+1,c1:c2+1].copy()
    dforebox0 = dforebox.copy()
    if DEBUG: print 'range r = [%d, %d], range c = [%d, %d]'%(r1,r2,c1,c2)

    # only look at cc i+1
    Lbox = L[r1:r2+1,c1:c2+1].copy()
    isforebox0 = Lbox == i+1
    dforebox[Lbox!=i+1] = 0

    for currthresh in num.linspace(params.n_bg_std_thresh_low,
                                   min(params.n_bg_std_thresh,
                                       num.max(dforebox)),20):

        # try raising threshold to currthresh
        isforebox = dforebox >= currthresh

        # compute connected components
        (Lbox,ncomponents) = meas.label(isforebox)

        if DEBUG: print 'for thresh = %.2f, ncomponents = %d'%(currthresh,ncomponents)

        if ncomponents == 1:
            continue

        # remove components with too small area
        removed = []
        for j in range(ncomponents):
            areaj = num.sum(Lbox==j+1)
            if areaj < 3:
                Lbox[Lbox==j+1] = 0
                removed += j,
        if DEBUG: print 'removed = ' + str(removed)
        for j in range(ncomponents):
            if num.any(num.array(removed)==j):
                continue
            nsmaller = num.sum(num.array(removed)<j)
            Lbox[Lbox==j+1] = j+1-nsmaller
        ncomponents -= len(removed)
        if DEBUG: print 'ncomponents = ' + str(ncomponents)

        # if we've created a new connected component
        if ncomponents > 1:
            if DEBUG: print 'found %d components at thresh %f'%(ncomponents,currthresh)
            break
    # end loop trying to increase threshold

    if ncomponents > 1:

        # succeeded in splitting into multiple connected components 
        # by raising the threshold, use this as initialization for GMM

        # get clusters for each cc
        mu = num.zeros([ncomponents,2])
        S = num.zeros([2,2,ncomponents])
        priors = num.zeros(ncomponents)
        for j in range(ncomponents):
            BWI = Lbox == (j+1)
            wj = dforebox[BWI]
            # normalize weights
            Z = sum(wj)
            if Z == 0:
                Z = 1
            # compute mean
            (rj,cj) = num.where(BWI)
            centerX = sum(cj*wj)/Z
            centerY = sum(rj*wj)/Z
            mu[j,0] = centerX + c1
            mu[j,1] = centerY + r1
            # compute variance
            S[0,0,j] = sum(wj*cj**2)/Z - centerX**2
            S[1,1,j] = sum(wj*rj**2)/Z - centerY**2
            S[0,1,j] = sum(wj*cj*rj)/Z - centerX*centerY
            S[1,0,j] = S[0,1,j]
            # fix small variances
            [D,V] = num.linalg.eig(S[:,:,j])
            if num.any(D<.01):
                D[D<.01] = .01
                S[:,:,j] = num.dot(V, num.dot(num.diag(D), V.T ))

            priors[j] = rj.size
            if DEBUG: print 'component %d: mu = '%j + str(mu[j,:]) + ', S = ' + str(S[:,:,j]) + ', prior = ' + str(priors[j])
        priors = priors / num.sum(priors)
        # label all points
        (gamma,e) = kcluster.gmmmemberships(mu,S,priors,x,w)
        # recompute clusters
        kcluster.gmmupdate(mu,S,priors,gamma,x,w)

        if DEBUG: print 'after updating, '
        if DEBUG:
            for j in range(ncomponents):
                print 'component %d: mu = '%j + str(mu[j,:]) + ', S = ' + str(S[:,:,j]) + ', prior = ' + str(priors[j])

        area = num.zeros(ncomponents)
        for j in range(ncomponents):
            (major,minor,angle) = cov2ell(S[:,:,j])
            area[j] = major*minor*num.pi*4.0
        
        removed, = num.where(area <= params.minshape.area)
        if removed.size > 0:
            if DEBUG: print 'removing components ' + str(removed)
            mu = num.delete(mu,removed,axis=0)
            S = num.delete(S,removed,axis=2)
            priors = num.delete(priors,removed)
            ncomponents -= removed.size
            if DEBUG: print "now there are " + str(ncomponents) + " components"

        if ncomponents > 1:
            # recompute memberships
            (gamma,e) = kcluster.gmmmemberships(mu,S,priors,x,w)
            
            # store
            mu0 = num.zeros([ncomponents,2])
            mu0[:,0] = mu[:,0]
            mu0[:,1] = mu[:,1]
            gamma0 = gamma
            major0 = num.zeros(ncomponents)
            minor0 = num.zeros(ncomponents)
            angle0 = num.zeros(ncomponents)
            area0 = num.zeros(ncomponents)
            #if ncomponents > 2:
            #    print 'Split component %d into %d components'%(i,ncomponents)
            #    params.DOBREAK = True
            for j in range(ncomponents):
                (major0[j],minor0[j],angle0[j]) = cov2ell(S[:,:,j])
                area0[j] = major0[j]*minor0[j]*num.pi*4.0
                if DEBUG: print 'component %d: mu = '%j + str(mu0[j,:]) + ', major = ' + str(major0[j]) + ', minor = ' + str(minor0[j]) + ', angle = ' + str(angle0[j]) + ', area = ' + str(area0[j])

            ## are any of the components too small?
            #if num.any(area0 < params.minshape.area):
            #    print 'split by raising threshold, but one of the components was too small, minarea = ' + str(params.minshape.area)
            #    # undo split
            #    ncomponents = 1

    # end if ncomponents > 1 (true if raising threshold successfully 
    # split the component)

    if ncomponents < 1:
        print "ncomponents = " + str(ncomponents) + " resetting to 1"
        ncomponents = 1

    if ncomponents == 1:

        # not able to split the connected component by raising 
        # the threshold 

        if DEBUG: print 'clustering '
        # compute the difference between the observation area and the
        # mean area
        err0 = num.abs(ellipses[i].area - params.meanshape.area)    

        # try splitting into more clusters
        ncomponents = 2
        while True:
            if DEBUG: print 'ncomponents = %d'%ncomponents
            (mu,S,priors,gamma,negloglik) = kcluster.gmm(x,ncomponents,weights=w,kmeansthresh=.1,emthresh=.1,mincov=.25)
            #(mu,S,priors,gamma,negloglik) = gmm(x,ncomponents,weights=w,nreplicates=4,kmeansiter=10,kmeansthresh=.1,emiters=10,emthresh=.1)
            if DEBUG: print 'negloglik = %.2f'%negloglik

            # compute the average distance between each clusters area and the
            # mean area; greatly penalize areas smaller than minarea
            err = 0
            major = num.zeros(ncomponents)
            minor = num.zeros(ncomponents)
            angle = num.zeros(ncomponents)
            area = num.zeros(ncomponents)
            for j in range(ncomponents):
                (major[j],minor[j],angle[j]) = cov2ell(S[:,:,j])
                area[j] = major[j]*minor[j]*num.pi*4.0
                if area[j] < params.minshape.area:
                    err += 10000
                    if DEBUG: print 'area[%d] < params.minshape.area = %d, incrementing error by 10000'%(j,round(params.minshape.area))
                else:
                    err += num.abs(params.meanshape.area - area[j])
                    if DEBUG: print 'difference between mean area = %d and area[%d] = %d is %d'%(round(params.meanshape.area),j,round(area[j]),round(num.abs(params.meanshape.area - area[j])))
            # end for j in range(ncomponents)
            if DEBUG: print 'error for ncomponents = %d is %f'%(ncomponents,err)

            if err >= err0:
                break
            ncomponents += 1
            mu0 = mu.copy()
            major0 = major.copy()
            minor0 = minor.copy()
            angle0 = angle.copy()
            area0 = area.copy()
            err0 = err
            gamma0 = gamma.copy()

        # end while True
    
        ncomponents -= 1
    
    # end if ncomponents == 1 (was not able to form multiple ccs by 
    # raising threshold)

    if ncomponents == 1:
        isdone[i] = True
        if DEBUG: print 'decided not to split'
        return isdone
    else:
        # get id
        idx = num.argmax(gamma0,axis=1)
        # replace
        ellipses[i].center.x = mu0[0,0]
        ellipses[i].center.y = mu0[0,1]
        ellipses[i].major = major0[0]
        ellipses[i].minor = minor0[0]
        ellipses[i].angle = angle0[0]
        ellipses[i].area = area0[0]
        # if small enough, set to done
        isdone[i] = ellipses[i].area <= params.maxshape.area
        # add new
        for j in range(1,ncomponents):
            ellipse = Ellipse(mu0[j,0],mu0[j,1],minor0[j],major0[j],angle0[j],area0[j])
            ellipses.append(ellipse)
            isdone = num.append(isdone,ellipse.area <= params.maxshape.area)
            L[r[idx==j],c[idx==j]] = len(ellipses)
        if DEBUG: print 'split into %d ellipses: '%ncomponents
        if DEBUG: print 'ellipses[%d] = '%i + str(ellipses[i])
        if DEBUG:
            for j in range(1,ncomponents):
                print 'ellipses[%d] = '%(len(ellipses)-j) + str(ellipses[-j])
        return isdone

Example #2

Show file

        else:

            (mu, S, priors) = kcluster.gmminit(x, k, weights=w)
            random.set_state(state)
            (mu0, S0, priors0) = kcluster0.gmminit(x, k, weights=w)
            if True or \
                    num.max(num.abs(mu - mu0)) > .001 or \
                    num.max(num.abs(S-S0)) > .001 or \
                    num.max(num.abs(priors - priors0)) > .001:
                print "max(|mu - mu0|) = " + str(num.max(num.abs(mu - mu0)))
                print "max(|S - S0|) = " + str(num.max(num.abs(S - S0)))
                print "max(|priors - priors0|) = " + str(
                    num.max(num.abs(priors - priors0)))
            for i in range(100):
                [gamma, newe] = kcluster.gmmmemberships(mu, S, priors, x, w)
                [gamma0,
                 newe0] = kcluster0.gmmmemberships(mu0, S0, priors0, x, w)
                if True or \
                        num.max(num.abs(gamma-gamma0)) > .001 or \
                        num.abs(newe-newe0) > .001:
                    print "i = %d: max(|gamma - gamma0|) = " % i + str(
                        num.max(num.abs(gamma - gamma0)))
                    print "|newe - newe0| = " + str(num.abs(newe - newe0))
                kcluster.gmmupdate(mu, S, priors, gamma, x, w)
                kcluster0.gmmupdate(mu0, S0, priors0, gamma0, x, w)
                if True or \
                        num.max(num.abs(mu - mu0)) > .001 or \
                        num.max(num.abs(S-S0)) > .001 or \
                        num.max(num.abs(priors - priors0)) > .001:
                    print "i = %d: max(|mu - mu0|) = " % i + str(

Example #3

Show file

File: newestconncomps.py Project: BackupTheBerlios/ctrax-svn

def trysplit(ellipses,i,isdone,L,dfore):

    print 'trying to split target i=%d'%i

    # get datapoints
    (r,c) = num.where(L==i+1)
    x = num.hstack((c.reshape(c.size,1),r.reshape(r.size,1)))
    w = dfore[L==i+1]
    ndata = r.size

    ## try increasing threshold

    # get a bounding box around L == i+1
    c1 = num.min(c);
    c2 = num.max(c);
    r1 = num.min(r);
    r2 = num.max(r);
    dforebox = dfore[r1:r2+1,c1:c2+1]
    dforebox0 = dforebox.copy()

    # only look at cc i+1
    Lbox = L[r1:r2+1,c1:c2+1]
    isforebox0 = Lbox == i+1
    dforebox[Lbox!=i+1] = 0

    for currthresh in num.linspace(params.n_bg_std_thresh,
                                   min(3*params.n_bg_std_thresh,
                                       num.max(dforebox)),10):

        # try raising threshold to currthresh
        isforebox = dforebox >= currthresh

        # compute connected components
        (Lbox,ncomponents) = meas.label(isforebox)

        if ncomponents == 1:
            continue

        # remove components with too small area
        removed = []
        for j in range(ncomponents):
            areaj = num.sum(Lbox==j+1)
            if areaj < 3:
                Lbox[Lbox==j+1] = 0
                removed += j
        for j in removed:
            for k in range(j+1,ncomponents):
                Lbox[Lbox==k+1] = k+1
        ncomponents -= len(removed)

        # if we've created a new connected component
        if ncomponents > 1:
            print 'found %d components at thresh %f'%(ncomponents,currthresh)
            break

    if ncomponents > 1:

        # get clusters for each cc
        mu = num.zeros([ncomponents,2])
        S = num.zeros([2,2,ncomponents])
        priors = num.zeros(ncomponents)
        for j in range(ncomponents):
            BWI = Lbox == (j+1)
            wj = dforebox[BWI]
            # normalize weights
            Z = sum(wj)
            if Z == 0:
                Z = 1
            # compute mean
            (rj,cj) = num.where(BWI)
            centerX = sum(cj*wj)/Z
            centerY = sum(rj*wj)/Z
            mu[j,0] = centerX + c1
            mu[j,1] = centerY + r1
            # compute variance
            S[0,0,j] = sum(wj*cj**2)/Z - centerX**2
            S[1,1,j] = sum(wj*rj**2)/Z - centerY**2
            S[0,1,j] = sum(wj*cj*rj)/Z - centerX*centerY
            S[1,0,j] = S[0,1,j]
            priors[j] = rj.size
            print 'component %d: mu = '%j + str(mu[j,:]) + ', S = ' + str(S[:,:,j]) + ', prior = ' + str(priors[j])
        priors = priors / num.sum(priors)
        # label all points
        (gamma,e) = kcluster.gmmmemberships(mu,S,priors,x,w)
        # recompute clusters
        kcluster.gmmupdate(mu,S,priors,gamma,x,w)

        print 'after updating, '
        for j in range(ncomponents):
            print 'component %d: mu = '%j + str(mu[j,:]) + ', S = ' + str(S[:,:,j]) + ', prior = ' + str(priors[j])

        # remove components with too small area
        area = num.zeros(ncomponents)
        for j in range(ncomponents):
            (major,minor,angle) = cov2ell(S[:,:,j])
            area[j] = major*minor*num.pi*4.0
            print 'component %d: mu = '%j + str(mu[j,:]) + ', major = ' + str(major) + ', minor = ' + str(minor) + ', angle = ' + str(angle) + ', area = ' + str(area[j])
        
        removed, = num.where(area <= params.maxareadelete)
        if removed.size > 0:
            print 'removing components ' + str(removed)
            mu = num.delete(mu,removed,axis=0)
            S = num.delete(S,removed,axis=2)
            ncomponents -= removed.size

        if ncomponents > 1:
            # recompute memberships
            (gamma,e) = kcluster.gmmmemberships(mu,S,priors,x,w)
            
            # store
            mu0 = num.zeros([ncomponents,2])
            mu0[:,0] = mu[:,0]
            mu0[:,1] = mu[:,1]
            gamma0 = gamma
            major0 = num.zeros(ncomponents)
            minor0 = num.zeros(ncomponents)
            angle0 = num.zeros(ncomponents)
            area0 = num.zeros(ncomponents)
            for j in range(ncomponents):
                (major0[j],minor0[j],angle0[j]) = cov2ell(S[:,:,j])
                area0[j] = major0[j]*minor0[j]*num.pi*4.0
                print 'component %d: mu = '%j + str(mu0[j,:]) + ', major = ' + str(major0[j]) + ', minor = ' + str(minor0[j]) + ', angle = ' + str(angle0[j]) + ', area = ' + str(area0[j])

            # are any of the components too small?
            if num.any(area0 < params.minshape.area):
                print 'split by raising threshold, but one of the components was too small, minarea = ' + str(params.minshape.area)
                # undo split
                ncomponents = 1

    if ncomponents == 1:

        # compute the difference between the observation area and the
        # mean area
        err0 = num.abs(ellipses[i].area - params.meanshape.area)    

        # try splitting into more clusters
        ncomponents = 2
        while True:
            #print 'ncomponents = %d'%ncomponents
            (mu,S,priors,gamma,negloglik) = kcluster.gmm(x,ncomponents,weights=w,kmeansthresh=.1,emthresh=.1)
            #(mu,S,priors,gamma,negloglik) = gmm(x,ncomponents,weights=w,nreplicates=4,kmeansiter=10,kmeansthresh=.1,emiters=10,emthresh=.1)
            #print 'negloglik = %.2f'%negloglik

            # compute the average distance between each clusters area and the
            # mean area; greatly penalize areas smaller than minarea
            err = 0
            major = num.zeros(ncomponents)
            minor = num.zeros(ncomponents)
            angle = num.zeros(ncomponents)
            area = num.zeros(ncomponents)
            for j in range(ncomponents):
                (major[j],minor[j],angle[j]) = cov2ell(S[:,:,j])
                area[j] = major[j]*minor[j]*num.pi*4.0
                if area[j] < params.minshape.area:
                    err += 10000
                else:
                    err += num.abs(params.meanshape.area - area[j])
            # end for j in range(ncomponents)

            if err >= err0:
                break
            ncomponents += 1
            mu0 = mu.copy()
            major0 = major.copy()
            minor0 = minor.copy()
            angle0 = angle.copy()
            area0 = area.copy()
            err0 = err
            gamma0 = gamma.copy()

        # end while True
    
        ncomponents -= 1

    if ncomponents == 1:
        isdone[i] = True
        print 'decided not to split'
    else:
        # get id
        idx = num.argmax(gamma0,axis=1)
        # replace
        ellipses[i].center.x = mu0[0,0]
        ellipses[i].center.y = mu0[0,1]
        ellipses[i].major = major0[0]
        ellipses[i].minor = minor0[0]
        ellipses[i].angle = angle0[0]
        ellipses[i].area = area0[0]
        # if small enough, set to done
        isdone[i] = ellipses[i].area <= params.maxshape.area
        # add new
        for j in range(1,ncomponents):
            ellipse = Ellipse(mu0[j,0],mu0[j,1],minor0[j],major0[j],angle0[j],area0[j])
            ellipses.append(ellipse)
            isdone = num.append(isdone,ellipse.area <= params.maxshape.area)
            L[r[idx==j],c[idx==j]] = len(ellipses)
        #num.concatenate((isdone,num.zeros(ncomponents,dtype=bool)))
        print 'split into %d ellipses: '%ncomponents
        print ellipses[i]
        for j in range(1,ncomponents):
            print ellipses[j]

Example #4

Show file

File: test_kcluster.py Project: BackupTheBerlios/ctrax-svn

                print "(err - err0)/err0 = " + str((err-err0)/err0)

        else:

            (mu,S,priors) = kcluster.gmminit(x,k,weights=w)
            random.set_state(state)
            (mu0,S0,priors0) = kcluster0.gmminit(x,k,weights=w)
            if True or \
                    num.max(num.abs(mu - mu0)) > .001 or \
                    num.max(num.abs(S-S0)) > .001 or \
                    num.max(num.abs(priors - priors0)) > .001:
                print "max(|mu - mu0|) = " + str(num.max(num.abs(mu - mu0)))
                print "max(|S - S0|) = " + str(num.max(num.abs(S-S0)))
                print "max(|priors - priors0|) = " + str(num.max(num.abs(priors - priors0)))
            for i in range(100):
                [gamma,newe] = kcluster.gmmmemberships(mu,S,priors,x,w)
                [gamma0,newe0] = kcluster0.gmmmemberships(mu0,S0,priors0,x,w)
                if True or \
                        num.max(num.abs(gamma-gamma0)) > .001 or \
                        num.abs(newe-newe0) > .001:
                    print "i = %d: max(|gamma - gamma0|) = "%i + str(num.max(num.abs(gamma-gamma0)))
                    print "|newe - newe0| = " + str(num.abs(newe-newe0))
                kcluster.gmmupdate(mu,S,priors,gamma,x,w)
                kcluster0.gmmupdate(mu0,S0,priors0,gamma0,x,w)
                if True or \
                        num.max(num.abs(mu - mu0)) > .001 or \
                        num.max(num.abs(S-S0)) > .001 or \
                        num.max(num.abs(priors - priors0)) > .001:
                    print "i = %d: max(|mu - mu0|) = "%i + str(num.max(num.abs(mu - mu0)))
                    print "max(|S - S0|) = " + str(num.max(num.abs(S-S0)))
                    print "max(|priors - priors0|) = " + str(num.max(num.abs(priors - priors0)))