Python kmeansの例、nipy.neurospin.clustering.clustering.kmeans Pythonの例

コード例 #1

0

ファイルを表示

 def testarg1(self):
     X = nr.randn(10,2)
     A = np.vstack(( np.ones((7,2)), np.zeros((3,2)) ))
     X = X + 3*A
     C,L,J = fc.kmeans(X,2.0)
     C,L,J = fc.kmeans(X,0.5)
     C,L,J = fc.kmeans(X,-42)
     self.assert_(True)

コード例 #2

0

ファイルを表示

 def testarg2(self):
     X = nr.randn(10,2)
     A = np.vstack(( np.ones((7,2)), np.zeros((3,2)) ))
     L = np.array([0,0,0,0,0,1,1,1,1,1])
     C,L,J = fc.kmeans(X,2,L)
     L = np.array([0.0,0,0,0,0,1,1,1,1,1.42])
     C,L,J = fc.kmeans(X,2,L)
     L = np.array([0.0,0,0,0,0,1,1,1,1,-1])
     C,L,J = fc.kmeans(X,2,L)
     self.assert_(True)

コード例 #3

0

ファイルを表示

 def testarg3(self):
     A = np.vstack(( np.ones((7,2)), np.zeros((3,2)) ))
     X = (nr.randn(10,2) * 100).astype(np.int)
     C,L,J = fc.kmeans(X,2)
     C,L,J = fc.kmeans(X,2)
     X = nr.randn(1,2)
     C,L,J = fc.kmeans(X,2)
     X = nr.randn(1,2)
     C,L,J = fc.kmeans(X,2)
     X = nr.randn(2,5)
     C,L,J = fc.kmeans(X,30)
     self.assert_(True)

コード例 #4

0

ファイルを表示

ファイル: gmm.py プロジェクト: Garyfallidis/nipy

    def initialize(self, x):
        """
        this function initializes self according to a certain dataset x:
        1. sets the regularizing hyper-parameters
        2. initializes z using a k-means algorithm, then
        3. upate the parameters
        
        Parameters
        ----------
        x, array of shape (n_samples,self.dim)
           the data used in the estimation process
        """
        import nipy.neurospin.clustering.clustering as fc
        n = x.shape[0]
        
        #1. set the priors
        self.guess_regularizing(x, bcheck=1)

        # 2. initialize the memberships
        if self.k>1:
            cent,z,J = fc.kmeans(x, self.k)
        else:
            z = np.zeros(n).astype(np.int)
        
        l = np.zeros((n, self.k))
        l[np.arange(n),z]=1

        # 3.update the parameters
        self.update(x,l)

コード例 #5

0

ファイルを表示

ファイル: test_clustering.py プロジェクト: Garyfallidis/nipy

 def testkmeans1(self):
     X = nr.randn(10,2)
     A = np.concatenate([np.ones((7,2)),np.zeros((3,2))])
     X = X+3*A;
     L = np.array([0,0,0,0,0,1,1,1,1,1])
     C,L,J = fc.kmeans(X,2,L)
     self.assert_(np.mean(L[:7])<0.5)

コード例 #6

0

ファイルを表示

    def VB_estimate(self,x,niter = 100,delta = 0.0001):
        """
        Estimation of the BGMM using a Variational Bayes approach
        
        Parameters
        ----------
        x array of shape (nbitems,dim) the input data
        niter = 100, the maximal number of iterations of the VB algo
        delta = 0.0001, the increment in log-likelihood 
              to declare convergence 
        
        Returns
        -------
        label: array of shape nbitems: resulting MAP labelling
        """
        x = self.check_data(x)

        # pre_cluster the data (this improves convergence...)
        label = np.zeros(x.shape[0])
        nit = 10
        mean,label,J = fc.kmeans(x,self.k,label,nit)

        label, mean, meansc, prec, we, dof, Li = fc.bayesian_gmm (x,self.prior_means,self.prior_precisions,self.prior_shrinkage,self.prior_weights, self.prior_dof,label,niter,delta)
        self.estimated = 1
        self.means = mean
        self.shrinkage = meansc
        self.precisions = prec
        self.weights = we
        self.dof = dof
        return label

コード例 #7

0

ファイルを表示

    def estimate(self, data, Labels=None, maxiter = 300, delta = 0.001,
                 ninit=1):
        """
        Estimation of the GMM based on data and an EM algorithm

        Parameters
        ----------
        data : (n*p) feature array, n = nb items, p=feature dimension
        Labels=None : prior labelling of the data
            (this may improve convergence)
        maxiter=300 : max number of iterations of the EM algorithm
        delta = 0.001 : criterion on the log-likelihood
            increments to declare converegence
        ninit=1 : number of possible iterations of the GMM estimation


        Returns
        -------
        Labels : array of shape(n), type np.int:
            discrete labelling of the data items into clusters
        LL : (float) average log-likelihood of the data
        bic : (float) associated bic criterion
        """
        data = self.check_x(data)
        if Labels==None:
            Labels = np.zeros(data.shape[0],np.int)
            nit = 10
            C,Labels,J = fc.kmeans(data,self.k,Labels,nit)
        if (self.k>data.shape[0]-1):
            print "too many clusters"
            self.k = data.shape[0]-1

        if self.prec_type=='full':prec_type=0
        if self.prec_type=='diag': prec_type=1
        
        C, P, W, Labels, bll = fc.gmm(data,self.k,Labels,prec_type,
                                     maxiter,delta)
        self.means = C
        if self.prec_type=='diag':
            self.precisions = P
        if self.prec_type=='full':
            self.precisions = np.reshape(P,(self.k,self.dim,self.dim))
        self.weights = W
        self.check()
        
        for i in range(ninit-1):
            Labels = np.zeros(data.shape[0])
            C, P, W, labels, ll = fc.gmm(data,self.k,Labels,
                                         prec_type,maxiter,delta)
            if ll>bll:
                self.means = C
                if self.prec_type=='diag':
                    self.precisions = P
                if self.prec_type=='full':
                    self.precisions = np.reshape(P,(self.k,self.dim,self.dim))
                self.weights = W
                self.check()
                bll = ll
                Labels = labels
        return Labels,bll, self.bic_from_all (bll,data.shape[0])

コード例 #8

0

ファイルを表示

    def optimize_with_bic(self,data, kvals=None, maxiter = 300,
                          delta = 0.001, ninit=1,verbose = 0):
        """
        Find the optimal GMM using bic criterion.
        The method is run with all the values in kmax for k

        Parameters
        ----------
        data : (n,p) feature array, n = nb items, p=feature dimension
        kvals=None : range of values for k.
            if kvals==None, self.k is used
        maxiter=300 : max number of iterations of the EM algorithm
        delta = 0.001 : criterion on the log-likelihood
            increments to declare converegence
        ninit=1 : number of possible iterations of the GMM estimation
        verbsose=0: verbosity mode

        Returns
        -------
        Labels : array of shape(n), type np.int,
            discrete labelling of the data items into clusters
        LL : array of shape(n): log-likelihood of the data
        bic : (float) associated bic criterion
        """
        data = self.check_x(data)
        if kvals==None:
            LogLike, Labels, bic = self.estimate(data,None, maxiter,\
                                                 delta, ninit)
            return Labels, LogLike, self.bic(LogLike)
     
        bic_ref = -np.infty
        for k in kvals:
            self.k = k
            nit = 10
            mean, label,J = fc.kmeans(data, k, Labels=None)            
            Lab,LL, bic = self.estimate(data, label, maxiter, delta, ninit)
            
            if bic>bic_ref:
                kopt = k
                C = self.means.copy()
                P = self.precisions.copy()
                W = self.weights.copy()
                bic_ref = bic
            if verbose:
                print k,LL,bic,kopt
            
        self.means = C
        self.precisions = P
        self.weights = W
        self.k = kopt
        
        if self.prec_type=='full':
            precisions = np.reshape(self.precisions,(self.k,self.dim*self.dim))
        else:
            precisions = self.precisions
        Labels, LogLike  = fc.gmm_partition(data,self.means,precisions,\
                                            self.weights)

        return Labels, LogLike, self.bic_from_ll(LogLike)

コード例 #9

0

ファイルを表示

ファイル: test_clustering.py プロジェクト: Garyfallidis/nipy

 def testkmeans2(self):
     X = nr.randn(10000,2)
     A = np.concatenate([np.ones((7000,2)),np.zeros((3000,2))])
     X = X+3*A
     L = np.concatenate([np.ones(5000), np.zeros(5000)]).astype(np.int)
     C,L,J = fc.kmeans(X,2,L)
     l = L[:7000].astype(np.float)
     self.assert_(np.mean(l)>0.9)

コード例 #10

0

ファイルを表示

ファイル: bgmm.py プロジェクト: agramfort/nipy

    def initialize(self, x):
        """
        initialize z using a k-means algorithm, then upate the parameters

        Parameters
        ----------
        x: array of shape (nb_samples,self.dim)
           the data used in the estimation process
        """
        if self.k>1:
            cent,z,J = fc.kmeans(x,self.k)
        else:
            z = np.zeros(x.shape[0]).astype(np.int)
        self.update(x,z)

コード例 #11

0

ファイルを表示

ファイル: bgmm.py プロジェクト: agramfort/nipy

    def initialize(self, x):
        """
        initialize z using a k-means algorithm, then upate the parameters

        Parameters
        ----------
        x: array of shape (nb_samples,self.dim)
           the data used in the estimation process
        """
        n = x.shape[0]
        if self.k>1:
            cent, z, J = fc.kmeans(x, self.k)
        else:
            z = np.zeros(x.shape[0]).astype(np.int)
        l = np.zeros((n,self.k))
        l[np.arange(n),z]=1
        self._Mstep(x,l)

コード例 #12

0

ファイルを表示

    def VB_estimate_and_sample(self,x,niter = 1000,delta = 0.0001,gd = None,verbose = 0):
        """
        Estimation of the BGMM using a Variational Bayes approach,
        and sampling of the model on test points in order to have
        an estimate of the posterior on these points

        Parameters
        ----------
        x array of shape (nbitems,dim) the input data
        niter = 100, the maximal number of iterations of the VB algo
        delta = 0.0001, the increment in log-likelihood 
              to declare convergence 
        gd = None  a grid descriptor, i.e. 
           the grid on chich the model is sampled
           if gd==None, x is used as Grid
        verbose = 0: the verbosity mode
        
        Returns
        -------
        Li : array of shape (nbnodes): the average log-posterior
        label: array of shape nbitems: resulting MAP labelling
        """
        x = self.check_data(x)
        
        # pre_cluster the data (this improves convergence...)
        label = np.zeros(x.shape[0])
        nit = 10
        mean,label,J = fc.kmeans(x,self.k,label,nit)

        if gd==None:
            grid = x
        else:
            grid = gd.make_grid()
        
        label, mean, meansc, prec, we,dof,Li = fc.bayesian_gmm (x, 
               self.prior_means, self.prior_precisions, self.prior_shrinkage,
               self.prior_weights, self.prior_dof, label, niter, delta, grid)
        self.estimated = 1
        self.means = mean
        self.shrinkage = meansc
        self.precisions = prec
        self.weights = we
        self.dof = dof
        if verbose:
            self.show(x,gd,np.exp(Li))
        return Li,label

コード例 #13

0

ファイルを表示

ファイル: parcel_io.py プロジェクト: Garyfallidis/nipy

def mask_parcellation(mask_images, nb_parcel, output_image=None):
    """
    Performs the parcellation of a certain mask

    Parameters
    ----------
    mask_images: list of strings,
                 paths of the mask images that define the common space.
    nb_parcel: int,
               number of desired parcels
    output_image: string, optional
                   path of the output image
                   
    Returns
    -------
    wim: Nifti1Imagine instance,  the resulting parcellation
    """
    from ..mask import intersect_masks

    # compute the group mask
    affine = load(mask_images[0]).get_affine()
    shape = load(mask_images[0]).get_shape()
    mask = intersect_masks(mask_images, threshold=0)>0
    ijk = np.where(mask)
    ijk = np.array(ijk).T
    nvox = ijk.shape[0]

    # Get and cluster  coordinates 
    ijk = np.hstack((ijk,np.ones((nvox,1))))
    coord = np.dot(ijk, affine.T)[:,:3]
    cent, tlabs, J = kmeans(coord, nb_parcel)
        
    # Write the results
    label = -np.ones(shape)
    label[mask]= tlabs
    wim = Nifti1Image(label, affine)
    wim.get_header()['descrip'] = 'Label image in %d parcels'%nb_parcel    
    if output_image is not None:
        save(wim, output_image)
    return wim

コード例 #14

0

ファイルを表示

ファイル: parcel_io.py プロジェクト: Garyfallidis/nipy

def one_subj_parcellation(MaskImage, betas, nbparcel, nn=6, method='ward', 
                          write_dir=None, mu=10., verbose=0, fullpath=None):
    """
    Parcellation of a one-subject dataset
    Return: a tuple (Parcellation instance, parcellation labels)
    
    Parameters
    ----------
    MaskImage: path to the mask-defining_image of the subject
    betas: list of paths to activation images from the subject
    nbparcel, int : number fo desired parcels
    nn=6: number of nearest neighbors  to define the image topology 
          (6, 18 or 26)
    method='ward': clustering method used, to be chosen among
                   'ward', 'gkm', 'ward_and-gkm'
                   'ward': Ward's clustering algorithm
                   'gkm': Geodesic k-means algorithm, random initialization
                   'gkm_and_ward': idem, initialized by Ward's clustering
    write_dir=None: write directory. If fullpath is None too, then no file output.
    mu = 10., float: the relative weight of anatomical information
    verbose=0: verbosity mode
    fullpath=None, string,
                   path of the output image
                   If write_dir and fullpath are None then no file output.
                   If only fullpath is None then it is the write dir + a name 
                   depending on the method.
    Note
    ----
    Ward's method takes time (about 6 minutes for a 60K voxels dataset)
    Geodesic k-means is 'quick and dirty'
    Ward's + GKM is expensive but quite good
    To reduce CPU time, rather use nn=6 (especially with Ward)    
    """
    import nipy.neurospin.graph as fg
    import nipy.neurospin.graph.field as ff
    
    if method not in ['ward','gkm','ward_and_gkm','kmeans']:
        raise ValueError, 'unknown method'
    if nn not in [6,18,26]:
        raise ValueError, 'nn should be 6,18 or 26'
    nbeta = len(betas)
    
    # step 1: load the data ----------------------------
    #1.1 the mask image
    nim = load(MaskImage)
    ref_dim =  nim.get_shape()
    affine = nim.get_affine()
    mask = nim.get_data()
    xyz = np.array(np.where(mask>0)).T
    nvox = xyz.shape[0]

    if method is not 'kmeans':
        # 1.2 get the main cc of the graph 
        # to remove the small connected components
        g = fg.WeightedGraph(nvox)
        g.from_3d_grid(xyz.astype(np.int),nn)
        
        aux = np.zeros(g.V).astype('bool')
        imc = g.main_cc()
        aux[imc]= True
        if np.sum(aux)==0:
            raise ValueError, "empty mask. Cannot proceed"
        g = g.subgraph(aux)
        lmask = np.zeros(ref_dim)
        lmask[xyz[:,0],xyz[:,1],xyz[:,2]]=aux
        xyz = xyz[aux,:]
        nvox = xyz.shape[0]

    # 1.3 from vox to mm
    xyz2 = np.hstack((xyz,np.ones((nvox,1))))
    coord = np.dot(xyz2, affine.T)[:,:3]

    # 1.4 read the functional data
    beta = []
    for b in range(nbeta):
        rbeta = load(betas[b])
        lbeta = rbeta.get_data()
        lbeta = lbeta[lmask>0]
        beta.append(lbeta)
	
    beta = np.array(beta).T

    #step 2: parcel the data ---------------------------
    feature = np.hstack((beta, mu*coord/np.std(coord)))
    if method is not 'kmeans':
        g = ff.Field(nvox, g.edges, g.weights, feature)

    if method=='kmeans':
        cent, u, J = kmeans(feature, nbparcel)

    if method=='ward':
        u, J0 = g.ward(nbparcel)

    if method=='gkm':
        seeds = np.argsort(np.random.rand(g.V))[:nbparcel]
        seeds, u, J1 = g.geodesic_kmeans(seeds)

    if method=='ward_and_gkm':
        w,J0 = g.ward(nbparcel)
        seeds, u, J1 = g.geodesic_kmeans(label=w)

    lpa = Parcellation(nbparcel, xyz, np.reshape(u,(nvox,1)))
    if verbose:
        pi = np.reshape(lpa.population(), nbparcel)
        vi = np.sum(lpa.var_feature_intra([beta])[0], 1)
        vf = np.dot(pi,vi)/nvox
        va =  np.dot(pi,np.sum(lpa.var_feature_intra([coord])[0],1))/nvox
        print nbparcel, "functional variance", vf, "anatomical variance",va


    # step3:  write the resulting label image
    Label = -np.ones(ref_dim,'int16')
    Label[lmask>0] = u

    if fullpath is not None:
        LabelImage = fullpath
    elif write_dir is not None:
        if method=='kmeans':
            LabelImage = os.path.join(write_dir,"parcel_kmeans.nii")
        if method=='ward':
            LabelImage = os.path.join(write_dir,"parcel_wards.nii")
        elif method=='gkm':
            LabelImage = os.path.join(write_dir,"parcel_gkmeans.nii")
        elif method=='ward_and_gkm':
            LabelImage = os.path.join(write_dir,"parcel_wgkmeans.nii")
    else:
        LabelImage = None
    
    if LabelImage is not None:
        wim = Nifti1Image(Label, affine)
        hdr = wim.get_header()
        hdr['descrip'] = 'Intra-subject parcellation image'
        save(wim, LabelImage)
        print "Wrote the parcellation images as %s" %LabelImage

    return lpa, Label

コード例 #15

0

ファイルを表示

ファイル: hierarchical_parcellation.py プロジェクト: cindeem/nipy

def optim_hparcel(Ranat, RFeature, Feature, Pa, Gs, anat_coord, lamb=1., 
                         dmax=10., chunksize=1.e5, niter=5, verbose=0):
	"""
	Core function of the heirrachical parcellation procedure.
	
    Parameters
    ----------
	Ranat: array of shape (n,3): set of positions sampled form the data
	RFeature: array of shape (n,f): assocaited feature
	Feature: list of subject-related feature arrays
	Pa : parcellation instance that is updated
	Gs: graph that represents the topology of the parcellation
	anat_coord: arrao of shape (nvox,3) space defining set of coordinates
	lamb=1.0: parameter to weight position
              and feature impact on the algorithm
	dmax = 10: locality parameter (in the space of anat_coord)
         to limit surch volume (CPU save)
	chunksize=1.e5 not used here (to be removed)
	niter = 5: number of iterations in teh algorithm
	verbose=0: verbosity level
	
    Returns
    -------
	U: list of arrays of length nsubj
       subject-dependent parcellations
	Proto_anat: array of shape (nvox) labelling of the common space
                (template parcellation)
	"""
	Sess = Pa.nb_subj
	# Ranat,RFeature,Pa,chunksize,dmax,lamb,Gs
	# a1. perform a rough clustering of the data to make prototype
	#Labs = np.zeros(RFeature.shape[0])
	proto, Labs, J = fc.kmeans(RFeature, Pa.k, Labels=None, maxiter=10)
	proto_anat = [np.mean(Ranat[Labs==k],0) for k in range(Pa.k)]
	proto_anat = np.array(proto_anat)
	proto = [np.mean(RFeature[Labs==k],0) for k in range(Pa.k)]
	proto = np.array(proto)
	proto_anat_old = proto_anat.copy()

	# a2. topological model of the parcellation
	# group-level part
	spatial_proto = ff.Field(Pa.k)
	spatial_proto.set_field(proto_anat)
	spatial_proto.Voronoi_diagram(proto_anat,anat_coord)
	spatial_proto.set_gaussian(proto_anat)
	spatial_proto.normalize()
	for git in range(niter):
		LP = []
		LPA = []
		U = []
		Energy = 0
		for s in range(Sess):			
			# b.subject-specific instances of the model
			# b.0 subject-specific information
			Fs = Feature[s]
			lac = anat_coord[Pa.label[:,s]>-1]
			target = proto_anat.copy()
	
			for nit in range(1):
				lseeds = np.zeros(Pa.k,'i')
				aux = np.argsort(rand(Pa.k))
				tata = 0
				toto = np.zeros(lac.shape[0])
				for j in range(Pa.k):
					# b.1 speed-up :only take a small ball
					i = aux[j]
					dX = lac-target[i,:]
					iz = np.nonzero(np.sum(dX**2,1)<dmax**2)
					iz = np.reshape(iz,np.size(iz))
					if np.size(iz)==0:
						iz  = np.array([np.argmin(np.sum(dX**2,1))])
					
					# b.2: anatomical constraints
					lanat = np.reshape(lac[iz,:],(np.size(iz),anat_coord.shape[1]))
					pot = np.zeros(np.size(iz))
					JM,rmin = _exclusion_map(i,spatial_proto,target,lanat)
					pot[JM<0] = np.infty
					pot[JM>=0] = -JM[JM>=0]
					
					# b.3: add feature discrepancy
					dF = Fs[iz]-proto[i]
					dF = np.reshape(dF,(np.size(iz),proto.shape[1]))
					pot += lamb*np.sum(dF**2,1)
					
					# b.4: solution
					pb = 0
					if np.sum(np.isinf(pot))==np.size(pot):
						pot = np.sum(dX[iz,:]**2,1)
						tata +=1
						pb = 1

					sol = iz[np.argmin(pot)]
					target[i] = lac[sol]
					
					if toto[sol]==1:
						print "pb",pb
						ln = spatial_proto.list_of_neighbors()
						argtoto = np.squeeze(np.nonzero(lseeds==sol))
						print i,argtoto,ln[i]
						if np.size(argtoto)==1: print [ln[argtoto]]
						print target[argtoto]
						print target[i]
						print rmin
						print JM[iz==lseeds[argtoto]]
						print pot[iz==lseeds[argtoto]]
					lseeds[i]= sol
					toto[sol]=1

				if verbose>1:
					jm = _Field_Gradient_Jac(spatial_proto,target)
					print jm.min(),jm.max(),np.sum(toto>0),tata
			
			# c.subject-specific parcellation
			g = Gs[s]
			f = ff.Field(g.V,g.edges,g.weights,Fs)
			u = f.constrained_voronoi(lseeds)
			U.append(u)

			Energy += np.sum((Fs-proto[u])*(Fs-proto[u]))/np.sum(Pa.label[:,s]>-1)
			# recompute the prototypes
			# (average in subject s)
			lproto = [np.mean(Fs[u==k],0) for k in range(Pa.k)]
			lproto = np.array(lproto)
			lproto[np.isnan(lproto)] = proto[np.isnan(lproto)]
			
			lproto_anat = [np.mean(lac[u==k],0) for k in range(Pa.k)]
			lproto_anat = np.array(lproto_anat)
			lproto_anat[np.isnan(lproto_anat)] = proto_anat[np.isnan(lproto_anat)]
			
			LP.append(lproto)
			LPA.append(lproto_anat)

		# recompute the prototypes across subjects
		proto_mem = proto.copy()
		proto_anat_mem = proto_anat.copy()
		proto = np.mean(np.array(LP),0)
		proto_anat = np.mean(np.array(LPA),0)
		displ = np.sqrt(np.sum((proto_mem-proto)**2,1).max())
		if verbose:
			print 'energy',Energy, 'displacement',displ
			
		# recompute the topological model
		spatial_proto.set_field(proto_anat)
		spatial_proto.Voronoi_diagram(proto_anat,anat_coord)
		spatial_proto.set_gaussian(proto_anat)
		spatial_proto.normalize()

		if displ<1.e-4*dmax: break
	return U,proto_anat

コード例 #16

0

ファイルを表示

ファイル: test_parcel.py プロジェクト: Garyfallidis/nipy

def test_parcel_one_subj_4():
    nbparcel = 10
    g = make_data_field()
    _, u, _ = fc.kmeans(g.field, nbparcel)
    assert((np.unique(u) == np.arange(nbparcel)).all())