예제 #1
0
    def estimate(self, data, Labels=None, maxiter = 300, delta = 0.001,
                 ninit=1):
        """
        Estimation of the GMM based on data and an EM algorithm

        Parameters
        ----------
        data : (n*p) feature array, n = nb items, p=feature dimension
        Labels=None : prior labelling of the data
            (this may improve convergence)
        maxiter=300 : max number of iterations of the EM algorithm
        delta = 0.001 : criterion on the log-likelihood
            increments to declare converegence
        ninit=1 : number of possible iterations of the GMM estimation


        Returns
        -------
        Labels : array of shape(n), type np.int:
            discrete labelling of the data items into clusters
        LL : (float) average log-likelihood of the data
        bic : (float) associated bic criterion
        """
        data = self.check_x(data)
        if Labels==None:
            Labels = np.zeros(data.shape[0],np.int)
            nit = 10
            C,Labels,J = fc.kmeans(data,self.k,Labels,nit)
        if (self.k>data.shape[0]-1):
            print "too many clusters"
            self.k = data.shape[0]-1

        if self.prec_type=='full':prec_type=0
        if self.prec_type=='diag': prec_type=1
        
        C, P, W, Labels, bll = fc.gmm(data,self.k,Labels,prec_type,
                                     maxiter,delta)
        self.means = C
        if self.prec_type=='diag':
            self.precisions = P
        if self.prec_type=='full':
            self.precisions = np.reshape(P,(self.k,self.dim,self.dim))
        self.weights = W
        self.check()
        
        for i in range(ninit-1):
            Labels = np.zeros(data.shape[0])
            C, P, W, labels, ll = fc.gmm(data,self.k,Labels,
                                         prec_type,maxiter,delta)
            if ll>bll:
                self.means = C
                if self.prec_type=='diag':
                    self.precisions = P
                if self.prec_type=='full':
                    self.precisions = np.reshape(P,(self.k,self.dim,self.dim))
                self.weights = W
                self.check()
                bll = ll
                Labels = labels
        return Labels,bll, self.bic_from_all (bll,data.shape[0])
예제 #2
0
 def testgmm3(self):
     X = nr.randn(10000,2)
     A = np.concatenate([np.ones((7000,2)),np.zeros((3000,2))])
     X = X+3*A
     L = np.concatenate([np.ones(5000), np.zeros(5000)]).astype(np.int)
     C,P,W,L,J = fc.gmm(X,2,L,2)
     C,P,W,L,J = fc.gmm(X,2,L,1)
     C,P,W,L,J = fc.gmm(X,2,L,0)
     l = L[:7000].astype('d')
     self.assert_(np.mean(l)>0.9)  
예제 #3
0
 def testgmm2(self):
     X = nr.randn(10000,2)
     A = np.concatenate([np.ones((7000,2)),np.zeros((3000,2))])
     X = X+3*A
     C,P,W,L,J = fc.gmm(X,2)
     dW= W[0]-W[1]
     ndW = dW*dW
     self.assert_(ndW > 0.1)
예제 #4
0
 def testgmm1_andCheckResult(self):
     np.random.seed(0) # force the random sequence
     X = nr.randn(10000,2)
     A = np.concatenate([np.ones((7000,2)),np.zeros((3000,2))])
     X = X+3*A
     L = np.concatenate([np.ones(5000), np.zeros(5000)]).astype(np.int)
     C,P,W,L,J = fc.gmm(X,2,L); 
     np.random.seed(None) # re-randomize the seed
     # results for randomseed = 0
     expectC = np.concatenate([np.zeros((1,2)),3*np.ones((1,2))])
     expectP = np.ones((2,2))
     expectW = np.array([ 0.3, 0.7])
     expectSL = 7000;
     self.assert_( np.allclose(C, expectC,0.01,0.05 ))
     self.assert_( np.allclose(P, expectP,0.03,0.05))
     self.assert_( np.allclose(W, expectW ,0.03,0.05))
     self.assert_( np.allclose(expectSL, L.sum(),0.01))