コード例 #1
0
    def optimize_with_bic(self,data, kvals=None, maxiter = 300,
                          delta = 0.001, ninit=1,verbose = 0):
        """
        Find the optimal GMM using bic criterion.
        The method is run with all the values in kmax for k

        Parameters
        ----------
        data : (n,p) feature array, n = nb items, p=feature dimension
        kvals=None : range of values for k.
            if kvals==None, self.k is used
        maxiter=300 : max number of iterations of the EM algorithm
        delta = 0.001 : criterion on the log-likelihood
            increments to declare converegence
        ninit=1 : number of possible iterations of the GMM estimation
        verbsose=0: verbosity mode

        Returns
        -------
        Labels : array of shape(n), type np.int,
            discrete labelling of the data items into clusters
        LL : array of shape(n): log-likelihood of the data
        bic : (float) associated bic criterion
        """
        data = self.check_x(data)
        if kvals==None:
            LogLike, Labels, bic = self.estimate(data,None, maxiter,\
                                                 delta, ninit)
            return Labels, LogLike, self.bic(LogLike)
     
        bic_ref = -np.infty
        for k in kvals:
            self.k = k
            nit = 10
            mean, label,J = fc.kmeans(data, k, Labels=None)            
            Lab,LL, bic = self.estimate(data, label, maxiter, delta, ninit)
            
            if bic>bic_ref:
                kopt = k
                C = self.means.copy()
                P = self.precisions.copy()
                W = self.weights.copy()
                bic_ref = bic
            if verbose:
                print k,LL,bic,kopt
            
        self.means = C
        self.precisions = P
        self.weights = W
        self.k = kopt
        
        if self.prec_type=='full':
            precisions = np.reshape(self.precisions,(self.k,self.dim*self.dim))
        else:
            precisions = self.precisions
        Labels, LogLike  = fc.gmm_partition(data,self.means,precisions,\
                                            self.weights)

        return Labels, LogLike, self.bic_from_ll(LogLike)
コード例 #2
0
 def testpartition(self):
     X = nr.randn(10000,2)
     A = np.concatenate([np.ones((7000,2)),np.zeros((3000,2))])
     X = X+3*A
     C = np.array([[0,0],[3,3]])
     P = np.array([[1,1],[1,1]])
     W = np.array([0.5, 0.5])
     L,G = fc.gmm_partition(X,C,P,W)
     l = L[:7000].astype('d')
     self.assert_(np.mean(l)>0.5)
コード例 #3
0
    def sample(self,gd,x,verbose=0):
        """
        Evaluating the GMM on some new data

        Parameters
        ----------
        data : (n*p) feature array, n = nb items, p=feature dimension

        Returns
        -------
        LL : array of shape (n) log-likelihood of the data
        """
        data = gd.make_grid()
        if self.prec_type=='full':
            precisions = np.reshape(self.precisions,(self.k,self.dim*self.dim))
        else:
            precisions = self.precisions
            
        Labels, LogLike  = fc.gmm_partition(\
            data,self.means,precisions, self.weights)
        if verbose:
            self.show(x,gd,np.exp(LogLike))
        return LogLike