def optimize_with_bic(self,data, kvals=None, maxiter = 300, delta = 0.001, ninit=1,verbose = 0): """ Find the optimal GMM using bic criterion. The method is run with all the values in kmax for k Parameters ---------- data : (n,p) feature array, n = nb items, p=feature dimension kvals=None : range of values for k. if kvals==None, self.k is used maxiter=300 : max number of iterations of the EM algorithm delta = 0.001 : criterion on the log-likelihood increments to declare converegence ninit=1 : number of possible iterations of the GMM estimation verbsose=0: verbosity mode Returns ------- Labels : array of shape(n), type np.int, discrete labelling of the data items into clusters LL : array of shape(n): log-likelihood of the data bic : (float) associated bic criterion """ data = self.check_x(data) if kvals==None: LogLike, Labels, bic = self.estimate(data,None, maxiter,\ delta, ninit) return Labels, LogLike, self.bic(LogLike) bic_ref = -np.infty for k in kvals: self.k = k nit = 10 mean, label,J = fc.kmeans(data, k, Labels=None) Lab,LL, bic = self.estimate(data, label, maxiter, delta, ninit) if bic>bic_ref: kopt = k C = self.means.copy() P = self.precisions.copy() W = self.weights.copy() bic_ref = bic if verbose: print k,LL,bic,kopt self.means = C self.precisions = P self.weights = W self.k = kopt if self.prec_type=='full': precisions = np.reshape(self.precisions,(self.k,self.dim*self.dim)) else: precisions = self.precisions Labels, LogLike = fc.gmm_partition(data,self.means,precisions,\ self.weights) return Labels, LogLike, self.bic_from_ll(LogLike)
def testpartition(self): X = nr.randn(10000,2) A = np.concatenate([np.ones((7000,2)),np.zeros((3000,2))]) X = X+3*A C = np.array([[0,0],[3,3]]) P = np.array([[1,1],[1,1]]) W = np.array([0.5, 0.5]) L,G = fc.gmm_partition(X,C,P,W) l = L[:7000].astype('d') self.assert_(np.mean(l)>0.5)
def sample(self,gd,x,verbose=0): """ Evaluating the GMM on some new data Parameters ---------- data : (n*p) feature array, n = nb items, p=feature dimension Returns ------- LL : array of shape (n) log-likelihood of the data """ data = gd.make_grid() if self.prec_type=='full': precisions = np.reshape(self.precisions,(self.k,self.dim*self.dim)) else: precisions = self.precisions Labels, LogLike = fc.gmm_partition(\ data,self.means,precisions, self.weights) if verbose: self.show(x,gd,np.exp(LogLike)) return LogLike