Esempio n. 1
0
    def _fit_dpgmm(self, x):
        # clustering
        k = max(self.crange)
        for r in xrange(self.repeats):
            # info
            if self.debug is True:
                print '\t[%s][c:%d][r:%d]' % (self.clus_type, k, r + 1),

            # fit and evaluate model
            model_kwargs = {}
            if 'alpha' in self.clus_kwargs:
                model_kwargs.update(alpha=self.clus_kwargs['alpha'])
            if 'conv_thresh' in self.clus_kwargs:
                model_kwargs.update(thresh=self.clus_kwargs['conv_thresh'])
            if 'max_iter' in self.clus_kwargs:
                model_kwargs.update(n_iter=self.clus_kwargs['max_iter'])

            model = DPGMM(n_components=k,
                          covariance_type=self.cvtype,
                          **model_kwargs)
            model.fit(x)
            self._labels[r] = model.predict(x)
            self._parameters[r] = model.means_
            self._ll[r] = model.score(x).sum()

            # evaluate goodness of fit for this run
            #self._gof[r] = self.gof(x, self._ll[r], k)
            if self.gof_type == 'aic':
                self._gof[r] = model.aic(x)
            if self.gof_type == 'bic':
                self._gof[r] = model.bic(x)

            # debug
            if self.debug is True:
                print self._gof[r], model.n_components, model.weights_.shape[0]
Esempio n. 2
0
    def _fit_dpgmm(self, x):
        # clustering
        k = max(self.crange)
        for r in xrange(self.repeats):
            # info
            if self.debug is True:
                print '\t[%s][c:%d][r:%d]' % (self.clus_type, k, r + 1),

            # fit and evaluate model
            model_kwargs = {}
            if 'alpha' in self.clus_kwargs:
                model_kwargs.update(alpha=self.clus_kwargs['alpha'])
            if 'conv_thresh' in self.clus_kwargs:
                model_kwargs.update(thresh=self.clus_kwargs['conv_thresh'])
            if 'max_iter' in self.clus_kwargs:
                model_kwargs.update(n_iter=self.clus_kwargs['max_iter'])

            model = DPGMM(n_components=k, covariance_type=self.cvtype,
                          **model_kwargs)
            model.fit(x)
            self._labels[r] = model.predict(x)
            self._parameters[r] = model.means_
            self._ll[r] = model.score(x).sum()

            # evaluate goodness of fit for this run
            #self._gof[r] = self.gof(x, self._ll[r], k)
            if self.gof_type == 'aic':
                self._gof[r] = model.aic(x)
            if self.gof_type == 'bic':
                self._gof[r] = model.bic(x)

            # debug
            if self.debug is True:
                print self._gof[r], model.n_components, model.weights_.shape[0]
Esempio n. 3
0
for chunks in np.arange(1, opts.size, step = 3):
  # Sample the specified number of points from X_unlabeled
  size = np.cumsum(chunk_sizes[:chunks])[-1]
  
  # Fit a Dirichlet process mixture of Gaussians using up to  ten components
  dpgmm = DPGMM(n_components=10, alpha=10.0, covariance_type='full')
  indices = np.arange(X_unlabeled.shape[0])
  np.random.shuffle(indices)
  X = X_unlabeled[indices[:size],]
  
  print("fitting a model with", size, "data points")
  with timeit():
    dpgmm.fit(X)
  print("Done!")
  print("AIC for this model & data: ", dpgmm.aic(X))
  print("BIC for this model & data: ", dpgmm.bic(X))
  Y_hat = dpgmm.predict(X)
  print ("Model assigned points to", np.max(Y_hat), "components")
  

# How can I best check this out? 
#color_iter = itertools.cycle(['r', 'g', 'b', 'c', 'm'])
#for i, (clf, title) in enumerate([(gmm, 'GMM'),
                                  #(dpgmm, 'Dirichlet Process GMM')]):
    #splot = plt.subplot(2, 1, 1 + i)
    #Y_ = clf.predict(X)
    #for i, (mean, covar, color) in enumerate(zip(
            #clf.means_, clf._get_covars(), color_iter)):
        #v, w = linalg.eigh(covar)
        #u = w[0] / linalg.norm(w[0])