def _fit_vbgmm(self, x): # clustering for c in xrange(len(self.crange)): k = self.crange[c] for r in xrange(self.repeats): # info if self.debug is True: print '\t[%s][c:%d][r:%d]' % ( self.clus_type, self.crange[c], r + 1), idx = c * self.repeats + r # fit and evaluate model model_kwargs = {} if 'alpha' in self.clus_kwargs: model_kwargs.update(alpha=self.clus_kwargs['alpha']) if 'conv_thresh' in self.clus_kwargs: model_kwargs.update(thresh=self.clus_kwargs['conv_thresh']) model = VBGMM(n_components=k, covariance_type=self.cvtype, **model_kwargs) model.n_features = self.input_dim fit_kwargs = {} if 'max_iter' in self.clus_kwargs: fit_kwargs.update(n_iter=self.clus_kwargs['max_iter']) model.fit(x, params='wmc', init_params='wmc', **fit_kwargs) self._labels[idx] = model.predict(x) self._parameters[idx] = model.means self._ll[idx] = model.score(x).sum() # evaluate goodness of fit self._gof[idx] = self.gof(x, self._ll[idx], k) # debug if self.debug is True: print self._gof[idx], model.converged_
def _fit_vbgmm(self, x): # clustering for c in xrange(len(self.crange)): k = self.crange[c] for r in xrange(self.repeats): # info if self.debug is True: print '\t[%s][c:%d][r:%d]' % (self.clus_type, self.crange[c], r + 1), idx = c * self.repeats + r # fit and evaluate model model_kwargs = {} if 'alpha' in self.clus_kwargs: model_kwargs.update(alpha=self.clus_kwargs['alpha']) if 'conv_thresh' in self.clus_kwargs: model_kwargs.update(thresh=self.clus_kwargs['conv_thresh']) model = VBGMM(n_components=k, covariance_type=self.cvtype, **model_kwargs) model.n_features = self.input_dim fit_kwargs = {} if 'max_iter' in self.clus_kwargs: fit_kwargs.update(n_iter=self.clus_kwargs['max_iter']) model.fit(x, params='wmc', init_params='wmc', **fit_kwargs) self._labels[idx] = model.predict(x) self._parameters[idx] = model.means self._ll[idx] = model.score(x).sum() # evaluate goodness of fit self._gof[idx] = self.gof(x, self._ll[idx], k) # debug if self.debug is True: print self._gof[idx], model.converged_
def test_vbgmm_no_modify_alpha(): alpha = 2. n_components = 3 X, y = make_blobs(random_state=1) vbgmm = VBGMM(n_components=n_components, alpha=alpha, n_iter=1) assert_equal(vbgmm.alpha, alpha) assert_equal(vbgmm.fit(X).alpha_, float(alpha) / n_components)
def main(method,cluster_num=30,alpha=.5): f ='/Users/davidgreenfield/Downloads/features_csv_tmp.csv' #f ='/Users/davidgreenfield/Downloads/features_f500.csv' cols=range(1,4096) feats =np.loadtxt(open(f,"rb"),delimiter=",",skiprows=1,usecols=(cols)) asins = np.loadtxt(open(f,"rb"),delimiter=",",skiprows=1,usecols=([0]),dtype=str) if method == 'kmeans': k_means=cluster.KMeans(n_clusters=cluster_num) k_means.fit(feats) y = k_means.labels_ if MAKE_GRAPH==1: print "hello 1" create_graph(k_means) elif method == 'GMM_VB': gmm_vb = VBGMM.fit(feats,n_components=50,alpha=.5) y = gmm_vb.predict(feats) cluster_no = len(np.unique(y)) elif method == 'GMM_DP': gmm_dp = DPGMM(n_components=50,alpha=alpha) gmm_dp.fit(feats) y = gmm_dp.predict(feats) cluster_no = len(np.unique(y)) clusters=[] groups={} data=load_data('./data/boots_aws.csv') for i in range(0,cluster_num): groups[i]=np.where(y==i) ids=asins[groups[i]] clusters.append(ids) links=[data[x]['url'] for x in ids] create_html(links,"templates/groups/group"+str(i)+".html") output_clusters(clusters,"outputs/clusters.csv")