def classification_distance(ref, test, test_data=None, ndraw=100000): if test_data is None: test_data = test.draw(ndraw) t_x = test.classify(test_data) r_x = ref.classify(test_data) cost = test_data.shape[0] * np.ones((len(test.clusters), len(ref.clusters)), dtype=np.double) _get_cost(t_x, r_x, cost) return (cost / test_data.shape[0]).T.copy()
def classification_distance(ref, test, test_data=None, ndraw=100000): if test_data is None: test_data = test.draw(ndraw) t_x = test.classify(test_data) r_x = ref.classify(test_data) cost = test_data.shape[0] * np.ones( (len(test.clusters), len(ref.clusters)), dtype=np.double) _get_cost(t_x, r_x, cost) return (cost / test_data.shape[0]).T.copy()
def sample(self, niter=1000, nburn=100, thin=1, tune_interval=100, ident=False): """ Performs MCMC sampling of the posterior. \beta must be sampled using Metropolis Hastings and its proposal distribution will be tuned every tune_interval iterations during the burnin period. It is suggested that an ample burnin is used and the AR parameters stores the acceptance rate for the stick weights of \beta and \alpha_0. """ if self.verbose: if self.gpu: print "starting GPU enabled MCMC" else: print "starting MCMC" # multiGPU init if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) if self.parallel: for w in self.workers: w.start() self._ident = ident self._setup_storage(niter, thin) self._tune_interval = tune_interval alpha = self._alpha0 alpha0 = self._alpha00 weights = self._weights0 beta = self._beta0 stick_beta = self._stick_beta0 mu = self._mu0 Sigma = self._Sigma0 for i in range(-nburn, niter): if isinstance(self.verbose, int) and self.verbose and \ not isinstance(self.verbose, bool): if i % self.verbose == 0: print i ## update labels labels, zhat = self._update_labels(mu, Sigma, weights) ## Get initial reference if needed if i == 0 and ident: zref = [] for ii in xrange(self.ngroups): zref.append(zhat[ii].copy()) c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double) for j in xrange(self.ncomp): for ii in xrange(self.ngroups): c0[j, :] += np.sum(zref[ii] == j) ## update mu and sigma mu, Sigma, counts = self._update_mu_Sigma(Sigma, labels, self.alldata) ## update weights, masks stick_weights, weights = self._update_stick_weights( counts, beta, alpha0) stick_beta, beta = self._update_beta(stick_beta, beta, stick_weights, alpha0, alpha) ## hyper parameters alpha = self._update_alpha(stick_beta) alpha0 = self._update_alpha0(stick_weights, beta, alpha0) ## Relabel if i > 0 and ident: cost = c0.copy() for Z, Zr in zip(zhat, zref): _get_cost(Zr, Z, cost) _, iii = np.where(munkres(cost)) beta = beta[iii] weights = weights[:, iii] mu = mu[iii] Sigma = Sigma[iii] ## save if i >= 0: self.beta[i] = beta self.weights[i] = weights self.alpha[i] = alpha self.alpha0[i] = alpha0 self.mu[i] = mu self.Sigma[i] = Sigma elif (nburn + i + 1) % self._tune_interval == 0: self._tune() self.stick_beta = stick_beta.copy() if self.gpu: kill_GPUWorkers(self.gpu_workers) if self.parallel: for ii in range(len(self.workers)): self.work_queue[ii].put(None)
def sample(self, niter=1000, nburn=0, thin=1, ident=False): """ samples niter + nburn iterations only storing the last niter draws thinned as indicated. if ident is True the munkres identification algorithm will be used matching to the INITIAL VALUES. These should be selected with great care. We recommend using the EM algorithm. Also .. burning doesn't make much sense in this case. """ self._setup_storage(niter) # start threads # if self.parallel: # for w in self.workers: # w.start() if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) alpha = self._alpha0 weights = self._weights0 mu = self._mu0 Sigma = self._Sigma0 if self.verbose: if self.gpu: print "starting GPU enabled MCMC" else: print "starting MCMC" for i in range(-nburn, niter): if i==0 and ident: labels, zref = self._update_labels(mu, Sigma, weights, True) c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double) for j in xrange(self.ncomp): c0[j,:] = np.sum(zref==j) zhat = zref.copy() if isinstance(self.verbose, int) and self.verbose and \ not isinstance(self.verbose, bool): if i % self.verbose == 0: print i labels, zhat = self._update_labels(mu, Sigma, weights, ident) counts = self._update_mu_Sigma(mu, Sigma, labels) stick_weights, weights = self._update_stick_weights(counts, alpha) alpha = self._update_alpha(stick_weights) ## relabel if needed: if i>0 and ident: cost = c0.copy() try: _get_cost(zref, zhat, cost) #cython!! except IndexError: print 'Something stranged happened ... do zref and zhat look correct?' import pdb; pdb.set_trace() _, iii = np.where(munkres(cost)) weights = weights[iii] mu = mu[iii] Sigma = Sigma[iii] if i>=0: self.weights[i] = weights self.alpha[i] = alpha self.mu[i] = mu self.Sigma[i] = Sigma # clean up threads # if self.parallel: # for i in xrange(self.num_cores): # self.work_queue[i].put(None) if self.gpu: kill_GPUWorkers(self.gpu_workers)
def sample(self, niter=1000, nburn=100, thin=1, tune_interval=100, ident=False): """ Performs MCMC sampling of the posterior. \beta must be sampled using Metropolis Hastings and its proposal distribution will be tuned every tune_interval iterations during the burnin period. It is suggested that an ample burnin is used and the AR parameters stores the acceptance rate for the stick weights of \beta and \alpha_0. """ if self.verbose: if self.gpu: print "starting GPU enabled MCMC" else: print "starting MCMC" # multiGPU init if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) self._ident = ident self._setup_storage(niter, thin) self._tune_interval = tune_interval alpha = self._alpha0 alpha0 = self._alpha00 weights = self._weights0 beta = self._beta0 stick_beta = self._stick_beta0 mu = self._mu0 Sigma = self._Sigma0 for i in range(-nburn, niter): if isinstance(self.verbose, int) and self.verbose and \ not isinstance(self.verbose, bool): if i % self.verbose == 0: print i # update labels labels, zhat = self._update_labels(mu, Sigma, weights) # Get initial reference if needed if i == 0 and ident: zref = [] for ii in xrange(self.ngroups): zref.append(zhat[ii].copy()) c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double) for j in xrange(self.ncomp): for ii in xrange(self.ngroups): c0[j, :] += np.sum(zref[ii] == j) # update mu and sigma counts = self._update_mu_Sigma(mu, Sigma, labels, self.alldata) # update weights, masks stick_weights, weights = self._update_stick_weights(counts, beta, alpha0) stick_beta, beta = sampler.sample_beta( stick_beta, beta, stick_weights, alpha0, alpha, self.AR, self.prop_scale, self.parallel ) # hyper parameters alpha = self._update_alpha(stick_beta) alpha0 = sampler.sample_alpha0(stick_weights, beta, alpha0, self.e0, self.f0, self.prop_scale, self.AR) # Relabel if i > 0 and ident: cost = c0.copy() for Z, Zr in zip(zhat, zref): _get_cost(Zr, Z, cost) _, iii = np.where(munkres(cost)) beta = beta[iii] weights = weights[:, iii] mu = mu[iii] Sigma = Sigma[iii] # save if i >= 0: self.beta[i] = beta self.weights[i] = weights self.alpha[i] = alpha self.alpha0[i] = alpha0 self.mu[i] = mu self.Sigma[i] = Sigma elif (nburn+i+1) % self._tune_interval == 0: self._tune() self.stick_beta = stick_beta.copy() if self.gpu: kill_GPUWorkers(self.gpu_workers)
def sample(self, niter=1000, nburn=0, thin=1, ident=False): """ samples niter + nburn iterations only storing the last niter draws thinned as indicated. if ident is True the munkres identification algorithm will be used matching to the INITIAL VALUES. These should be selected with great care. We recommend using the EM algorithm. Also .. burning doesn't make much sense in this case. """ self._setup_storage(niter) # start threads if self.parallel: for w in self.workers: w.start() if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) alpha = self._alpha0 weights = self._weights0 mu = self._mu0 Sigma = self._Sigma0 if self.verbose: if self.gpu: print "starting GPU enabled MCMC" else: print "starting MCMC" for i in range(-nburn, niter): if i == 0 and ident: labels, zref = self._update_labels(mu, Sigma, weights, True) c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double) for j in xrange(self.ncomp): c0[j, :] = np.sum(zref == j) zhat = zref.copy() if isinstance(self.verbose, int) and self.verbose and \ not isinstance(self.verbose, bool): if i % self.verbose == 0: print i labels, zhat = self._update_labels(mu, Sigma, weights, ident) mu, Sigma, counts = self._update_mu_Sigma(Sigma, labels) stick_weights, weights = self._update_stick_weights(counts, alpha) alpha = self._update_alpha(stick_weights) ## relabel if needed: if i > 0 and ident: cost = c0.copy() try: _get_cost(zref, zhat, cost) #cython!! except IndexError: print 'Something stranged happened ... do zref and zhat look correct?' import pdb pdb.set_trace() _, iii = np.where(munkres(cost)) weights = weights[iii] mu = mu[iii] Sigma = Sigma[iii] if i >= 0: self.weights[i] = weights self.alpha[i] = alpha self.mu[i] = mu self.Sigma[i] = Sigma # clean up threads if self.parallel: for i in xrange(self.num_cores): self.work_queue[i].put(None) if self.gpu: kill_GPUWorkers(self.gpu_workers)