def optimize(self, maxiter=1000, perdiff=0.1): """ Optimizes the posterior distribution given the data. The algorithm terminates when either the maximum number of iterations is reached or the percent difference in the posterior is less than perdiff. """ #if self.gpu: # self.gdata = to_gpu(np.asarray(self.data, dtype=np.float32)) # self.g_ones = to_gpu(np.ones((self.ncomp,1), dtype=np.float32)) # self.g_ones_long = to_gpu(np.ones((self.nobs, 1), dtype=np.float32)) if self.parallel: from multiprocessing import RawArray self.shared_dens_mem = RawArray('d', self.nobs * self.ncomp) self.shared_dens = np.frombuffer(self.shared_dens_mem).reshape( self.nobs, self.ncomp) for w in self.workers: w.set_dens(self.shared_dens_mem) w.start() # start threads if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) self.expected_labels() ll_2 = self.log_posterior() ll_1 = 1 it = 0 if self.verbose: if self.gpu: print "starting GPU enabled BEM" else: print "starting BEM" while np.abs(ll_1 - ll_2) > 0.01 * perdiff and it < maxiter: if isinstance(self.verbose, int) and self.verbose and not isinstance( self.verbose, bool): if it % self.verbose == 0: print "%d:, %f" % (it, ll_2) it += 1 self.maximize_mu() self.maximize_Sigma() self.maximize_weights() self.expected_alpha() self.expected_labels() ll_1 = ll_2 ll_2 = self.log_posterior() if self.gpu: kill_GPUWorkers(self.gpu_workers) if self.parallel: for i in xrange(self.num_cores): self.work_queue[i].put(None)
def optimize(self, maxiter=1000, perdiff=0.1): """ Optimizes the posterior distribution given the data. The algorithm terminates when either the maximum number of iterations is reached or the percent difference in the posterior is less than perdiff. """ #if self.gpu: # self.gdata = to_gpu(np.asarray(self.data, dtype=np.float32)) # self.g_ones = to_gpu(np.ones((self.ncomp,1), dtype=np.float32)) # self.g_ones_long = to_gpu(np.ones((self.nobs, 1), dtype=np.float32)) if self.parallel: from multiprocessing import RawArray self.shared_dens_mem = RawArray('d', self.nobs*self.ncomp) self.shared_dens = np.frombuffer(self.shared_dens_mem).reshape(self.nobs, self.ncomp) for w in self.workers: w.set_dens(self.shared_dens_mem) w.start() # start threads if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) self.expected_labels() ll_2 = self.log_posterior() ll_1 = 1 it = 0 if self.verbose: if self.gpu: print "starting GPU enabled BEM" else: print "starting BEM" while np.abs(ll_1 - ll_2) > 0.01*perdiff and it < maxiter: if isinstance(self.verbose, int) and self.verbose and not isinstance(self.verbose, bool): if it % self.verbose == 0: print "%d:, %f" % (it, ll_2) it += 1 self.maximize_mu() self.maximize_Sigma() self.maximize_weights() self.expected_alpha() self.expected_labels() ll_1 = ll_2 ll_2 = self.log_posterior() if self.gpu: kill_GPUWorkers(self.gpu_workers) if self.parallel: for i in xrange(self.num_cores): self.work_queue[i].put(None)
def optimize(self, maxiter=1000, perdiff=0.1): """ Optimizes the posterior distribution given the data. The algorithm terminates when either the maximum number of iterations is reached or the percent difference in the posterior is less than perdiff. """ # start threads if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) self.expected_labels() ll_2 = self.log_posterior() ll_1 = 1 it = 0 if self.verbose: if self.gpu: print "starting GPU enabled BEM" else: print "starting BEM" while np.abs(ll_1 - ll_2) > 0.01 * perdiff and it < maxiter: if isinstance(self.verbose, int) and self.verbose and not isinstance( self.verbose, bool): if it % self.verbose == 0: print "%d:, %f" % (it, ll_2) it += 1 self.maximize_mu() self.maximize_Sigma() self.maximize_weights() self.expected_alpha() self.expected_labels() ll_1 = ll_2 ll_2 = self.log_posterior() if self.gpu: kill_GPUWorkers(self.gpu_workers)
def optimize(self, maxiter=1000, perdiff=0.1): """ Optimizes the posterior distribution given the data. The algorithm terminates when either the maximum number of iterations is reached or the percent difference in the posterior is less than perdiff. """ # start threads if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) self.expected_labels() ll_2 = self.log_posterior() ll_1 = 1 it = 0 if self.verbose: if self.gpu: print "starting GPU enabled BEM" else: print "starting BEM" while np.abs(ll_1 - ll_2) > 0.01*perdiff and it < maxiter: if isinstance(self.verbose, int) and self.verbose and not isinstance(self.verbose, bool): if it % self.verbose == 0: print "%d:, %f" % (it, ll_2) it += 1 self.maximize_mu() self.maximize_Sigma() self.maximize_weights() self.expected_alpha() self.expected_labels() ll_1 = ll_2 ll_2 = self.log_posterior() if self.gpu: kill_GPUWorkers(self.gpu_workers)
def sample(self, niter=1000, nburn=100, thin=1, tune_interval=100, ident=False): """ Performs MCMC sampling of the posterior. \beta must be sampled using Metropolis Hastings and its proposal distribution will be tuned every tune_interval iterations during the burnin period. It is suggested that an ample burnin is used and the AR parameters stores the acceptance rate for the stick weights of \beta and \alpha_0. """ if self.verbose: if self.gpu: print "starting GPU enabled MCMC" else: print "starting MCMC" # multiGPU init if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) if self.parallel: for w in self.workers: w.start() self._ident = ident self._setup_storage(niter, thin) self._tune_interval = tune_interval alpha = self._alpha0 alpha0 = self._alpha00 weights = self._weights0 beta = self._beta0 stick_beta = self._stick_beta0 mu = self._mu0 Sigma = self._Sigma0 for i in range(-nburn, niter): if isinstance(self.verbose, int) and self.verbose and \ not isinstance(self.verbose, bool): if i % self.verbose == 0: print i ## update labels labels, zhat = self._update_labels(mu, Sigma, weights) ## Get initial reference if needed if i == 0 and ident: zref = [] for ii in xrange(self.ngroups): zref.append(zhat[ii].copy()) c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double) for j in xrange(self.ncomp): for ii in xrange(self.ngroups): c0[j, :] += np.sum(zref[ii] == j) ## update mu and sigma mu, Sigma, counts = self._update_mu_Sigma(Sigma, labels, self.alldata) ## update weights, masks stick_weights, weights = self._update_stick_weights( counts, beta, alpha0) stick_beta, beta = self._update_beta(stick_beta, beta, stick_weights, alpha0, alpha) ## hyper parameters alpha = self._update_alpha(stick_beta) alpha0 = self._update_alpha0(stick_weights, beta, alpha0) ## Relabel if i > 0 and ident: cost = c0.copy() for Z, Zr in zip(zhat, zref): _get_cost(Zr, Z, cost) _, iii = np.where(munkres(cost)) beta = beta[iii] weights = weights[:, iii] mu = mu[iii] Sigma = Sigma[iii] ## save if i >= 0: self.beta[i] = beta self.weights[i] = weights self.alpha[i] = alpha self.alpha0[i] = alpha0 self.mu[i] = mu self.Sigma[i] = Sigma elif (nburn + i + 1) % self._tune_interval == 0: self._tune() self.stick_beta = stick_beta.copy() if self.gpu: kill_GPUWorkers(self.gpu_workers) if self.parallel: for ii in range(len(self.workers)): self.work_queue[ii].put(None)
def sample(self, niter=1000, nburn=0, thin=1, ident=False): """ samples niter + nburn iterations only storing the last niter draws thinned as indicated. if ident is True the munkres identification algorithm will be used matching to the INITIAL VALUES. These should be selected with great care. We recommend using the EM algorithm. Also .. burning doesn't make much sense in this case. """ self._setup_storage(niter) # start threads # if self.parallel: # for w in self.workers: # w.start() if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) alpha = self._alpha0 weights = self._weights0 mu = self._mu0 Sigma = self._Sigma0 if self.verbose: if self.gpu: print "starting GPU enabled MCMC" else: print "starting MCMC" for i in range(-nburn, niter): if i==0 and ident: labels, zref = self._update_labels(mu, Sigma, weights, True) c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double) for j in xrange(self.ncomp): c0[j,:] = np.sum(zref==j) zhat = zref.copy() if isinstance(self.verbose, int) and self.verbose and \ not isinstance(self.verbose, bool): if i % self.verbose == 0: print i labels, zhat = self._update_labels(mu, Sigma, weights, ident) counts = self._update_mu_Sigma(mu, Sigma, labels) stick_weights, weights = self._update_stick_weights(counts, alpha) alpha = self._update_alpha(stick_weights) ## relabel if needed: if i>0 and ident: cost = c0.copy() try: _get_cost(zref, zhat, cost) #cython!! except IndexError: print 'Something stranged happened ... do zref and zhat look correct?' import pdb; pdb.set_trace() _, iii = np.where(munkres(cost)) weights = weights[iii] mu = mu[iii] Sigma = Sigma[iii] if i>=0: self.weights[i] = weights self.alpha[i] = alpha self.mu[i] = mu self.Sigma[i] = Sigma # clean up threads # if self.parallel: # for i in xrange(self.num_cores): # self.work_queue[i].put(None) if self.gpu: kill_GPUWorkers(self.gpu_workers)
ind = np.arange(N) np.random.shuffle(ind) all_data = data[ind].copy() w = np.ones(ncomps) mu = np.zeros((ncomps, J)) Sigma = np.zeros((ncomps, J, J)) for i in range(ncomps): Sigma[i] = np.identity(J) workers = multigpu.init_GPUWorkers(data, gpus) starttime = time.time() for i in xrange(1000): if i % 100 == 0: print i #import pdb; pdb.set_trace() ll, ct, xbar, dens = multigpu.get_expected_labels_GPU( workers, w, mu, Sigma) labels = multigpu.get_labelsGPU(workers, w, mu, Sigma, True) ## make sure host GPU is ok ... from pycuda.gpuarray import to_gpu from pycuda.gpuarray import sum as gsum test = to_gpu(np.ones(100000, dtype=np.int32)) print gsum(test) multigpu.kill_GPUWorkers(workers) print "DONE! it took " + str(time.time() - starttime)
def sample(self, niter=1000, nburn=100, thin=1, tune_interval=100, ident=False): """ Performs MCMC sampling of the posterior. \beta must be sampled using Metropolis Hastings and its proposal distribution will be tuned every tune_interval iterations during the burnin period. It is suggested that an ample burnin is used and the AR parameters stores the acceptance rate for the stick weights of \beta and \alpha_0. """ if self.verbose: if self.gpu: print "starting GPU enabled MCMC" else: print "starting MCMC" # multiGPU init if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) self._ident = ident self._setup_storage(niter, thin) self._tune_interval = tune_interval alpha = self._alpha0 alpha0 = self._alpha00 weights = self._weights0 beta = self._beta0 stick_beta = self._stick_beta0 mu = self._mu0 Sigma = self._Sigma0 for i in range(-nburn, niter): if isinstance(self.verbose, int) and self.verbose and \ not isinstance(self.verbose, bool): if i % self.verbose == 0: print i # update labels labels, zhat = self._update_labels(mu, Sigma, weights) # Get initial reference if needed if i == 0 and ident: zref = [] for ii in xrange(self.ngroups): zref.append(zhat[ii].copy()) c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double) for j in xrange(self.ncomp): for ii in xrange(self.ngroups): c0[j, :] += np.sum(zref[ii] == j) # update mu and sigma counts = self._update_mu_Sigma(mu, Sigma, labels, self.alldata) # update weights, masks stick_weights, weights = self._update_stick_weights(counts, beta, alpha0) stick_beta, beta = sampler.sample_beta( stick_beta, beta, stick_weights, alpha0, alpha, self.AR, self.prop_scale, self.parallel ) # hyper parameters alpha = self._update_alpha(stick_beta) alpha0 = sampler.sample_alpha0(stick_weights, beta, alpha0, self.e0, self.f0, self.prop_scale, self.AR) # Relabel if i > 0 and ident: cost = c0.copy() for Z, Zr in zip(zhat, zref): _get_cost(Zr, Z, cost) _, iii = np.where(munkres(cost)) beta = beta[iii] weights = weights[:, iii] mu = mu[iii] Sigma = Sigma[iii] # save if i >= 0: self.beta[i] = beta self.weights[i] = weights self.alpha[i] = alpha self.alpha0[i] = alpha0 self.mu[i] = mu self.Sigma[i] = Sigma elif (nburn+i+1) % self._tune_interval == 0: self._tune() self.stick_beta = stick_beta.copy() if self.gpu: kill_GPUWorkers(self.gpu_workers)
def sample(self, niter=1000, nburn=0, thin=1, ident=False): """ samples niter + nburn iterations only storing the last niter draws thinned as indicated. if ident is True the munkres identification algorithm will be used matching to the INITIAL VALUES. These should be selected with great care. We recommend using the EM algorithm. Also .. burning doesn't make much sense in this case. """ self._setup_storage(niter) # start threads if self.parallel: for w in self.workers: w.start() if self.gpu: self.gpu_workers = init_GPUWorkers(self.data, self.dev_list) alpha = self._alpha0 weights = self._weights0 mu = self._mu0 Sigma = self._Sigma0 if self.verbose: if self.gpu: print "starting GPU enabled MCMC" else: print "starting MCMC" for i in range(-nburn, niter): if i == 0 and ident: labels, zref = self._update_labels(mu, Sigma, weights, True) c0 = np.zeros((self.ncomp, self.ncomp), dtype=np.double) for j in xrange(self.ncomp): c0[j, :] = np.sum(zref == j) zhat = zref.copy() if isinstance(self.verbose, int) and self.verbose and \ not isinstance(self.verbose, bool): if i % self.verbose == 0: print i labels, zhat = self._update_labels(mu, Sigma, weights, ident) mu, Sigma, counts = self._update_mu_Sigma(Sigma, labels) stick_weights, weights = self._update_stick_weights(counts, alpha) alpha = self._update_alpha(stick_weights) ## relabel if needed: if i > 0 and ident: cost = c0.copy() try: _get_cost(zref, zhat, cost) #cython!! except IndexError: print 'Something stranged happened ... do zref and zhat look correct?' import pdb pdb.set_trace() _, iii = np.where(munkres(cost)) weights = weights[iii] mu = mu[iii] Sigma = Sigma[iii] if i >= 0: self.weights[i] = weights self.alpha[i] = alpha self.mu[i] = mu self.Sigma[i] = Sigma # clean up threads if self.parallel: for i in xrange(self.num_cores): self.work_queue[i].put(None) if self.gpu: kill_GPUWorkers(self.gpu_workers)
ind = np.arange(N); np.random.shuffle(ind); all_data = data[ind].copy() w = np.ones(ncomps) mu = np.zeros((ncomps, J)) Sigma = np.zeros((ncomps, J, J)) for i in range(ncomps): Sigma[i] = np.identity(J) workers = multigpu.init_GPUWorkers(data, gpus) starttime = time.time() for i in xrange(1000): if i % 100 == 0: print i #import pdb; pdb.set_trace() ll, ct, xbar, dens = multigpu.get_expected_labels_GPU(workers, w, mu, Sigma) labels = multigpu.get_labelsGPU(workers, w, mu, Sigma, True) ## make sure host GPU is ok ... from pycuda.gpuarray import to_gpu from pycuda.gpuarray import sum as gsum test = to_gpu(np.ones(100000, dtype=np.int32)) print gsum(test) multigpu.kill_GPUWorkers(workers) print "DONE! it took " + str(time.time() - starttime)