def fit(self, datasets, verbose=False, tune_interval=100): if isinstance(datasets, FCMCollection): datasets = datasets.to_list() self.d = datasets[0].shape[1] datasets = [i.copy().astype('double') for i in datasets] self.ndatasets = len(datasets) total_data = vstack(datasets) self.m = mean(total_data, 0) self.s = std(total_data, 0) standardized = [] for i in datasets: if i.shape[1] != self.d: raise RuntimeError("Datasets shape do not match") standardized.append(((i - self.m) / self.s)) if self.prior_mu is not None: self._load_mu_at_fit() if self.prior_sigma is not None: self._load_sigma_at_fit() if self.seed is not None: seed(self.seed) else: from datetime import datetime seed(datetime.now().microsecond) self.hdp = HDPNormalMixture(standardized, ncomp=self.nclusts, gamma0=self.gamma0, m0=self.m0, nu0=self.nu0, Phi0=self.Phi0, e0=self.e0, f0=self.f0, g0=self.g0, h0=self.h0, mu0=self._prior_mu, Sigma0=self._prior_sigma, weights0=self._prior_pi, alpha0=self.alpha0, gpu=self.device, parallel=self.parallel, verbose=verbose) self.hdp.sample(niter=self.niter, nburn=self.burnin, thin=1, ident=self.ident, tune_interval=tune_interval) self._run = True #we've fit the mixture model return self.get_results()
def fit(self, datasets, verbose=False, tune_interval=100): if isinstance(datasets, FCMcollection): datasets = datasets.to_list() self.d = datasets[0].shape[1] datasets = [i.copy().astype('double') for i in datasets] self.ndatasets = len(datasets) total_data = vstack(datasets) self.m = mean(total_data, 0) self.s = std(total_data, 0) standardized = [] for i in datasets: if i.shape[1] != self.d: raise RuntimeError("Datasets shape do not match") standardized.append(((i - self.m) / self.s)) if self.prior_mu is not None: self._load_mu_at_fit() if self.prior_sigma is not None: self._load_sigma_at_fit() if self.seed is not None: seed(self.seed) else: from datetime import datetime seed(datetime.now().microsecond) self.hdp = HDPNormalMixture( standardized, ncomp=self.nclusts, gamma0=self.gamma0, m0=self.m0, nu0=self.nu0, Phi0=self.Phi0, e0=self.e0, f0=self.f0, g0=self.g0, h0=self.h0, mu0=self._prior_mu, Sigma0=self._prior_sigma, weights0=self._prior_pi, alpha0=self.alpha0, gpu=self.device, parallel=self.parallel, verbose=verbose) self.hdp.sample( niter=self.niter, nburn=self.burnin, thin=1, ident=self.ident, tune_interval=tune_interval) self._run = True # we've fit the mixture model return self.get_results()
import numpy.random as npr from dpmix import HDPNormalMixture if __name__ == '__main__': nclust = 256 niter = 10 burnin = 10 device = 1 max_events = 50000 num_files = 10 seed = 9 #npr.seed(seed) for it in range(10, 20): xs = [] for i in range(num_files): print i, xs.append(npr.uniform(-5,5,(max_events, 5))) print mcmc = HDPNormalMixture(xs, ncomp=nclust, gpu=device, parallel=True, verbose=2) mcmc.sample(burnin, nburn=0, tune_interval=5) imcmc = HDPNormalMixture(mcmc, verbose=2) imcmc.sample(niter, nburn=0, ident=True) del mcmc del imcmc
from dpmix import HDPNormalMixture #import gpustats as gs if __name__ == '__main__': N = int(1e5) K = 2 J = 4 ncomps = 3 true_labels, data = generate_data(n=N, k=K, ncomps=ncomps) data = data - data.mean(0) data = data / data.std(0) #shuffle the data ... ind = np.arange(N) np.random.shuffle(ind) all_data = data[ind].copy() data = [all_data[(N / J * i):(N / J * (i + 1))].copy() for i in range(J)] mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0, 1, 2], parallel=True, verbose=100) mcmc.sample(2, nburn=5, tune_interval=100) imcmc = HDPNormalMixture(mcmc, verbose=100) imcmc.sample(2, nburn=0, ident=True) print imcmc.mu[-1] print imcmc.weights[-1] print imcmc.beta[-1]
class HDPMixtureModel(DPMixtureModel): ''' HDPMixtureModel(nclusts, niter=1000, burnin= 100, last= None) nclusts = number of clusters to fit niter = number of mcmc itterations burning = number of mcmc burnin itterations last = number of mcmc itterations to draw samples from. if None last = niter ''' def __init__(self, *args, **kwargs): super(HDPMixtureModel, self).__init__(*args, **kwargs) self.g0 = 0.1 self.h0 = 0.1 def fit(self, datasets, verbose=False, tune_interval=100): if isinstance(datasets, FCMcollection): datasets = datasets.to_list() self.d = datasets[0].shape[1] datasets = [i.copy().astype('double') for i in datasets] self.ndatasets = len(datasets) total_data = vstack(datasets) self.m = mean(total_data, 0) self.s = std(total_data, 0) standardized = [] for i in datasets: if i.shape[1] != self.d: raise RuntimeError("Datasets shape do not match") standardized.append(((i - self.m) / self.s)) if self.prior_mu is not None: self._load_mu_at_fit() if self.prior_sigma is not None: self._load_sigma_at_fit() if self.seed is not None: seed(self.seed) else: from datetime import datetime seed(datetime.now().microsecond) self.hdp = HDPNormalMixture( standardized, ncomp=self.nclusts, gamma0=self.gamma0, m0=self.m0, nu0=self.nu0, Phi0=self.Phi0, e0=self.e0, f0=self.f0, g0=self.g0, h0=self.h0, mu0=self._prior_mu, Sigma0=self._prior_sigma, weights0=self._prior_pi, alpha0=self.alpha0, gpu=self.device, parallel=self.parallel, verbose=verbose) self.hdp.sample( niter=self.niter, nburn=self.burnin, thin=1, ident=self.ident, tune_interval=tune_interval) self._run = True # we've fit the mixture model return self.get_results() def get_results(self): """ get the results of the fitted mixture model """ if self.last is None: self.last = self.niter if self._run: # print self.mus # allresults = [] # for k in range(self.ndatasets): # rslts = [] # for i in range(self.last): # for j in range(self.nclusts): # tmp = DPCluster(self.hdp.weights[-(i + 1), k, j], (self.hdp.mu[-(i + 1), j] * self.s) + self.m, self.hdp.Sigma[-(i + 1), j] * outer(self.s, self.s)) # tmp.nmu = self.hdp.mu[-(i + 1), j] # tmp.nsigma = self.hdp.Sigma[-(i + 1), j] # rslts.append(tmp) # allresults.append(DPMixture(rslts, self.last, self.m, self.s, self.ident)) # return allresults #pis = self.hdp.weights[-self.last:].T.reshape(self.ndatasets,self.last*self.nclusts).copy() pis = array([ self.hdp.weights[-self.last:, k, :].flatten() for k in range(self.ndatasets)]) mus = (self.hdp.mu[- self.last:].reshape(self.nclusts * self.last, self.d) * self.s + self.m) sigmas = (self.hdp.Sigma[- self.last:].reshape(self.nclusts * self.last, self.d, self.d) * outer(self.s, self.s)) return HDPMixture( pis, mus, sigmas, self.last, self.m, self.s, self.ident)
import numpy.random as npr from dpmix import HDPNormalMixture if __name__ == '__main__': nclust = 256 niter = 10 burnin = 10 device = 1 max_events = 50000 num_files = 10 seed = 9 #npr.seed(seed) for it in range(10, 20): xs = [] for i in range(num_files): print i, xs.append(npr.uniform(-5, 5, (max_events, 5))) print mcmc = HDPNormalMixture(xs, ncomp=nclust, gpu=device, parallel=True, verbose=2) mcmc.sample(burnin, nburn=0, tune_interval=5) imcmc = HDPNormalMixture(mcmc, verbose=2) imcmc.sample(niter, nburn=0, ident=True) del mcmc del imcmc
class HDPMixtureModel(DPMixtureModel): ''' HDPMixtureModel(nclusts, niter=1000, burnin= 100, last= None) nclusts = number of clusters to fit niter = number of mcmc itterations burning = number of mcmc burnin itterations last = number of mcmc itterations to draw samples from. if None last = niter ''' def fit(self, datasets, verbose=False, tune_interval=100): if isinstance(datasets, FCMcollection): datasets = datasets.to_list() self.d = datasets[0].shape[1] datasets = [i.copy() for i in datasets] self.ndatasets = len(datasets) total_data = vstack(datasets) self.m = mean(total_data, 0) self.s = std(total_data, 0) standardized = [] for i in datasets: if i.shape[1] != self.d: raise RuntimeError("Datasets shape do not match") standardized.append((i - self.m) / self.s) if self.prior_mu is not None: self._load_mu_at_fit() if self.prior_sigma is not None: self._load_sigma_at_fit() if self.seed is not None: seed(self.seed) else: from datetime import datetime seed(datetime.now().microsecond) self.hdp = HDPNormalMixture(standardized, ncomp=self.nclusts, gamma0=self.gamma0, m0=self.m0, nu0=self.nu0, Phi0=self.Phi0, e0=self.e0, f0=self.f0, mu0=self._prior_mu, Sigma0=self._prior_sigma, weights0=self._prior_pi, alpha0=self.alpha0, gpu=self.device, parallel=self.parallel, verbose=verbose) self.hdp.sample(niter=self.niter, nburn=self.burnin, thin=1, ident=self.ident, tune_interval=tune_interval) self._run = True #we've fit the mixture model return self.get_results() def get_results(self): """ get the results of the fitted mixture model """ if self.last is None: self.last = self.niter if self._run: #print self.mus allresults = [] for k in range(self.ndatasets): rslts = [] for i in range(self.last): for j in range(self.nclusts): tmp = DPCluster( self.hdp.weights[-(i + 1), k, j], (self.hdp.mu[-(i + 1), j] * self.s) + self.m, self.hdp.Sigma[-(i + 1), j] * outer(self.s, self.s)) tmp.nmu = self.hdp.mu[-(i + 1), j] tmp.nsigma = self.hdp.Sigma[-(i + 1), j] rslts.append(tmp) allresults.append( DPMixture(rslts, self.last, self.m, self.s, self.ident)) return allresults
all_data = np.r_[all_data, data[i]] dmean = all_data.mean(0) dstd = all_data.std(0) for d in data: d -= dmean d /= dstd ## run some benchmarks! if __name__ == '__main__': t1 = time() mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0, 1, 2], parallel=True, verbose=100) mcmc.sample(1000, nburn=2000, tune_interval=50) imcmc = HDPNormalMixture(mcmc, verbose=100) imcmc.sample(1000, nburn=0, ident=True) t1 = time() - t1 print 'ALL GPU: ' + str(t1) t2 = time() mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0], parallel=False, verbose=100) mcmc.sample(1000, nburn=2000, tune_interval=50)
class HDPMixtureModel(DPMixtureModel): """ HDPMixtureModel(nclusts, niter=1000, burnin= 100, last= None) nclusts = number of clusters to fit niter = number of mcmc itterations burning = number of mcmc burnin itterations last = number of mcmc itterations to draw samples from. if None last = niter """ def __init__(self, *args, **kwargs): super(HDPMixtureModel, self).__init__(*args, **kwargs) self.g0 = 0.1 self.h0 = 0.1 def fit(self, datasets, verbose=False, tune_interval=100): if isinstance(datasets, FCMCollection): datasets = datasets.to_list() self.d = datasets[0].shape[1] datasets = [i.copy().astype('double') for i in datasets] self.ndatasets = len(datasets) total_data = vstack(datasets) self.m = mean(total_data, 0) self.s = std(total_data, 0) standardized = [] for i in datasets: if i.shape[1] != self.d: raise RuntimeError("Datasets shape do not match") standardized.append(((i - self.m) / self.s)) if self.prior_mu is not None: self._load_mu_at_fit() if self.prior_sigma is not None: self._load_sigma_at_fit() if self.seed is not None: seed(self.seed) else: from datetime import datetime seed(datetime.now().microsecond) self.hdp = HDPNormalMixture(standardized, ncomp=self.nclusts, gamma0=self.gamma0, m0=self.m0, nu0=self.nu0, Phi0=self.Phi0, e0=self.e0, f0=self.f0, g0=self.g0, h0=self.h0, mu0=self._prior_mu, Sigma0=self._prior_sigma, weights0=self._prior_pi, alpha0=self.alpha0, gpu=self.device, parallel=self.parallel, verbose=verbose) self.hdp.sample(niter=self.niter, nburn=self.burnin, thin=1, ident=self.ident, tune_interval=tune_interval) self._run = True #we've fit the mixture model return self.get_results() def get_results(self): """ get the results of the fitted mixture model """ if self.last is None: self.last = self.niter if self._run: pis = array([ self.hdp.weights[-self.last:, k, :].flatten() for k in range(self.ndatasets) ]) mus = (self.hdp.mu[-self.last:].reshape(self.nclusts * self.last, self.d) * self.s + self.m) sigmas = (self.hdp.Sigma[-self.last:].reshape( self.nclusts * self.last, self.d, self.d) * outer(self.s, self.s)) return HDPMixture(pis, mus, sigmas, self.last, self.m, self.s, self.ident)
from dpmix import HDPNormalMixture #import gpustats as gs if __name__ == '__main__': N = int(1e5) K = 2 J = 4 ncomps = 3 true_labels, data = generate_data(n=N, k=K, ncomps=ncomps) data = data - data.mean(0) data = data/data.std(0) #shuffle the data ... ind = np.arange(N); np.random.shuffle(ind); all_data = data[ind].copy() data = [ all_data[(N/J*i):(N/J*(i+1))].copy() for i in range(J) ] mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0,1,2], parallel=True, verbose=100) mcmc.sample(2, nburn=5, tune_interval=100) imcmc = HDPNormalMixture(mcmc, verbose=100) imcmc.sample(2, nburn=0, ident=True) print imcmc.mu[-1] print imcmc.weights[-1] print imcmc.beta[-1]
for i in range(1, 3): all_data = np.r_[all_data, data[i]] dmean = all_data.mean(0) dstd = all_data.std(0) for d in data: d -= dmean d /= dstd ## run some benchmarks! if __name__ == "__main__": t1 = time() mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0, 1, 2], parallel=True, verbose=100) mcmc.sample(1000, nburn=2000, tune_interval=50) imcmc = HDPNormalMixture(mcmc, verbose=100) imcmc.sample(1000, nburn=0, ident=True) t1 = time() - t1 print "ALL GPU: " + str(t1) t2 = time() mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0], parallel=False, verbose=100) mcmc.sample(1000, nburn=2000, tune_interval=50) imcmc = HDPNormalMixture(mcmc, verbose=100) imcmc.sample(1000, nburn=0, ident=True) t2 = time() - t2 print "One GPU: " + str(t2) t4 = time()