def fit(self, datasets, verbose=False, tune_interval=100): if isinstance(datasets, FCMCollection): datasets = datasets.to_list() self.d = datasets[0].shape[1] datasets = [i.copy().astype('double') for i in datasets] self.ndatasets = len(datasets) total_data = vstack(datasets) self.m = mean(total_data, 0) self.s = std(total_data, 0) standardized = [] for i in datasets: if i.shape[1] != self.d: raise RuntimeError("Datasets shape do not match") standardized.append(((i - self.m) / self.s)) if self.prior_mu is not None: self._load_mu_at_fit() if self.prior_sigma is not None: self._load_sigma_at_fit() if self.seed is not None: seed(self.seed) else: from datetime import datetime seed(datetime.now().microsecond) self.hdp = HDPNormalMixture(standardized, ncomp=self.nclusts, gamma0=self.gamma0, m0=self.m0, nu0=self.nu0, Phi0=self.Phi0, e0=self.e0, f0=self.f0, g0=self.g0, h0=self.h0, mu0=self._prior_mu, Sigma0=self._prior_sigma, weights0=self._prior_pi, alpha0=self.alpha0, gpu=self.device, parallel=self.parallel, verbose=verbose) self.hdp.sample(niter=self.niter, nburn=self.burnin, thin=1, ident=self.ident, tune_interval=tune_interval) self._run = True #we've fit the mixture model return self.get_results()
from dpmix import HDPNormalMixture #import gpustats as gs if __name__ == '__main__': N = int(1e5) K = 2 J = 4 ncomps = 3 true_labels, data = generate_data(n=N, k=K, ncomps=ncomps) data = data - data.mean(0) data = data / data.std(0) #shuffle the data ... ind = np.arange(N) np.random.shuffle(ind) all_data = data[ind].copy() data = [all_data[(N / J * i):(N / J * (i + 1))].copy() for i in range(J)] mcmc = HDPNormalMixture(data, ncomp=100, gpu=[0, 1, 2], parallel=True, verbose=100) mcmc.sample(2, nburn=5, tune_interval=100) imcmc = HDPNormalMixture(mcmc, verbose=100) imcmc.sample(2, nburn=0, ident=True) print imcmc.mu[-1] print imcmc.weights[-1] print imcmc.beta[-1]
import numpy.random as npr from dpmix import HDPNormalMixture if __name__ == '__main__': nclust = 256 niter = 10 burnin = 10 device = 1 max_events = 50000 num_files = 10 seed = 9 #npr.seed(seed) for it in range(10, 20): xs = [] for i in range(num_files): print i, xs.append(npr.uniform(-5, 5, (max_events, 5))) print mcmc = HDPNormalMixture(xs, ncomp=nclust, gpu=device, parallel=True, verbose=2) mcmc.sample(burnin, nburn=0, tune_interval=5) imcmc = HDPNormalMixture(mcmc, verbose=2) imcmc.sample(niter, nburn=0, ident=True) del mcmc del imcmc