def simulate_data(nCells = 5*10**4, nPersons = 40, seed = 123456, ratio_P = [1., 1., 0.8, 0.1]): """ Simulates the data following the instruction presented in the article """ if seed != None: npr.seed(seed) nClass = 4 dim = 3 P = [0.49, 0.3, 0.2 , 0.01 ] Thetas = [np.array([0.,0, 0]), np.array([0, -2, 1]), np.array([1., 2, 0]), np.array([-2,2,1.5])] Z_Sigma = [np.array([[1.27, 0.25, 0],[0.25, 0.27, -0.001],[0., -0.001, 0.001]]), np.array([[0.06, 0.04, -0.03],[0.04, 0.05, 0],[-0.03, 0., 0.09]]), np.array([[0.44, 0.08, 0.08],[0.08, 0.16, 0],[0.08, 0., 0.16]]), 0.01*np.eye(3)] Sigmas = [0.1*np.eye(3), 0.1*spl.toeplitz([2.,0.5,0]),0.1* spl.toeplitz([2.,-0.5,1]), 0.1*spl.toeplitz([1.,.3,.3]) ] act_Class = np.zeros((nPersons,4)) for i in range(nClass): act_Class[:np.ceil(nPersons*ratio_P[i]),i] = 1. Y = [] nu = 100 mus = [] for i in range(nPersons): mix_obj = GMM.mixture(K = np.int(np.sum(act_Class[i, :]))) theta_temp = [] sigma_temp = [] for j in range(nClass): if act_Class[i, j] == 1: theta_temp.append(Thetas[j] + npr.multivariate_normal(np.zeros(3), Z_Sigma[j])) sigma_temp.append(wishart.invwishartrand(nu, (nu - dim - 1)* Sigmas[j])) else: theta_temp.append(np.ones(dim)*np.NAN) sigma_temp.append(np.ones((dim,dim))*np.NAN) theta_temp_ = [ theta_temp[aC] for aC in np.where(act_Class[i, :] == 1)[0]] sigma_temp_ = [ sigma_temp[aC] for aC in np.where(act_Class[i, :] == 1)[0]] mix_obj.mu = theta_temp_ mus.append(theta_temp) mix_obj.sigma = sigma_temp_ p_ = np.array([ (0.2*np.random.rand()+0.9) * P[aC] for aC in np.where(act_Class[i, :] == 1)[0]] ) p_ /= np.sum(p_) mix_obj.p = p_ mix_obj.d = dim Y.append(mix_obj.simulate_data(nCells)) mus = np.array(mus) return Y, act_Class, mus.T, Thetas, Sigmas, P
def main_python(K = 5): sim = 100 data = np.ascontiguousarray(np.loadtxt('../data/flowcym.dat',skiprows=1,usecols=(1,2,3,4,5,6))) mix = GMM.mixture(data,K) t0 = time.time() for i in range(sim): # @UnusedVariable mix.sample() t1 = time.time() print("mixture took %.4f sec"%(t1-t0))
def main_python(K=5): sim = 100 data = np.ascontiguousarray( np.loadtxt('../data/flowcym.dat', skiprows=1, usecols=(1, 2, 3, 4, 5, 6))) mix = GMM.mixture(data, K) t0 = time.time() for i in range(sim): # @UnusedVariable mix.sample() t1 = time.time() print("mixture took %.4f sec" % (t1 - t0))
def test_sampling(self): GMM.sample_sigma(self.mix.data, self.mix.mu[0], self.mix.prior[0]["sigma"]["Q"], self.mix.prior[0]["sigma"]["nu"]) self.mix.sample_x() self.mix.sample_sigma()
def test_error(self): with self.assertRaisesRegexp( ValueError, "the number of observations must be larger then the dimenstion" ): GMM.mixture(np.ones((1, 1)), 2)
def setUp(self): self.K = 2 self.data = np.random.rand(self.n, 2) + 1 self.mix = GMM.mixture(self.data, self.K) self.mix2 = GMM.mixture(self.data, self.K) self.mix2.high_memory = False
def simulate_data_(thetas, sigma_theta, sigmas, weights, nu=100, ratio_act=None, n_cells=5 * 10**4, n_persons=40, seed=None, silent=True): """ simulating data given: *thetas* list of latent means *sigma_theta* variation between the means *sigmas* list of latent covariances *weights* list of probabilites *nu* inverse wishart parameter *ratio_act* probabilility that the cluster is active at a person *n_cells* number of cells at a person *n_persons* number of persons *seed* random number generator """ if seed is None: npr.seed(seed) K = len(weights) dim = thetas[0].shape[0] if ratio_act is None: ratio_act = np.ones(K) act_class = np.zeros((n_persons, K)) for i in range(K): act_class[:np.int(np.ceil(n_persons * ratio_act[i])), i] = 1. Y = [] x = [] nu = 100 mus = [] for i in range(n_persons): if not silent: print("setting up person_{i}: ".format(i=i), end='') sys.stdout.flush() mix_obj = GMM.mixture(K=np.int(np.sum(act_class[i, :]))) theta_temp = [] sigma_temp = [] for j in range(K): if act_class[i, j] == 1: theta_temp.append(thetas[j] + util.rmvn(np.zeros((dim, 1)), sigma_theta[j])) sigma_temp.append( wishart.invwishartrand(nu, (nu - dim - 1) * sigmas[j])) else: theta_temp.append(np.ones(dim) * np.NAN) sigma_temp.append(np.ones((dim, dim)) * np.NAN) theta_temp_ = [ theta_temp[aC] for aC in np.where(act_class[i, :] == 1)[0] ] sigma_temp_ = [ sigma_temp[aC] for aC in np.where(act_class[i, :] == 1)[0] ] mix_obj.mu = theta_temp_ mus.append(theta_temp) mix_obj.sigma = sigma_temp_ p_ = np.array([(0.2 * np.random.rand() + 0.9) * weights[aC] for aC in np.where(act_class[i, :] == 1)[0]]) p_ /= np.sum(p_) mix_obj.p = p_ mix_obj.d = dim #Y_, x_ = mix_obj.simulate_data2(np.int(np.floor(0.99*n_cells))) Y_, x_ = mix_obj.simulate_data2(n_cells) noise_variance = np.eye(mix_obj.d) np.fill_diagonal(noise_variance, np.var(Y_, 0)) #Y_noise = npr.multivariate_normal(np.mean(Y_,0), noise_variance, size = np.int(np.ceil(0.01*n_cells))) #Y_ = np.vstack((Y_,Y_noise)) #np.random.shuffle(Y_) Y.append(Y_) x.append(x_) if not silent: print("done") sys.stdout.flush() mus = np.array(mus) return Y, act_class, mus.T, x
mix.sample() mus[i, :data.shape[1]] = mix.mu[0] mus[i, data.shape[1]:] = mix.mu[1] t1 = time.time() if 1: for k in range(mix.K): plt.plot(mix.data[mix.x == k, 0], mix.data[mix.x == k, 1], 'o') plt.figure() for k in range(mix.K): plt.plot(mus[:, (2 * k):(2 * (k + 1))]) plt.show() print("mixture took %.4f sec" % (t1 - t0)) mix2 = GMM.mixture(data, K) mus = np.zeros((sim, 4)) t0 = time.time() for i in range(sim): mix2.sample() t1 = time.time() print("Python mixture took %.4f sec" % (t1 - t0)) if 0: import pstats, cProfile import pyximport pyximport.install() import bayesianmixture.distributions.rng_cython as rng_cython #cProfile.runctx("rng_cython.sample_mu_rep(np.sum(mix.data[mix.x == 0 ,:],1),mix.sigma[0],mix.prior[0]['mu']['theta'].reshape(mix.d),mix.prior[0]['mu']['sigma'],npr.rand(mix.d),10000)", globals(), locals(), "Profile.prof")
mix.sample() mus[i,:data.shape[1]] = mix.mu[0] mus[i,data.shape[1]:] = mix.mu[1] t1 = time.time() if 1: for k in range(mix.K): plt.plot(mix.data[mix.x==k,0],mix.data[mix.x==k,1],'o') plt.figure() for k in range(mix.K): plt.plot(mus[:,(2*k):(2*(k+1))]) plt.show() print("mixture took %.4f sec"%(t1-t0)) mix2 = GMM.mixture(data,K) mus = np.zeros((sim,4)) t0 = time.time() for i in range(sim): mix2.sample() t1 = time.time() print("Python mixture took %.4f sec"%(t1-t0)) if 0: import pstats, cProfile import pyximport pyximport.install() import bayesianmixture.distributions.rng_cython as rng_cython #cProfile.runctx("rng_cython.sample_mu_rep(np.sum(mix.data[mix.x == 0 ,:],1),mix.sigma[0],mix.prior[0]['mu']['theta'].reshape(mix.d),mix.prior[0]['mu']['sigma'],npr.rand(mix.d),10000)", globals(), locals(), "Profile.prof")
def simulate_data_( thetas, sigma_theta, sigmas, weights, nu = 100, ratio_act = None, n_cells = 5*10**4, n_persons = 40, seed = None, silent = True): """ simulating data given: *thetas* list of latent means *sigma_theta* variation between the means *sigmas* list of latent covariances *weights* list of probabilites *nu* inverse wishart parameter *ratio_act* probabilility that the cluster is active at a person *n_cells* number of cells at a person *n_persons* number of persons *seed* random number generator """ if seed is None: npr.seed(seed) K = len(weights) dim = thetas[0].shape[0] if ratio_act is None: ratio_act = np.ones(K ) act_class = np.zeros((n_persons, K)) for i in range(K): act_class[:np.int(np.ceil(n_persons * ratio_act[i])), i] = 1. Y = [] x = [] nu = 100 mus = [] for i in range(n_persons): if not silent: print("setting up person_{i}: ".format(i = i),end = '') sys.stdout.flush() mix_obj = GMM.mixture(K = np.int(np.sum(act_class[i, :]))) theta_temp = [] sigma_temp = [] for j in range(K): if act_class[i, j] == 1: theta_temp.append(thetas[j] + util.rmvn( np.zeros((dim, 1)), sigma_theta[j] )) sigma_temp.append(wishart.invwishartrand(nu, (nu - dim - 1) * sigmas[j])) else: theta_temp.append(np.ones(dim) * np.NAN) sigma_temp.append(np.ones((dim,dim)) * np.NAN) theta_temp_ = [ theta_temp[aC] for aC in np.where(act_class[i, :] == 1)[0]] sigma_temp_ = [ sigma_temp[aC] for aC in np.where(act_class[i, :] == 1)[0]] mix_obj.mu = theta_temp_ mus.append(theta_temp) mix_obj.sigma = sigma_temp_ p_ = np.array([ (0.2*np.random.rand()+0.9) * weights[aC] for aC in np.where(act_class[i, :] == 1)[0]] ) p_ /= np.sum(p_) mix_obj.p = p_ mix_obj.d = dim #Y_, x_ = mix_obj.simulate_data2(np.int(np.floor(0.99*n_cells))) Y_, x_ = mix_obj.simulate_data2(n_cells) noise_variance = np.eye(mix_obj.d) np.fill_diagonal(noise_variance, np.var(Y_,0)) #Y_noise = npr.multivariate_normal(np.mean(Y_,0), noise_variance, size = np.int(np.ceil(0.01*n_cells))) #Y_ = np.vstack((Y_,Y_noise)) #np.random.shuffle(Y_) Y.append(Y_) x.append(x_) if not silent: print("done") sys.stdout.flush() mus = np.array(mus) return Y, act_class, mus.T, x
def simulate_data(nCells=5 * 10**4, nPersons=40, seed=123456, ratio_P=[1., 1., 0.8, 0.1]): """ Simulates the data following the instruction presented in the article """ if seed != None: npr.seed(seed) nClass = 4 dim = 3 P = [0.49, 0.3, 0.2, 0.01] Thetas = [ np.array([0., 0, 0]), np.array([0, -2, 1]), np.array([1., 2, 0]), np.array([-2, 2, 1.5]) ] Z_Sigma = [ np.array([[1.27, 0.25, 0], [0.25, 0.27, -0.001], [0., -0.001, 0.001]]), np.array([[0.06, 0.04, -0.03], [0.04, 0.05, 0], [-0.03, 0., 0.09]]), np.array([[0.44, 0.08, 0.08], [0.08, 0.16, 0], [0.08, 0., 0.16]]), 0.01 * np.eye(3) ] Sigmas = [ 0.1 * np.eye(3), 0.1 * spl.toeplitz([2., 0.5, 0]), 0.1 * spl.toeplitz([2., -0.5, 1]), 0.1 * spl.toeplitz([1., .3, .3]) ] act_Class = np.zeros((nPersons, 4)) for i in range(nClass): act_Class[:np.ceil(nPersons * ratio_P[i]), i] = 1. Y = [] nu = 100 mus = [] for i in range(nPersons): mix_obj = GMM.mixture(K=np.int(np.sum(act_Class[i, :]))) theta_temp = [] sigma_temp = [] for j in range(nClass): if act_Class[i, j] == 1: theta_temp.append( Thetas[j] + npr.multivariate_normal(np.zeros(3), Z_Sigma[j])) sigma_temp.append( wishart.invwishartrand(nu, (nu - dim - 1) * Sigmas[j])) else: theta_temp.append(np.ones(dim) * np.NAN) sigma_temp.append(np.ones((dim, dim)) * np.NAN) theta_temp_ = [ theta_temp[aC] for aC in np.where(act_Class[i, :] == 1)[0] ] sigma_temp_ = [ sigma_temp[aC] for aC in np.where(act_Class[i, :] == 1)[0] ] mix_obj.mu = theta_temp_ mus.append(theta_temp) mix_obj.sigma = sigma_temp_ p_ = np.array([(0.2 * np.random.rand() + 0.9) * P[aC] for aC in np.where(act_Class[i, :] == 1)[0]]) p_ /= np.sum(p_) mix_obj.p = p_ mix_obj.d = dim Y.append(mix_obj.simulate_data(nCells)) mus = np.array(mus) return Y, act_Class, mus.T, Thetas, Sigmas, P