def predictMCMC(n_train, nxx_pv, nxx_npv, nxy_pv, nxy_npv, nyy_pv, nyy_npv, B_x, B_y, e, x_test, private, p1=0.25, p2=0.70, p3=0.05): d = nxx_pv.shape[0] # Generate noise if private: W = wishart.rvs(d + 1, ((d * pow(B_x, 2)) / (p1 * e)) * np.identity(d), size=1) L = np.random.laplace(scale=(2 * d * B_x * B_y) / (p2 * e), size=d) V = wishart.rvs(2, pow(B_y, 2) / (2 * p3 * e), size=1) else: W = 0 L = 0 V = 0 return doMCMC(n_train, nxx_npv + nxx_pv + W, nxy_npv + nxy_pv + L, nyy_npv + nyy_pv + V, x_test)
def Inddist(part, nsample, pvariates, iterations): T = [] for i in range(iterations): W1 = wishart.rvs(df=nsample - 1, scale=np.eye(pvariates)) W2 = wishart.rvs(df=nsample - 1, scale=W1 / (nsample - 1)) W2_11, W2_12, W2_21, W2_22 = partition(W2, part, part) T = np.append(T, det(W2) / (det(W2_11) * det(W2_22))) return T
def sample_prior_mixture_components(self, mulinha, Sigmalinha, Hlinha, sigmalinha, d, nsamples=1): mu = multivariate_normal.rvs(mean=mulinha, cov=Sigmalinha, size=nsamples).reshape(nsamples, d) cov = wishart.rvs(df=sigmalinha, scale=Hlinha, size=nsamples).reshape(nsamples, d, d) for i in range(nsamples): while matrix_is_well_conditioned(cov[i]) is not True: cov = wishart.rvs(df=sigmalinha, scale=Hlinha, size=nsamples).reshape(nsamples, d, d) cov_inv = np.linalg.inv(cov) return mu, cov_inv, cov
def Sphdist(nsample, pvariates, iterations): T = [] for i in range(iterations): W1 = wishart.rvs(df=nsample - 1, scale=np.eye(pvariates) / (nsample - 1)) W2 = wishart.rvs(df=nsample - 1, scale=np.eye(pvariates)) T = np.append(T, det(W1.dot(W2))**(1 / pvariates) / np.trace(W1.dot(W2))) return T
def Canodist(part, nsample, pvariates, iterations): T = [] for i in range(iterations): W1 = wishart.rvs(df=nsample - 1, scale=np.eye(pvariates)) W2 = wishart.rvs(df=nsample - 1, scale=W1 / (nsample - 1)) W2_11, W2_12, W2_21, W2_22 = partition(W2, part, part) Q = W2_12.dot(inv(W2_22).dot(W2_21)) T = np.append(T, det(Q) / det(W2_11 - Q)) return T
def sample_prior_hyperparameters(self, X_mean, X_cov, d): mulinha = multivariate_normal.rvs(mean=X_mean, cov=X_cov) Sigmalinha = invwishart.rvs(df=d, scale=d * X_cov) while matrix_is_well_conditioned(Sigmalinha) is not True: Sigmalinha = invwishart.rvs(df=d, scale=d * X_cov) Hlinha = wishart.rvs(df=d, scale=X_cov / d) while matrix_is_well_conditioned(Hlinha) is not True: Hlinha = wishart.rvs(df=d, scale=X_cov / d) sigmalinha = invgamma.rvs(1, 1 / d) + d return mulinha, Sigmalinha, Hlinha, sigmalinha
def __init__(self, cx=None, cy=None): self.df = 1 if cx is None: self.cx = wishart.rvs(self.df, numpy.eye(self.df)) # draw these separately and use a diagonal else: self.cx = cx if cy is None: self.cy = wishart.rvs(self.df, numpy.eye(self.df)) else: self.cy = cy self.cov = inv(numpy.diag([self.cx,self.cy]))
def generate_pd_matrix(n: int, as_torch: bool = True): """Generate an (n x n) positive-definite matrix""" if n < 2: raise ValueError(f"Must generate at least a 2x2 matrix, not {n}x{n}") A = wishart.rvs(n, tuple(1 for _ in range(n))) while not _is_positive_definite(A): A = wishart.rvs(n, tuple(1 for _ in range(n))) A += A.mean() * np.eye(n) if as_torch: A_torch = torch.as_tensor(A, dtype=torch.get_default_dtype()) return A_torch return A
def gen_toy_data(toy_data_params): """ Generates a mixture of stationary and non-stationary multivariate normal data. toy_data_params is a dictionary containing: dimensions_noisy : number of non-stationary sources mu_stationary : vector of means of the stationary distribution) sigma_stationary (optional): covariance matrix of the stationary distribution If sigma_stationary is not supplied, one will be sampled from a Wishart(d,I) distribution where d is the number of stationary sources Returns the generated data and the mixing matrix. """ dim_s = len(toy_data_params['mu_stationary']) dim_n = toy_data_params['dimensions_noisy'] n_observations = sum(toy_data_params['epoch_sizes']) dim_total = dim_s + dim_n if 'sigma_stationary' not in toy_data_params: toy_data_params['sigma_stationary'] = wishart.rvs( dim_s, np.identity(dim_s)) if dim_s == 1: DATA_STATIONARY = np.random.normal(toy_data_params['mu_stationary'], toy_data_params['sigma_stationary'], size = n_observations).\ reshape(n_observations,1) else: DATA_STATIONARY = np.random.multivariate_normal( toy_data_params['mu_stationary'], toy_data_params['sigma_stationary'], size=n_observations) if dim_n == 1: DATA_NOISY = np.hstack([np.random.normal(np.random.normal(size = dim_n), np.random.uniform(size = dim_n), size = toy_data_params['epoch_size']) for epoch_size in toy_data_params['epoch_sizes']]).\ reshape(n_observations,1) else: DATA_NOISY = np.vstack([ np.random.multivariate_normal(np.random.normal(size=dim_n), wishart.rvs(dim_n, np.identity(dim_n)), size=epoch_size) for epoch_size in toy_data_params['epoch_sizes'] ]) toy_data = np.hstack([DATA_STATIONARY, DATA_NOISY]) mixer = random_rotation(dim_total) toy_data_mixed = toy_data.dot( mixer.T) # "Left" multiplication; actually vstacked mixer.dot(DATA[i]) return toy_data_mixed, mixer
def gen_data(K=3, Nk=[400, 300, 125, 100, 75], random_state=123, normalize=True): np.random.seed(random_state) # for generating random centers center = np.array((0, 0)) dispersion = 10 mu_k = multivariate_normal(center, np.identity(2) * dispersion, K) sd_k = uniform(0.1, 2, size=K) data, labels = make_blobs(n_samples=Nk[:K], n_features=2, centers=mu_k, cluster_std=sd_k) # rotate data in each cluster using random covariance matrices.. sigmas = wishart.rvs(2, scale=np.identity(2), size=K) + np.identity(2) x = data[labels == 0] @ sigmas[0] for k in range(1, K): x = np.vstack((x, data[labels == k] @ sigmas[k])) labels = np.sort(labels) if normalize: x = StandardScaler().fit_transform(x) return x, labels
def sample_factor_x(tau_sparse_tensor, tau_ind, U, V, X, beta0=1): """Sampling T-by-R factor matrix X and its hyperparameters.""" dim3, rank = X.shape var_mu_hyper = X[0, :] / (beta0 + 1) dx = X[1:, :] - X[:-1, :] var_V_hyper = inv( np.eye(rank) + dx.T @ dx + beta0 * np.outer(X[0, :], X[0, :]) / (beta0 + 1)) var_Lambda_hyper = wishart.rvs(df=dim3 + rank, scale=var_V_hyper) var_mu_hyper = mvnrnd_pre(var_mu_hyper, (beta0 + 1) * var_Lambda_hyper) var1 = kr_prod(V, U).T var2 = kr_prod(var1, var1) var3 = (var2 @ ten2mat(tau_ind, 2).T).reshape([rank, rank, dim3]) var4 = var1 @ ten2mat(tau_sparse_tensor, 2).T for t in range(dim3): if t == 0: X[t, :] = mvnrnd_pre((X[t + 1, :] + var_mu_hyper) / 2, var3[:, :, t] + 2 * var_Lambda_hyper) elif t == dim3 - 1: temp1 = var4[:, t] + var_Lambda_hyper @ X[t - 1, :] temp2 = var3[:, :, t] + var_Lambda_hyper X[t, :] = mvnrnd_pre(solve(temp2, temp1), temp2) else: temp1 = var4[:, t] + var_Lambda_hyper @ (X[t - 1, :] + X[t + 1, :]) temp2 = var3[:, :, t] + 2 * var_Lambda_hyper X[t, :] = mvnrnd_pre(solve(temp2, temp1), temp2) return X
def test_draw_samples_with_broadcast(self, dtype_dof, dtype, degrees_of_freedom, scale, scale_is_samples, rv_shape, num_samples): degrees_of_freedom_mx = mx.nd.array([degrees_of_freedom], dtype=dtype_dof) scale_mx = mx.nd.array(scale, dtype=dtype) if not scale_is_samples: scale_mx = add_sample_dimension(mx.nd, scale_mx) rand = np.random.rand(num_samples, *rv_shape) rand_gen = MockMXNetRandomGenerator(mx.nd.array(rand.flatten(), dtype=dtype)) reps = 1000 mins = np.zeros(reps) maxs = np.zeros(reps) for i in range(reps): rvs = wishart.rvs(df=degrees_of_freedom, scale=scale, size=num_samples) mins[i] = rvs.min() maxs[i] = rvs.max() # rv_samples_np = wishart.rvs(df=degrees_of_freedom, scale=scale, size=num_samples) var = Wishart.define_variable(shape=rv_shape, dtype=dtype, rand_gen=rand_gen).factor variables = {var.degrees_of_freedom.uuid: degrees_of_freedom_mx, var.scale.uuid: scale_mx} draw_samples_rt = var.draw_samples(F=mx.nd, variables=variables) assert np.issubdtype(draw_samples_rt.dtype, dtype) assert is_sampled_array(mx.nd, draw_samples_rt) == scale_is_samples if scale_is_samples: assert get_num_samples(mx.nd, draw_samples_rt) == num_samples, (get_num_samples(mx.nd, draw_samples_rt), num_samples) assert mins.min() < draw_samples_rt.asnumpy().min() assert maxs.max() > draw_samples_rt.asnumpy().max()
def _update_item_params(self): N = self.n_item X_bar = np.mean(self.item_features_, 0).reshape((self.n_feature, 1)) # print 'X_bar', X_bar.shape S_bar = np.cov(self.item_features_.T) # print 'S_bar', S_bar.shape diff_X_bar = self.mu0_item - X_bar # W_{0}_star WI_post = inv( inv(self.WI_item) + N * S_bar + np.dot(diff_X_bar, diff_X_bar.T) * (N * self.beta_item) / (self.beta_item + N)) # Note: WI_post and WI_post.T should be the same. # Just make sure it is symmertic here WI_post = (WI_post + WI_post.T) / 2.0 # update alpha_item df_post = self.df_item + N self.alpha_item = wishart.rvs(df_post, WI_post, 1, self.rand_state) # update mu_item mu_mean = (self.beta_item * self.mu0_item + N * X_bar) / \ (self.beta_item + N) mu_var = cholesky(inv(np.dot(self.beta_item + N, self.alpha_item))) # print 'lam', lam.shape self.mu_item = mu_mean + np.dot( mu_var, self.rand_state.randn(self.n_feature, 1))
def sample_theta(X, theta, Lambda_x, time_lags, beta0=1): dim, rank = X.shape d = time_lags.shape[0] tmax = np.max(time_lags) theta_bar = np.mean(theta, axis=0) temp = d / (d + beta0) var_theta_hyper = inv( np.eye(rank) + cov_mat(theta, theta_bar) + temp * beta0 * np.outer(theta_bar, theta_bar)) var_Lambda_hyper = wishart.rvs(df=d + rank, scale=var_theta_hyper) var_mu_hyper = mvnrnd_pre(temp * theta_bar, (d + beta0) * var_Lambda_hyper) for k in range(d): theta0 = theta.copy() theta0[k, :] = 0 mat0 = np.zeros((dim - tmax, rank)) for L in range(d): mat0 += X[tmax - time_lags[L]:dim - time_lags[L], :] @ np.diag( theta0[L, :]) varPi = X[tmax:dim, :] - mat0 var0 = X[tmax - time_lags[k]:dim - time_lags[k], :] var = np.einsum('ij, jk, ik -> j', var0, Lambda_x, varPi) var_Lambda = np.einsum('ti, tj, ij -> ij', var0, var0, Lambda_x) + var_Lambda_hyper theta[k, :] = mvnrnd_pre( solve(var_Lambda, var + var_Lambda_hyper @ var_mu_hyper), var_Lambda) return theta
def updateParameters(self, latent_z, n_cluster_samples): params = [] normal_insts = [] for k in range(self.n_cluster): sample_idx = np.where(latent_z == k)[0] sample_k = sample[sample_idx] n_sample_k = sample_k.shape[0] mean_sample_k = np.average(sample_k, 0) cov_k = np.zeros((n_dimentions, n_dimentions)) for j in range(n_sample_k): deviation = sample_k[j] - mean_sample_k cov_k += np.dot(deviation.reshape(-1, 1), deviation.reshape(1, -1)) deviation = mean_sample_k - self.u0 tmp1 = np.dot(deviation.reshape(-1, 1), deviation.reshape(1, -1)) tmp2 = (n_cluster_samples[k] * self.beta) / (n_cluster_samples[k] + self.beta) tmp3 = tmp2 * tmp1 wish_cover = np.linalg.inv( np.linalg.inv(self.covar) + cov_k + tmp3) normal_cov = wishart.rvs(self.new + n_cluster_samples[k], wish_cover) tmp4 = (n_cluster_samples[k] + self.beta) * normal_cov tmp5 = n_cluster_samples[k] + self.beta tmp6 = (n_cluster_samples[k] * mean_sample_k + self.beta * self.u0) / tmp5 normal_mean = multivariate_normal(tmp6, np.linalg.inv(tmp4)).rvs() params.append((normal_mean, normal_cov)) normal_insts.append( multivariate_normal(normal_mean, np.linalg.inv(normal_cov))) return params, normal_insts
def _update_user_params(self): # same as _update_user_params N = self.n_user X_bar = np.mean(self.user_features_, 0).reshape((self.n_feature, 1)) S_bar = np.cov(self.user_features_.T) # mu_{0} - U_bar diff_X_bar = self.mu0_user - X_bar # W_{0}_star WI_post = inv( inv(self.WI_user) + N * S_bar + np.dot(diff_X_bar, diff_X_bar.T) * (N * self.beta_user) / (self.beta_user + N)) # Note: WI_post and WI_post.T should be the same. # Just make sure it is symmertic here WI_post = (WI_post + WI_post.T) / 2.0 # update alpha_user df_post = self.df_user + N # LAMBDA_{U} ~ W(W{0}_star, df_post) self.alpha_user = wishart.rvs(df_post, WI_post, 1, self.rand_state) # update mu_user # mu_{0}_star = (beta_{0} * mu_{0} + N * U_bar) / (beta_{0} + N) mu_mean = (self.beta_user * self.mu0_user + N * X_bar) / \ (self.beta_user + N) # decomposed inv(beta_{0}_star * LAMBDA_{U}) mu_var = cholesky(inv(np.dot(self.beta_user + N, self.alpha_user))) # sample multivariate gaussian self.mu_user = mu_mean + np.dot( mu_var, self.rand_state.randn(self.n_feature, 1))
def _update_user_params(self): # same as _update_user_params N = self.n_user X_bar = np.mean(self.user_features_, 0).reshape((self.n_feature, 1)) S_bar = np.cov(self.user_features_.T) # mu_{0} - U_bar diff_X_bar = self.mu0_user - X_bar # W_{0}_star WI_post = inv(inv(self.WI_user) + N * S_bar + np.dot(diff_X_bar, diff_X_bar.T) * (N * self.beta_user) / (self.beta_user + N)) # Note: WI_post and WI_post.T should be the same. # Just make sure it is symmertic here WI_post = (WI_post + WI_post.T) / 2.0 # update alpha_user df_post = self.df_user + N # LAMBDA_{U} ~ W(W{0}_star, df_post) self.alpha_user = wishart.rvs(df_post, WI_post, 1, self.rand_state) # update mu_user # mu_{0}_star = (beta_{0} * mu_{0} + N * U_bar) / (beta_{0} + N) mu_mean = (self.beta_user * self.mu0_user + N * X_bar) / \ (self.beta_user + N) # decomposed inv(beta_{0}_star * LAMBDA_{U}) mu_var = cholesky(inv(np.dot(self.beta_user + N, self.alpha_user))) # sample multivariate gaussian self.mu_user = mu_mean + np.dot( mu_var, self.rand_state.randn(self.n_feature, 1))
def draw_posterior(self, y, x, alpha, sigma): """One draw from the posterior for the Conjugate-Normal prior. Parameter y: A (T-p)xk matrix with the LHS of the model. With T being the length of the original data set, p the number of lags and k the number of variables in the model. Parameter x: A (T-p)x(k*p+constant) matrix with the RHS of the model. With T being the length of the original data, p the number of lags, k the number of variables in the model and constant is either 0 or 1 depending whether the model has an intercept (constant = 1) or not (constant = 0). Parameter alpha: The regression coefficients of the previous draw. Parameter sigma: The variance-covariance matrix from the previous draw. Returns: One draw of the mcmc consists of the draw for the coefficients and a draw for the variance-covariance matrix. """ # OLS estimates tmp1 = np.linalg.inv(np.matmul(np.transpose(x), x)) tmp2 = np.matmul(np.transpose(x), y) olsestim = np.matmul(tmp1, tmp2) resids = y - np.matmul(x, olsestim) sse = np.matmul(np.transpose(resids), resids) # Posterior for coefficients vpost = np.linalg.inv(np.linalg.inv(self.coefpriorvar) + np.matmul(np.transpose(x), x)) apost = np.matmul(vpost, np.matmul(np.linalg.inv(self.coefpriorvar), self.coefprior) + np.matmul(np.matmul(np.transpose(x), x), olsestim)) cova = np.kron(sigma, vpost) alpha = np.random.multivariate_normal(np.ndarray.flatten(apost), cova) # Posterior for variance - covariance matrix vpost = self.T + self.varpriordof tmp1 = sse + self.varprior + np.matmul(np.transpose(olsestim), np.matmul(np.transpose(x), np.matmul(x, olsestim))) tmp2 = np.matmul(np.matmul(np.transpose(self.coefprior), np.linalg.inv(self.coefpriorvar)), self.coefprior) tmp3 = np.matmul(np.matmul(np.transpose(apost), np.linalg.inv(self.coefpriorvar) + np.matmul(np.transpose(x), x)), apost) spost = tmp1 + tmp2 + tmp3 sigma = wishart.rvs(vpost,np.linalg.inv(spost)) # Return estimates retlist = (alpha,sigma) return(retlist)
def test_against_scipy_mvn_col_conditional(seeded_rng): # have to be careful for constructing everything as a submatrix of a big # PSD matrix, else no guarantee that anything's invertible. cov_np = wishart.rvs(df=m + p + 2, scale=np.eye(m + p)) rowcov = CovIdentity(size=m) colcov = CovUnconstrainedCholesky(Sigma=cov_np[0:n, 0:n]) A = cov_np[n:, 0:n] Q = CovUnconstrainedCholesky(Sigma=cov_np[n:, n:]) X = rmn(np.eye(m), np.eye(n)) A_tf = tf.constant(A, "float64") X_tf = tf.constant(X, "float64") Q_np = Q._cov colcov_np = colcov._cov - A.T.dot(np.linalg.inv(Q_np)).dot((A)) scipy_answer = np.sum( multivariate_normal.logpdf(X, np.zeros([n]), colcov_np)) tf_answer = matnorm_logp_conditional_col(X_tf, rowcov, colcov, A_tf, Q) assert_allclose(scipy_answer, tf_answer, rtol=rtol)
def _update_item_params(self): N = self.n_item X_bar = np.mean(self.item_features_, 0).reshape((self.n_feature, 1)) # print 'X_bar', X_bar.shape S_bar = np.cov(self.item_features_.T) # print 'S_bar', S_bar.shape diff_X_bar = self.mu0_item - X_bar # W_{0}_star WI_post = inv(inv(self.WI_item) + N * S_bar + np.dot(diff_X_bar, diff_X_bar.T) * (N * self.beta_item) / (self.beta_item + N)) # Note: WI_post and WI_post.T should be the same. # Just make sure it is symmertic here WI_post = (WI_post + WI_post.T) / 2.0 # update alpha_item df_post = self.df_item + N self.alpha_item = wishart.rvs(df_post, WI_post, 1, self.rand_state) # update mu_item mu_mean = (self.beta_item * self.mu0_item + N * X_bar) / \ (self.beta_item + N) mu_var = cholesky(inv(np.dot(self.beta_item + N, self.alpha_item))) # print 'lam', lam.shape self.mu_item = mu_mean + np.dot( mu_var, self.rand_state.randn(self.n_feature, 1))
def test_matnorm_regression_unconstrained(seeded_rng): # Y = XB + eps # Y is m x p, B is n x p, eps is m x p X = norm.rvs(size=(m, n)) B = norm.rvs(size=(n, p)) Y_hat = X.dot(B) rowcov_true = np.eye(m) colcov_true = wishart.rvs(p + 2, np.eye(p)) Y = Y_hat + rmn(rowcov_true, colcov_true) row_cov = CovIdentity(size=m) col_cov = CovUnconstrainedCholesky(size=p) model = MatnormalRegression(time_cov=row_cov, space_cov=col_cov) model.fit(X, Y, naive_init=False) assert pearsonr(B.flatten(), model.beta_.flatten())[0] >= corrtol pred_y = model.predict(X) assert pearsonr(pred_y.flatten(), Y_hat.flatten())[0] >= corrtol model = MatnormalRegression(time_cov=row_cov, space_cov=col_cov) model.fit(X, Y, naive_init=True) assert pearsonr(B.flatten(), model.beta_.flatten())[0] >= corrtol pred_y = model.predict(X) assert pearsonr(pred_y.flatten(), Y_hat.flatten())[0] >= corrtol
def new_cluster(self, X, a, B, c, m): """ x is a np.matrix s is a float a is a float c is a float B is a np.matrix m is a np.matrix """ d = float(X.shape[0]) s = float(X.shape[1]) x_mean = np.mean(X, axis=1) m_p = c/(s+c) * m + 1/(s+c) * np.sum(X, axis=1) c_p = s + c a_p = a + s sum_B = np.matlib.zeros((d,d)) for i in np.arange(s): sum_B = sum_B + (X - x_mean) * (X - x_mean).T B_p = B + sum_B + s/(a*s + 1) * (x_mean - m) * (x_mean - m).T sigma_p = wishart.rvs(df=a_p, scale=inv(B_p)) mean_p = multivariate_normal.rvs(mean=m_p, cov=inv(c_p * sigma_p)) #transpose omitted return mean_p, sigma_p
def sample_hyperparam(self, M): K = M.size()[0] # NUM_USERS/NUM_ITEMS df0_star = self.embedding_dim + K # eq 14 beta0_star = self.beta0 + K # eq 14 M_avg = M.mean(0).view(-1, 1) mu0_star = (beta0_star * self.mu0 + K * M_avg) / (beta0_star) # eq 14 mu0_star = mu0_star.double().view(-1) S_avg = M.transpose(0, 1).mm(M) / K # eq 14 W0_star_inv = self.W0_inv + K * S_avg + self.beta0 * K / ( beta0_star) * ((self.mu0 - M_avg) * (self.mu0 - M_avg).transpose(0, 1)) # eq 14 W0_star = W0_star_inv.inverse() lambda_M = wishart.rvs(df=df0_star, scale=W0_star) lambda_M = torch.tensor(lambda_M).double() #print(mu0_star[:2]) covar = (lambda_M * beta0_star).inverse() mulvarNormal = MNorm(mu0_star, covariance_matrix=covar) mu_M = mulvarNormal.sample() return mu_M.float().view(-1, 1), lambda_M.float()
def get_request(): num_bin = np.random.randint(2, 5) num_dim = 4 num_points = num_bin**num_dim v = np.random.normal(size=num_points, scale=10) x = np.random.normal(size=1, scale=10) # We want to integrate the case where the probabilities are generated by the discretized # normal distribution. if np.random.choice([True, False]): q = np.random.uniform(low=0.01, size=num_points) else: mean = np.random.normal(size=num_dim) cov = wishart.rvs(num_dim, np.identity(num_dim)) q = get_normal_probabilities(num_bin, mean, cov)[0].flatten() # We need to remove all zero-probability events. is_nonzero = (q > EPS_FLOAT) q, v = q[is_nonzero], v[is_nonzero] q = q / np.sum(q) beta = np.random.uniform() gamma = np.random.uniform() is_cost = np.random.choice([True, False]) return x, v, q, beta, gamma, is_cost
def samplewishart(inputs, shape0, scale0, l0): if inputs.ndim==1: inputs = inputs[:, np.newaxis] K = scale0 * np.exp(-0.5 * (inputs - inputs.T)**2 / l0**2) + 1e-6 * np.eye(len(inputs)) samples = wishart.rvs(df=shape0 + len(inputs), scale=K) return samples
def sample_factor_w(tau_sparse_mat, tau_ind, W, X, tau, beta0=1, vargin=0): """Sampling N-by-R factor matrix W and its hyperparameters (mu_w, Lambda_w).""" dim1, rank = W.shape W_bar = np.mean(W, axis=0) temp = dim1 / (dim1 + beta0) var_W_hyper = inv( np.eye(rank) + cov_mat(W, W_bar) + temp * beta0 * np.outer(W_bar, W_bar)) var_Lambda_hyper = wishart.rvs(df=dim1 + rank, scale=var_W_hyper) var_mu_hyper = mvnrnd_pre(temp * W_bar, (dim1 + beta0) * var_Lambda_hyper) if dim1 * rank**2 > 1e+8: vargin = 1 if vargin == 0: var1 = X.T var2 = kr_prod(var1, var1) var3 = (var2 @ tau_ind.T).reshape([rank, rank, dim1 ]) + var_Lambda_hyper[:, :, None] var4 = var1 @ tau_sparse_mat.T + ( var_Lambda_hyper @ var_mu_hyper)[:, None] for i in range(dim1): W[i, :] = mvnrnd_pre(solve(var3[:, :, i], var4[:, i]), var3[:, :, i]) elif vargin == 1: for i in range(dim1): pos0 = np.where(tau_sparse_mat[i, :] != 0) Xt = X[pos0[0], :] var_mu = tau[i] * Xt.T @ tau_sparse_mat[ i, pos0[0]] + var_Lambda_hyper @ var_mu_hyper var_Lambda = tau[i] * Xt.T @ Xt + var_Lambda_hyper W[i, :] = mvnrnd_pre(solve(var_Lambda, var_mu), var_Lambda) return W
def draw_posterior(self, y, x, alpha, sigma): """One draw from the posterior for the uninformative prior. Parameter y: A (T-p)xk matrix with the LHS of the model. With T being the length of the original data set, p the number of lags and k the number of variables in the model. Parameter x: A (T-p)x(k*p+constant) matrix with the RHS of the model. With T being the length of the original data, p the number of lags, k the number of variables in the model and constant is either 0 or 1 depending whether the model has an intercept (constant = 1) or not (constant = 0). Parameter alpha: The regression coefficients of the previous draw. Parameter sigma: The variance-covariance matrix from the previous draw. Returns: One draw of the mcmc consists of the draw for the coefficients and a draw for the variance-covariance matrix. """ # get OLS estimates tmp1 = np.linalg.inv(np.matmul(np.transpose(x), x)) tmp2 = np.matmul(np.transpose(x), y) olsestim = np.matmul(tmp1, tmp2) # residuals resids = y - np.matmul(x, olsestim) sse = np.matmul(np.transpose(resids), resids) # Draw coefficients (beta) vpost = np.kron(sigma, np.linalg.inv(np.matmul(np.transpose(x), x))) alpha = np.random.multivariate_normal(np.ndarray.flatten(olsestim), vpost) # Draw variance-covariance matrix sigma = wishart.rvs(self.T, np.linalg.inv(sse)) # Return Values return alpha, sigma
def make_posterior_samples(self, nb_samples=10): self._posterior_samples = [] m = self._sk_model nb_states = m.means_.shape[0] for i in range(nb_samples): _gmm = GMM() _gmm.lmbda = np.array([ wishart.rvs( m.degrees_of_freedom_[i] + 1., np.linalg.inv(m.covariances_[i] * m.degrees_of_freedom_[i])) for i in range(nb_states) ]) _gmm.mu = np.array([ np.random.multivariate_normal( m.means_[i], np.linalg.inv(m.mean_precision_[i] * _gmm.lmbda[i])) for i in range(nb_states) ]) _gmm.priors = m.weights_ self._posterior_samples += [_gmm]
def draw(self, K = 10, N = 1*10**5, m = 3, gaussian = False): if self.seed is not None: np.random.seed(self.seed) alphas = gamma.rvs(5, size=m) # shape parameter #print(sum(alphas)) # equivalent sample size self.p = dirichlet.rvs(alpha = alphas, size = 1)[0] self.phi_is = multinomial.rvs(1, self.p, size=N) # draw from categorical p.m.f self.x_draws = np.zeros((N,K)) self.hyper_loc, self.hyper_scale, self.thetas, self.var, self.covs, self.rdraws = dict(), dict(), dict(), tuple(), tuple(), tuple() for i in range(m): self.hyper_loc["mean"+str(i+1)] = norm.rvs(size = 1, loc = 0, scale = 5) self.hyper_scale["scale"+str(i+1)] = 1/gamma.rvs(5, size=1) self.thetas["mean"+str(i+1)] = norm.rvs(size = K, loc = self.hyper_loc["mean"+str(i+1)], scale = self.hyper_scale["scale"+str(i+1)]) self.thetas["Sigma"+str(i+1)] = np.eye(K)*(1/gamma.rvs(5, size=K)) self.thetas["nu"+str(i+1)] = randint.rvs(K+2, K+10, size=1)[0] if gaussian: self.covs += (self.thetas['Sigma'+str(i+1)], ) else: self.covs += (wishart.rvs(df = self.thetas['nu'+str(i+1)], scale = self.thetas['Sigma'+str(i+1)], size=1),) self.var += (self.thetas["nu"+str(i+1)]/(self.thetas["nu"+str(i+1)]-2)*self.covs[i],) # variance covariance matrix of first Student-t component self.rdraws += (np.random.multivariate_normal(self.thetas["mean"+str(i+1)], self.covs[i], N),) self.Phi = np.tile(self.phi_is[:,i], K).reshape(K,N).T # repeat phi vector to match with random matrix self.x_draws += np.multiply(self.Phi, self.rdraws[i]) return self.x_draws
def update_mixture_components(self, X, mulinha, Sigmalinha, Hlinha, sigmalinha, nk, active_components): K = self.K_active Sigmalinha_inv = np.linalg.inv(Sigmalinha) for k in range(K): k_inds = np.argwhere(self.z == k).ravel() X_k = X[k_inds] # all the points in current cluster k phi_k = self.phi[k_inds] phi_k = phi_k.reshape(len(phi_k), -1) beta_k = self.beta[k_inds] beta_k = beta_k.reshape(len(beta_k), -1) covariance_inv = Sigmalinha_inv + self.cov_inv[k].dot( np.sum(phi_k**2 / beta_k)) covariance = np.linalg.inv(covariance_inv) mean = covariance.dot( Sigmalinha_inv.dot(mulinha) + self.cov_inv[k].dot(np.sum(X_k / beta_k, axis=0))) self.mu[k] = multivariate_normal.rvs(mean=mean, cov=covariance) aux = np.dot((X_k - phi_k * self.mu[k]).T, (X_k - phi_k * self.mu[k]) / beta_k) self.cov_inv[k] = wishart.rvs(df=int(np.ceil(sigmalinha)) + nk[k] + 1, scale=np.linalg.inv(Hlinha + aux)) self.cov[k] = np.linalg.inv(self.cov_inv[k])
def _update_item_params(self): N = self.n_item X_bar = np.mean(self.item_features, 0) X_bar = np.reshape(X_bar, (self.n_feature, 1)) # print 'X_bar', X_bar.shape S_bar = np.cov(self.item_features.T) # print 'S_bar', S_bar.shape norm_X_bar = X_bar - self.mu_item # print 'norm_X_bar', norm_X_bar.shape WI_post = inv(inv(self.WI_item) + N * S_bar + \ np.dot(norm_X_bar, norm_X_bar.T) * \ (N * self.beta_item) / (self.beta_item + N)) # print 'WI_post', WI_post.shape # Not sure why we need this... WI_post = (WI_post + WI_post.T) / 2.0 df_post = self.df_item + N # update alpha_item self.alpha_item = wishart.rvs(df_post, WI_post, 1, self.rand_state) # update mu_item mu_temp = (self.beta_item * self.mu_item + N * X_bar) / \ (self.beta_item + N) # print "mu_temp", mu_temp.shape lam = cholesky(inv(np.dot(self.beta_item + N, self.alpha_item))) # print 'lam', lam.shape self.mu_item = mu_temp + np.dot( lam, self.rand_state.randn(self.n_feature, 1))
def _update_user_params(self): # same as _update_user_params N = self.n_user X_bar = np.mean(self.user_features, 0).T X_bar = np.reshape(X_bar, (self.n_feature, 1)) # print 'X_bar', X_bar.shape S_bar = np.cov(self.user_features.T) # print 'S_bar', S_bar.shape norm_X_bar = X_bar - self.mu_user # print 'norm_X_bar', norm_X_bar.shape WI_post = inv(inv(self.WI_user) + N * S_bar + \ np.dot(norm_X_bar, norm_X_bar.T) * \ (N * self.beta_user) / (self.beta_user + N)) # print 'WI_post', WI_post.shape # Not sure why we need this... WI_post = (WI_post + WI_post.T) / 2.0 df_post = self.df_user + N # update alpha_user self.alpha_user = wishart.rvs(df_post, WI_post, 1, self.rand_state) # update mu_item mu_temp = (self.beta_user * self.mu_user + N * X_bar) / \ (self.beta_user + N) # print 'mu_temp', mu_temp.shape lam = cholesky(inv(np.dot(self.beta_user + N, self.alpha_user))) # print 'lam', lam.shape self.mu_user = mu_temp + np.dot( lam, self.rand_state.randn(self.n_feature, 1))
def simulate(theta, sim_args): pz_fid = sim_args[0] modes = sim_args[1] N = sim_args[2] nl = sim_args[3] nz = len(pz_fid) nmodes = len(modes) # Photo-z parameters z = np.linspace(0, pz_fid[0].get_knots()[-1], len(pz_fid[0].get_knots())) pz_new = [0] * nz for i in range(nz): p = pz_fid[i](z + theta[5 + i]) p = p / np.trapz(p, z) pz_new[i] = interpolate.InterpolatedUnivariateSpline(z, p, k=3) pz = pz_new # Compute theory power spectrum C = power_spectrum(theta, [pz, modes, N]) # Realize noisy power spectrum C_hat = np.zeros((nz, nz, nmodes)) for i in range(nmodes): C_hat[:, :, i] = wishart.rvs(df=nl[i], scale=C[:, :, i]) / nl[i] return C_hat
def __init__(self, n, dim=2, m=None, C=None): if m is None: m = np.random.randn(dim) if C is None: C = wishart.rvs(dim+1, np.identity(dim), 1) self.m = m self.C = C super(Gaussian, self).__init__(n)
def __init__(self, c=None): self.df = 2 if c is None: self.c = wishart.rvs(self.df, numpy.eye(self.df)) else: self.c = c self.cov = inv(self.c)
def update_sgm(): aj = a + s S_m = S - xbar m_S = xbar - self.m Bj = B + S_m.T.dot(S_m) + (s / float(s + 1) * m_S.T.dot(m_S)) lmbdaj = wishart.rvs(aj, Bj) cj = s + c self.sgm[j] = np.linalg.inv(cj * lmbdaj)
def __init__(self, c=None): self.df = 1 if c is None: self.c = wishart.rvs(self.df, numpy.eye(self.df)) # let's draw from a 1-D wishart for this else: self.c = c self.cov = inv(numpy.diag([self.c, self.c]))
def test_wishart_invwishart_2D_rvs(self): dim = 3 df = 10 # Construct a simple non-diagonal positive definite matrix scale = np.eye(dim) scale[0,1] = 0.5 scale[1,0] = 0.5 # Construct frozen Wishart and inverse Wishart random variables w = wishart(df, scale) iw = invwishart(df, scale) # Get the generated random variables from a known seed np.random.seed(248042) w_rvs = wishart.rvs(df, scale) np.random.seed(248042) frozen_w_rvs = w.rvs() np.random.seed(248042) iw_rvs = invwishart.rvs(df, scale) np.random.seed(248042) frozen_iw_rvs = iw.rvs() # Manually calculate what it should be, based on the Bartlett (1933) # decomposition of a Wishart into D A A' D', where D is the Cholesky # factorization of the scale matrix and A is the lower triangular matrix # with the square root of chi^2 variates on the diagonal and N(0,1) # variates in the lower triangle. np.random.seed(248042) covariances = np.random.normal(size=3) variances = np.r_[ np.random.chisquare(df), np.random.chisquare(df-1), np.random.chisquare(df-2), ]**0.5 # Construct the lower-triangular A matrix A = np.diag(variances) A[np.tril_indices(dim, k=-1)] = covariances # Wishart random variate D = np.linalg.cholesky(scale) DA = D.dot(A) manual_w_rvs = np.dot(DA, DA.T) # inverse Wishart random variate # Supposing that the inverse wishart has scale matrix `scale`, then the # random variate is the inverse of a random variate drawn from a Wishart # distribution with scale matrix `inv_scale = np.linalg.inv(scale)` iD = np.linalg.cholesky(np.linalg.inv(scale)) iDA = iD.dot(A) manual_iw_rvs = np.linalg.inv(np.dot(iDA, iDA.T)) # Test for equality assert_allclose(w_rvs, manual_w_rvs) assert_allclose(frozen_w_rvs, manual_w_rvs) assert_allclose(iw_rvs, manual_iw_rvs) assert_allclose(frozen_iw_rvs, manual_iw_rvs)
def _update_alpha(self): sum = 0 for entry in self.trainData.iterrows(): keys = [entry[1][0], entry[1][1], entry[1][3]] rate = entry[1][2] sum += (rate - self.predict_with_keys(keys, True)) ** 2 WI_post = self.WI_alpha + sum df_post = self.df_alpha + self.L self.alpha = wishart.rvs(df=df_post, scale=1. / WI_post)
def _update_time_params(self): N = self.rateDao.num_times diff_temp = np.diff(self.T, axis=0) diff_t0 = self.T[0] - self.mu0_t WI_post = self.WI_time + np.dot(diff_temp.T, diff_temp) + self.beta_time / (1 + self.beta_time) * np.outer( diff_t0, diff_t0) WI_post = (WI_post + WI_post.T) / 2. df_post = self.df_item + N self.lambda_time = wishart.rvs(df_post, WI_post) mu_temp = (self.beta_time * self.mu0_t + self.T[0]) / (self.beta_item + 1) sigma_temp = np.linalg.inv(np.dot(self.beta_item + 1, self.lambda_time)) self.mu_time = mv_normalrand(mu_temp, sigma_temp, self.factors)
def _update_item_params(self): N = self.rateDao.num_items X_bar = np.mean(self.Q, 0) S_bar = np.cov(self.Q.T) norm_X_bar = self.mu0_i - X_bar WI_post = self.WI_item + N * S_bar + np.outer(norm_X_bar) * (N * self.beta_item) / (self.beta_item + N) WI_post = (WI_post + WI_post.T) / 2. df_post = self.df_item + N self.lambda_item = wishart.rvs(df_post, WI_post) mu_temp = (self.beta_item * self.mu0_i + N * X_bar) / (self.beta_item + N) sigma_temp = np.linalg.inv(np.dot(self.beta_item + N, self.lambda_item)) self.mu_item = mv_normalrand(mu_temp, sigma_temp, self.factors)
def _update_user_params(self): N = self.rateDao.num_users X_bar = np.mean(self.P, 0) S_bar = np.cov(self.P.T) norm_X_bar = self.mu0_u - X_bar WI_post = self.WI_user + N * S_bar + np.outer(norm_X_bar) * (N * self.beta_user) / (self.beta_user + N) # ensure the matrix's symmetry WI_post = (WI_post + WI_post.T) / 2. df_post = self.df_user + N self.lambda_user = wishart.rvs(df_post, WI_post) # 以下可参考http://blog.pluskid.org/?p=430 mu_temp = (self.beta_user * self.mu0_u + N * X_bar) / (self.beta_user + N) sigma_temp = np.linalg.inv(np.dot(self.beta_user + N, self.lambda_user)) self.mu_user = mv_normalrand(mu_temp, sigma_temp, self.factors)
def sample_from_prior(c0, m0, a0, B0): precision0 = wishart.rvs(df=a0, scale=np.linalg.inv(B0)) cov = np.linalg.inv(precision0) mean = mvn.rvs(mean=m0, cov=cov/c0) return mean, cov
def propose(self): vx = wishart.rvs(self.df, self.cx) vy = wishart.rvs(self.df, self.cy) fb = wishart.logpdf(vx, self.df, self.cx) + wishart.logpdf(vy, self.df, self.cy) - \ ( wishart.logpdf(self.cx, self.df, vx) + wishart.logpdf(self.cy, self.df, vy) ) return AlignedNormal2D(cx=vx, cy=vy), fb
def DP_GMM(self, X, T): X = X.todense() for i in np.arange(X.shape[0]): for j in np.arange(X.shape[1]): X[i,j] += 1e-3 * random.random() print X d = float(X.shape[0]) N = float(X.shape[1]) c = 1.0/10.0 a = d B = c * d * np.cov(X.T) alpha = 1.0 m = np.mean(X, axis=1) m_mean = [None] * int(N) m_sigma = [None] * int(N) m_n = np.zeros((1, int(N))) # number of points in each cluster m_c = np.ones((1, int(N))) print B print B.shape sigma = wishart.rvs(df=a, scale=inv(B)) m_sigma[0] = sigma m_mean[0] = multivariate_normal.rvs(mean=m, cov=inv(c * sigma)) #transpose omitted num_clusters = 1 m_n[0] = N num_cluster_list = np.zeros((1, T)) for t in np.arange(T): print t for i in np.arange(N): m_psi = np.zeros((1, num_clusters + 1)) # (a) for all clusters with points in them besides x_i for j in np.arange(num_clusters): nj = m_n[j] if m_c[i] == j: nj = nj - 1 # if only x_i in the cluster, then m_psi prob = 0 if nj > 0: mean_j = m_mean[j] sigma_j = m_sigma[j] m_psi_val = multivariate_normal.pdf(x=X[:, i], mean=mean_j, cov=inv(sigma_j)) * nj / (alpha + N - 1) m_psi[j] = m_psi_val # (b) for new cluster m_psi[num_clusters + 1] = alpha / (alpha + N - 1) * self.marginal(X[:, i], a, B, c, m) # (c) normalize m_psi and sample from discrete distribution m_psi = m_psi / sum(m_psi) # remove this point from the cluster's count m_n[m_c[i]] -= 1 m_c[i] = self.discrete_dist(m_psi) # cluster assignment for x_i m_n[m_c[i]] += 1 if m_c[i] == num_clusters + 1: # generate a new cluster num_clusters = num_clusters + 1 mean_p, sigma_p = self.new_cluster(X[:, i], a, B, c, m) m_mean[num_clusters] = mean_p m_sigma[num_clusters] = sigma_p # (d) remove clusters with no points, reindex remaining clusters m_n = np.zeros((1, N)) m_c_temp = m_c for j in np.arange(num_clusters): indices = [] for i, item in m_c: if item == j: indices.append(i) count = float(len(indices)) m_n[j] = count X_sub = X[:, indices] mean_p, sigma_p = self.new_cluster(X_sub, a, B, c, m) m_mean[j] = mean_p m_sigma[j] = sigma_p
def propose(self): val = wishart.rvs(self.df, self.c) fb = wishart.logpdf(val, self.df, self.c) - wishart.logpdf(self.c, self.df, val) return FreeNormal2D(c=val), fb
def sampler_W(df, scale): # sample = wishart.rvs(df, scale, size=1, random_state=None) matrix = sample[0] return matrix
def sampler_W(df, scale): # sample = wishart.rvs(df, scale, size=1, random_state=None) return sample
def gmm(X, K, max_iter=100): N, D = X.shape # parameters for pi, mu, and precision alphas = np.ones(K, dtype=np.float32) # prior parameter for pi (dirichlet) orig_alphas = np.ones(K, dtype=np.float32) # prior parameter for pi (dirichlet) # mu_means = np.zeros((K, D), dtype=np.float32) # prior mean for mu (normal) ### No! # mu_covs = np.empty((K, D, D), dtype=np.float32) # prior covariance for mu (normal) orig_c = 10.0 # for k in xrange(K): # mu_covs[k] = np.eye(D)*orig_c orig_a = np.ones(K, dtype=np.float32)*D a = np.ones(K, dtype=np.float32)*D # prior for precision (wishart) orig_B = np.empty((K, D, D)) B = np.empty((K, D, D)) # precision (wishart) empirical_cov = np.cov(X.T) for k in xrange(K): B[k] = (D/10.0)*empirical_cov orig_B[k] = (D/10.0)*empirical_cov # try random init instead # mu_means = np.random.randn(K, D)*orig_c mu_means = np.empty((K, D)) for j in xrange(K): mu_means[j] = X[np.random.choice(N)] mu_covs = wishart.rvs(df=orig_a[0], scale=np.linalg.inv(B[0]), size=K) costs = np.zeros(max_iter) for iter_idx in xrange(max_iter): # calculate q(c[i]) # phi = np.empty((N,K)) # index i = sample, index j = cluster t1 = np.empty(K) t2 = np.empty((N,K)) t3 = np.empty(K) t4 = np.empty(K) # calculate this first because we will use it multiple times Binv = np.empty((K, D, D)) for j in range(K): Binv[j] = np.linalg.inv(B[j]) for j in xrange(K): # calculate t1 t1[j] = -np.log(np.linalg.det(B[j])) for d in xrange(D): t1[j] += digamma( (1 - d + a[j])/2.0 ) # calculate t2 for i in xrange(N): diff_ij = X[i] - mu_means[j] t2[i,j] = diff_ij.dot( (a[j]*Binv[j] ).dot(diff_ij) ) # calculate t3 t3[j] = np.trace( a[j]*Binv[j].dot(mu_covs[j]) ) # calculate t4 t4[j] = digamma(alphas[j]) - digamma(alphas.sum()) # calculate phi from t's # MAKE SURE 1-d array gets added to 2-d array correctly phi = np.exp(0.5*t1 - 0.5*t2 - 0.5*t3 + t4) # print "phi before normalize:", phi phi = phi / phi.sum(axis=1, keepdims=True) # print "phi:", phi cluster_assignments = phi.argmax(axis=1) n = phi.sum(axis=0) # there should be K of these # print "n[j]:", n # update q(pi) alphas = orig_alphas + n # print "alphas:", alphas # update q(mu) for j in xrange(K): mu_covs[j] = np.linalg.inv( (1.0/orig_c)*np.eye(D) + n[j]*a[j]*Binv[j] ) mu_means[j] = mu_covs[j].dot( a[j]*Binv[j] ).dot(phi[:,j].dot(X)) # print "means:", mu_means # print "mu_covs:", mu_covs # update q(lambda) a = orig_a + n for j in xrange(K): B[j] = orig_B[j].copy() for i in xrange(N): diff_ij = X[i] - mu_means[j] B[j] += phi[i,j]*(np.outer(diff_ij, diff_ij) + mu_covs[j]) # print "a[j]:", a # print "B[j]:", B costs[iter_idx] = get_cost(X, K, cluster_assignments, phi, alphas, mu_means, mu_covs, a, B, orig_alphas, orig_c, orig_a, orig_B) plt.plot(costs) plt.title("Costs") plt.show() print "cluster assignments:\n", cluster_assignments plt.scatter(X[:,0], X[:,1], c=cluster_assignments, s=100, alpha=0.7) plt.show()
def gmm(X, T=500): N, D = X.shape m0 = X.mean(axis=0) c0 = 0.1 a0 = float(D) B0 = c0*D*np.cov(X.T) alpha0 = 1.0 # cluster assignments - originally everything is assigned to cluster 0 C = np.zeros(N) # keep as many as we need for each gaussian # originally we sample from the prior # TODO: just use the function above precision0 = wishart.rvs(df=a0, scale=np.linalg.inv(B0)) covariances = [np.linalg.inv(precision0)] means = [mvn.rvs(mean=m0, cov=covariances[0]/c0)] cluster_counts = [1] K = 1 observations_per_cluster = np.zeros((T, 6)) for t in xrange(T): if t % 20 == 0: print t # 1) calculate phi[i,j] # Notes: # MANY new clusters can be made each iteration # A cluster can be DESTROYED if a x[i] is the only pt in cluster j and gets assigned to a new cluster # phi = np.empty((N, K)) list_of_cluster_indices = range(K) next_cluster_index = K # phi = [] # TODO: do we need this at all? for i in xrange(N): phi_i = {} for j in list_of_cluster_indices: # don't loop through xrange(K) because clusters can be created or destroyed as we loop through i nj_noti = np.sum(C[:i] == j) + np.sum(C[i+1:] == j) if nj_noti > 0: # existing cluster # phi[i,j] = N(x[i] | mu[j], cov[j]) * nj_noti / (alpha0 + N - 1) # using the sampled mu / covs phi_i[j] = mvn.pdf(X[i], mean=means[j], cov=covariances[j]) * nj_noti / (alpha0 + N - 1.0) # new cluster # create a possible new cluster for every sample i # but only keep it if sample i occupies this new cluster j' # i.e. if C[i] = j' when we sample C[i] # phi[i,j'] = alpha0 / (alpha0 + N - 1) * p(x[i]) # p(x[i]) is a marginal integrated over mu and precision phi_i[next_cluster_index] = alpha0 / (alpha0 + N - 1.0) * marginal(X[i], c0, m0, a0, B0) # normalize phi[i] and assign C[i] to its new cluster by sampling from phi[i] normalize_phi_hat(phi_i) # if C[i] = j' (new cluster), generate mu[j'] and cov[j'] C[i] = sample_cluster_identity(phi_i) if C[i] == next_cluster_index: list_of_cluster_indices.append(next_cluster_index) next_cluster_index += 1 new_mean, new_cov = sample_from_prior(c0, m0, a0, B0) means.append(new_mean) covariances.append(new_cov) # destroy any cluster with no points in it clusters_to_remove = [] tot = 0 for j in list_of_cluster_indices: nj = np.sum(C == j) # print "number of pts in cluster %d:" % j, nj tot += nj if nj == 0: clusters_to_remove.append(j) # print "tot:", tot assert(tot == N) for j in clusters_to_remove: list_of_cluster_indices.remove(j) # DEBUG - make sure no clusters are empty # counts = [np.sum(C == j) for j in list_of_cluster_indices] # for c in counts: # assert(c > 0) # re-order the cluster indexes so they range from 0..new K - 1 new_C = np.zeros(N) for new_j in xrange(len(list_of_cluster_indices)): old_j = list_of_cluster_indices[new_j] new_C[C == old_j] = new_j C = new_C K = len(list_of_cluster_indices) list_of_cluster_indices = range(K) # redundant but if removed will break counts cluster_counts.append(K) # 2) calculate the new mu, covariance for every currently non-empty cluster # i.e. SAMPLE mu, cov from the new cluster assignments means = [] covariances = [] for j in xrange(K): # first calculate m', c', a', B' # then call the function that samples a mean and covariance using these mean, cov = sample_from_X(X[C == j], m0, c0, a0, B0) means.append(mean) covariances.append(cov) # plot number of observations per cluster for 6 most probable clusters per iteration counts = sorted([np.sum(C == j) for j in list_of_cluster_indices], reverse=True) # print "counts:", counts if len(counts) < 6: observations_per_cluster[t,:len(counts)] = counts else: observations_per_cluster[t] = counts[:6] # plot number of clusters per iteration plt.plot(cluster_counts) plt.show() # plot number of observations per cluster for 6 most probable clusters per iteration plt.plot(observations_per_cluster) plt.show()
def make_step(self): """ Make a single Gibbs sampling step according to [2] algorithm 3. """ # Create an empty base component to evaluate the probability of new components base_component = Component(self, []) # Iterate over all observations for i in range(self.npoints): # Remove the observation from its current cluster z_i = self.z[i] self.components[z_i].remove(i) # Remove the component if it is empty if len(self.components[z_i].idx) == 0: del self.components[z_i] # Iterate over all possible components and evaluate the # probability to assign the current data point to the cluster probabilities = [] labels = [] for z, component in self.components.iteritems(): # Obtain the marginal likelihood under the Gaussian p = component.marginal_likelihood(self.X[i]) # Obtain the contribution from the Dirichlet process p *= component.total_weight / (self.alpha + self.npoints - 1.0) # Append to the list of probabilities and labels probabilities.append(p) labels.append(z) # Consider the possibility of a new component # Define a new label z_new = np.max(self.components.keys()) + 1 # Probabilities for adding a new cluster p = base_component.marginal_likelihood(self.X[i]) p *= self.alpha / (self.alpha + self.npoints - 1.0) # Append to the list of probabilities and labels probabilities.append(p) labels.append(z_new) # Normalise the distribution probabilities = np.asarray(probabilities) / np.sum(probabilities) # Sample a new cluster self.z[i] = z_i = np.random.choice(labels, p=probabilities) # If it's a new cluster if z_i == z_new: self.components[z_i] = Component(self, [i]) else: self.components[z_i].append(i) # Sample the density of each cluster self.samples_rho.append(np.random.dirichlet(self.alpha + np.asarray([component.total_weight for component in self.components.itervalues()]))) # Sample the inverse covariance row_mu = [] row_tau = [] for component in self.components.itervalues(): # Draw from the Wishart distribution tau = np.atleast_2d(wishart.rvs(df=component.pnu, scale=np.linalg.inv(component.ppsi))) # Draw from the multivariate normal mu = np.random.multivariate_normal(component.pchi, np.linalg.inv(component.pkappa * tau)) # Append samples row_mu.append(mu) row_tau.append(tau) # Store the results self.samples_mu.append(row_mu) self.samples_tau.append(row_tau)