def get_moments(xs, k): n, d, v = xs.shape assert d >= k assert v >= 3 xs1, xs2, xs3 = xs[:,:,0], xs[:,:,1], xs[:,:,2] m1 = (xs1.sum(0) / n).reshape(d,1) M2 = symmetrize(xs1.T.dot(xs2) / n) M3 = symmetrize(Triples(xs1, xs2, xs3)) return m1, M2, M3
def model_conjugate_update(model_prior_params, S, N, project=False, size=1): mu_0, lambda_0, a_0, b_0 = model_prior_params lambda_n = S['XX'] + lambda_0 inv_lambda_n = np.linalg.inv(lambda_n) if not isPD(lambda_n): lambda_n = nearestPD(lambda_n) if not isPD(inv_lambda_n): inv_lambda_n = nearestPD(inv_lambda_n) mu_n = inv_lambda_n.dot(S['Xy'] + lambda_0.dot(mu_0)) a_n = a_0 + .5 * N b_n = b_0 + .5 * (S['yy'] + mu_0.T.dot(lambda_0).dot(mu_0) - mu_n.T.dot(lambda_n).dot(mu_n))[0, 0] if project: b_n = max(b_n, .1) sigma_squared = scipy.stats.invgamma.rvs(a=a_n, scale=b_n, size=size) theta = np.array([ scipy.stats.multivariate_normal.rvs(mu_n.flatten(), symmetrize(ss * inv_lambda_n)) for ss in sigma_squared ]) return theta, sigma_squared
def privatize_suff_stats(S, sensitivity_x, sensitivity_y, epsilon): data_dim = S['XX'].shape[0] XX_comps = data_dim * ( data_dim + 1) / 2 # upper triangular, not counting last column which is X X_comps = data_dim # last column Xy_comps = data_dim yy_comps = 1 sensitivity = XX_comps * sum(sensitivity_x[:-1]) ** 2 \ + X_comps * sum(sensitivity_x[:-1]) \ + Xy_comps * sum(sensitivity_x[:-1]) * sensitivity_y \ + yy_comps * sensitivity_y ** 2 Z = { key: np.random.laplace(loc=val, scale=sensitivity / epsilon) for key, val in S.items() } # symmetrize Z_XX since we only want to add noise to upper triangle Z['XX'] = symmetrize(Z['XX']) Z['X'] = Z['XX'][:, 0][:, None] return Z, sensitivity
def solve_mixture_model(model, data): """ Whiten and unwhiten appropriately """ d = model["d"] # Get moments moments = model.empirical_moments(data, model.observed_monomials(3)) M2 = zeros((d, d)) M3 = zeros((d, d, d)) for i in xrange(d): for j in xrange(d): xij = sp.sympify('x%d * x%d' %(i+1, j+1)) M2[i,j] = moments[xij] for k in xrange(d): xijk = sp.sympify('x%d * x%d * x%d' % (i+1, j+1, k+1)) M3[i,j,k] = moments[xijk] k = model["k"] # Symmetrize M2, M3 = symmetrize(M2), symmetrize(M3) assert symmetric_skew(M2) < 1e-2 assert symmetric_skew(M3) < 1e-2 # Whiten W, Wt = get_whitener(M2, k) M3_ = einsum('ijk,ia,jb,kc->abc', M3, W, W, W) pi_, M_, _, _ = candecomp(M3_, k) # Unwhiten M M_ = Wt.dot(M_.dot(diag(pi_))) pi_ = 1./pi_**2 # "Project" onto simplex pi_ = make_distribution(abs(pi_)) M_ = array([make_distribution(col) for col in M_.T]).T return pi_, M_
def solve_mixture_model(model, data): """ Whiten and unwhiten appropriately """ d = model["d"] # Get moments moments = model.empirical_moments(data, model.observed_monomials(3)) M2 = zeros((d, d)) M3 = zeros((d, d, d)) for i in xrange(d): for j in xrange(d): xij = sp.sympify('x%d * x%d' % (i + 1, j + 1)) M2[i, j] = moments[xij] for k in xrange(d): xijk = sp.sympify('x%d * x%d * x%d' % (i + 1, j + 1, k + 1)) M3[i, j, k] = moments[xijk] k = model["k"] # Symmetrize M2, M3 = symmetrize(M2), symmetrize(M3) assert symmetric_skew(M2) < 1e-2 assert symmetric_skew(M3) < 1e-2 # Whiten W, Wt = get_whitener(M2, k) M3_ = einsum('ijk,ia,jb,kc->abc', M3, W, W, W) pi_, M_, _, _ = candecomp(M3_, k) # Unwhiten M M_ = Wt.dot(M_.dot(diag(pi_))) pi_ = 1. / pi_**2 # "Project" onto simplex pi_ = make_distribution(abs(pi_)) M_ = array([make_distribution(col) for col in M_.T]).T return pi_, M_
def NIG_rvs(mu, lam, a, b, size=1): if size == 1: return NIG_rvs_single_variance(mu, lam, a, b) sigma_squared = scipy.stats.invgamma.rvs(a=a, scale=b, size=size) inv_lam = inv(lam) theta = np.array([ scipy.stats.multivariate_normal.rvs(mu.flatten(), symmetrize(ss * inv_lam)) for ss in sigma_squared ]) return theta, sigma_squared
def NIG_rvs_single_variance(mu, lam, a, b, size=1): sigma_squared = scipy.stats.invgamma.rvs(a=a, scale=b) cov = symmetrize(sigma_squared * inv(lam)) if not isPD(cov): cov = nearestPD(cov) theta = scipy.stats.multivariate_normal.rvs(mu.flatten(), cov, size=size) if isinstance(theta, float): theta = np.array([theta]) if size == 1: theta = theta[:, None] return theta, sigma_squared
def update_model_params(S, model_prior_params, N): mu_n, lambda_n, a_n, b_n = calc_posterior_params(S, N, model_prior_params) sigma_squared = scipy.stats.invgamma.rvs(a=a_n, scale=b_n) cov = symmetrize(sigma_squared * np.linalg.inv(lambda_n)) if not isPD(cov): cov = nearestPD(cov) theta = scipy.stats.multivariate_normal.rvs(mu_n.flatten(), cov) if isinstance(theta, float): theta = np.array([theta]) theta = theta[:, None] return theta, sigma_squared