def tsf_exact_w(wts, idcs): w = np.zeros(x.shape[0]) w[idcs] = wts muw, Sigw = gaussian.weighted_post(mu0, Sig0inv, Siginv, x, w) nu = (x - muw).dot(SigLInv.T) Psi = np.dot(SigLInv, np.dot(Sigw, SigLInv.T)) nu = np.hstack((nu.dot(np.linalg.cholesky(Psi)), 0.25 * np.sqrt(np.trace(np.dot(Psi.T, Psi))) * np.ones(nu.shape[0])[:, np.newaxis])) return nu
def update(self, wts=None, pts=None): if wts is None or pts is None or pts.shape[0] == 0: wts = np.zeros(1) pts = np.zeros((1, mu0.shape[0])) self.muw, self.LSigw, self.LSigwInv = gaussian.weighted_post( mu0, Sig0inv, Siginv, pts, wts)
Sig0 = np.eye(d) Sig = np.eye(d) SigL = np.linalg.cholesky(Sig) th = np.ones(d) Sig0inv = np.linalg.inv(Sig0) Siginv = np.linalg.inv(Sig) SigLInv = np.linalg.inv(SigL) logdetSig = np.linalg.slogdet(Sig)[1] #generate data and compute true posterior #use the trial # as the seed np.random.seed(int(tr)) num_processes = 16 # number of processes for parallelization of pseudocoresets experiment **adapt to your computing resources** x = np.random.multivariate_normal(th, Sig, N) mup, LSigp, LSigpInv = gaussian.weighted_post(mu0, Sig0inv, Siginv, x, np.ones(x.shape[0])) Sigp = LSigp.dot(LSigp.T) SigpInv = LSigpInv.T.dot(LSigpInv) #for the algorithm, use the trial # and name as seed np.random.seed(int(''.join([str(ord(ch)) for ch in nm + str(tr)])) % 2**32) #create the log_likelihood function print('Creating log-likelihood function') log_likelihood = lambda x, th: gaussian.gaussian_loglikelihood( x, th, Siginv, logdetSig) print('Creating gradient log-likelihood function') grad_log_likelihood = lambda x, th: gaussian.gaussian_gradx_loglikelihood( x, th, Siginv)
N = 5000 # number of data points d = 100 # number of dimensions mu0 = np.zeros(d) Sig0 = np.eye(d) Sig = 500 * np.eye(d) SigL = np.linalg.cholesky(Sig) th = np.zeros(d) Sig0inv = np.linalg.inv(Sig0) Siginv = np.linalg.inv(Sig) SigLInv = np.linalg.inv(SigL) logdetSig = np.linalg.slogdet(Sig)[1] X = np.random.multivariate_normal(th, Sig, N) mup, LSigp, LSigpInv = gaussian.weighted_post( mu0, Sig0inv, Siginv, X, np.ones(X.shape[0])) # true posterior Sigp = LSigp.dot(LSigp.T) SigpInv = LSigpInv.dot(LSigpInv.T) Xoutliers1 = np.random.multivariate_normal(th + 200, 0.5 * Sig, int(N / 50.)) Xoutliers2 = np.random.multivariate_normal(th + 150, 0.1 * Sig, int(N / 50.)) Xoutliers3 = np.random.multivariate_normal(th, 10 * Sig, int(N / 10.)) Xcorrupted = np.concatenate((X, Xoutliers1, Xoutliers2, Xoutliers3)) #create function to output log_likelihood given param samples print('Creating log-likelihood function') log_likelihood = lambda x, th: gaussian_loglikelihood(x, th, Siginv, logdetSig) print('Creating gradient log-likelihood function') grad_log_likelihood = lambda x, th: gaussian_grad_x_loglikelihood( x, th, Siginv)
Sig = np.eye(d) SigL = np.linalg.cholesky(Sig) th = np.ones(d) Sig0inv = np.linalg.inv(Sig0) Siginv = np.linalg.inv(Sig) SigLInv = np.linalg.inv(SigL) nm = sys.argv[1] tr = sys.argv[2] # generate data and compute true posterior # use the trial # as the seed np.random.seed(int(tr)) x = np.random.multivariate_normal(th, Sig, N) mup, Sigp = gaussian.weighted_post(mu0, Sig0inv, Siginv, x, np.ones(x.shape[0])) Sigpinv = np.linalg.inv(Sigp) # for the algorithm, use the trial # and name as seed np.random.seed(int(''.join([str(ord(ch)) for ch in nm + tr])) % 2**32) # compute constants for log likelihood function xSiginv = x.dot(Siginv) xSiginvx = (xSiginv * x).sum(axis=1) logdetSig = np.linalg.slogdet(Sig)[1] # create the log_likelihood function log_likelihood = lambda samples: gaussian.gaussian_potentials( Siginv, xSiginvx, xSiginv, logdetSig, x, samples) # create the sampler for the "optimally-tuned" Hilbert coreset