w = np.zeros(Z.shape[0]) w[idcs] = wts muw, Sigw = get_laplace(w, Z, mu0) else: muw, Sigw = mu0, Sig0 return np.random.multivariate_normal(muw, Sigw, sz) tsf_w = bc.BayesianTangentSpaceFactory(lambda th: log_likelihood_2d2d(Z, th), sampler_w, projection_dim) print('Creating coresets object') # create coreset construction objects giga_optimal = bc.HilbertCoreset(tsf_optimal) giga_realistic = bc.HilbertCoreset(tsf_realistic) unif = bc.UniformSamplingCoreset(Z.shape[0]) sparsevi = bc.SparseVICoreset(tsf_w, opt_itrs=opt_itrs, step_sched=learning_rate) iht = bc.IHTCoreset(tsf_realistic, projection_dim, 'IHT') iht_ii = bc.IHTCoreset(tsf_realistic, projection_dim, 'IHT-2') algs = { 'SVI': sparsevi, 'GIGAO': giga_optimal, 'GIGAR': giga_realistic, 'RAND': unif, 'IHT': iht, 'IHT-2': iht_ii, 'PRIOR': None }
log_likelihood, grad_z_log_likelihood) prj_w = bc.BlackBoxProjector(sampler_w, projection_dim, log_likelihood, grad_z_log_likelihood) print('Creating coresets object') #create coreset construction objects t0 = time.perf_counter() giga_optimal = bc.HilbertCoreset(Z, prj_optimal) gigao_t_setup = time.perf_counter() - t0 t0 = time.perf_counter() giga_realistic = bc.HilbertCoreset(Z, prj_realistic) gigar_t_setup = time.perf_counter() - t0 t0 = time.perf_counter() unif = bc.UniformSamplingCoreset(Z) unif_t_setup = time.perf_counter() - t0 t0 = time.perf_counter() sparsevi = bc.SparseVICoreset(Z, prj_w, opt_itrs=SVI_opt_itrs, n_subsample_opt=n_subsample_opt, n_subsample_select=n_subsample_select, step_sched=SVI_step_sched) sparsevi_t_setup = time.perf_counter() - t0 t0 = time.perf_counter() bpsvi = bc.BatchPSVICoreset(Z, prj_w, opt_itrs=BPSVI_opt_itrs,
def run(arguments): # check if result already exists for this run, and if so, quit if results.check_exists(arguments): print('Results already exist for arguments ' + str(arguments)) print('Quitting.') quit() ####################################### ####################################### ## Step 0: Setup ####################################### ####################################### np.random.seed(arguments.trial) bc.util.set_verbosity(arguments.verbosity) if arguments.coreset_size_spacing == 'log': Ms = np.unique( np.logspace(0., np.log10(arguments.coreset_size_max), arguments.coreset_num_sizes, dtype=np.int32)) else: Ms = np.unique( np.linspace(1, arguments.coreset_size_max, arguments.coreset_num_sizes, dtype=np.int32)) #make sure the first size to record is 0 if Ms[0] != 0: Ms = np.hstack((0, Ms)) ####################################### ####################################### ## Step 1: Generate a Synthetic Dataset ####################################### ####################################### #change these to change the prior / likelihood mu0 = np.zeros(arguments.data_dim) Sig0 = np.eye(arguments.data_dim) Sig = np.eye(arguments.data_dim) #these are computed Sig0inv = np.linalg.inv(Sig0) Siginv = np.linalg.inv(Sig) LSigInv = np.linalg.cholesky(Siginv) #Siginv = LL^T, L Lower tri USig = sl.solve_triangular(LSigInv, np.eye(LSigInv.shape[0]), lower=True, overwrite_b=True, check_finite=False).T # Sig = UU^T, U upper tri th = np.ones(arguments.data_dim) logdetSig = np.linalg.slogdet(Sig)[1] ####################################### ####################################### ## Step 2: Calculate Likelihoods/Projectors ####################################### ####################################### print('Computing true posterior') x = np.random.multivariate_normal(th, Sig, arguments.data_num) mup, USigp, LSigpInv = gaussian.weighted_post(mu0, Sig0inv, Siginv, x, np.ones(x.shape[0])) Sigp = USigp.dot(USigp.T) SigpInv = LSigpInv.dot(LSigpInv.T) #create the log_likelihood function print('Creating log-likelihood function') log_likelihood = lambda x, th: gaussian.log_likelihood( x, th, Siginv, logdetSig) print('Creating gradient log-likelihood function') grad_log_likelihood = lambda x, th: gaussian.gradx_log_likelihood( x, th, Siginv) print('Creating tuned projector for Hilbert coreset construction') #create the sampler for the "optimally-tuned" Hilbert coreset sampler_optimal = lambda n, w, pts: mup + np.random.randn(n, mup.shape[0] ).dot(USigp.T) prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim, log_likelihood, grad_log_likelihood) print('Creating untuned projector for Hilbert coreset construction') #create the sampler for the "realistically-tuned" Hilbert coreset xhat = x[np.random.randint(0, x.shape[0], int(np.sqrt(x.shape[0]))), :] muhat, USigHat, LSigHatInv = gaussian.weighted_post( mu0, Sig0inv, Siginv, xhat, np.ones(xhat.shape[0])) sampler_realistic = lambda n, w, pts: muhat + np.random.randn( n, muhat.shape[0]).dot(USigHat.T) prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim, log_likelihood, grad_log_likelihood) print('Creating black box projector') def sampler_w(n, wts, pts): if wts is None or pts is None or pts.shape[0] == 0: wts = np.zeros(1) pts = np.zeros((1, mu0.shape[0])) muw, USigw, _ = gaussian.weighted_post(mu0, Sig0inv, Siginv, pts, wts) return muw + np.random.randn(n, muw.shape[0]).dot(USigw.T) prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim, log_likelihood, grad_log_likelihood) print('Creating exact projectors') #TODO need to fix all the transposes in this... class GaussianProjector(bc.Projector): def project(self, pts, grad=False): nu = (pts - self.muw).dot(LSigInv) PsiL = LSigInv.T.dot(self.USigw) Psi = PsiL.dot(PsiL.T) nu = np.hstack( (nu.dot(PsiL), np.sqrt(0.5 * np.trace(np.dot(Psi.T, Psi))) * np.ones(nu.shape[0])[:, np.newaxis])) nu *= np.sqrt(nu.shape[1]) if not grad: return nu else: gnu = np.hstack( (SigLInv.dot(PsiL), np.zeros(pts.shape[1])[:, np.newaxis])).T gnu = np.tile(gnu, (pts.shape[0], 1, 1)) gnu *= np.sqrt(gnu.shape[1]) return nu, gnu def update(self, wts=None, pts=None): if wts is None or pts is None or pts.shape[0] == 0: wts = np.zeros(1) pts = np.zeros((1, mu0.shape[0])) self.muw, self.USigw, self.LSigwInv = gaussian.weighted_post( mu0, Sig0inv, Siginv, pts, wts) prj_optimal_exact = GaussianProjector() prj_optimal_exact.update(np.ones(x.shape[0]), x) prj_realistic_exact = GaussianProjector() prj_realistic_exact.update(np.ones(xhat.shape[0]), xhat) ####################################### ####################################### ## Step 3: Construct Coreset ####################################### ####################################### ############################## print('Creating coreset construction objects') #create coreset construction objects sparsevi_exact = bc.SparseVICoreset(x, GaussianProjector(), opt_itrs=arguments.opt_itrs, step_sched=eval(arguments.step_sched)) sparsevi = bc.SparseVICoreset(x, prj_bb, opt_itrs=arguments.opt_itrs, step_sched=eval(arguments.step_sched)) giga_optimal = bc.HilbertCoreset(x, prj_optimal) giga_optimal_exact = bc.HilbertCoreset(x, prj_optimal_exact) giga_realistic = bc.HilbertCoreset(x, prj_realistic) giga_realistic_exact = bc.HilbertCoreset(x, prj_realistic_exact) unif = bc.UniformSamplingCoreset(x) algs = { 'SVI-EXACT': sparsevi_exact, 'SVI': sparsevi, 'GIGA-OPT': giga_optimal, 'GIGA-OPT-EXACT': giga_optimal_exact, 'GIGA-REAL': giga_realistic, 'GIGA-REAL-EXACT': giga_realistic_exact, 'US': unif } alg = algs[arguments.alg] print('Building coreset') w = [] p = [] cputs = np.zeros(Ms.shape[0]) t_build = 0 for m in range(Ms.shape[0]): print('M = ' + str(Ms[m]) + ': coreset construction, ' + arguments.alg + ' ' + str(arguments.trial)) t0 = time.process_time() itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1]) alg.build(itrs) t_build += time.process_time() - t0 wts, pts, idcs = alg.get() #store weights/pts/runtime w.append(wts) p.append(pts) cputs[m] = t_build ############################## ############################## ## Step 4: Evaluate coreset ############################## ############################## # computing kld and saving results muw = np.zeros((Ms.shape[0], mu0.shape[0])) Sigw = np.zeros((Ms.shape[0], mu0.shape[0], mu0.shape[0])) rklw = np.zeros(Ms.shape[0]) fklw = np.zeros(Ms.shape[0]) csizes = np.zeros(Ms.shape[0]) mu_errs = np.zeros(Ms.shape[0]) Sig_errs = np.zeros(Ms.shape[0]) for m in range(Ms.shape[0]): csizes[m] = (w[m] > 0).sum() muw[m, :], USigw, LSigwInv = gaussian.weighted_post( mu0, Sig0inv, Siginv, p[m], w[m]) Sigw[m, :, :] = USigw.dot(USigw.T) rklw[m] = gaussian.KL(muw[m, :], Sigw[m, :, :], mup, SigpInv) fklw[m] = gaussian.KL(mup, Sigp, muw[m, :], LSigwInv.dot(LSigwInv.T)) mu_errs[m] = np.sqrt(((mup - muw[m, :])**2).sum()) / np.sqrt( (mup**2).sum()) Sig_errs[m] = np.sqrt(((Sigp - Sigw[m, :, :])**2).sum()) / np.sqrt( (Sigp**2).sum()) results.save(arguments, csizes=csizes, Ms=Ms, cputs=cputs, rklw=rklw, fklw=fklw, mu_errs=mu_errs, Sig_errs=Sig_errs) #also save muw/Sigw/etc for plotting coreset visualizations f = open('results/coreset_data.pk', 'wb') res = (x, mu0, Sig0, Sig, mup, Sigp, w, p, muw, Sigw) pk.dump(res, f) f.close()
mup = np.dot(Sigp, np.dot(Sig0inv,np.ones(out_features)) + (wts[:, np.newaxis]*Y[:,np.newaxis]*X).sum(axis=0)/datastd**2) return np.random.multivariate_normal(mup, Sigp, n) prj_w = bc.BlackBoxProjector(sampler_w, proj_dim, log_likelihood, grad_log_likelihood, nl=nl) prj_bw = bc.BetaBlackBoxProjector(sampler_w, proj_dim, beta_likelihood, log_likelihood, grad_beta, nl=nl) #create coreset construction objects print('Creating coreset construction objects') in_batches = True if in_batches: sparsevi = bc.SparseVICoreset(Z, prj_w, opt_itrs=VI_opt_itrs, n_subsample_opt=n_subsample_opt, n_subsample_select=n_subsample_select, step_sched=SVI_step_sched, wts=np.ones(init_size), idcs=1e7+np.arange(init_size), pts=Z_init, groups=groups, initialized=True, enforce_new=False) bcoresvi = bc.BetaCoreset(Z, prj_bw, opt_itrs=VI_opt_itrs, n_subsample_opt=n_subsample_opt, n_subsample_select=n_subsample_select, step_sched=BCORES_step_sched, beta=beta, learn_beta=False, wts=np.ones(init_size), idcs=1e7+np.arange(init_size), pts=Z_init, groups=groups, initialized=True) unif = bc.UniformSamplingCoreset(Z, wts=np.ones(init_size), idcs=1e7+np.arange(init_size), pts=Z_init, groups=groups) else: raise NotImplementedError("Supported only batch data acquisition") algs = {'BCORES': bcoresvi, #'BPSVI': bpsvi, 'SVI': sparsevi, 'RAND': unif, 'PRIOR': None} alg = algs[algnm] # Diagnostics nlls = np.zeros(M+1) rmses = np.zeros(M+1) # Build coreset
def run(arguments): # check if result already exists for this run, and if so, quit if results.check_exists(arguments): print('Results already exist for arguments ' + str(arguments)) print('Quitting.') quit() ####################################### ####################################### ## Step 0: Setup ####################################### ####################################### np.random.seed(arguments.trial) bc.util.set_verbosity(arguments.verbosity) if arguments.coreset_size_spacing == 'log': Ms = np.unique( np.logspace(0., np.log10(arguments.coreset_size_max), arguments.coreset_num_sizes, dtype=np.int32)) else: Ms = np.unique( np.linspace(1, arguments.coreset_size_max, arguments.coreset_num_sizes, dtype=np.int32)) #make sure the first size to record is 0 if Ms[0] != 0: Ms = np.hstack((0, Ms)) ####################################### ####################################### ## Step 1: Load and preprocess data ####################################### ####################################### #load data and compute true posterior #each row of x is [lat, lon, price] print('Loading data') x = np.load('../data/prices2018.npy') print('dataset size : ', x.shape) print('Subsampling down to ' + str(arguments.data_num) + ' points') idcs = np.arange(x.shape[0]) np.random.shuffle(idcs) x = x[idcs[:arguments.data_num], :] #log transform the prices x[:, 2] = np.log10(x[:, 2]) #get empirical mean/std datastd = x[:, 2].std() datamn = x[:, 2].mean() #bases of increasing size; the last one is effectively a constant basis_unique_scales = np.array([.2, .4, .8, 1.2, 1.6, 2., 100]) basis_unique_counts = np.hstack( (arguments.n_bases_per_scale * np.ones(6, dtype=np.int64), 1)) #the dimension of the scaling vector for the above bases d = basis_unique_counts.sum() print('Basis dimension: ' + str(d)) #model params mu0 = datamn * np.ones(d) Sig0 = (datastd**2 + datamn**2) * np.eye(d) Sig0inv = np.linalg.inv(Sig0) #generate basis functions by uniformly randomly picking locations in the dataset print('Trial ' + str(arguments.trial)) print('Creating bases') basis_scales = np.array([]) basis_locs = np.zeros((0, 2)) for i in range(basis_unique_scales.shape[0]): basis_scales = np.hstack( (basis_scales, basis_unique_scales[i] * np.ones(basis_unique_counts[i]))) idcs = np.random.choice(np.arange(x.shape[0]), replace=False, size=basis_unique_counts[i]) basis_locs = np.vstack((basis_locs, x[idcs, :2])) print('Converting bases and observations into X/Y matrices') #convert basis functions + observed data locations into a big X matrix X = np.zeros((x.shape[0], basis_scales.shape[0])) for i in range(basis_scales.shape[0]): X[:, i] = np.exp(-((x[:, :2] - basis_locs[i, :])**2).sum(axis=1) / (2 * basis_scales[i]**2)) Y = x[:, 2] Z = np.hstack((X, Y[:, np.newaxis])) _, bV = np.linalg.eigh(X.T.dot(X)) bV = bV[:, -arguments.proj_dim:] ####################################### ####################################### ## Step 2: Calculate Likelihoods/Projectors ####################################### ####################################### #get true posterior print('Computing true posterior') mup, USigp, LSigpInv = model_linreg.weighted_post(mu0, Sig0inv, datastd**2, Z, np.ones(X.shape[0])) Sigp = USigp.dot(USigp.T) SigpInv = LSigpInv.dot(LSigpInv.T) #create function to output log_likelihood given param samples print('Creating log-likelihood function') log_likelihood = lambda z, th: model_linreg.log_likelihood( z, th, datastd**2) print('Creating gradient log-likelihood function') grad_log_likelihood = lambda z, th: model_linreg.grad_x_log_likelihood( z, th, datastd**2) #create tangent space for well-tuned Hilbert coreset alg print('Creating tuned projector for Hilbert coreset construction') sampler_optimal = lambda n, w, pts: mup + np.random.randn(n, mup.shape[0] ).dot(USigp.T) prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim, log_likelihood, grad_log_likelihood) #create tangent space for poorly-tuned Hilbert coreset alg print('Creating untuned projector for Hilbert coreset construction') Zhat = Z[np.random.randint(0, Z.shape[0], int(np.sqrt(Z.shape[0]))), :] muhat, USigHat, LSigHatInv = model_linreg.weighted_post( mu0, Sig0inv, datastd**2, Zhat, np.ones(Zhat.shape[0])) sampler_realistic = lambda n, w, pts: muhat + np.random.randn( n, muhat.shape[0]).dot(USigHat.T) prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim, log_likelihood, grad_log_likelihood) print('Creating black box projector') def sampler_w(n, wts, pts): if wts is None or pts is None or pts.shape[0] == 0: muw = mu0 USigw = np.linalg.cholesky( Sig0 ) #Note: USigw is lower triangular here, below is upper tri. Doesn't matter, just need Sigw = MM^T else: muw, USigw, _ = model_linreg.weighted_post(mu0, Sig0inv, datastd**2, pts, wts) return muw + np.random.randn(n, muw.shape[0]).dot(USigw.T) prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim, log_likelihood, grad_log_likelihood) print('Creating exact projectors') ############################## ###Exact projection in SparseVI for gradient computation #for this model we can do the tangent space projection exactly class LinRegProjector(bc.Projector): def __init__(self, bV): self.bV = bV def project(self, pts, grad=False): X = pts[:, :-1] Y = pts[:, -1] #beta = X.dot(self.V*np.sqrt(np.maximum(self.lmb, 0.))) beta = X.dot(self.USigw) nu = Y - X.dot(self.muw) #approximation to avoid high memory cost: project the matrix term down to bV.shape[1]**2 dimensions beta_proj = beta.dot(self.bV) #lmb2, V2 = np.linalg.eigh(beta.T.dot(beta)) #beta_proj = beta.dot(V2[:, -arguments.proj_dim:]) return np.hstack( (nu[:, np.newaxis] * beta, 1. / np.sqrt(2.) * (beta_proj[:, :, np.newaxis] * beta_proj[:, np.newaxis, :]). reshape(beta.shape[0], arguments.proj_dim**2))) / datastd**2 def update(self, wts, pts): if wts is None or pts is None or pts.shape[0] == 0: self.muw = mu0 self.USigw = np.linalg.cholesky( Sig0 ) #Note: USigw here is lower triangular, but keeping naming convention for below stuff. Doesn't matter, just need Sigw = MM^T else: self.muw, self.USigw, _ = model_linreg.weighted_post( mu0, Sig0inv, datastd**2, pts, wts) #if pts.shape[0] == 0: # self.muw = mu0 # self.Sigw = Sig0 #else: # self.muw, self.Sigw = model_linreg.weighted_post(mu0, Sig0inv, datastd**2, pts, wts) #self.lmb, self.V = np.linalg.eigh(self.LSigw.dot(self.LSigw.T)) prj_optimal_exact = LinRegProjector(bV) prj_optimal_exact.update(np.ones(Z.shape[0]), Z) prj_realistic_exact = LinRegProjector(bV) prj_realistic_exact.update(np.ones(Zhat.shape[0]), Zhat) ####################################### ####################################### ## Step 3: Construct Coreset ####################################### ####################################### ############################## print('Creating coreset construction objects') #create coreset construction objects sparsevi_exact = bc.SparseVICoreset(Z, LinRegProjector(bV), opt_itrs=arguments.opt_itrs, step_sched=eval(arguments.step_sched)) sparsevi = bc.SparseVICoreset(Z, prj_bb, opt_itrs=arguments.opt_itrs, step_sched=eval(arguments.step_sched)) giga_optimal = bc.HilbertCoreset(Z, prj_optimal) giga_optimal_exact = bc.HilbertCoreset(Z, prj_optimal_exact) giga_realistic = bc.HilbertCoreset(Z, prj_realistic) giga_realistic_exact = bc.HilbertCoreset(Z, prj_realistic_exact) unif = bc.UniformSamplingCoreset(Z) algs = { 'SVI-EXACT': sparsevi_exact, 'SVI': sparsevi, 'GIGA-OPT': giga_optimal, 'GIGA-OPT-EXACT': giga_optimal_exact, 'GIGA-REAL': giga_realistic, 'GIGA-REAL-EXACT': giga_realistic_exact, 'US': unif } alg = algs[arguments.alg] print('Building coreset') w = [] p = [] cputs = np.zeros(Ms.shape[0]) t_build = 0 for m in range(Ms.shape[0]): print('M = ' + str(Ms[m]) + ': coreset construction, ' + arguments.alg + ' ' + str(arguments.trial)) t0 = time.process_time() itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1]) alg.build(itrs) t_build += time.process_time() - t0 wts, pts, idcs = alg.get() #store weights/pts/runtime w.append(wts) p.append(pts) cputs[m] = t_build ############################## ############################## ## Step 4: Evaluate coreset ############################## ############################## # computing kld and saving results muw = np.zeros((Ms.shape[0], mu0.shape[0])) Sigw = np.zeros((Ms.shape[0], mu0.shape[0], mu0.shape[0])) rklw = np.zeros(Ms.shape[0]) fklw = np.zeros(Ms.shape[0]) mu_errs = np.zeros(Ms.shape[0]) Sig_errs = np.zeros(Ms.shape[0]) csizes = np.zeros(Ms.shape[0]) for m in range(Ms.shape[0]): csizes[m] = (w[m] > 0).sum() muw[m, :], USigw, LSigwInv = model_linreg.weighted_post( mu0, Sig0inv, datastd**2, p[m], w[m]) Sigw[m, :, :] = USigw.dot(USigw.T) rklw[m] = model_linreg.KL(muw[m, :], Sigw[m, :, :], mup, SigpInv) fklw[m] = model_linreg.KL(mup, Sigp, muw[m, :], LSigwInv.dot(LSigwInv.T)) mu_errs[m] = np.sqrt(((mup - muw[m, :])**2).sum()) / np.sqrt( (mup**2).sum()) Sig_errs[m] = np.sqrt(((Sigp - Sigw[m, :, :])**2).sum()) / np.sqrt( (Sigp**2).sum()) results.save(arguments, csizes=csizes, Ms=Ms, cputs=cputs, rklw=rklw, fklw=fklw, mu_errs=mu_errs, Sig_errs=Sig_errs) #also save muw/Sigw/etc for plotting coreset visualizations f = open('results/coreset_data.pk', 'wb') res = (x, mu0, Sig0, datastd, mup, Sigp, w, p, muw, Sigw) pk.dump(res, f) f.close()
bpsvi = bc.BatchPSVICoreset(Xcorrupted, prj_w, opt_itrs=BPSVI_opt_itrs, n_subsample_opt=n_subsample_opt, step_sched=BPSVI_step_sched) bcoresvi = bc.BetaCoreset(Xcorrupted, prj_bw, opt_itrs=BCORES_opt_itrs, n_subsample_opt=n_subsample_opt, n_subsample_select=n_subsample_select, step_sched=BCORES_step_sched, beta=.1, learn_beta=False) giga_optimal = bc.HilbertCoreset(Xcorrupted, prj_optimal) giga_realistic = bc.HilbertCoreset(Xcorrupted, prj_realistic) unif = bc.UniformSamplingCoreset(Xcorrupted) algs = { 'BCORES': bcoresvi, 'BPSVI': bpsvi, 'SVI': sparsevi, 'GIGAO': giga_optimal, 'GIGAR': giga_realistic, 'RAND': unif, 'PRIOR': None } alg = algs[nm] print('Building coreset') #build coresets w = [np.array([0.])]
def run(arguments): # check if result already exists for this run, and if so, quit if results.check_exists(arguments): print('Results already exist for arguments ' + str(arguments)) print('Quitting.') quit() ####################################### ####################################### ########### Step 0: Setup ############# ####################################### ####################################### np.random.seed(arguments.trial) bc.util.set_verbosity(arguments.verbosity) if arguments.coreset_size_spacing == 'log': Ms = np.unique( np.logspace(0., np.log10(arguments.coreset_size_max), arguments.coreset_num_sizes, dtype=np.int32)) else: Ms = np.unique( np.linspace(1, arguments.coreset_size_max, arguments.coreset_num_sizes, dtype=np.int32)) ####################################### ####################################### ## Step 1: Define Model ####################################### ####################################### if arguments.model == "lr": import model_lr as model elif arguments.model == "poiss": import model_poiss as model ####################################### ####################################### ## Step 2: Load Dataset & run full MCMC / Laplace ####################################### ####################################### print('Loading dataset ' + arguments.dataset) X, Y, Z, Zt, D = model.load_data('../data/' + arguments.dataset + '.npz') #NOTE: Sig0 is currently coded as identity in model_lr and model_pr (see log_prior). #so if you change Sig0 here things might break. #TODO: fix that... mu0 = np.zeros(Z.shape[1]) Sig0 = np.eye(Z.shape[1]) LSig0 = np.eye(Z.shape[1]) print('Checking for cached full MCMC samples') mcmc_cache_filename = 'mcmc_cache/full_samples_' + arguments.model + '_' + arguments.dataset + '.npz' if os.path.exists(mcmc_cache_filename): print('Cache exists, loading') tmp__ = np.load(mcmc_cache_filename) full_samples = tmp__['samples'] full_mcmc_time_per_itr = tmp__['t'] else: print('Cache doesnt exist, running MCMC') #convert Y to Stan LR label format stanY = np.zeros(Y.shape[0]) stanY[:] = Y stanY[stanY == -1] = 0 sampler_data = { 'x': X, 'y': stanY.astype(int), 'w': np.ones(X.shape[0]), 'd': X.shape[1], 'n': X.shape[0] } full_samples, t_full_mcmc = mcmc.run(sampler_data, arguments.mcmc_samples_full, arguments.model, model.stan_representation, arguments.trial) full_samples = full_samples['theta'] #TODO right now *2 to account for burn; but this should all be specified via tunable arguments full_mcmc_time_per_itr = t_full_mcmc / (arguments.mcmc_samples_full * 2) if not os.path.exists('mcmc_cache'): os.mkdir('mcmc_cache') np.savez(mcmc_cache_filename, samples=full_samples, t=full_mcmc_time_per_itr) ####################################### ####################################### ## Step 3: Calculate Likelihoods/Projectors ####################################### ####################################### #get Gaussian approximation to the true posterior print('Approximating true posterior') mup = full_samples.mean(axis=0) Sigp = np.cov(full_samples, rowvar=False) LSigp = np.linalg.cholesky(Sigp) LSigpInv = solve_triangular(LSigp, np.eye(LSigp.shape[0]), lower=True, overwrite_b=True, check_finite=False) #create tangent space for well-tuned Hilbert coreset alg print('Creating tuned projector for Hilbert coreset construction') muHat, LSigHat, LSigHatInv = get_laplace(np.ones(Z.shape[0]), Z, np.zeros(Z.shape[1]), model, diag=False) sampler_optimal = lambda n, w, pts: muHat + np.random.randn( n, muHat.shape[0]).dot(LSigHat.T) prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim, model.log_likelihood, model.grad_z_log_likelihood) #create tangent space for poorly-tuned Hilbert coreset alg print('Creating untuned projector for Hilbert coreset construction') Zhat = Z[np.random.randint(0, Z.shape[0], int(np.sqrt(Z.shape[0]))), :] muHat2, LSigHat2, LSigHat2Inv = get_laplace(np.ones(Zhat.shape[0]), Zhat, np.zeros(Zhat.shape[1]), model, diag=False) sampler_realistic = lambda n, w, pts: muHat2 + np.random.randn( n, muHat2.shape[0]).dot(LSigHat2.T) prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim, model.log_likelihood, model.grad_z_log_likelihood) print('Creating black box projector') def sampler_w(n, wts, pts): if wts is None or pts is None or pts.shape[0] == 0: muw = mu0 LSigw = LSig0 else: muw, LSigw, _ = get_laplace(wts, pts, np.zeros(Z.shape[1]), model, diag=False) return muw + np.random.randn(n, muw.shape[0]).dot(LSigw.T) prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim, model.log_likelihood, model.grad_z_log_likelihood) ####################################### ####################################### ## Step 4: Construct Coreset ####################################### ####################################### print('Creating coreset construction objects') #create coreset construction objects sparsevi = bc.SparseVICoreset(Z, prj_bb, opt_itrs=arguments.opt_itrs, step_sched=eval(arguments.step_sched)) giga_optimal = bc.HilbertCoreset(Z, prj_optimal) giga_realistic = bc.HilbertCoreset(Z, prj_realistic) unif = bc.UniformSamplingCoreset(Z) algs = { 'SVI': sparsevi, 'GIGA-OPT': giga_optimal, 'GIGA-REAL': giga_realistic, 'US': unif } alg = algs[arguments.alg] cputs = np.zeros(Ms.shape[0]) mcmc_time_per_itr = np.zeros(Ms.shape[0]) csizes = np.zeros(Ms.shape[0]) Fs = np.zeros(Ms.shape[0]) rklw = np.zeros(Ms.shape[0]) fklw = np.zeros(Ms.shape[0]) mu_errs = np.zeros(Ms.shape[0]) Sig_errs = np.zeros(Ms.shape[0]) print('Running coreset construction / MCMC for ' + arguments.dataset + ' ' + arguments.alg + ' ' + str(arguments.trial)) t_alg = 0. for m in range(Ms.shape[0]): print('M = ' + str(Ms[m]) + ': coreset construction, ' + arguments.alg + ' ' + arguments.dataset + ' ' + str(arguments.trial)) #this runs alg up to a level of M; on the next iteration, it will continue from where it left off t0 = time.process_time() itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1]) alg.build(itrs) t_alg += time.process_time() - t0 wts, pts, idcs = alg.get() print('M = ' + str(Ms[m]) + ': MCMC') # Use MCMC on the coreset, measure time taken stanY = np.zeros(idcs.shape[0]) stanY[:] = Y[idcs] stanY[stanY == -1] = 0 sampler_data = { 'x': X[idcs, :], 'y': stanY.astype(int), 'w': wts, 'd': X.shape[1], 'n': idcs.shape[0] } cst_samples, t_cst_mcmc = mcmc.run(sampler_data, arguments.mcmc_samples_coreset, arguments.model, model.stan_representation, arguments.trial) cst_samples = cst_samples['theta'] #TODO see note above re: full mcmc sampling t_cst_mcmc_per_step = t_cst_mcmc / (arguments.mcmc_samples_coreset * 2) print('M = ' + str(Ms[m]) + ': Approximating posterior with Gaussian') muw = cst_samples.mean(axis=0) Sigw = np.cov(cst_samples, rowvar=False) LSigw = np.linalg.cholesky(Sigw) LSigwInv = solve_triangular(LSigw, np.eye(LSigw.shape[0]), lower=True, overwrite_b=True, check_finite=False) print('M = ' + str(Ms[m]) + ': Computing metrics') cputs[m] = t_alg mcmc_time_per_itr[m] = t_cst_mcmc_per_step csizes[m] = (wts > 0).sum() gcs = np.array([ model.grad_th_log_joint(Z[idcs, :], full_samples[i, :], wts) for i in range(full_samples.shape[0]) ]) gfs = np.array([ model.grad_th_log_joint(Z, full_samples[i, :], np.ones(Z.shape[0])) for i in range(full_samples.shape[0]) ]) Fs[m] = (((gcs - gfs)**2).sum(axis=1)).mean() rklw[m] = KL(muw, Sigw, mup, LSigpInv.T.dot(LSigpInv)) fklw[m] = KL(mup, Sigp, muw, LSigwInv.T.dot(LSigwInv)) mu_errs[m] = np.sqrt(((mup - muw)**2).sum()) / np.sqrt((mup**2).sum()) Sig_errs[m] = np.sqrt(((Sigp - Sigw)**2).sum()) / np.sqrt( (Sigp**2).sum()) results.save(arguments, csizes=csizes, Ms=Ms, cputs=cputs, Fs=Fs, full_mcmc_time_per_itr=full_mcmc_time_per_itr, mcmc_time_per_itr=mcmc_time_per_itr, rklw=rklw, fklw=fklw, mu_errs=mu_errs, Sig_errs=Sig_errs)