Ejemplo n.º 1
0
def run(arguments):

    # check if result already exists for this run, and if so, quit
    if results.check_exists(arguments):
        print('Results already exist for arguments ' + str(arguments))
        print('Quitting.')
        quit()

    #######################################
    #######################################
    ## Step 0: Setup
    #######################################
    #######################################

    np.random.seed(arguments.trial)
    bc.util.set_verbosity(arguments.verbosity)

    if arguments.coreset_size_spacing == 'log':
        Ms = np.unique(
            np.logspace(0.,
                        np.log10(arguments.coreset_size_max),
                        arguments.coreset_num_sizes,
                        dtype=np.int32))
    else:
        Ms = np.unique(
            np.linspace(1,
                        arguments.coreset_size_max,
                        arguments.coreset_num_sizes,
                        dtype=np.int32))

    #make sure the first size to record is 0
    if Ms[0] != 0:
        Ms = np.hstack((0, Ms))

    #######################################
    #######################################
    ## Step 1: Generate a Synthetic Dataset
    #######################################
    #######################################

    #change these to change the prior / likelihood
    mu0 = np.zeros(arguments.data_dim)
    Sig0 = np.eye(arguments.data_dim)
    Sig = np.eye(arguments.data_dim)

    #these are computed
    Sig0inv = np.linalg.inv(Sig0)
    Siginv = np.linalg.inv(Sig)
    LSigInv = np.linalg.cholesky(Siginv)  #Siginv = LL^T, L Lower tri
    USig = sl.solve_triangular(LSigInv,
                               np.eye(LSigInv.shape[0]),
                               lower=True,
                               overwrite_b=True,
                               check_finite=False).T  # Sig = UU^T, U upper tri
    th = np.ones(arguments.data_dim)
    logdetSig = np.linalg.slogdet(Sig)[1]

    #######################################
    #######################################
    ## Step 2: Calculate Likelihoods/Projectors
    #######################################
    #######################################

    print('Computing true posterior')
    x = np.random.multivariate_normal(th, Sig, arguments.data_num)
    mup, USigp, LSigpInv = gaussian.weighted_post(mu0, Sig0inv, Siginv, x,
                                                  np.ones(x.shape[0]))
    Sigp = USigp.dot(USigp.T)
    SigpInv = LSigpInv.dot(LSigpInv.T)

    #create the log_likelihood function
    print('Creating log-likelihood function')
    log_likelihood = lambda x, th: gaussian.log_likelihood(
        x, th, Siginv, logdetSig)

    print('Creating gradient log-likelihood function')
    grad_log_likelihood = lambda x, th: gaussian.gradx_log_likelihood(
        x, th, Siginv)

    print('Creating tuned projector for Hilbert coreset construction')
    #create the sampler for the "optimally-tuned" Hilbert coreset
    sampler_optimal = lambda n, w, pts: mup + np.random.randn(n, mup.shape[0]
                                                              ).dot(USigp.T)
    prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim,
                                       log_likelihood, grad_log_likelihood)

    print('Creating untuned projector for Hilbert coreset construction')
    #create the sampler for the "realistically-tuned" Hilbert coreset
    xhat = x[np.random.randint(0, x.shape[0], int(np.sqrt(x.shape[0]))), :]
    muhat, USigHat, LSigHatInv = gaussian.weighted_post(
        mu0, Sig0inv, Siginv, xhat, np.ones(xhat.shape[0]))
    sampler_realistic = lambda n, w, pts: muhat + np.random.randn(
        n, muhat.shape[0]).dot(USigHat.T)
    prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim,
                                         log_likelihood, grad_log_likelihood)

    print('Creating black box projector')

    def sampler_w(n, wts, pts):
        if wts is None or pts is None or pts.shape[0] == 0:
            wts = np.zeros(1)
            pts = np.zeros((1, mu0.shape[0]))
        muw, USigw, _ = gaussian.weighted_post(mu0, Sig0inv, Siginv, pts, wts)
        return muw + np.random.randn(n, muw.shape[0]).dot(USigw.T)

    prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim,
                                  log_likelihood, grad_log_likelihood)

    print('Creating exact projectors')

    #TODO need to fix all the transposes in this...
    class GaussianProjector(bc.Projector):
        def project(self, pts, grad=False):
            nu = (pts - self.muw).dot(LSigInv)
            PsiL = LSigInv.T.dot(self.USigw)
            Psi = PsiL.dot(PsiL.T)
            nu = np.hstack(
                (nu.dot(PsiL), np.sqrt(0.5 * np.trace(np.dot(Psi.T, Psi))) *
                 np.ones(nu.shape[0])[:, np.newaxis]))
            nu *= np.sqrt(nu.shape[1])
            if not grad:
                return nu
            else:
                gnu = np.hstack(
                    (SigLInv.dot(PsiL), np.zeros(pts.shape[1])[:,
                                                               np.newaxis])).T
                gnu = np.tile(gnu, (pts.shape[0], 1, 1))
                gnu *= np.sqrt(gnu.shape[1])
                return nu, gnu

        def update(self, wts=None, pts=None):
            if wts is None or pts is None or pts.shape[0] == 0:
                wts = np.zeros(1)
                pts = np.zeros((1, mu0.shape[0]))
            self.muw, self.USigw, self.LSigwInv = gaussian.weighted_post(
                mu0, Sig0inv, Siginv, pts, wts)

    prj_optimal_exact = GaussianProjector()
    prj_optimal_exact.update(np.ones(x.shape[0]), x)
    prj_realistic_exact = GaussianProjector()
    prj_realistic_exact.update(np.ones(xhat.shape[0]), xhat)

    #######################################
    #######################################
    ## Step 3: Construct Coreset
    #######################################
    #######################################

    ##############################
    print('Creating coreset construction objects')
    #create coreset construction objects
    sparsevi_exact = bc.SparseVICoreset(x,
                                        GaussianProjector(),
                                        opt_itrs=arguments.opt_itrs,
                                        step_sched=eval(arguments.step_sched))
    sparsevi = bc.SparseVICoreset(x,
                                  prj_bb,
                                  opt_itrs=arguments.opt_itrs,
                                  step_sched=eval(arguments.step_sched))
    giga_optimal = bc.HilbertCoreset(x, prj_optimal)
    giga_optimal_exact = bc.HilbertCoreset(x, prj_optimal_exact)
    giga_realistic = bc.HilbertCoreset(x, prj_realistic)
    giga_realistic_exact = bc.HilbertCoreset(x, prj_realistic_exact)
    unif = bc.UniformSamplingCoreset(x)

    algs = {
        'SVI-EXACT': sparsevi_exact,
        'SVI': sparsevi,
        'GIGA-OPT': giga_optimal,
        'GIGA-OPT-EXACT': giga_optimal_exact,
        'GIGA-REAL': giga_realistic,
        'GIGA-REAL-EXACT': giga_realistic_exact,
        'US': unif
    }
    alg = algs[arguments.alg]

    print('Building coreset')
    w = []
    p = []
    cputs = np.zeros(Ms.shape[0])
    t_build = 0
    for m in range(Ms.shape[0]):
        print('M = ' + str(Ms[m]) + ': coreset construction, ' +
              arguments.alg + ' ' + str(arguments.trial))
        t0 = time.process_time()
        itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1])
        alg.build(itrs)
        t_build += time.process_time() - t0
        wts, pts, idcs = alg.get()

        #store weights/pts/runtime
        w.append(wts)
        p.append(pts)
        cputs[m] = t_build

    ##############################
    ##############################
    ## Step 4: Evaluate coreset
    ##############################
    ##############################

    # computing kld and saving results
    muw = np.zeros((Ms.shape[0], mu0.shape[0]))
    Sigw = np.zeros((Ms.shape[0], mu0.shape[0], mu0.shape[0]))
    rklw = np.zeros(Ms.shape[0])
    fklw = np.zeros(Ms.shape[0])
    csizes = np.zeros(Ms.shape[0])
    mu_errs = np.zeros(Ms.shape[0])
    Sig_errs = np.zeros(Ms.shape[0])
    for m in range(Ms.shape[0]):
        csizes[m] = (w[m] > 0).sum()
        muw[m, :], USigw, LSigwInv = gaussian.weighted_post(
            mu0, Sig0inv, Siginv, p[m], w[m])
        Sigw[m, :, :] = USigw.dot(USigw.T)
        rklw[m] = gaussian.KL(muw[m, :], Sigw[m, :, :], mup, SigpInv)
        fklw[m] = gaussian.KL(mup, Sigp, muw[m, :], LSigwInv.dot(LSigwInv.T))
        mu_errs[m] = np.sqrt(((mup - muw[m, :])**2).sum()) / np.sqrt(
            (mup**2).sum())
        Sig_errs[m] = np.sqrt(((Sigp - Sigw[m, :, :])**2).sum()) / np.sqrt(
            (Sigp**2).sum())

    results.save(arguments,
                 csizes=csizes,
                 Ms=Ms,
                 cputs=cputs,
                 rklw=rklw,
                 fklw=fklw,
                 mu_errs=mu_errs,
                 Sig_errs=Sig_errs)

    #also save muw/Sigw/etc for plotting coreset visualizations
    f = open('results/coreset_data.pk', 'wb')
    res = (x, mu0, Sig0, Sig, mup, Sigp, w, p, muw, Sigw)
    pk.dump(res, f)
    f.close()
Ejemplo n.º 2
0
print('Building projectors')
sampler_optimal = lambda sz, w, pts: mup + np.random.randn(sz, mup.shape[0]
                                                           ).dot(LSigp.T)
sampler_realistic = lambda sz, w, pts: muhat + np.random.randn(
    sz, muhat.shape[0]).dot(LSighat.T)


def sampler_w(sz, w, pts, diag=graddiag):
    if pts.shape[0] == 0:
        w = np.zeros(1)
        pts = np.zeros((1, Z.shape[1]))
    muw, LSigw, LSigwInv = get_laplace(w, pts, mu0, diag)
    return muw + np.random.randn(sz, muw.shape[0]).dot(LSigw.T)


prj_optimal = bc.BlackBoxProjector(sampler_optimal, projection_dim,
                                   log_likelihood, grad_z_log_likelihood)
prj_realistic = bc.BlackBoxProjector(sampler_realistic, projection_dim,
                                     log_likelihood, grad_z_log_likelihood)
prj_w = bc.BlackBoxProjector(sampler_w, projection_dim, log_likelihood,
                             grad_z_log_likelihood)

print('Creating coresets object')
#create coreset construction objects
t0 = time.perf_counter()
giga_optimal = bc.HilbertCoreset(Z, prj_optimal)
gigao_t_setup = time.perf_counter() - t0

t0 = time.perf_counter()
giga_realistic = bc.HilbertCoreset(Z, prj_realistic)
gigar_t_setup = time.perf_counter() - t0
Ejemplo n.º 3
0
  muw, LSigw, LSigwInv = weighted_post(mu0, Sig0inv, datastd**2, deep_encoder(nl, pts), wts)
  return muw + np.random.randn(n, muw.shape[0]).dot(LSigw.T)
'''
def sampler_w(n, wts, pts):
  if pts.shape[0] == 0:
      wts = np.zeros(1)
      pts = np.zeros((1, Z.shape[1]))
  sigsq = datastd**2
  z = deep_encoder(nl, pts)
  X = z[:, :-1]
  Y = z[:, -1]
  Sigp = np.linalg.inv(Sig0inv + (wts[:, np.newaxis]*X).T.dot(X)/sigsq)
  mup = np.dot(Sigp, np.dot(Sig0inv,np.ones(out_features)) + (wts[:, np.newaxis]*Y[:,np.newaxis]*X).sum(axis=0)/datastd**2)
  return np.random.multivariate_normal(mup, Sigp, n)

prj_w = bc.BlackBoxProjector(sampler_w, proj_dim, log_likelihood, grad_log_likelihood, nl=nl)
prj_bw = bc.BetaBlackBoxProjector(sampler_w, proj_dim, beta_likelihood, log_likelihood, grad_beta, nl=nl)

#create coreset construction objects
print('Creating coreset construction objects')

in_batches = True
if in_batches:
  sparsevi = bc.SparseVICoreset(Z, prj_w, opt_itrs=VI_opt_itrs, n_subsample_opt=n_subsample_opt, n_subsample_select=n_subsample_select,
                              step_sched=SVI_step_sched, wts=np.ones(init_size), idcs=1e7+np.arange(init_size), pts=Z_init, groups=groups, initialized=True, enforce_new=False)
  bcoresvi = bc.BetaCoreset(Z, prj_bw, opt_itrs=VI_opt_itrs, n_subsample_opt=n_subsample_opt, n_subsample_select=n_subsample_select,
                              step_sched=BCORES_step_sched, beta=beta, learn_beta=False, wts=np.ones(init_size), idcs=1e7+np.arange(init_size), pts=Z_init, groups=groups, initialized=True)
  unif = bc.UniformSamplingCoreset(Z, wts=np.ones(init_size), idcs=1e7+np.arange(init_size), pts=Z_init, groups=groups)
else:
  raise NotImplementedError("Supported only batch data acquisition")
Ejemplo n.º 4
0
mu = res.x
cov = -np.linalg.inv(hess_th_log_joint(Z, mu, np.ones(Z.shape[0]))[0, :, :])

#you can replace this step with a lot of different things: e.g.
# - choose a subset of data uniformly and weight uniformly, run MCMC
# - same thing with var inf, INLA, SGLD, etc 

##########################################################################
##########################################################################
## Step 3: Compute a random finite projection of the tangent space  
##########################################################################
##########################################################################

projection_dim = 500 #random projection dimension
sampler = lambda sz, w, p : np.atleast_2d(np.random.multivariate_normal(mu, cov, sz))
projector = bc.BlackBoxProjector(sampler, projection_dim, log_likelihood)

############################
############################
## Step 4: Build the Coreset
############################
############################


print('Building the coreset...')

#build the coreset
M = 500 # use up to 500 datapoints (run 500 itrs)
coreset = bc.HilbertCoreset(Z, projector) #do coreset construction using the discretized log-likelihood functions
coreset.build(M) #build the coreset to size M with at most M iterations
wts, pts, idcs = coreset.get() #get the output weights
Ejemplo n.º 5
0
def run(arguments):
    # check if result already exists for this run, and if so, quit
    if results.check_exists(arguments):
        print('Results already exist for arguments ' + str(arguments))
        print('Quitting.')
        quit()

    #######################################
    #######################################
    ## Step 0: Setup
    #######################################
    #######################################

    np.random.seed(arguments.trial)
    bc.util.set_verbosity(arguments.verbosity)

    if arguments.coreset_size_spacing == 'log':
        Ms = np.unique(
            np.logspace(0.,
                        np.log10(arguments.coreset_size_max),
                        arguments.coreset_num_sizes,
                        dtype=np.int32))
    else:
        Ms = np.unique(
            np.linspace(1,
                        arguments.coreset_size_max,
                        arguments.coreset_num_sizes,
                        dtype=np.int32))

    #make sure the first size to record is 0
    if Ms[0] != 0:
        Ms = np.hstack((0, Ms))

    #######################################
    #######################################
    ## Step 1: Load and preprocess data
    #######################################
    #######################################

    #load data and compute true posterior
    #each row of x is [lat, lon, price]
    print('Loading data')

    x = np.load('../data/prices2018.npy')
    print('dataset size : ', x.shape)

    print('Subsampling down to ' + str(arguments.data_num) + ' points')
    idcs = np.arange(x.shape[0])
    np.random.shuffle(idcs)
    x = x[idcs[:arguments.data_num], :]

    #log transform the prices
    x[:, 2] = np.log10(x[:, 2])

    #get empirical mean/std
    datastd = x[:, 2].std()
    datamn = x[:, 2].mean()

    #bases of increasing size; the last one is effectively a constant
    basis_unique_scales = np.array([.2, .4, .8, 1.2, 1.6, 2., 100])
    basis_unique_counts = np.hstack(
        (arguments.n_bases_per_scale * np.ones(6, dtype=np.int64), 1))

    #the dimension of the scaling vector for the above bases
    d = basis_unique_counts.sum()
    print('Basis dimension: ' + str(d))

    #model params
    mu0 = datamn * np.ones(d)
    Sig0 = (datastd**2 + datamn**2) * np.eye(d)
    Sig0inv = np.linalg.inv(Sig0)

    #generate basis functions by uniformly randomly picking locations in the dataset
    print('Trial ' + str(arguments.trial))
    print('Creating bases')
    basis_scales = np.array([])
    basis_locs = np.zeros((0, 2))
    for i in range(basis_unique_scales.shape[0]):
        basis_scales = np.hstack(
            (basis_scales,
             basis_unique_scales[i] * np.ones(basis_unique_counts[i])))
        idcs = np.random.choice(np.arange(x.shape[0]),
                                replace=False,
                                size=basis_unique_counts[i])
        basis_locs = np.vstack((basis_locs, x[idcs, :2]))

    print('Converting bases and observations into X/Y matrices')
    #convert basis functions + observed data locations into a big X matrix
    X = np.zeros((x.shape[0], basis_scales.shape[0]))
    for i in range(basis_scales.shape[0]):
        X[:, i] = np.exp(-((x[:, :2] - basis_locs[i, :])**2).sum(axis=1) /
                         (2 * basis_scales[i]**2))
    Y = x[:, 2]
    Z = np.hstack((X, Y[:, np.newaxis]))

    _, bV = np.linalg.eigh(X.T.dot(X))
    bV = bV[:, -arguments.proj_dim:]

    #######################################
    #######################################
    ## Step 2: Calculate Likelihoods/Projectors
    #######################################
    #######################################

    #get true posterior
    print('Computing true posterior')
    mup, USigp, LSigpInv = model_linreg.weighted_post(mu0, Sig0inv, datastd**2,
                                                      Z, np.ones(X.shape[0]))
    Sigp = USigp.dot(USigp.T)
    SigpInv = LSigpInv.dot(LSigpInv.T)

    #create function to output log_likelihood given param samples
    print('Creating log-likelihood function')
    log_likelihood = lambda z, th: model_linreg.log_likelihood(
        z, th, datastd**2)

    print('Creating gradient log-likelihood function')
    grad_log_likelihood = lambda z, th: model_linreg.grad_x_log_likelihood(
        z, th, datastd**2)

    #create tangent space for well-tuned Hilbert coreset alg
    print('Creating tuned projector for Hilbert coreset construction')
    sampler_optimal = lambda n, w, pts: mup + np.random.randn(n, mup.shape[0]
                                                              ).dot(USigp.T)
    prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim,
                                       log_likelihood, grad_log_likelihood)

    #create tangent space for poorly-tuned Hilbert coreset alg
    print('Creating untuned projector for Hilbert coreset construction')
    Zhat = Z[np.random.randint(0, Z.shape[0], int(np.sqrt(Z.shape[0]))), :]
    muhat, USigHat, LSigHatInv = model_linreg.weighted_post(
        mu0, Sig0inv, datastd**2, Zhat, np.ones(Zhat.shape[0]))
    sampler_realistic = lambda n, w, pts: muhat + np.random.randn(
        n, muhat.shape[0]).dot(USigHat.T)
    prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim,
                                         log_likelihood, grad_log_likelihood)

    print('Creating black box projector')

    def sampler_w(n, wts, pts):
        if wts is None or pts is None or pts.shape[0] == 0:
            muw = mu0
            USigw = np.linalg.cholesky(
                Sig0
            )  #Note: USigw is lower triangular here, below is upper tri. Doesn't matter, just need Sigw = MM^T
        else:
            muw, USigw, _ = model_linreg.weighted_post(mu0, Sig0inv,
                                                       datastd**2, pts, wts)
        return muw + np.random.randn(n, muw.shape[0]).dot(USigw.T)

    prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim,
                                  log_likelihood, grad_log_likelihood)

    print('Creating exact projectors')

    ##############################
    ###Exact projection in SparseVI for gradient computation
    #for this model we can do the tangent space projection exactly
    class LinRegProjector(bc.Projector):
        def __init__(self, bV):
            self.bV = bV

        def project(self, pts, grad=False):
            X = pts[:, :-1]
            Y = pts[:, -1]
            #beta = X.dot(self.V*np.sqrt(np.maximum(self.lmb, 0.)))
            beta = X.dot(self.USigw)
            nu = Y - X.dot(self.muw)
            #approximation to avoid high memory cost: project the matrix term down to bV.shape[1]**2 dimensions
            beta_proj = beta.dot(self.bV)
            #lmb2, V2 = np.linalg.eigh(beta.T.dot(beta))
            #beta_proj = beta.dot(V2[:, -arguments.proj_dim:])
            return np.hstack(
                (nu[:, np.newaxis] * beta, 1. / np.sqrt(2.) *
                 (beta_proj[:, :, np.newaxis] * beta_proj[:, np.newaxis, :]).
                 reshape(beta.shape[0], arguments.proj_dim**2))) / datastd**2

        def update(self, wts, pts):
            if wts is None or pts is None or pts.shape[0] == 0:
                self.muw = mu0
                self.USigw = np.linalg.cholesky(
                    Sig0
                )  #Note: USigw here is lower triangular, but keeping naming convention for below stuff. Doesn't matter, just need Sigw = MM^T
            else:
                self.muw, self.USigw, _ = model_linreg.weighted_post(
                    mu0, Sig0inv, datastd**2, pts, wts)
            #if pts.shape[0] == 0:
            #    self.muw = mu0
            #    self.Sigw = Sig0
            #else:
            #    self.muw, self.Sigw = model_linreg.weighted_post(mu0, Sig0inv, datastd**2, pts, wts)
            #self.lmb, self.V = np.linalg.eigh(self.LSigw.dot(self.LSigw.T))

    prj_optimal_exact = LinRegProjector(bV)
    prj_optimal_exact.update(np.ones(Z.shape[0]), Z)
    prj_realistic_exact = LinRegProjector(bV)
    prj_realistic_exact.update(np.ones(Zhat.shape[0]), Zhat)

    #######################################
    #######################################
    ## Step 3: Construct Coreset
    #######################################
    #######################################

    ##############################
    print('Creating coreset construction objects')
    #create coreset construction objects
    sparsevi_exact = bc.SparseVICoreset(Z,
                                        LinRegProjector(bV),
                                        opt_itrs=arguments.opt_itrs,
                                        step_sched=eval(arguments.step_sched))
    sparsevi = bc.SparseVICoreset(Z,
                                  prj_bb,
                                  opt_itrs=arguments.opt_itrs,
                                  step_sched=eval(arguments.step_sched))
    giga_optimal = bc.HilbertCoreset(Z, prj_optimal)
    giga_optimal_exact = bc.HilbertCoreset(Z, prj_optimal_exact)
    giga_realistic = bc.HilbertCoreset(Z, prj_realistic)
    giga_realistic_exact = bc.HilbertCoreset(Z, prj_realistic_exact)
    unif = bc.UniformSamplingCoreset(Z)

    algs = {
        'SVI-EXACT': sparsevi_exact,
        'SVI': sparsevi,
        'GIGA-OPT': giga_optimal,
        'GIGA-OPT-EXACT': giga_optimal_exact,
        'GIGA-REAL': giga_realistic,
        'GIGA-REAL-EXACT': giga_realistic_exact,
        'US': unif
    }
    alg = algs[arguments.alg]

    print('Building coreset')
    w = []
    p = []
    cputs = np.zeros(Ms.shape[0])
    t_build = 0
    for m in range(Ms.shape[0]):
        print('M = ' + str(Ms[m]) + ': coreset construction, ' +
              arguments.alg + ' ' + str(arguments.trial))
        t0 = time.process_time()
        itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1])
        alg.build(itrs)
        t_build += time.process_time() - t0
        wts, pts, idcs = alg.get()

        #store weights/pts/runtime
        w.append(wts)
        p.append(pts)
        cputs[m] = t_build

    ##############################
    ##############################
    ## Step 4: Evaluate coreset
    ##############################
    ##############################

    # computing kld and saving results
    muw = np.zeros((Ms.shape[0], mu0.shape[0]))
    Sigw = np.zeros((Ms.shape[0], mu0.shape[0], mu0.shape[0]))
    rklw = np.zeros(Ms.shape[0])
    fklw = np.zeros(Ms.shape[0])
    mu_errs = np.zeros(Ms.shape[0])
    Sig_errs = np.zeros(Ms.shape[0])
    csizes = np.zeros(Ms.shape[0])
    for m in range(Ms.shape[0]):
        csizes[m] = (w[m] > 0).sum()
        muw[m, :], USigw, LSigwInv = model_linreg.weighted_post(
            mu0, Sig0inv, datastd**2, p[m], w[m])
        Sigw[m, :, :] = USigw.dot(USigw.T)
        rklw[m] = model_linreg.KL(muw[m, :], Sigw[m, :, :], mup, SigpInv)
        fklw[m] = model_linreg.KL(mup, Sigp, muw[m, :],
                                  LSigwInv.dot(LSigwInv.T))
        mu_errs[m] = np.sqrt(((mup - muw[m, :])**2).sum()) / np.sqrt(
            (mup**2).sum())
        Sig_errs[m] = np.sqrt(((Sigp - Sigw[m, :, :])**2).sum()) / np.sqrt(
            (Sigp**2).sum())

    results.save(arguments,
                 csizes=csizes,
                 Ms=Ms,
                 cputs=cputs,
                 rklw=rklw,
                 fklw=fklw,
                 mu_errs=mu_errs,
                 Sig_errs=Sig_errs)

    #also save muw/Sigw/etc for plotting coreset visualizations
    f = open('results/coreset_data.pk', 'wb')
    res = (x, mu0, Sig0, datastd, mup, Sigp, w, p, muw, Sigw)
    pk.dump(res, f)
    f.close()
Ejemplo n.º 6
0
def run(arguments):

    # check if result already exists for this run, and if so, quit
    if results.check_exists(arguments):
        print('Results already exist for arguments ' + str(arguments))
        print('Quitting.')
        quit()

    #######################################
    #######################################
    ########### Step 0: Setup #############
    #######################################
    #######################################

    np.random.seed(arguments.trial)
    bc.util.set_verbosity(arguments.verbosity)

    if arguments.coreset_size_spacing == 'log':
        Ms = np.unique(
            np.logspace(0.,
                        np.log10(arguments.coreset_size_max),
                        arguments.coreset_num_sizes,
                        dtype=np.int32))
    else:
        Ms = np.unique(
            np.linspace(1,
                        arguments.coreset_size_max,
                        arguments.coreset_num_sizes,
                        dtype=np.int32))

    #######################################
    #######################################
    ## Step 1: Define Model
    #######################################
    #######################################

    if arguments.model == "lr":
        import model_lr as model
    elif arguments.model == "poiss":
        import model_poiss as model

    #######################################
    #######################################
    ## Step 2: Load Dataset & run full MCMC / Laplace
    #######################################
    #######################################

    print('Loading dataset ' + arguments.dataset)
    X, Y, Z, Zt, D = model.load_data('../data/' + arguments.dataset + '.npz')

    #NOTE: Sig0 is currently coded as identity in model_lr and model_pr (see log_prior).
    #so if you change Sig0 here things might break.
    #TODO: fix that...
    mu0 = np.zeros(Z.shape[1])
    Sig0 = np.eye(Z.shape[1])
    LSig0 = np.eye(Z.shape[1])

    print('Checking for cached full MCMC samples')
    mcmc_cache_filename = 'mcmc_cache/full_samples_' + arguments.model + '_' + arguments.dataset + '.npz'
    if os.path.exists(mcmc_cache_filename):
        print('Cache exists, loading')
        tmp__ = np.load(mcmc_cache_filename)
        full_samples = tmp__['samples']
        full_mcmc_time_per_itr = tmp__['t']
    else:
        print('Cache doesnt exist, running MCMC')
        #convert Y to Stan LR label format
        stanY = np.zeros(Y.shape[0])
        stanY[:] = Y
        stanY[stanY == -1] = 0
        sampler_data = {
            'x': X,
            'y': stanY.astype(int),
            'w': np.ones(X.shape[0]),
            'd': X.shape[1],
            'n': X.shape[0]
        }
        full_samples, t_full_mcmc = mcmc.run(sampler_data,
                                             arguments.mcmc_samples_full,
                                             arguments.model,
                                             model.stan_representation,
                                             arguments.trial)
        full_samples = full_samples['theta']
        #TODO right now *2 to account for burn; but this should all be specified via tunable arguments
        full_mcmc_time_per_itr = t_full_mcmc / (arguments.mcmc_samples_full *
                                                2)
        if not os.path.exists('mcmc_cache'):
            os.mkdir('mcmc_cache')
        np.savez(mcmc_cache_filename,
                 samples=full_samples,
                 t=full_mcmc_time_per_itr)

    #######################################
    #######################################
    ## Step 3: Calculate Likelihoods/Projectors
    #######################################
    #######################################

    #get Gaussian approximation to the true posterior
    print('Approximating true posterior')
    mup = full_samples.mean(axis=0)
    Sigp = np.cov(full_samples, rowvar=False)
    LSigp = np.linalg.cholesky(Sigp)
    LSigpInv = solve_triangular(LSigp,
                                np.eye(LSigp.shape[0]),
                                lower=True,
                                overwrite_b=True,
                                check_finite=False)

    #create tangent space for well-tuned Hilbert coreset alg
    print('Creating tuned projector for Hilbert coreset construction')
    muHat, LSigHat, LSigHatInv = get_laplace(np.ones(Z.shape[0]),
                                             Z,
                                             np.zeros(Z.shape[1]),
                                             model,
                                             diag=False)
    sampler_optimal = lambda n, w, pts: muHat + np.random.randn(
        n, muHat.shape[0]).dot(LSigHat.T)
    prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim,
                                       model.log_likelihood,
                                       model.grad_z_log_likelihood)

    #create tangent space for poorly-tuned Hilbert coreset alg
    print('Creating untuned projector for Hilbert coreset construction')
    Zhat = Z[np.random.randint(0, Z.shape[0], int(np.sqrt(Z.shape[0]))), :]
    muHat2, LSigHat2, LSigHat2Inv = get_laplace(np.ones(Zhat.shape[0]),
                                                Zhat,
                                                np.zeros(Zhat.shape[1]),
                                                model,
                                                diag=False)
    sampler_realistic = lambda n, w, pts: muHat2 + np.random.randn(
        n, muHat2.shape[0]).dot(LSigHat2.T)
    prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim,
                                         model.log_likelihood,
                                         model.grad_z_log_likelihood)

    print('Creating black box projector')

    def sampler_w(n, wts, pts):
        if wts is None or pts is None or pts.shape[0] == 0:
            muw = mu0
            LSigw = LSig0
        else:
            muw, LSigw, _ = get_laplace(wts,
                                        pts,
                                        np.zeros(Z.shape[1]),
                                        model,
                                        diag=False)
        return muw + np.random.randn(n, muw.shape[0]).dot(LSigw.T)

    prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim,
                                  model.log_likelihood,
                                  model.grad_z_log_likelihood)

    #######################################
    #######################################
    ## Step 4: Construct Coreset
    #######################################
    #######################################

    print('Creating coreset construction objects')
    #create coreset construction objects
    sparsevi = bc.SparseVICoreset(Z,
                                  prj_bb,
                                  opt_itrs=arguments.opt_itrs,
                                  step_sched=eval(arguments.step_sched))
    giga_optimal = bc.HilbertCoreset(Z, prj_optimal)
    giga_realistic = bc.HilbertCoreset(Z, prj_realistic)
    unif = bc.UniformSamplingCoreset(Z)

    algs = {
        'SVI': sparsevi,
        'GIGA-OPT': giga_optimal,
        'GIGA-REAL': giga_realistic,
        'US': unif
    }
    alg = algs[arguments.alg]

    cputs = np.zeros(Ms.shape[0])
    mcmc_time_per_itr = np.zeros(Ms.shape[0])
    csizes = np.zeros(Ms.shape[0])
    Fs = np.zeros(Ms.shape[0])
    rklw = np.zeros(Ms.shape[0])
    fklw = np.zeros(Ms.shape[0])
    mu_errs = np.zeros(Ms.shape[0])
    Sig_errs = np.zeros(Ms.shape[0])

    print('Running coreset construction / MCMC for ' + arguments.dataset +
          ' ' + arguments.alg + ' ' + str(arguments.trial))
    t_alg = 0.
    for m in range(Ms.shape[0]):
        print('M = ' + str(Ms[m]) + ': coreset construction, ' +
              arguments.alg + ' ' + arguments.dataset + ' ' +
              str(arguments.trial))
        #this runs alg up to a level of M; on the next iteration, it will continue from where it left off
        t0 = time.process_time()
        itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1])
        alg.build(itrs)
        t_alg += time.process_time() - t0
        wts, pts, idcs = alg.get()

        print('M = ' + str(Ms[m]) + ': MCMC')
        # Use MCMC on the coreset, measure time taken
        stanY = np.zeros(idcs.shape[0])
        stanY[:] = Y[idcs]
        stanY[stanY == -1] = 0
        sampler_data = {
            'x': X[idcs, :],
            'y': stanY.astype(int),
            'w': wts,
            'd': X.shape[1],
            'n': idcs.shape[0]
        }
        cst_samples, t_cst_mcmc = mcmc.run(sampler_data,
                                           arguments.mcmc_samples_coreset,
                                           arguments.model,
                                           model.stan_representation,
                                           arguments.trial)
        cst_samples = cst_samples['theta']
        #TODO see note above re: full mcmc sampling
        t_cst_mcmc_per_step = t_cst_mcmc / (arguments.mcmc_samples_coreset * 2)

        print('M = ' + str(Ms[m]) + ': Approximating posterior with Gaussian')
        muw = cst_samples.mean(axis=0)
        Sigw = np.cov(cst_samples, rowvar=False)
        LSigw = np.linalg.cholesky(Sigw)
        LSigwInv = solve_triangular(LSigw,
                                    np.eye(LSigw.shape[0]),
                                    lower=True,
                                    overwrite_b=True,
                                    check_finite=False)

        print('M = ' + str(Ms[m]) + ': Computing metrics')
        cputs[m] = t_alg
        mcmc_time_per_itr[m] = t_cst_mcmc_per_step
        csizes[m] = (wts > 0).sum()
        gcs = np.array([
            model.grad_th_log_joint(Z[idcs, :], full_samples[i, :], wts)
            for i in range(full_samples.shape[0])
        ])
        gfs = np.array([
            model.grad_th_log_joint(Z, full_samples[i, :], np.ones(Z.shape[0]))
            for i in range(full_samples.shape[0])
        ])
        Fs[m] = (((gcs - gfs)**2).sum(axis=1)).mean()
        rklw[m] = KL(muw, Sigw, mup, LSigpInv.T.dot(LSigpInv))
        fklw[m] = KL(mup, Sigp, muw, LSigwInv.T.dot(LSigwInv))
        mu_errs[m] = np.sqrt(((mup - muw)**2).sum()) / np.sqrt((mup**2).sum())
        Sig_errs[m] = np.sqrt(((Sigp - Sigw)**2).sum()) / np.sqrt(
            (Sigp**2).sum())

    results.save(arguments,
                 csizes=csizes,
                 Ms=Ms,
                 cputs=cputs,
                 Fs=Fs,
                 full_mcmc_time_per_itr=full_mcmc_time_per_itr,
                 mcmc_time_per_itr=mcmc_time_per_itr,
                 rklw=rklw,
                 fklw=fklw,
                 mu_errs=mu_errs,
                 Sig_errs=Sig_errs)
Ejemplo n.º 7
0
Sig0 = np.eye(D)

print('Building projectors')


def sampler_w(sz, w, pts, diag=graddiag):
    if pts.shape[0] == 0:
        w = np.zeros(1)
        pts = np.zeros((1, Z.shape[1]))
    muw, LSigw, LSigwInv = get_laplace(w, pts, mu0, diag)
    return muw + np.random.randn(sz, muw.shape[0]).dot(LSigw.T)


grad_beta = lambda x, th, beta: gaussian_beta_gradient(x, th, beta, Siginv,
                                                       logdetSig)
prj_w = bc.BlackBoxProjector(sampler_w, projection_dim, log_likelihood,
                             grad_z_log_likelihood)
prj_bw = bc.BetaBlackBoxProjector(sampler_w, projection_dim, beta_likelihood,
                                  beta_likelihood, grad_beta)

print('Creating coresets object')
#create coreset construction objects

unif = bc.UniformSamplingCoreset(Z)
sparsevi = bc.SparseVICoreset(Z,
                              prj_w,
                              opt_itrs=SVI_opt_itrs,
                              n_subsample_opt=n_subsample_opt,
                              n_subsample_select=n_subsample_select,
                              step_sched=SVI_step_sched)
bpsvi = bc.BatchPSVICoreset(Z,
                            prj_w,