Exemple #1
0
def analyse(path,**kwargs):
	restore=res.load(path)
	##print(restore.files)
	##pt=restore['pt']
	##if 'ptsim' in restore.keys():
	##	ptsim=restore['ptsim']

	py=restore['py']
	x,X=discrprogr(py,**kwargs)

	y=[]
	Y=[]
	if 'pysim' in restore.keys():
		pysim=restore['pysim']
		y,Y=discrprogr(pysim,**kwargs)	

	res.save('../../anres/anresults{}{}'.format(kwargs.get('string',''),kwargs.get('mode','')),x=x,X=X,y=y,Y=Y)
	res.dump('../../anres/anresults{}{}'.format(kwargs.get('string',''),kwargs.get('mode','')),x=x,X=X,y=y,Y=Y)
Exemple #2
0
def evaluate(path,**kwargs):
	restore=res.load(path)
	##print(restore.files)
	##pt=restore['pt']
	##if 'ptsim' in restore.keys():
	##	ptsim=restore['ptsim']

	py=restore['py']
	t0=restore['t0']
	dy,DY,disc=evaldiscr(py,**kwargs)

	y=[]
	Y=[]
	if 'pysim' in restore.keys():
		pysim=restore['pysim']
		y,Y,trash=evaldiscr(pysim,**kwargs)

	res.save('../../anres/evresults{}{}'.format(kwargs.get('string',''),kwargs.get('mode','limit')),dy=dy,DY=DY,y=y,Y=Y,t0=disc[:,t0])
	res.dump('../../anres/evresults{}{}'.format(kwargs.get('string',''),kwargs.get('mode','limit')),dy=dy,DY=DY,y=y,Y=Y,t0=disc[:,t0])
def run(arguments):

    # check if result already exists for this run, and if so, quit
    if results.check_exists(arguments):
        print('Results already exist for arguments ' + str(arguments))
        print('Quitting.')
        quit()

    #######################################
    #######################################
    ## Step 0: Setup
    #######################################
    #######################################

    np.random.seed(arguments.trial)
    bc.util.set_verbosity(arguments.verbosity)

    if arguments.coreset_size_spacing == 'log':
        Ms = np.unique(
            np.logspace(0.,
                        np.log10(arguments.coreset_size_max),
                        arguments.coreset_num_sizes,
                        dtype=np.int32))
    else:
        Ms = np.unique(
            np.linspace(1,
                        arguments.coreset_size_max,
                        arguments.coreset_num_sizes,
                        dtype=np.int32))

    #make sure the first size to record is 0
    if Ms[0] != 0:
        Ms = np.hstack((0, Ms))

    #######################################
    #######################################
    ## Step 1: Generate a Synthetic Dataset
    #######################################
    #######################################

    #change these to change the prior / likelihood
    mu0 = np.zeros(arguments.data_dim)
    Sig0 = np.eye(arguments.data_dim)
    Sig = np.eye(arguments.data_dim)

    #these are computed
    Sig0inv = np.linalg.inv(Sig0)
    Siginv = np.linalg.inv(Sig)
    LSigInv = np.linalg.cholesky(Siginv)  #Siginv = LL^T, L Lower tri
    USig = sl.solve_triangular(LSigInv,
                               np.eye(LSigInv.shape[0]),
                               lower=True,
                               overwrite_b=True,
                               check_finite=False).T  # Sig = UU^T, U upper tri
    th = np.ones(arguments.data_dim)
    logdetSig = np.linalg.slogdet(Sig)[1]

    #######################################
    #######################################
    ## Step 2: Calculate Likelihoods/Projectors
    #######################################
    #######################################

    print('Computing true posterior')
    x = np.random.multivariate_normal(th, Sig, arguments.data_num)
    mup, USigp, LSigpInv = gaussian.weighted_post(mu0, Sig0inv, Siginv, x,
                                                  np.ones(x.shape[0]))
    Sigp = USigp.dot(USigp.T)
    SigpInv = LSigpInv.dot(LSigpInv.T)

    #create the log_likelihood function
    print('Creating log-likelihood function')
    log_likelihood = lambda x, th: gaussian.log_likelihood(
        x, th, Siginv, logdetSig)

    print('Creating gradient log-likelihood function')
    grad_log_likelihood = lambda x, th: gaussian.gradx_log_likelihood(
        x, th, Siginv)

    print('Creating tuned projector for Hilbert coreset construction')
    #create the sampler for the "optimally-tuned" Hilbert coreset
    sampler_optimal = lambda n, w, pts: mup + np.random.randn(n, mup.shape[0]
                                                              ).dot(USigp.T)
    prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim,
                                       log_likelihood, grad_log_likelihood)

    print('Creating untuned projector for Hilbert coreset construction')
    #create the sampler for the "realistically-tuned" Hilbert coreset
    xhat = x[np.random.randint(0, x.shape[0], int(np.sqrt(x.shape[0]))), :]
    muhat, USigHat, LSigHatInv = gaussian.weighted_post(
        mu0, Sig0inv, Siginv, xhat, np.ones(xhat.shape[0]))
    sampler_realistic = lambda n, w, pts: muhat + np.random.randn(
        n, muhat.shape[0]).dot(USigHat.T)
    prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim,
                                         log_likelihood, grad_log_likelihood)

    print('Creating black box projector')

    def sampler_w(n, wts, pts):
        if wts is None or pts is None or pts.shape[0] == 0:
            wts = np.zeros(1)
            pts = np.zeros((1, mu0.shape[0]))
        muw, USigw, _ = gaussian.weighted_post(mu0, Sig0inv, Siginv, pts, wts)
        return muw + np.random.randn(n, muw.shape[0]).dot(USigw.T)

    prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim,
                                  log_likelihood, grad_log_likelihood)

    print('Creating exact projectors')

    #TODO need to fix all the transposes in this...
    class GaussianProjector(bc.Projector):
        def project(self, pts, grad=False):
            nu = (pts - self.muw).dot(LSigInv)
            PsiL = LSigInv.T.dot(self.USigw)
            Psi = PsiL.dot(PsiL.T)
            nu = np.hstack(
                (nu.dot(PsiL), np.sqrt(0.5 * np.trace(np.dot(Psi.T, Psi))) *
                 np.ones(nu.shape[0])[:, np.newaxis]))
            nu *= np.sqrt(nu.shape[1])
            if not grad:
                return nu
            else:
                gnu = np.hstack(
                    (SigLInv.dot(PsiL), np.zeros(pts.shape[1])[:,
                                                               np.newaxis])).T
                gnu = np.tile(gnu, (pts.shape[0], 1, 1))
                gnu *= np.sqrt(gnu.shape[1])
                return nu, gnu

        def update(self, wts=None, pts=None):
            if wts is None or pts is None or pts.shape[0] == 0:
                wts = np.zeros(1)
                pts = np.zeros((1, mu0.shape[0]))
            self.muw, self.USigw, self.LSigwInv = gaussian.weighted_post(
                mu0, Sig0inv, Siginv, pts, wts)

    prj_optimal_exact = GaussianProjector()
    prj_optimal_exact.update(np.ones(x.shape[0]), x)
    prj_realistic_exact = GaussianProjector()
    prj_realistic_exact.update(np.ones(xhat.shape[0]), xhat)

    #######################################
    #######################################
    ## Step 3: Construct Coreset
    #######################################
    #######################################

    ##############################
    print('Creating coreset construction objects')
    #create coreset construction objects
    sparsevi_exact = bc.SparseVICoreset(x,
                                        GaussianProjector(),
                                        opt_itrs=arguments.opt_itrs,
                                        step_sched=eval(arguments.step_sched))
    sparsevi = bc.SparseVICoreset(x,
                                  prj_bb,
                                  opt_itrs=arguments.opt_itrs,
                                  step_sched=eval(arguments.step_sched))
    giga_optimal = bc.HilbertCoreset(x, prj_optimal)
    giga_optimal_exact = bc.HilbertCoreset(x, prj_optimal_exact)
    giga_realistic = bc.HilbertCoreset(x, prj_realistic)
    giga_realistic_exact = bc.HilbertCoreset(x, prj_realistic_exact)
    unif = bc.UniformSamplingCoreset(x)

    algs = {
        'SVI-EXACT': sparsevi_exact,
        'SVI': sparsevi,
        'GIGA-OPT': giga_optimal,
        'GIGA-OPT-EXACT': giga_optimal_exact,
        'GIGA-REAL': giga_realistic,
        'GIGA-REAL-EXACT': giga_realistic_exact,
        'US': unif
    }
    alg = algs[arguments.alg]

    print('Building coreset')
    w = []
    p = []
    cputs = np.zeros(Ms.shape[0])
    t_build = 0
    for m in range(Ms.shape[0]):
        print('M = ' + str(Ms[m]) + ': coreset construction, ' +
              arguments.alg + ' ' + str(arguments.trial))
        t0 = time.process_time()
        itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1])
        alg.build(itrs)
        t_build += time.process_time() - t0
        wts, pts, idcs = alg.get()

        #store weights/pts/runtime
        w.append(wts)
        p.append(pts)
        cputs[m] = t_build

    ##############################
    ##############################
    ## Step 4: Evaluate coreset
    ##############################
    ##############################

    # computing kld and saving results
    muw = np.zeros((Ms.shape[0], mu0.shape[0]))
    Sigw = np.zeros((Ms.shape[0], mu0.shape[0], mu0.shape[0]))
    rklw = np.zeros(Ms.shape[0])
    fklw = np.zeros(Ms.shape[0])
    csizes = np.zeros(Ms.shape[0])
    mu_errs = np.zeros(Ms.shape[0])
    Sig_errs = np.zeros(Ms.shape[0])
    for m in range(Ms.shape[0]):
        csizes[m] = (w[m] > 0).sum()
        muw[m, :], USigw, LSigwInv = gaussian.weighted_post(
            mu0, Sig0inv, Siginv, p[m], w[m])
        Sigw[m, :, :] = USigw.dot(USigw.T)
        rklw[m] = gaussian.KL(muw[m, :], Sigw[m, :, :], mup, SigpInv)
        fklw[m] = gaussian.KL(mup, Sigp, muw[m, :], LSigwInv.dot(LSigwInv.T))
        mu_errs[m] = np.sqrt(((mup - muw[m, :])**2).sum()) / np.sqrt(
            (mup**2).sum())
        Sig_errs[m] = np.sqrt(((Sigp - Sigw[m, :, :])**2).sum()) / np.sqrt(
            (Sigp**2).sum())

    results.save(arguments,
                 csizes=csizes,
                 Ms=Ms,
                 cputs=cputs,
                 rklw=rklw,
                 fklw=fklw,
                 mu_errs=mu_errs,
                 Sig_errs=Sig_errs)

    #also save muw/Sigw/etc for plotting coreset visualizations
    f = open('results/coreset_data.pk', 'wb')
    res = (x, mu0, Sig0, Sig, mup, Sigp, w, p, muw, Sigw)
    pk.dump(res, f)
    f.close()
Exemple #4
0
                bombarolo = nemici[random2]
                bomba = bombe.append(Bomb(bombarolo.x_n, bombarolo.y_n))
        if 0 <= x <= 693:
            paintEdo(x, y)
        elif x < 0:
            paintEdo(0, y)
        else:
            paintEdo(693, y)
        # pygame.display.update() update only a part of the screen
        pygame.display.flip()  # update the whole screen

# RESULT SAVING AND RECORD CHECKING---------------
if not quitting:
    screen.blit(gameoverdisplay, (0, 0))
    pygame.display.flip()  # update the whole screen
    stringarecord = results.save(missili_sparati, bombe_evitate, nemici_sconfitti)
    pygame.time.wait(4000)
    record = False
    if (
        int(stringarecord[0]) == bombe_evitate
        or int(stringarecord[1]) == missili_sparati
        or int(stringarecord[2]) == nemici_sconfitti
    ):
        record = True

# RESULT VISUALIZATION------------------------------------

if fine:
    pygame.font.init()
    font = pygame.font.Font("freesansbold.ttf", 32)
    stringaresult = (
def run(arguments):
    # check if result already exists for this run, and if so, quit
    if results.check_exists(arguments):
        print('Results already exist for arguments ' + str(arguments))
        print('Quitting.')
        quit()

    #######################################
    #######################################
    ## Step 0: Setup
    #######################################
    #######################################

    np.random.seed(arguments.trial)
    bc.util.set_verbosity(arguments.verbosity)

    if arguments.coreset_size_spacing == 'log':
        Ms = np.unique(
            np.logspace(0.,
                        np.log10(arguments.coreset_size_max),
                        arguments.coreset_num_sizes,
                        dtype=np.int32))
    else:
        Ms = np.unique(
            np.linspace(1,
                        arguments.coreset_size_max,
                        arguments.coreset_num_sizes,
                        dtype=np.int32))

    #make sure the first size to record is 0
    if Ms[0] != 0:
        Ms = np.hstack((0, Ms))

    #######################################
    #######################################
    ## Step 1: Load and preprocess data
    #######################################
    #######################################

    #load data and compute true posterior
    #each row of x is [lat, lon, price]
    print('Loading data')

    x = np.load('../data/prices2018.npy')
    print('dataset size : ', x.shape)

    print('Subsampling down to ' + str(arguments.data_num) + ' points')
    idcs = np.arange(x.shape[0])
    np.random.shuffle(idcs)
    x = x[idcs[:arguments.data_num], :]

    #log transform the prices
    x[:, 2] = np.log10(x[:, 2])

    #get empirical mean/std
    datastd = x[:, 2].std()
    datamn = x[:, 2].mean()

    #bases of increasing size; the last one is effectively a constant
    basis_unique_scales = np.array([.2, .4, .8, 1.2, 1.6, 2., 100])
    basis_unique_counts = np.hstack(
        (arguments.n_bases_per_scale * np.ones(6, dtype=np.int64), 1))

    #the dimension of the scaling vector for the above bases
    d = basis_unique_counts.sum()
    print('Basis dimension: ' + str(d))

    #model params
    mu0 = datamn * np.ones(d)
    Sig0 = (datastd**2 + datamn**2) * np.eye(d)
    Sig0inv = np.linalg.inv(Sig0)

    #generate basis functions by uniformly randomly picking locations in the dataset
    print('Trial ' + str(arguments.trial))
    print('Creating bases')
    basis_scales = np.array([])
    basis_locs = np.zeros((0, 2))
    for i in range(basis_unique_scales.shape[0]):
        basis_scales = np.hstack(
            (basis_scales,
             basis_unique_scales[i] * np.ones(basis_unique_counts[i])))
        idcs = np.random.choice(np.arange(x.shape[0]),
                                replace=False,
                                size=basis_unique_counts[i])
        basis_locs = np.vstack((basis_locs, x[idcs, :2]))

    print('Converting bases and observations into X/Y matrices')
    #convert basis functions + observed data locations into a big X matrix
    X = np.zeros((x.shape[0], basis_scales.shape[0]))
    for i in range(basis_scales.shape[0]):
        X[:, i] = np.exp(-((x[:, :2] - basis_locs[i, :])**2).sum(axis=1) /
                         (2 * basis_scales[i]**2))
    Y = x[:, 2]
    Z = np.hstack((X, Y[:, np.newaxis]))

    _, bV = np.linalg.eigh(X.T.dot(X))
    bV = bV[:, -arguments.proj_dim:]

    #######################################
    #######################################
    ## Step 2: Calculate Likelihoods/Projectors
    #######################################
    #######################################

    #get true posterior
    print('Computing true posterior')
    mup, USigp, LSigpInv = model_linreg.weighted_post(mu0, Sig0inv, datastd**2,
                                                      Z, np.ones(X.shape[0]))
    Sigp = USigp.dot(USigp.T)
    SigpInv = LSigpInv.dot(LSigpInv.T)

    #create function to output log_likelihood given param samples
    print('Creating log-likelihood function')
    log_likelihood = lambda z, th: model_linreg.log_likelihood(
        z, th, datastd**2)

    print('Creating gradient log-likelihood function')
    grad_log_likelihood = lambda z, th: model_linreg.grad_x_log_likelihood(
        z, th, datastd**2)

    #create tangent space for well-tuned Hilbert coreset alg
    print('Creating tuned projector for Hilbert coreset construction')
    sampler_optimal = lambda n, w, pts: mup + np.random.randn(n, mup.shape[0]
                                                              ).dot(USigp.T)
    prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim,
                                       log_likelihood, grad_log_likelihood)

    #create tangent space for poorly-tuned Hilbert coreset alg
    print('Creating untuned projector for Hilbert coreset construction')
    Zhat = Z[np.random.randint(0, Z.shape[0], int(np.sqrt(Z.shape[0]))), :]
    muhat, USigHat, LSigHatInv = model_linreg.weighted_post(
        mu0, Sig0inv, datastd**2, Zhat, np.ones(Zhat.shape[0]))
    sampler_realistic = lambda n, w, pts: muhat + np.random.randn(
        n, muhat.shape[0]).dot(USigHat.T)
    prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim,
                                         log_likelihood, grad_log_likelihood)

    print('Creating black box projector')

    def sampler_w(n, wts, pts):
        if wts is None or pts is None or pts.shape[0] == 0:
            muw = mu0
            USigw = np.linalg.cholesky(
                Sig0
            )  #Note: USigw is lower triangular here, below is upper tri. Doesn't matter, just need Sigw = MM^T
        else:
            muw, USigw, _ = model_linreg.weighted_post(mu0, Sig0inv,
                                                       datastd**2, pts, wts)
        return muw + np.random.randn(n, muw.shape[0]).dot(USigw.T)

    prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim,
                                  log_likelihood, grad_log_likelihood)

    print('Creating exact projectors')

    ##############################
    ###Exact projection in SparseVI for gradient computation
    #for this model we can do the tangent space projection exactly
    class LinRegProjector(bc.Projector):
        def __init__(self, bV):
            self.bV = bV

        def project(self, pts, grad=False):
            X = pts[:, :-1]
            Y = pts[:, -1]
            #beta = X.dot(self.V*np.sqrt(np.maximum(self.lmb, 0.)))
            beta = X.dot(self.USigw)
            nu = Y - X.dot(self.muw)
            #approximation to avoid high memory cost: project the matrix term down to bV.shape[1]**2 dimensions
            beta_proj = beta.dot(self.bV)
            #lmb2, V2 = np.linalg.eigh(beta.T.dot(beta))
            #beta_proj = beta.dot(V2[:, -arguments.proj_dim:])
            return np.hstack(
                (nu[:, np.newaxis] * beta, 1. / np.sqrt(2.) *
                 (beta_proj[:, :, np.newaxis] * beta_proj[:, np.newaxis, :]).
                 reshape(beta.shape[0], arguments.proj_dim**2))) / datastd**2

        def update(self, wts, pts):
            if wts is None or pts is None or pts.shape[0] == 0:
                self.muw = mu0
                self.USigw = np.linalg.cholesky(
                    Sig0
                )  #Note: USigw here is lower triangular, but keeping naming convention for below stuff. Doesn't matter, just need Sigw = MM^T
            else:
                self.muw, self.USigw, _ = model_linreg.weighted_post(
                    mu0, Sig0inv, datastd**2, pts, wts)
            #if pts.shape[0] == 0:
            #    self.muw = mu0
            #    self.Sigw = Sig0
            #else:
            #    self.muw, self.Sigw = model_linreg.weighted_post(mu0, Sig0inv, datastd**2, pts, wts)
            #self.lmb, self.V = np.linalg.eigh(self.LSigw.dot(self.LSigw.T))

    prj_optimal_exact = LinRegProjector(bV)
    prj_optimal_exact.update(np.ones(Z.shape[0]), Z)
    prj_realistic_exact = LinRegProjector(bV)
    prj_realistic_exact.update(np.ones(Zhat.shape[0]), Zhat)

    #######################################
    #######################################
    ## Step 3: Construct Coreset
    #######################################
    #######################################

    ##############################
    print('Creating coreset construction objects')
    #create coreset construction objects
    sparsevi_exact = bc.SparseVICoreset(Z,
                                        LinRegProjector(bV),
                                        opt_itrs=arguments.opt_itrs,
                                        step_sched=eval(arguments.step_sched))
    sparsevi = bc.SparseVICoreset(Z,
                                  prj_bb,
                                  opt_itrs=arguments.opt_itrs,
                                  step_sched=eval(arguments.step_sched))
    giga_optimal = bc.HilbertCoreset(Z, prj_optimal)
    giga_optimal_exact = bc.HilbertCoreset(Z, prj_optimal_exact)
    giga_realistic = bc.HilbertCoreset(Z, prj_realistic)
    giga_realistic_exact = bc.HilbertCoreset(Z, prj_realistic_exact)
    unif = bc.UniformSamplingCoreset(Z)

    algs = {
        'SVI-EXACT': sparsevi_exact,
        'SVI': sparsevi,
        'GIGA-OPT': giga_optimal,
        'GIGA-OPT-EXACT': giga_optimal_exact,
        'GIGA-REAL': giga_realistic,
        'GIGA-REAL-EXACT': giga_realistic_exact,
        'US': unif
    }
    alg = algs[arguments.alg]

    print('Building coreset')
    w = []
    p = []
    cputs = np.zeros(Ms.shape[0])
    t_build = 0
    for m in range(Ms.shape[0]):
        print('M = ' + str(Ms[m]) + ': coreset construction, ' +
              arguments.alg + ' ' + str(arguments.trial))
        t0 = time.process_time()
        itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1])
        alg.build(itrs)
        t_build += time.process_time() - t0
        wts, pts, idcs = alg.get()

        #store weights/pts/runtime
        w.append(wts)
        p.append(pts)
        cputs[m] = t_build

    ##############################
    ##############################
    ## Step 4: Evaluate coreset
    ##############################
    ##############################

    # computing kld and saving results
    muw = np.zeros((Ms.shape[0], mu0.shape[0]))
    Sigw = np.zeros((Ms.shape[0], mu0.shape[0], mu0.shape[0]))
    rklw = np.zeros(Ms.shape[0])
    fklw = np.zeros(Ms.shape[0])
    mu_errs = np.zeros(Ms.shape[0])
    Sig_errs = np.zeros(Ms.shape[0])
    csizes = np.zeros(Ms.shape[0])
    for m in range(Ms.shape[0]):
        csizes[m] = (w[m] > 0).sum()
        muw[m, :], USigw, LSigwInv = model_linreg.weighted_post(
            mu0, Sig0inv, datastd**2, p[m], w[m])
        Sigw[m, :, :] = USigw.dot(USigw.T)
        rklw[m] = model_linreg.KL(muw[m, :], Sigw[m, :, :], mup, SigpInv)
        fklw[m] = model_linreg.KL(mup, Sigp, muw[m, :],
                                  LSigwInv.dot(LSigwInv.T))
        mu_errs[m] = np.sqrt(((mup - muw[m, :])**2).sum()) / np.sqrt(
            (mup**2).sum())
        Sig_errs[m] = np.sqrt(((Sigp - Sigw[m, :, :])**2).sum()) / np.sqrt(
            (Sigp**2).sum())

    results.save(arguments,
                 csizes=csizes,
                 Ms=Ms,
                 cputs=cputs,
                 rklw=rklw,
                 fklw=fklw,
                 mu_errs=mu_errs,
                 Sig_errs=Sig_errs)

    #also save muw/Sigw/etc for plotting coreset visualizations
    f = open('results/coreset_data.pk', 'wb')
    res = (x, mu0, Sig0, datastd, mup, Sigp, w, p, muw, Sigw)
    pk.dump(res, f)
    f.close()
'''
	main file which takes argument from encrypt.py and executes further functions as documented in the file
'''
import imgpros
import sys
import utilities
import model
import results
'''Initializes the inputs taken from encrypt.py and stores them for furthur use'''
utilities.intializer(sys.argv)
'''Displays a preview of initial 50 frames to help cropping of rotating cell by the user'''
utilities.preview()
'''Allows user to select a free-size rectangular portion of the first frame consisting of a cell for furthur analysis'''
imgpros.crop()
'''Performs Linear Regression Analysis the cropped portion of all the frames and outputs 
	the centre of mass(COM) data for and a plot of COM for furthur use'''
model.analyze()
'''Calculates frequency, change in angle per frame, total clockwise/counter-clockwise time interval and no of frames and 
	all else necessary outputs. Also outputs the finally analyzed data in the form of CSV files and graphs'''
model.compute()
'''Saves the graphs and CSV files obtained after analysis in the required folder'''
results.save()
Exemple #7
0
def run(arguments):
    # check if result already exists for this run, and if so, quit
    if results.check_exists(arguments):
        print('Results already exist for arguments ' + str(arguments))
        print('Quitting.')
        quit()

    #######################################
    #######################################
    ########### Step 0: Setup #############
    #######################################
    #######################################

    np.random.seed(arguments.trial)
    bc.util.set_verbosity(arguments.verbosity)
    algs = {
        'FW': bc.snnls.FrankWolfe,
        'GIGA': bc.snnls.GIGA,
        'OMP': bc.snnls.OrthoPursuit,
        'US': bc.snnls.UniformSampling
    }

    if arguments.coreset_size_spacing == 'log':
        Ms = np.unique(
            np.logspace(0.,
                        np.log10(arguments.coreset_size_max),
                        arguments.coreset_num_sizes,
                        dtype=np.int32))
    else:
        Ms = np.unique(
            np.linspace(1,
                        arguments.coreset_size_max,
                        arguments.coreset_num_sizes,
                        dtype=np.int32))

    #######################################
    #######################################
    ## Step 1: Generate a Synthetic Dataset
    #######################################
    #######################################

    if arguments.data_type == 'normal':
        X = np.random.randn(arguments.data_num, arguments.data_dim)
    else:
        X = np.eye(arguments.data_num)

    ############################
    ############################
    ## Step 1: Build/Evaluate the Coreset
    ############################
    ############################

    data_type = arguments.data_type
    fldr = arguments.results_folder

    err = np.zeros(Ms.shape[0])
    csize = np.zeros(Ms.shape[0])
    cput = np.zeros(Ms.shape[0])

    print('data: ' + arguments.data_type + ', trial ' + str(arguments.trial) +
          ', alg: ' + arguments.alg)

    class IDProjector(bc.Projector):
        def update(self, wts, pts):
            pass

        def project(self, pts, grad=False):
            return pts

    alg = bc.HilbertCoreset(X, IDProjector(), snnls=algs[arguments.alg])

    for m, M in enumerate(Ms):
        t0 = time.process_time()
        itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1])
        alg.build(itrs)
        tf = time.process_time()
        cput[m] = tf - t0 + cput[m - 1] if m > 0 else tf - t0
        wts, pts, idcs = alg.get()
        csize[m] = (wts > 0).sum()
        err[m] = alg.error()

    ############################
    ############################
    ## Step 2: Save Results
    ############################
    ############################

    results.save(arguments, err=err, csize=csize, Ms=Ms, cput=cput)
Exemple #8
0
def run(region, N=1000):
    """Run model simulation.
    
    Args:
        region (str): Region to run the simulation for.
        N (int, optional): Number of samples.
    """
    region = region.upper().strip()
    print(region)
    # load config
    with open("model/regions.json") as fp:
        _config = json.load(fp)
    config = _config[region]
    config = {
        'dates': ('2020-08-01', '2021-03-13'),
        'window': 7,
        'weekly': False,
        'attributes': 'IRD',
        'initial': {
            'E': .1,
            'I': .1,
            'R': 0,
            'D': 0
        },
        'emission': {
            'I': (1, 1),
            'R': (1, 1),
            'D': (1, 1)
        },
        **config
    }
    POP = population.get_population(region)
    # parse
    dates = [datetime.strptime(d, "%Y-%m-%d") for d in config['dates']]
    window = config['window']
    weekly = config.get('weekly', False)
    attributes = config['attributes'].upper()
    initial = [
        1 - sum(config['initial'].values()),  # S
        config['initial'].get('E', 0),  # E
        config['initial'].get('I', 0),  # I
        config['initial'].get('R', 0),  # R     
        config['initial'].get('D', 0)
    ]  # D
    emission = [
        config['emission'].get('I',
                               (1, 1)), config['emission'].get('R', (1, 1)),
        config['emission'].get('D', (1, 1))
    ]
    # optimize
    params = optimize_spline(region,
                             dates,
                             initial=initial,
                             attributes=attributes,
                             emission=emission,
                             window=window,
                             weekly=weekly)
    # simulate result
    (sim_lat,
     sim_obs), last_values = posterior.simulate_posterior(region=region,
                                                          params=params,
                                                          dates=dates,
                                                          N=N,
                                                          initial=initial,
                                                          parI=emission[0],
                                                          parR=emission[1],
                                                          parD=emission[2])
    # save result
    _results.save((sim_lat, sim_obs), dates, region, params)
Exemple #9
0
def fit(cell,
        method,
        search_transformation='a',
        sample_transformation='a',
        start_from_m1=False,
        method_1b=False,
        repeats=50,
        cap=None):
    """
    Performs a fit to data from cell ``cell``, using method ``method`` and the
    given configuration.

    If ``start_from_m1`` is set to ``True``, a single repeat will be run. Else,
    the number of repeats will be set by ``repeats`` and ``cap``.
    """
    # Check cell and method (better checking happens below)
    cell = int(cell)
    method = int(method)

    # Check compatibility of arguments, is handled in detail later via
    # results.reserve_base_name()
    if method == 1 and not method_1b:
        raise ValueError('Only Method 1b is supported by fit(), not method 1.')
    if method != 1 and method_1b:
        raise ValueError('Method 1b can only be used if method 1 is chosen.')

    # Set method name for screen output
    method_name = str(method)
    if start_from_m1:
        method_name += 'b'

    # Create transformation objects
    search_transformation = transformations.create(search_transformation)
    sample_transformation = transformations.create(sample_transformation)

    # Define boundaries
    bounds = boundaries.Boundaries(
        search_transformation,
        sample_transformation,
        None if method == 1 else cells.lower_conductance(cell),
    )

    # Define error function
    if method == 1:
        g_fixed = results.load_parameters(cell, 1)[-1]
        f = errors.E1(cell, search_transformation, fixed_conductance=g_fixed)
    elif method == 2:
        f = errors.E2(cell, search_transformation)
    elif method == 3:
        f = errors.E3(cell, search_transformation)
    elif method == 4:
        f = errors.E4(cell, search_transformation)
    elif method == 5:
        f = errors.EAP(cell, search_transformation)
    else:
        raise ValueError('Method not supported: ' + str(method))

    # Check number of repeats
    if start_from_m1:
        repeats = 1
    else:
        repeats = int(repeats)
        if repeats < 1:
            raise ValueError('Number of repeats must be at least 1.')
        if debug:
            repeats = min(3, repeats)

    # Check cap on total number of runs
    if start_from_m1:
        cap = None
    elif cap is not None:
        cap = int(cap)
        if cap < 1:
            raise ValueError(
                'Cap on total number of runs must be at least 1 (or None).')

    # Run
    scores = []
    for i in range(repeats):

        # Cap max runs
        cap_info = ''
        if cap:
            n = results.count(cell, method, search_transformation.code(),
                              sample_transformation.code(), start_from_m1,
                              method_1b, False)
            if n >= cap:
                print()
                print('Maximum number of runs reached: terminating.')
                print()
                return
            cap_info = ' (run ' + str(n + 1) + ', capped at ' + str(cap) + ')'

        # Show configuration
        print()
        print('Cell   ' + str(cell))
        print('Method ' + method_name)
        print('Search ' + search_transformation.name())
        print('Sample ' + sample_transformation.name())
        print('Repeat ' + str(1 + i) + ' of ' + str(repeats) + cap_info)
        print()
        if start_from_m1:
            print('Starting from Method 1 result.')
        else:
            print('Starting point sampled from boundaries.')

        # Get base filename to store results in
        with results.reserve_base_name(cell, method,
                                       search_transformation.code(),
                                       sample_transformation.code(),
                                       start_from_m1, method_1b) as base:
            print('Storing results using base ' + base)

            # Choose starting point
            if start_from_m1:
                # Start from method 1 results
                p0 = results.load_parameters(cell, 1)  # Model space
                q0 = search_transformation.transform(p0)  # Search space
            else:
                # Choose random starting point
                # Allow resampling, in case error calculation fails
                print('Choosing starting point')
                q0 = f0 = float('inf')
                while not np.isfinite(f0):
                    q0 = bounds.sample()  # Search space
                    f0 = f(q0)  # Initial score

            # Create optimiser
            opt = pints.OptimisationController(f,
                                               q0,
                                               boundaries=bounds,
                                               method=pints.CMAES)
            opt.set_log_to_file(base + '.csv', True)
            opt.set_max_iterations(3 if debug else None)
            #opt.set_parallel(True)
            opt.set_parallel(False)

            # Run optimisation
            with np.errstate(all='ignore'):  # Ignore numpy warnings
                q, s = opt.run()  # Search space
            p = search_transformation.detransform(q)  # Model space
            if method_1b:
                p = np.concatenate((p, [g_fixed]))

            # Store results for this run
            results.save(base, p, s, opt.time(), opt.evaluations())

        scores.append(s)

    # Order scores
    order = np.argsort(scores)
    scores = np.asarray(scores)[order]

    # Show results
    print('Best scores:')
    for score in scores[:10]:
        print(score)
    print('Mean & std of score:')
    print(np.mean(scores))
    print(np.std(scores))
    print('Worst score:')
    print(scores[-1])
Exemple #10
0
def run(arguments):

    # check if result already exists for this run, and if so, quit
    if results.check_exists(arguments):
        print('Results already exist for arguments ' + str(arguments))
        print('Quitting.')
        quit()

    #######################################
    #######################################
    ########### Step 0: Setup #############
    #######################################
    #######################################

    np.random.seed(arguments.trial)
    bc.util.set_verbosity(arguments.verbosity)

    if arguments.coreset_size_spacing == 'log':
        Ms = np.unique(
            np.logspace(0.,
                        np.log10(arguments.coreset_size_max),
                        arguments.coreset_num_sizes,
                        dtype=np.int32))
    else:
        Ms = np.unique(
            np.linspace(1,
                        arguments.coreset_size_max,
                        arguments.coreset_num_sizes,
                        dtype=np.int32))

    #######################################
    #######################################
    ## Step 1: Define Model
    #######################################
    #######################################

    if arguments.model == "lr":
        import model_lr as model
    elif arguments.model == "poiss":
        import model_poiss as model

    #######################################
    #######################################
    ## Step 2: Load Dataset & run full MCMC / Laplace
    #######################################
    #######################################

    print('Loading dataset ' + arguments.dataset)
    X, Y, Z, Zt, D = model.load_data('../data/' + arguments.dataset + '.npz')

    #NOTE: Sig0 is currently coded as identity in model_lr and model_pr (see log_prior).
    #so if you change Sig0 here things might break.
    #TODO: fix that...
    mu0 = np.zeros(Z.shape[1])
    Sig0 = np.eye(Z.shape[1])
    LSig0 = np.eye(Z.shape[1])

    print('Checking for cached full MCMC samples')
    mcmc_cache_filename = 'mcmc_cache/full_samples_' + arguments.model + '_' + arguments.dataset + '.npz'
    if os.path.exists(mcmc_cache_filename):
        print('Cache exists, loading')
        tmp__ = np.load(mcmc_cache_filename)
        full_samples = tmp__['samples']
        full_mcmc_time_per_itr = tmp__['t']
    else:
        print('Cache doesnt exist, running MCMC')
        #convert Y to Stan LR label format
        stanY = np.zeros(Y.shape[0])
        stanY[:] = Y
        stanY[stanY == -1] = 0
        sampler_data = {
            'x': X,
            'y': stanY.astype(int),
            'w': np.ones(X.shape[0]),
            'd': X.shape[1],
            'n': X.shape[0]
        }
        full_samples, t_full_mcmc = mcmc.run(sampler_data,
                                             arguments.mcmc_samples_full,
                                             arguments.model,
                                             model.stan_representation,
                                             arguments.trial)
        full_samples = full_samples['theta']
        #TODO right now *2 to account for burn; but this should all be specified via tunable arguments
        full_mcmc_time_per_itr = t_full_mcmc / (arguments.mcmc_samples_full *
                                                2)
        if not os.path.exists('mcmc_cache'):
            os.mkdir('mcmc_cache')
        np.savez(mcmc_cache_filename,
                 samples=full_samples,
                 t=full_mcmc_time_per_itr)

    #######################################
    #######################################
    ## Step 3: Calculate Likelihoods/Projectors
    #######################################
    #######################################

    #get Gaussian approximation to the true posterior
    print('Approximating true posterior')
    mup = full_samples.mean(axis=0)
    Sigp = np.cov(full_samples, rowvar=False)
    LSigp = np.linalg.cholesky(Sigp)
    LSigpInv = solve_triangular(LSigp,
                                np.eye(LSigp.shape[0]),
                                lower=True,
                                overwrite_b=True,
                                check_finite=False)

    #create tangent space for well-tuned Hilbert coreset alg
    print('Creating tuned projector for Hilbert coreset construction')
    muHat, LSigHat, LSigHatInv = get_laplace(np.ones(Z.shape[0]),
                                             Z,
                                             np.zeros(Z.shape[1]),
                                             model,
                                             diag=False)
    sampler_optimal = lambda n, w, pts: muHat + np.random.randn(
        n, muHat.shape[0]).dot(LSigHat.T)
    prj_optimal = bc.BlackBoxProjector(sampler_optimal, arguments.proj_dim,
                                       model.log_likelihood,
                                       model.grad_z_log_likelihood)

    #create tangent space for poorly-tuned Hilbert coreset alg
    print('Creating untuned projector for Hilbert coreset construction')
    Zhat = Z[np.random.randint(0, Z.shape[0], int(np.sqrt(Z.shape[0]))), :]
    muHat2, LSigHat2, LSigHat2Inv = get_laplace(np.ones(Zhat.shape[0]),
                                                Zhat,
                                                np.zeros(Zhat.shape[1]),
                                                model,
                                                diag=False)
    sampler_realistic = lambda n, w, pts: muHat2 + np.random.randn(
        n, muHat2.shape[0]).dot(LSigHat2.T)
    prj_realistic = bc.BlackBoxProjector(sampler_realistic, arguments.proj_dim,
                                         model.log_likelihood,
                                         model.grad_z_log_likelihood)

    print('Creating black box projector')

    def sampler_w(n, wts, pts):
        if wts is None or pts is None or pts.shape[0] == 0:
            muw = mu0
            LSigw = LSig0
        else:
            muw, LSigw, _ = get_laplace(wts,
                                        pts,
                                        np.zeros(Z.shape[1]),
                                        model,
                                        diag=False)
        return muw + np.random.randn(n, muw.shape[0]).dot(LSigw.T)

    prj_bb = bc.BlackBoxProjector(sampler_w, arguments.proj_dim,
                                  model.log_likelihood,
                                  model.grad_z_log_likelihood)

    #######################################
    #######################################
    ## Step 4: Construct Coreset
    #######################################
    #######################################

    print('Creating coreset construction objects')
    #create coreset construction objects
    sparsevi = bc.SparseVICoreset(Z,
                                  prj_bb,
                                  opt_itrs=arguments.opt_itrs,
                                  step_sched=eval(arguments.step_sched))
    giga_optimal = bc.HilbertCoreset(Z, prj_optimal)
    giga_realistic = bc.HilbertCoreset(Z, prj_realistic)
    unif = bc.UniformSamplingCoreset(Z)

    algs = {
        'SVI': sparsevi,
        'GIGA-OPT': giga_optimal,
        'GIGA-REAL': giga_realistic,
        'US': unif
    }
    alg = algs[arguments.alg]

    cputs = np.zeros(Ms.shape[0])
    mcmc_time_per_itr = np.zeros(Ms.shape[0])
    csizes = np.zeros(Ms.shape[0])
    Fs = np.zeros(Ms.shape[0])
    rklw = np.zeros(Ms.shape[0])
    fklw = np.zeros(Ms.shape[0])
    mu_errs = np.zeros(Ms.shape[0])
    Sig_errs = np.zeros(Ms.shape[0])

    print('Running coreset construction / MCMC for ' + arguments.dataset +
          ' ' + arguments.alg + ' ' + str(arguments.trial))
    t_alg = 0.
    for m in range(Ms.shape[0]):
        print('M = ' + str(Ms[m]) + ': coreset construction, ' +
              arguments.alg + ' ' + arguments.dataset + ' ' +
              str(arguments.trial))
        #this runs alg up to a level of M; on the next iteration, it will continue from where it left off
        t0 = time.process_time()
        itrs = (Ms[m] if m == 0 else Ms[m] - Ms[m - 1])
        alg.build(itrs)
        t_alg += time.process_time() - t0
        wts, pts, idcs = alg.get()

        print('M = ' + str(Ms[m]) + ': MCMC')
        # Use MCMC on the coreset, measure time taken
        stanY = np.zeros(idcs.shape[0])
        stanY[:] = Y[idcs]
        stanY[stanY == -1] = 0
        sampler_data = {
            'x': X[idcs, :],
            'y': stanY.astype(int),
            'w': wts,
            'd': X.shape[1],
            'n': idcs.shape[0]
        }
        cst_samples, t_cst_mcmc = mcmc.run(sampler_data,
                                           arguments.mcmc_samples_coreset,
                                           arguments.model,
                                           model.stan_representation,
                                           arguments.trial)
        cst_samples = cst_samples['theta']
        #TODO see note above re: full mcmc sampling
        t_cst_mcmc_per_step = t_cst_mcmc / (arguments.mcmc_samples_coreset * 2)

        print('M = ' + str(Ms[m]) + ': Approximating posterior with Gaussian')
        muw = cst_samples.mean(axis=0)
        Sigw = np.cov(cst_samples, rowvar=False)
        LSigw = np.linalg.cholesky(Sigw)
        LSigwInv = solve_triangular(LSigw,
                                    np.eye(LSigw.shape[0]),
                                    lower=True,
                                    overwrite_b=True,
                                    check_finite=False)

        print('M = ' + str(Ms[m]) + ': Computing metrics')
        cputs[m] = t_alg
        mcmc_time_per_itr[m] = t_cst_mcmc_per_step
        csizes[m] = (wts > 0).sum()
        gcs = np.array([
            model.grad_th_log_joint(Z[idcs, :], full_samples[i, :], wts)
            for i in range(full_samples.shape[0])
        ])
        gfs = np.array([
            model.grad_th_log_joint(Z, full_samples[i, :], np.ones(Z.shape[0]))
            for i in range(full_samples.shape[0])
        ])
        Fs[m] = (((gcs - gfs)**2).sum(axis=1)).mean()
        rklw[m] = KL(muw, Sigw, mup, LSigpInv.T.dot(LSigpInv))
        fklw[m] = KL(mup, Sigp, muw, LSigwInv.T.dot(LSigwInv))
        mu_errs[m] = np.sqrt(((mup - muw)**2).sum()) / np.sqrt((mup**2).sum())
        Sig_errs[m] = np.sqrt(((Sigp - Sigw)**2).sum()) / np.sqrt(
            (Sigp**2).sum())

    results.save(arguments,
                 csizes=csizes,
                 Ms=Ms,
                 cputs=cputs,
                 Fs=Fs,
                 full_mcmc_time_per_itr=full_mcmc_time_per_itr,
                 mcmc_time_per_itr=mcmc_time_per_itr,
                 rklw=rklw,
                 fklw=fklw,
                 mu_errs=mu_errs,
                 Sig_errs=Sig_errs)