def test_simulation(self): """ CorrFitter.simulated_data_iter """ models = [ self.mkcorr(a="a", b="a", dE="dE", tp=None) ] fitter = self.dofit(models) data = self.data diter = gv.BufferDict() k = list(data.keys())[0] # make n config dataset corresponding to data n = 100 diter = gv.raniter( g = gv.gvar(gv.mean(self.data[k]), gv.evalcov(self.data[k]) * n), n = n ) dataset = gv.dataset.Dataset() for d in diter: dataset.append(k, d) pexact = fitter.fit.pmean covexact = gv.evalcov(gv.dataset.avg_data(dataset)[k]) for sdata in fitter.simulated_data_iter(n=2, dataset=dataset): sfit = fitter.lsqfit( data=sdata, prior=self.prior, p0=pexact, print_fit=False ) diff = dict() for i in ['a', 'logdE']: diff[i] = sfit.p[i][0] - pexact[i][0] c2 = gv.chi2(diff) self.assertLess(c2/c2.dof, 15.) self.assert_arraysclose(gv.evalcov(sdata[k]), covexact)
def _assert_similar_gvars(g, h, rtol, atol): np.testing.assert_allclose(gvar.mean(g), gvar.mean(h), rtol=rtol, atol=atol) g = np.reshape(g, -1) h = np.reshape(h, -1) assert_close_matrices(gvar.evalcov(g), gvar.evalcov(h), rtol=rtol, atol=atol)
def test_data(): hp = gvar.BufferDict({'log(sdev)': gvar.log(gvar.gvar(1, 1))}) x = np.linspace(0, 5, 10) def gpfactory(hp): gp = lgp.GP(lgp.ExpQuad() * hp['sdev']**2) gp.addx(x, 'x') return gp truehp = gvar.sample(hp) truegp = gpfactory(truehp) trueprior = truegp.prior() def makeerr(bd, err): return gvar.BufferDict(bd, buf=np.full_like(bd.buf, err)) data_noerr = gvar.sample(trueprior) error = makeerr(data_noerr, 0.1) zeroerror = makeerr(data_noerr, 0) zerocov = gvar.evalcov(gvar.gvar(data_noerr, zeroerror)) data_err = gvar.make_fake_data(gvar.gvar(data_noerr, error)) datas = [ [ data_noerr, gvar.gvar(data_noerr), (data_noerr, ), (data_noerr, zerocov), lambda _: data_noerr, lambda _: gvar.gvar(data_noerr), lambda _: (data_noerr, ), lambda _: (data_noerr, zerocov), ], [ data_err, (data_err, ), (gvar.mean(data_err), gvar.evalcov(data_err)), lambda _: data_err, lambda _: (data_err, ), lambda _: (gvar.mean(data_err), gvar.evalcov(data_err)), ], ] for datasets in datas: fits = [] for data in datasets: fit = lgp.empbayes_fit(hp, gpfactory, data) fits.append(fit) p = fits[0].minresult.x for fit in fits[1:]: np.testing.assert_allclose(fit.minresult.x, p, atol=1e-6)
def test_cov(): a = np.random.randint(4, size=(10, 10)) xcov = a.T @ a x = gvar.gvar(np.zeros(10), xcov) cov1 = evalcov_sparse(x) cov2 = gvar.evalcov(x) assert np.all(cov1 == cov2) transf = np.random.randint(4, size=(5, 10)) y = transf @ x cov1 = evalcov_sparse(y) cov2 = gvar.evalcov(y) assert np.all(cov1 == cov2)
def parse_gvar_to_str(obj: GVar) -> str: """Inverts `parse_str_to_gvar`.""" if obj is None: return "" elif isinstance(obj, str): return obj elif isinstance(obj, ndarray): return ( array2string(mean(obj), separator=",", formatter={"float": str}) + " | " + array2string( evalcov(obj), separator=",", formatter={"float": str})) elif isinstance(obj, GVar): return str(mean(obj)) + " | " + str(evalcov(obj)) else: raise TypeError(f"Cannot parse {obj} ({type(obj)}) to str")
def test_ravgdict_unwgtd(self): " unweighted RAvgDict " # scalar mean_s = np.random.uniform(-10., 10.) sdev_s = 0.1 x_s = gv.gvar(mean_s, sdev_s) # array mean_a = np.random.uniform(-10., 10., (2, )) cov_a = np.array([[1., 0.5], [0.5, 2.]]) / 10. x_a = gv.gvar(mean_a, cov_a) N = 30 r_a = gv.raniter(x_a, N) ravg = RAvgDict(dict(scalar=1.0, array=[[2., 3.]]), weighted=False) for ri in r_a: ravg.add( dict(scalar=gv.gvar(x_s(), sdev_s), array=[gv.gvar(ri, cov_a)])) np_assert_allclose(ravg['scalar'].sdev, x_s.sdev / (N**0.5)) self.assertLess(abs(ravg['scalar'].mean - mean_s), 5 * ravg['scalar'].sdev) np_assert_allclose(gv.evalcov(ravg['array'].flat), cov_a / N) for i in range(2): self.assertLess(abs(mean_a[i] - ravg['array'][0, i].mean), 5 * ravg['array'][0, i].sdev) self.assertEqual(ravg.dof, 2 * N - 2 + N - 1) self.assertGreater(ravg.Q, 1e-3)
def test_ravgdict_wgtd(self): " weighted RAvgDict " # scalar mean_s = np.random.uniform(-10., 10.) xbig_s = gv.gvar(mean_s, 1.) xsmall_s = gv.gvar(mean_s, 0.1) # array mean_a = np.random.uniform(-10., 10., (2, )) cov_a = np.array([[1., 0.5], [0.5, 2.]]) invcov = np.linalg.inv(cov_a) N = 30 xbig_a = gv.gvar(mean_a, cov_a) rbig_a = gv.raniter(xbig_a, N) xsmall_a = gv.gvar(mean_a, cov_a / 10.) rsmall_a = gv.raniter(xsmall_a, N) ravg = RAvgDict(dict(scalar=1.0, array=[[2., 3.]])) for rb, rw in zip(rbig_a, rsmall_a): ravg.add( dict(scalar=gv.gvar(xbig_s(), 1.), array=[gv.gvar(rb, cov_a)])) ravg.add( dict(scalar=gv.gvar(xsmall_s(), 0.1), array=[gv.gvar(rw, cov_a / 10.)])) np_assert_allclose( ravg['scalar'].sdev, 1 / (N * (1. / xbig_s.var + 1. / xsmall_s.var))**0.5) self.assertLess(abs(ravg['scalar'].mean - mean_s), 5 * ravg['scalar'].sdev) np_assert_allclose(gv.evalcov(ravg['array'].flat), cov_a / (10. + 1.) / N) for i in range(2): self.assertLess(abs(mean_a[i] - ravg['array'][0, i].mean), 5 * ravg['array'][0, i].sdev) self.assertEqual(ravg.dof, 4 * N - 2 + 2 * N - 1) self.assertGreater(ravg.Q, 0.5e-3)
def test_ravgdict_unwgtd(self): " unweighted RAvgDict " # scalar mean_s = np.random.uniform(-10., 10.) sdev_s = 0.1 x_s = gv.gvar(mean_s, sdev_s) # array mean_a = np.random.uniform(-10., 10., (2,)) cov_a = np.array([[1., 0.5], [0.5, 2.]]) / 10. x_a = gv.gvar(mean_a, cov_a) N = 30 r_a = gv.raniter(x_a, N) ravg = RAvgDict(dict(scalar=1.0, array=[[2., 3.]]), weighted=False) for ri in r_a: ravg.add(dict( scalar=gv.gvar(x_s(), sdev_s), array=[gv.gvar(ri, cov_a)] )) np_assert_allclose( ravg['scalar'].sdev, x_s.sdev / (N ** 0.5)) self.assertLess( abs(ravg['scalar'].mean - mean_s), 5 * ravg['scalar'].sdev ) np_assert_allclose(gv.evalcov(ravg['array'].flat), cov_a / N) for i in range(2): self.assertLess( abs(mean_a[i] - ravg['array'][0, i].mean), 5 * ravg['array'][0, i].sdev ) self.assertEqual(ravg.dof, 2 * N - 2 + N - 1) self.assertGreater(ravg.Q, 1e-3)
def test_pickle_gvar(self): b = BufferDict() b['a'] = gv.gvar(1,2) b['b'] = [gv.gvar(3,4), gv.gvar(5,6)] b['b'] += gv.gvar(1, 1) b['c'] = gv.gvar(10,1) sb = pckl.dumps(b) c = pckl.loads(sb) self.assertEqual(str(b), str(c)) self.assertEqual(str(gv.evalcov(b)), str(gv.evalcov(c))) # no uncorrelated bits b['a'] += b['c'] sb = pckl.dumps(b) c = pckl.loads(sb) self.assertEqual(str(b), str(c)) self.assertEqual(str(gv.evalcov(b)), str(gv.evalcov(c)))
def assert_close_gvar(self, sol, result): diff = np.reshape(sol - result, -1) diffmean = gvar.mean(diff) solcov = gvar.evalcov(gvar.svd(sol)) q = diffmean @ linalg.solve(solcov, diffmean, assume_a='pos') # once I got: # LinAlgWarning: Ill-conditioned matrix (rcond=5.70425e-17): result may # not be accurate. np.testing.assert_allclose(q, 0, atol=1e-7) # TODO to compare matrices, use the 2-norm. diffcov = gvar.evalcov(diff) solmax = np.max(linalg.eigvalsh(solcov)) diffmax = np.max(linalg.eigvalsh(diffcov)) np.testing.assert_allclose(diffmax / solmax, 0, atol=1e-10)
def test_cov(g): if hasattr(g, 'keys'): g = gvar.BufferDict(g) g = g.flat[:] cov = np.zeros((len(g), len(g)), dtype=float) for idx, bcov in ef.evalcov_blocks(g): cov[idx[:, None], idx] = bcov assert str(gvar.evalcov(g)) == str(cov)
def test_expval(self): " integrator(f ...) " xarray = gv.gvar([5., 3.], [[400., 0.9], [0.9, 1.]]) xdict = gv.BufferDict([(0, 1), (1, 1)]) xdict = gv.BufferDict(xdict, buf=xarray) xscalar = xarray[0] def fscalar(x): if hasattr(x, 'keys'): x = x.buf return x.flat[0] def farray(x): if hasattr(x, 'keys'): x = x.buf return gv.PDFStatistics.moments(x.flat[0]) def fdict(x): if hasattr(x, 'keys'): x = x.buf return gv.BufferDict([(0, x.flat[0]), (1, x.flat[0]**2), (2, x.flat[0]**3), (3, x.flat[0]**4)]) for x in [xscalar, xarray, xdict]: integ = PDFIntegrator(x) integ(neval=1000, nitn=5) for f in [fscalar, farray, fdict]: r = integ(f, neval=1000, nitn=5, adapt=False) if f is fscalar: self.assertTrue(abs(r.mean - 5) < 5. * r.sdev) else: if hasattr(r, 'keys'): r = r.buf s = gv.PDFStatistics(r) self.assertTrue(abs(s.mean.mean - 5.) < 10. * s.mean.sdev) self.assertTrue(abs(s.sdev.mean - 20.) < 10. * s.sdev.sdev) self.assertTrue(abs(s.skew.mean) < 10. * s.skew.sdev) self.assertTrue(abs(s.ex_kurt.mean) < 10. * s.ex_kurt.sdev) # covariance test # N.B. Integrand has two entries that are identical, # which leads to a singular covariance -- so SVD # is essential here. The off-diagonal elements # of np.outer(x, x) are what cause the singularity. def fcov(x): return dict(x=x, xx=np.outer(x, x)) integ = PDFIntegrator(xarray) r = integ(fcov, neval=1000, nitn=5) rmean = r['x'] rcov = r['xx'] - np.outer(r['x'], r['x']) xmean = gv.mean(xarray) xcov = gv.evalcov(xarray) for i in [0, 1]: self.assertTrue(abs(rmean[i].mean - xmean[i]) < 5. * rmean[i].sdev) for j in [0, 1]: self.assertTrue( abs(rcov[i, j].mean - xcov[i, j]) < 5. * rcov[i, j].sdev)
def test_priorpoints_cache(): gp = lgp.GP(lgp.ExpQuad()) x = np.arange(20) gp.addx(x, 0) gp.addx(x, 1) prior = gp.prior() cov = gvar.evalcov(prior) util.assert_equal(cov[0, 0], cov[1, 1]) util.assert_equal(cov[0, 0], cov[0, 1])
def chisq_test(g, alpha): """chisquare test on g being 0""" g = flat(g) mean = gvar.mean(g) cov = gvar.evalcov(g) q = quad(cov, mean) n = len(mean) assert stats.chi2(n).sf(q) > alpha / 2 assert stats.chi2(n).cdf(q) > alpha / 2
def fit_data(data, fit_range, particles, Pcm, nstates, nsinks, cov=None, print_fit=True): if nsinks == 1: if print_fit: print "fitting with " + str(nstates) + " states and smeared data" else: pass else: if print_fit: print "fitting with " + str( nstates) + " states and point and smeared data" else: pass x = fit_range y = data if cov is None: cov_matrix = gv.evalcov(y) #cov_matrix = gv.evalcov(y)/len(y) #cov_matrix = np.identity(len(x)) if print_fit: print np.shape(cov_matrix) else: pass else: cov_matrix = cov y = gv.mean(y) #you'll need to update the pipi function to deal with number of sinks as well as nstates if particles == "pipi": p = pipi_priors(nstates, nsinks, Pcm) #fitc = pipi_fit_function(nstates=nstates,T=len(data)) if nsinks == 1: fitc = pipi_fit_function(nstates=nstates, T=48, sinks=["s"]) else: fitc = pipi_fit_function(nstates=nstates, T=48, sinks=["s", "p"]) elif particles == "pion": p = pion_priors(nstates, nsinks, Pcm) p0 = pion_priors(nstates, nsinks, Pcm) if nsinks == 1: fitc = pion_fit_function(nstates=nstates, T=48, sinks=["s"]) else: fitc = pion_fit_function(nstates=nstates, T=48, sinks=["s", "p"]) #fit = lsqfit.nonlinear_fit(data=(x,y,cov_matrix),prior=p0,fcn=fitc.full_func) fit = lsqfit.nonlinear_fit(data=(x, y, cov_matrix), prior=p, fcn=fitc.full_func) if print_fit: print fit else: pass return fit
def test_priortransf(): gp = lgp.GP(lgp.ExpQuad()) x, y = np.arange(40).reshape(2, -1) gp.addx(x, 0) gp.addx(y, 1) gp.addtransf({0: x, 1: y}, 2) cov1 = gp.prior(2, raw=True) u = gp.prior(2) cov2 = gvar.evalcov(u) np.testing.assert_allclose(cov1, cov2, rtol=1e-15)
def gv_to_samples_corr(gv_ls, N_samp): ''' transform gvar to bs samples with correlation ''' mean = [v.mean for v in gv_ls] cov_m = gv.evalcov(gv_ls) rng = np.random.default_rng() samp_ls = rng.multivariate_normal(mean, cov_m, size=N_samp) return samp_ls
def test_expval(self): " integrator(f ...) " xarray = gv.gvar([5., 3.], [[4., 0.9], [0.9, 1.]]) xdict = gv.BufferDict([(0, 1), (1, 1)]) xdict = gv.BufferDict(xdict, buf=xarray) xscalar = xarray[0] def fscalar(x): if hasattr(x, 'keys'): x = x.buf return x.flat[0] def farray(x): if hasattr(x, 'keys'): x = x.buf return gv.PDFStatistics.moments(x.flat[0]) def fdict(x): if hasattr(x, 'keys'): x = x.buf return gv.BufferDict([(0, x.flat[0]), (1, x.flat[0]**2), (2, x.flat[0]**3), (3, x.flat[0]**4)]) for x in [xscalar, xarray, xdict]: integ = PDFIntegrator(x) integ(neval=1000, nitn=5) for f in [fscalar, farray, fdict]: r = integ(f, neval=1000, nitn=5, adapt=False) if f is fscalar: self.assertTrue(abs(r.mean - 5) < 5. * r.sdev) else: if hasattr(r, 'keys'): r = r.buf s = gv.PDFStatistics(r) self.assertTrue(abs(s.mean.mean - 5.) < 10. * s.mean.sdev) self.assertTrue(abs(s.sdev.mean - 2.) < 10. * s.sdev.sdev) self.assertTrue(abs(s.skew.mean) < 10. * s.skew.sdev) self.assertTrue(abs(s.ex_kurt.mean) < 10. * s.ex_kurt.sdev) # covariance test def fcov(x): return dict(x=x, xx=np.outer(x, x)) integ = PDFIntegrator(xarray) r = integ(fcov, neval=1000, nitn=5) rmean = r['x'] rcov = r['xx'] - np.outer(r['x'], r['x']) xmean = gv.mean(xarray) xcov = gv.evalcov(xarray) for i in [0, 1]: self.assertTrue(abs(rmean[i].mean - xmean[i]) < 5. * rmean[i].sdev) for j in [0, 1]: self.assertTrue( abs(rcov[i, j].mean - xcov[i, j]) < 5. * rcov[i, j].sdev)
def assert_same_gvars(g, h, atol=1e-8): z = g - h z = np.reshape(z, -1) np.testing.assert_allclose(gvar.mean(z), np.zeros(z.shape), rtol=0, atol=atol) assert_close_matrices(gvar.evalcov(z), np.zeros(2 * z.shape), rtol=0, atol=atol)
def dump_precompute(g, outputfile): if isinstance(outputfile, str): outputfile = open(outputfile, 'wb') mn = gv.mean(g) evb = gv.evalcov(g.buf) covm = {} for keyi in g: for keyj in g: covm[keyi,keyj] = evb[g.slice(keyi),g.slice(keyj)] pickle.dump((mn, covm), outputfile) outputfile.close()
def test_cov(g): if hasattr(g, 'keys'): g = gvar.BufferDict(g) blocks = ef.evalcov_blocks(g, compress=True) g = g.flat[:] cov = np.zeros((len(g), len(g)), dtype=float) idx, bsdev = blocks[0] if len(idx) > 0: cov[idx, idx] = bsdev**2 for idx, bcov in blocks[1:]: cov[idx[:, None], idx] = bcov assert str(gvar.evalcov(g)) == str(cov)
def avg_quark_prop(size_list=[8, 8, 8, 8], cfgs_list=range(200, 1200, 50), src_type='evenodd_wall', t0=0, color_list=[0], ens_tag='', prop_tag='eoprop_', mass=0.5, figname='', **lat_kwargs): """ This function loads the propagators calculated in :meth:`gauge_tools.examples.staggered_quark_prop` and averages them. Most key word arguments are similar to those of :meth:`gauge_tools.examples.staggered_quark_prop`, except for ``figname``, which if not an empyt string, is a signal to create a figure and save it as a pdf in `figname`. """ fname_load = lambda ind_cfg: "{}{}m{}_{}.npy".format( ens_tag, prop_tag, mass, ind_cfg) fname_save = "{}{}m{}_avg.p".format(ens_tag, prop_tag, mass) import gauge_tools as gt lat = gt.lattice(*size_list, **lat_kwargs) props_list = [] props_projected = [] for n_cfg in cfgs_list: prop_v_field = np.load(fname_load(n_cfg), allow_pickle=True) props_list.append(prop_v_field) props_projected.append( gt.util.quark.propagator.ks_project_spatialmom( prop_v_field, color=color_list[0])) props_proj_gvar = gv.dataset.avg_data(props_projected) pickle.dump( dict(mean=gv.mean(props_proj_gvar), cov=gv.evalcov(props_proj_gvar)), open(fname_save, 'wb')) avg_quark_prop.props_list = props_list avg_quark_prop.props_proj_gvar = props_proj_gvar avg_quark_prop.lat = lat if figname != '' and PLOTS: plt.ion() fig = plt.figure() plt.errorbar(range(len(props_proj_gvar)), np.abs(gv.mean(props_proj_gvar)), gv.sdev(props_proj_gvar), fmt='.', label='interacting') free_theory(gt, src_type=src_type, t0=t0, mass=mass, color_list=color_list, print_=False) plt.title('qaurk propagator in free and interacting theory') plt.legend() plt.yscale('log') fig.savefig(figname, format="pdf")
def test_expval(self): " integrator(f ...) " xarray = gv.gvar([5., 3.], [[4., 0.9], [0.9, 1.]]) xdict = gv.BufferDict([(0, 1), (1, 1)]) xdict = gv.BufferDict(xdict, buf=xarray) xscalar = xarray[0] def fscalar(x): if hasattr(x, 'keys'): x = x.buf return x.flat[0] def farray(x): if hasattr(x, 'keys'): x = x.buf return gv.PDFStatistics.moments(x.flat[0]) def fdict(x): if hasattr(x, 'keys'): x = x.buf return gv.BufferDict([ (0, x.flat[0]), (1, x.flat[0] ** 2), (2, x.flat[0] ** 3), (3, x.flat[0] ** 4) ]) for x in [xscalar, xarray, xdict]: integ = PDFIntegrator(x) integ(neval=1000, nitn=5) for f in [fscalar, farray, fdict]: r = integ(f, neval=1000, nitn=5, adapt=False) if f is fscalar: self.assertTrue(abs(r.mean - 5) < 5. * r.sdev) else: if hasattr(r, 'keys'): r = r.buf s = gv.PDFStatistics(r) self.assertTrue(abs(s.mean.mean - 5.) < 10. * s.mean.sdev) self.assertTrue(abs(s.sdev.mean - 2.) < 10. * s.sdev.sdev) self.assertTrue(abs(s.skew.mean) < 10. * s.skew.sdev) self.assertTrue(abs(s.ex_kurt.mean) < 10. * s.ex_kurt.sdev) # covariance test def fcov(x): return dict(x=x, xx=np.outer(x, x)) integ = PDFIntegrator(xarray) r = integ(fcov, neval=1000, nitn=5) rmean = r['x'] rcov = r['xx'] - np.outer(r['x'], r['x']) xmean = gv.mean(xarray) xcov = gv.evalcov(xarray) for i in [0, 1]: self.assertTrue(abs(rmean[i].mean - xmean[i]) < 5. * rmean[i].sdev) for j in [0, 1]: self.assertTrue(abs(rcov[i,j].mean - xcov[i,j]) < 5. * rcov[i,j].sdev)
def plot_error_ellipsis(self, x_key, y_key, observable): x = self._get_posterior(x_key)[observable] y = self._get_posterior(y_key)[observable] fig, ax = plt.subplots() corr = '{0:.3g}'.format(gv.evalcorr([x, y])[0, 1]) std_x = '{0:.3g}'.format(gv.sdev(x)) std_y = '{0:.3g}'.format(gv.sdev(y)) text = ('$R_{x, y}=$ %s\n $\sigma_x =$ %s\n $\sigma_y =$ %s' % (corr, std_x, std_y)) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords ax.text(0.05, 0.95, text, transform=ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) C = gv.evalcov([x, y]) eVe, eVa = np.linalg.eig(C) for e, v in zip(eVe, eVa.T): plt.plot([ gv.mean(x) - 1 * np.sqrt(e) * v[0], 1 * np.sqrt(e) * v[0] + gv.mean(x) ], [ gv.mean(y) - 1 * np.sqrt(e) * v[1], 1 * np.sqrt(e) * v[1] + gv.mean(y) ], 'k-', lw=2) #plt.scatter(x-np.mean(x), y-np.mean(y), rasterized=True, marker=".", alpha=100.0/self.bs_N) #plt.scatter(x, y, rasterized=True, marker=".", alpha=100.0/self.bs_N) plt.grid() plt.gca().set_aspect('equal', adjustable='datalim') plt.xlabel(x_key.replace('_', '\_'), fontsize=24) plt.ylabel(y_key.replace('_', '\_'), fontsize=24) fig = plt.gcf() plt.close() return fig
def test_ravgarray_unwgtd(self): " unweighted RAvgArray " # if not have_gvar: # return mean = np.random.uniform(-10., 10., (2,)) cov = np.array([[1., 0.5], [0.5, 2.]]) / 10. N = 30 x = gv.gvar(mean, cov) r = gv.raniter(x, N) ravg = RAvgArray((1, 2), weighted=False) for ri in r: ravg.add([gv.gvar(ri, cov)]) np_assert_allclose(gv.evalcov(ravg.flat), cov / N) for i in range(2): self.assertLess(abs(mean[i] - ravg[0, i].mean), 5 * ravg[0, i].sdev) self.assertEqual(ravg.dof, 2 * N - 2) self.assertGreater(ravg.Q, 1e-3)
def visualize_correlations(self, channel): """ Visualize the correlations by making heatmaps of the correlation and covariance matrices and by plotting their eigenvalue spectra. Args: channel: str, the name of the channel (e.g., 'f_parallel') Returns: (fig, axarr) """ if channel not in self._valid_channels: raise ValueError("Unsupported channel", channel) dataframe = self.__getattribute__(channel) groups = dataframe.groupby('ens_id') ncols = len(groups) fig, axarr = plt.subplots(nrows=3, ncols=ncols, figsize=(5*ncols, 15)) for idx, (ens_id, df) in enumerate(groups): ax_col = axarr[:, idx] ax1, ax2, ax3 = ax_col df = df.sort_values(by=['alias_light', 'alias_heavy', 'phat2']) corr = gv.evalcorr(df['form_factor'].values) sns.heatmap(corr, ax=ax1) cov = gv.evalcov(df['form_factor'].values) sns.heatmap(cov, ax=ax2) matrices = {'corr': corr, 'cov:full': cov, 'cov:diag': np.diag(cov)} markers = ['o', 's', '^'] for (label, mat), marker in zip(matrices.items(), markers): if label == 'cov:diag': w = mat else: w = np.linalg.eigvals(mat) w = np.sort(w)[::-1] w /= max(w) plt.plot(w, ax=ax3, label=label, marker=marker) ax1.set_title(f"Correlation matrix: {ens_id}") ax2.set_title(f"Covariance matirx: {ens_id}") ax3.set_title(f"Eigenvalue spectra: {ens_id}") ax3.legend() ax3.set_yscale("log") return fig, axarr
def test_prior_gvar(): gp = lgp.GP(lgp.ExpQuad()) gp.addx(np.random.randn(20), 0) gp.addx(np.random.randn(15), 1) gp.addtransf({0: np.random.randn(15, 20)}, 2) gp.addlintransf(lambda x, y: jnp.real(jnp.fft.rfft(x + y)), [1, 2], 3) m = np.random.randn(40, 40) gp.addcov(m @ m.T, 4) gp.addtransf({4: np.random.randn(8, 40), 3: np.pi}, 5) covs = gp.prior(raw=True) prior = gp.prior() gmeans = gvar.mean(prior) for g in gmeans.values(): np.testing.assert_equal(g, np.zeros_like(g)) gcovs = gvar.evalcov(prior) for k, cov in covs.items(): gcov = gcovs[k] util.assert_close_matrices(cov, gcov, atol=1e-15, rtol=1e-9)
def fitscript_v2(trange, T, data, priors, fcn, result_flag='off'): if trange['tmax'][1] > T/2: sets = len(data)/T else: sets = len(data)/(T/2) posterior = [] tmintbl = [] tmaxtbl = [] chi2tbl = [] lgbftbl = [] lgpostd = [] for tmin in range(trange['tmin'][0], trange['tmin'][1]+1): for tmax in range(trange['tmax'][0], trange['tmax'][1]+1): x = x_indep(tmin, tmax) y = y_dep(x, data, sets) fit = lsqfit.nonlinear_fit(data=(x,y), prior=priors, fcn=fcn) if result_flag=='on': print tmin, tmax, print fit posterior.append(fit.p) tmintbl.append(tmin) tmaxtbl.append(tmax) chi2tbl.append(fit.chi2/fit.dof) lgbftbl.append(fit.logGBF) # log posterior probability # factor of log 2pi**k/2 pifactor = ((tmax-tmin+1)/2.0) * np.log(2*np.pi) # log det data covariance datacov = gv.evalcov(y) # data covariance L = np.linalg.cholesky(datacov) # cholesky decomposition #logdetA = 2.*np.trace(np.log(L)) logsqrtdetA = np.trace(np.log(L)) chi2factor = -0.5*fit.chi2 lgpostd.append(-pifactor-0.5*fit.chi2) fittbl = dict() fittbl['tmin'] = tmintbl fittbl['tmax'] = tmaxtbl fittbl['post'] = posterior fittbl['chi2dof'] = chi2tbl fittbl['logGBF'] = lgbftbl fittbl['logposterior'] = lgpostd fittbl['rawoutput'] = fit return fittbl
def test_ravgarray_wgtd(self): " weighted RAvgArray " if not have_gvar: return mean = np.random.uniform(-10., 10., (2, )) cov = np.array([[1., 0.5], [0.5, 2.]]) invcov = np.linalg.inv(cov) N = 30 xbig = gv.gvar(mean, cov) rbig = gv.raniter(xbig, N) xsmall = gv.gvar(mean, cov / 10.) rsmall = gv.raniter(xsmall, N) ravg = RAvgArray(2) for rb, rs in zip(rbig, rsmall): ravg.add(gv.gvar(rb, cov)) ravg.add(gv.gvar(rs, cov / 10.)) np_assert_allclose(gv.evalcov(ravg), cov / (10. + 1.) / N) for i in range(2): self.assertLess(abs(mean[i] - ravg[i].mean), 5 * ravg[i].sdev) self.assertEqual(ravg.dof, 4 * N - 2) self.assertGreater(ravg.Q, 1e-3)
def test_ravgarray_wgtd(self): " weighted RAvgArray " # if not have_gvar: # return mean = np.random.uniform(-10., 10., (2,)) cov = np.array([[1., 0.5], [0.5, 2.]]) invcov = np.linalg.inv(cov) N = 30 xbig = gv.gvar(mean, cov) rbig = gv.raniter(xbig, N) xsmall = gv.gvar(mean, cov / 10.) rsmall = gv.raniter(xsmall, N) ravg = RAvgArray((1, 2)) for rb, rs in zip(rbig, rsmall): ravg.add([gv.gvar(rb, cov)]) ravg.add([gv.gvar(rs, cov / 10.)]) np_assert_allclose(gv.evalcov(ravg.flat), cov / (10. + 1.) / N) for i in range(2): self.assertLess(abs(mean[i] - ravg[0, i].mean), 5 * ravg[0, i].sdev) self.assertEqual(ravg.dof, 4 * N - 2) self.assertGreater(ravg.Q, 1e-3)
def test_ravgdict_wgtd(self): " weighted RAvgDict " # scalar mean_s = np.random.uniform(-10., 10.) xbig_s = gv.gvar(mean_s, 1.) xsmall_s = gv.gvar(mean_s, 0.1) # array mean_a = np.random.uniform(-10., 10., (2,)) cov_a = np.array([[1., 0.5], [0.5, 2.]]) invcov = np.linalg.inv(cov_a) N = 30 xbig_a = gv.gvar(mean_a, cov_a) rbig_a = gv.raniter(xbig_a, N) xsmall_a = gv.gvar(mean_a, cov_a / 10.) rsmall_a = gv.raniter(xsmall_a, N) ravg = RAvgDict(dict(scalar=1.0, array=[[2., 3.]])) for rb, rw in zip(rbig_a, rsmall_a): ravg.add(dict( scalar=gv.gvar(xbig_s(), 1.), array=[gv.gvar(rb, cov_a)] )) ravg.add(dict( scalar=gv.gvar(xsmall_s(), 0.1), array=[gv.gvar(rw, cov_a / 10.)] )) np_assert_allclose( ravg['scalar'].sdev, 1/ (N * ( 1. / xbig_s.var + 1. / xsmall_s.var)) ** 0.5 ) self.assertLess( abs(ravg['scalar'].mean - mean_s), 5 * ravg['scalar'].sdev ) np_assert_allclose(gv.evalcov(ravg['array'].flat), cov_a / (10. + 1.) / N) for i in range(2): self.assertLess( abs(mean_a[i] - ravg['array'][0, i].mean), 5 * ravg['array'][0, i].sdev ) self.assertEqual(ravg.dof, 4 * N - 2 + 2 * N - 1) self.assertGreater(ravg.Q, 1e-3)
def fit_ere2(self, p0, p1, p2): cov_np = np.array(gv.evalcov([self.y[k] for k in self.irreps])) yq = np.array([self.y[k].mean for k in self.irreps]) p_opt, p_cov = curve_fit(self.get_qsq_ere2, self.irreps, yq, p0=[p0, p1, p2], sigma=cov_np, absolute_sigma=True, method='lm') r = self.get_qsq_ere2(self.irreps, p_opt[0], p_opt[1], p_opt[2]) - yq chisq_min = np.dot(r, np.dot(np.linalg.inv(cov_np), r)) dof = len(yq) - 3 results = dict() results['chisq_dof'] = chisq_min / dof results['dof'] = dof results['Q'] = scsp.gammaincc(0.5 * dof, 0.5 * chisq_min) results['p_opt'] = gv.gvar(p_opt, p_cov) return results
def correlated_chi2(yfit, ydata): """Computes the correlated chi2 function.""" # Get the fit values, data, and covariance matrix as dicts cov_dict = gv.evalcov(ydata) # Enforce an ordering of keys klist = list(ydata.keys()) # Reserve space for arrays # Implementation note: flatten allows for the special case # of matrix-valued priors, e.g., for the transition matrix Vnn sizes = [len(np.asarray(ydata[key]).flatten()) for key in klist] total_size = sum(sizes) diff = np.empty(total_size) cov_arr = np.zeros((total_size, total_size)) # Infer the start and end points for intervals ends = np.cumsum(sizes) starts = ends - sizes # Populate arrays for start_i, end_i, key_i in zip(starts, ends, klist): diff[start_i:end_i] = np.asarray(gv.mean(ydata[key_i] - yfit[key_i])).flatten() for start_j, end_j, key_j in zip(starts, ends, klist): try: cov_arr[start_i:end_i, start_j:end_j] =\ cov_dict[(key_i, key_j)] except ValueError: # Implementation note: matrix-valued priors have # multi-dimensional covariance matrices, # which must be reshaped in a 2x2 array cov_arr[start_i:end_i, start_j:end_j] = \ cov_dict[(key_i, key_j)].reshape( end_i - start_i, end_j - start_j ) # The "usual" chi2 function (ydata-yfit).cov_inv.(ydata-yfit) try: result = np.dot(diff, np.linalg.solve(cov_arr, diff)) except np.linalg.LinAlgError: result = np.nan return result
def pred(kw, seed, err): np.random.seed(seed) x = np.random.uniform(-5, 5, size=20) xpred = np.random.uniform(-10, 10, size=100) gp = lgp.GP(lgp.ExpQuad()) gp.addx(x, 'data') gp.addx(xpred, 'pred') y = np.tanh(x) if err: datagp = lgp.GP(0.1**2 * lgp.Cauchy(scale=0.3)) datagp.addx(x, 'data') y = y + datagp.prior('data') result = gp.pred({'data': y}, 'pred', **kw) if isinstance(result, tuple) and len(result) == 2: mean, cov = result elif isinstance(result, np.ndarray): mean = gvar.mean(result) cov = gvar.evalcov(result) return mean, cov
gp.addlintransf(lambda x: x[0, 3], ['dft'], '3rd real coef') gp.addlintransf(lambda x: x[0, 3] - x[1, 3], ['dft'], '3rd coef pseudo-phase') # We will force the 3rd spectrum coefficient to be high because the supervisor # said that it always comes out high, so the plot can't be different from the # other articles. The statistics professor said that the correct fitting method # is "Bayesian statistics" and that everything is subjective, so we may as # well hardcode the desiratum into the prior. u = gp.predfromdata({ '3rd real coef': gvar.gvar(10, 1), '3rd coef pseudo-phase': gvar.gvar(5, 1), }) mean = gvar.mean(u) sdev = gvar.sdev(u) cov = gvar.evalcov(u) fig, axs = plt.subplots(2, 1, num='dft', clear=True, figsize=[6.4, 7]) ax = axs[0] ax.set_title('Function') m = mean[DUO] s = sdev[DUO] patch = ax.fill_between(xpred, m - s, m + s, alpha=0.5) color = patch.get_facecolor()[0] simulated_lines = np.random.multivariate_normal(m, cov[DUO, DUO]) ax.plot(xpred, simulated_lines, '-', color=color) ax.plot(xdata, np.zeros_like(xdata), '.k', label='discrete lattice') ax = axs[1] ax.set_title('DFT')
def main(): gv.ranseed([2009,2010,2011,2012]) # initialize random numbers (opt.) x,y = make_data() # make fit data p0 = None # make larger fits go faster (opt.) sys_stdout = sys.stdout sys.stdout = tee.tee(sys.stdout, open("eg1.out","w")) for nexp in range(1, 11): prior = make_prior(nexp) fit = lsqfit.nonlinear_fit(data=(x,y),fcn=f,prior=prior,p0=p0) #, svdcut=SVDCUT) if fit.chi2/fit.dof<1.: p0 = fit.pmean # starting point for next fit (opt.) if nexp > 5 and nexp < 10: print(".".center(73)) continue elif nexp not in [1]: print("") print '************************************* nexp =',nexp print fit.format() # print the fit results E = fit.p['E'] # best-fit parameters a = fit.p['a'] if nexp > 2: print 'E1/E0 =',E[1]/E[0],' E2/E0 =',E[2]/E[0] print 'a1/a0 =',a[1]/a[0],' a2/a0 =',a[2]/a[0] # redo fit with 4 parameters since that is enough prior = make_prior(4) fit = lsqfit.nonlinear_fit(data=(x,y), fcn=f, prior=prior, p0=fit.pmean) sys.stdout = sys_stdout print fit # extra data 1 print '\n--------------------- fit with extra information' sys.stdout = tee.tee(sys_stdout, open("eg1a.out", "w")) def ratio(p): return p['a'][1] / p['a'][0] newfit = lsqfit.nonlinear_fit(data=gv.gvar(1,1e-5), fcn=ratio, prior=fit.p) print (newfit) E = newfit.p['E'] a = newfit.p['a'] print 'E1/E0 =',E[1]/E[0],' E2/E0 =',E[2]/E[0] print 'a1/a0 =',a[1]/a[0],' a2/a0 =',a[2]/a[0] # alternate method for extra data sys.stdout = tee.tee(sys_stdout, open("eg1b.out", "w")) fit.p['a1/a0'] = fit.p['a'][1] / fit.p['a'][0] new_data = {'a1/a0' : gv.gvar(1,1e-5)} new_p = lsqfit.wavg([fit.p, new_data]) print 'chi2/dof = %.2f\n' % (new_p.chi2 / new_p.dof) print 'E:', new_p['E'][:4] print 'a:', new_p['a'][:4] print 'a1/a0:', new_p['a1/a0'] if DO_BAYES: # Bayesian Fit gv.ranseed([123]) prior = make_prior(4) fit = lsqfit.nonlinear_fit(data=(x,y), fcn=f, prior=prior, p0=fit.pmean) sys.stdout = tee.tee(sys_stdout, open("eg1c.out", "w")) # print fit expval = lsqfit.BayesIntegrator(fit, limit=10.) # adapt integrator to PDF expval(neval=10000, nitn=10) # calculate expectation value of function g(p) fit_hist = gv.PDFHistogram(fit.p['E'][0]) def g(p): parameters = [p['a'][0], p['E'][0]] return dict( mean=parameters, outer=np.outer(parameters, parameters), hist=fit_hist.count(p['E'][0]), ) r = expval(g, neval=10000, nitn=10, adapt=False) # print results print r.summary() means = r['mean'] cov = r['outer'] - np.outer(r['mean'], r['mean']) print 'Results from Bayesian Integration:' print 'a0: mean =', means[0], ' sdev =', cov[0,0]**0.5 print 'E0: mean =', means[1], ' sdev =', cov[1,1]**0.5 print 'covariance from Bayesian integral =', np.array2string(cov, prefix=36 * ' ') print print 'Results from Least-Squares Fit:' print 'a0: mean =', fit.p['a'][0].mean, ' sdev =', fit.p['a'][0].sdev print 'E0: mean =', fit.p['E'][0].mean, ' sdev =', fit.p['E'][0].sdev print 'covariance from least-squares fit =', np.array2string(gv.evalcov([fit.p['a'][0], fit.p['E'][0]]), prefix=36*' ',precision=3) sys.stdout = sys_stdout # make histogram of E[0] probabilty plt = fit_hist.make_plot(r['hist']) plt.xlabel('$E_0$') plt.ylabel('probability') plt.savefig('eg1c.png', bbox_inches='tight') # plt.show() # # extra data 2 # sys.stdout = tee.tee(sys_stdout, open("eg1b.out", "w")) # newfit = fit # for i in range(1): # print '\n--------------------- fit with %d extra data sets' % (i+1) # x, ynew = make_data() # prior = newfit.p # newfit = lsqfit.nonlinear_fit(data=(x,ynew), fcn=f, prior=prior) # , svdcut=SVDCUT) # print newfit sys.stdout = sys_stdout # def fcn(x, p): # return f(x, p), f(x, p) # prior = make_prior(nexp) # fit = lsqfit.nonlinear_fit(data=(x, [y, ynew]), fcn=fcn, prior=prior, p0=newfit.pmean) # , svdcut=SVDCUT) # print(fit) if DO_BOOTSTRAP: Nbs = 40 # number of bootstrap copies outputs = {'E1/E0':[], 'E2/E0':[], 'a1/a0':[],'a2/a0':[],'E1':[],'a1':[]} # results for bsfit in fit.bootstrap_iter(n=Nbs): E = bsfit.pmean['E'] # best-fit parameters a = bsfit.pmean['a'] outputs['E1/E0'].append(E[1]/E[0]) # accumulate results outputs['E2/E0'].append(E[2]/E[0]) outputs['a1/a0'].append(a[1]/a[0]) outputs['a2/a0'].append(a[2]/a[0]) outputs['E1'].append(E[1]) outputs['a1'].append(a[1]) # print E[:2] # print a[:2] # print bsfit.chi2/bsfit.dof # extract means and standard deviations from the bootstrap output for k in outputs: outputs[k] = gv.gvar(np.mean(outputs[k]),np.std(outputs[k])) print 'Bootstrap results:' print 'E1/E0 =',outputs['E1/E0'],' E2/E1 =',outputs['E2/E0'] print 'a1/a0 =',outputs['a1/a0'],' a2/a0 =',outputs['a2/a0'] print 'E1 =',outputs['E1'],' a1 =',outputs['a1'] if DO_PLOT: import matplotlib.pyplot as plt ratio = y / fit.fcn(x,fit.pmean) plt.xlim(4, 21) plt.xlabel('x') plt.ylabel('y / f(x,p)') plt.errorbar(x=x,y=gv.mean(ratio),yerr=gv.sdev(ratio),fmt='ob') plt.plot([4.0, 21.0], [1.0, 1.0], 'b:') plt.savefig('eg1.png', bbox_inches='tight') plt.show()
taglist.append(('l32v5.bar3pt.'+irrepStr+'.azaz.t-7.p00','azaz','t7','16m')) ## -- consolidated all loading into a single file: start = time.time() print "loading gvar data: start ",start dall = standard_load(taglist,filekey,argsin) print "end ",(time.time() - start) ## -- get entire correlation matrix start = time.time() print "making correlation: start ",start #corall = gv.evalcorr(dall) ## -- super slow corall = gv.evalcorr(dall.buf) ## -- uses precomputed data, need to slice data manually print "end ",(time.time() - start) print "making covariance : start ",start covall = gv.evalcov(dall.buf) ## -- uses precomputed data, need to slice data manually print "end ",(time.time() - start) ## -- test routines, print correlation eigenvalues, eigenvectors to file #for testkey in ['s12','s21','s13','s31','s15','s51','s16','s61']: #for testkey in ['aiais11t6','aiais22t6','aiais33t6','aiais55t6','aiais66t6']: #for testkey in ['aiais11t7','aiais22t7','aiais33t7','aiais55t7','aiais66t7']: #for testkey in ['s11','s22','s33','s55','s66']: # evec = gvl.eigvalsh(corall[dall.slice(testkey),dall.slice(testkey)],True) # f = open('corr.'+testkey+'.dat','w') # f.write('#key : '+testkey+'\n') # f.write('#eigenvalues :\n') # seval = str(evec[0][0]) # for v in evec[0][1:]: # seval += ', ' +str(v) # f.write(seval+'\n')
#print np.shape(raw_data) #pt.plot_data(raw_data,Pcm,"Corr") #pt.plot_data(raw_data,Pcm,"M_eff") #pt.plot_data(raw_data,Pcm,"A_eff") #pt.plot_data(raw_data,Pcm,"E_1") #pt.plot_data(raw_data,Pcm,"A_1") s_data = s_data[:, fit_range] p_data = sources_dict["p"] p_data = dmt.fold_data(p_data) p_data = p_data[:, fit_range] if nsinks == 1: b0_data = s_data else: b0_data = np.concatenate((s_data, p_data), axis=1) gv_b0_data = gv.dataset.avg_data(b0_data) cov_matrix = gv.evalcov(gv_b0_data) / ( N_cfgs - 1) #if you want to freeze the covariance matrix fit0 = fitters.fit_data(gv_b0_data, fit_range, "pion", Pcm, nstates, nsinks, cov=None) params = fit0.p E0 = fit0.p["Es0"] print "E_{0}(t_min = %s, nstates = %s) = %s" % (str(t_min), str(nstates), E0) if nsinks == 1: fit_plot_data = gv_b0_data else: