def test_histogram(self): x = gv.gvar([5., 3.], [[4., 0.2], [0.2, 1.]]) xsum = x[0] + x[1] integ = PDFIntegrator(x) hist = gv.PDFHistogram(xsum, nbin=40, binwidth=0.2) integ(neval=1000, nitn=5) def fhist(x): return hist.count(x[0] + x[1]) r = integ(fhist, neval=1000, nitn=5, adapt=False) bins, prob, stat, norm = hist.analyze(r) self.assertTrue( abs(gv.mean(np.sum(prob)) - 1.) < 5. * gv.sdev(np.sum(prob))) self.assertTrue(abs(stat.mean.mean - xsum.mean) < 5. * stat.mean.sdev) self.assertTrue(abs(stat.sdev.mean - xsum.sdev) < 5. * stat.sdev.sdev) self.assertTrue(abs(stat.skew.mean) < 5. * stat.skew.sdev) self.assertTrue(abs(stat.ex_kurt.mean) < 5. * stat.ex_kurt.sdev)
def main(): if not hasattr(lsqfit, 'BayesIntegrator'): # fake the run so that `make run` still works outfile = open('bayes.out', 'r').read() print(outfile[:-1]) return x, y = make_data() prior = make_prior() fit = lsqfit.nonlinear_fit(prior=prior, data=(x, y), fcn=fcn) print(fit) # Bayesian integrator expval = vegas.PDFIntegrator(fit.p, sync_ran=False) # mean and covariance matrix, and counts for histograms hist = [ gv.PDFHistogram(fit.p[0]), gv.PDFHistogram(fit.p[1]), gv.PDFHistogram(fit.p[2]), gv.PDFHistogram(fit.p[3]), ] def g(p): return dict(mean=p, outer=np.outer(p, p), count=[ hist[0].count(p[0]), hist[1].count(p[1]), hist[2].count(p[2]), hist[3].count(p[3]), ], pdf=np.exp(fit.logpdf(p))) # adapt integrator expval to PDF from fit neval = 1000 nitn = 10 expval(g, pdfkey='pdf', neval=neval, nitn=nitn) # evaluate expectation value of g(p) results = expval(g, pdfkey='pdf', neval=neval, nitn=nitn, adapt=False) # analyze results print('\nIterations:') print(results.summary()) print('Integration Results:') pmean = results['mean'] pcov = results['outer'] - np.outer(pmean, pmean) print(' mean(p) =', pmean) print(' cov(p) =\n', pcov) # create GVars from results p = gv.gvar(gv.mean(pmean), gv.mean(pcov)) print('\nBayesian Parameters:') print(gv.tabulate(p)) print('\nlog(Bayes Factor) =', np.log(results.norm)) # show histograms print('\nHistogram Statistics:') count = results['count'] for i in range(4): # print histogram statistics print('p[{}]:'.format(i)) print(hist[i].analyze(count[i]).stats)
def main(): sys_stdout = sys.stdout sys.stdout = tee.tee(sys.stdout, open("eg3a.out","w")) x, y = make_data() prior = make_prior() fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0] if DO_PLOT: plt.semilogx() plt.errorbar( x=gv.mean(x), xerr=gv.sdev(x), y=gv.mean(y), yerr=gv.sdev(y), fmt='ob' ) # plot fit line xx = np.linspace(0.99 * gv.mean(min(x)), 1.01 * gv.mean(max(x)), 100) yy = fcn(xx, fit.pmean) plt.xlabel('x') plt.ylabel('y') plt.plot(xx, yy, ':r') plt.savefig('eg3.png', bbox_inches='tight') plt.show() sys.stdout = sys_stdout if DO_BOOTSTRAP: gv.ranseed(123) sys.stdout = tee.tee(sys_stdout, open('eg3c.out', 'w')) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0] Nbs = 40 outputs = {'p':[], 'p1/p0':[], 'p3/p2':[]} for bsfit in fit.bootstrap_iter(n=Nbs): p = bsfit.pmean outputs['p'].append(p) outputs['p1/p0'].append(p[1] / p[0]) outputs['p3/p2'].append(p[3] / p[2]) print '\nBootstrap Averages:' outputs = gv.dataset.avg_data(outputs, bstrap=True) print gv.tabulate(outputs) print 'corr(p0,p1) =', gv.evalcorr(outputs['p'][:2])[1,0] # make histograms of p1/p0 and p3/p2 sys.stdout = sys_stdout print sys.stdout = tee.tee(sys_stdout, open('eg3d.out', 'w')) print 'Histogram Analysis:' count = {'p1/p0':[], 'p3/p2':[]} hist = { 'p1/p0':gv.PDFHistogram(fit.p[1] / fit.p[0]), 'p3/p2':gv.PDFHistogram(fit.p[3] / fit.p[2]), } for bsfit in fit.bootstrap_iter(n=1000): p = bsfit.pmean count['p1/p0'].append(hist['p1/p0'].count(p[1] / p[0])) count['p3/p2'].append(hist['p3/p2'].count(p[3] / p[2])) count = gv.dataset.avg_data(count) plt.rcParams['figure.figsize'] = [6.4, 2.4] pltnum = 1 for k in count: print k + ':' print hist[k].analyze(count[k]).stats plt.subplot(1, 2, pltnum) plt.xlabel(k) hist[k].make_plot(count[k], plot=plt) if pltnum == 2: plt.ylabel('') pltnum += 1 plt.rcParams['figure.figsize'] = [6.4, 4.8] plt.savefig('eg3d.png', bbox_inches='tight') plt.show() if DO_BAYESIAN: gv.ranseed(123) sys.stdout = tee.tee(sys_stdout, open('eg3e.out', 'w')) print fit expval = lsqfit.BayesIntegrator(fit) # adapt integrator to PDF from fit neval = 1000 nitn = 10 expval(neval=neval, nitn=nitn) # <g(p)> gives mean and covariance matrix, and histograms hist = [ gv.PDFHistogram(fit.p[0]), gv.PDFHistogram(fit.p[1]), gv.PDFHistogram(fit.p[2]), gv.PDFHistogram(fit.p[3]), ] def g(p): return dict( mean=p, outer=np.outer(p, p), count=[ hist[0].count(p[0]), hist[1].count(p[1]), hist[2].count(p[2]), hist[3].count(p[3]), ], ) # evaluate expectation value of g(p) results = expval(g, neval=neval, nitn=nitn, adapt=False) # analyze results print('\nIterations:') print(results.summary()) print('Integration Results:') pmean = results['mean'] pcov = results['outer'] - np.outer(pmean, pmean) print ' mean(p) =', pmean print ' cov(p) =\n', pcov # create GVars from results p = gv.gvar(gv.mean(pmean), gv.mean(pcov)) print('\nBayesian Parameters:') print(gv.tabulate(p)) # show histograms print('\nHistogram Statistics:') count = results['count'] for i in range(4): print('p[{}] -'.format(i)) print(hist[i].analyze(count[i]).stats) plt.subplot(2, 2, i + 1) plt.xlabel('p[{}]'.format(i)) hist[i].make_plot(count[i], plot=plt) if i % 2 != 0: plt.ylabel('') plt.savefig('eg3e.png', bbox_inches='tight') plt.show() if DO_SIMULATION: gv.ranseed(1234) sys.stdout = tee.tee(sys_stdout, open('eg3f.out', 'w')) print(40 * '*' + ' real fit') print(fit.format(True)) Q = [] p = [] for sfit in fit.simulated_fit_iter(n=3, add_priornoise=False): print(40 * '=' + ' simulation') print(sfit.format(True)) diff = sfit.p - sfit.pexact print '\nsfit.p - pexact =', diff print(gv.fmt_chi2(gv.chi2(diff))) print # omit constraint sys.stdout = tee.tee(sys_stdout, open("eg3b.out", "w")) prior = gv.gvar(4 * ['0(1)']) prior[1] = gv.gvar('0(20)') fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]
def main(): x, y = make_data() # make fit data # y = gv.gvar(gv.mean(y), 0.75**2 * gv.evalcov(y)) p0 = None # make larger fits go faster (opt.) sys_stdout = sys.stdout sys.stdout = tee.tee(sys.stdout, open("eg1.out", "w")) for nexp in range(1, 7): prior = make_prior(nexp) fit = lsqfit.nonlinear_fit(data=(x, y), fcn=fcn, prior=prior, p0=p0) if fit.chi2 / fit.dof < 1.: p0 = fit.pmean # starting point for next fit (opt.) print '************************************* nexp =', nexp print fit.format() # print the fit results E = fit.p['E'] # best-fit parameters a = fit.p['a'] if nexp > 2: print 'E1/E0 =', E[1] / E[0], ' E2/E0 =', E[2] / E[0] print 'a1/a0 =', a[1] / a[0], ' a2/a0 =', a[2] / a[0] print # error budget outputs = { 'E1/E0': E[1] / E[0], 'E2/E0': E[2] / E[0], 'a1/a0': a[1] / a[0], 'a2/a0': a[2] / a[0] } inputs = {'E': fit.prior['E'], 'a': fit.prior['a'], 'y': y} inputs = collections.OrderedDict() inputs['a'] = fit.prior['a'] inputs['E'] = fit.prior['E'] inputs['y'] = fit.data[1] print '================= Error Budget Analysis' print fit.fmt_values(outputs) print fit.fmt_errorbudget(outputs, inputs) sys.stdout = sys_stdout # print(gv.gvar(str(a[1])) / gv.gvar(str(a[0])) ) # print(gv.evalcorr([fit.p['a'][1], fit.p['E'][1]])) # print(fit.format(True)) # redo fit with 4 parameters since that is enough prior = make_prior(4) fit = lsqfit.nonlinear_fit(data=(x, y), fcn=fcn, prior=prior, p0=fit.pmean) sys.stdout = tee.tee(sys_stdout, open("eg1a.out", "w")) print '--------------------- original fit' print fit.format() E = fit.p['E'] # best-fit parameters a = fit.p['a'] print 'E1/E0 =', E[1] / E[0], ' E2/E0 =', E[2] / E[0] print 'a1/a0 =', a[1] / a[0], ' a2/a0 =', a[2] / a[0] print # extra data 1 print '\n--------------------- new fit to extra information' def ratio(p): return p['a'][1] / p['a'][0] newfit = lsqfit.nonlinear_fit(data=gv.gvar(1, 1e-5), fcn=ratio, prior=fit.p) print(newfit.format()) E = newfit.p['E'] a = newfit.p['a'] print 'E1/E0 =', E[1] / E[0], ' E2/E0 =', E[2] / E[0] print 'a1/a0 =', a[1] / a[0], ' a2/a0 =', a[2] / a[0] if DO_PLOT: import matplotlib.pyplot as plt ratio = y / fit.fcn(x, fit.pmean) plt.xlim(4, 15) plt.ylim(0.95, 1.05) plt.xlabel('x') plt.ylabel('y / f(x,p)') plt.yticks([0.96, 0.98, 1.00, 1.02, 1.04], ['0.96', '0.98', '1.00', '1.02', '1.04']) plt.errorbar(x=x, y=gv.mean(ratio), yerr=gv.sdev(ratio), fmt='ob') plt.plot([4.0, 21.0], [1.0, 1.0], 'b:') plt.savefig('eg1.png', bbox_inches='tight') plt.show() # alternate method for extra data sys.stdout = tee.tee(sys_stdout, open("eg1b.out", "w")) fit.p['a1/a0'] = fit.p['a'][1] / fit.p['a'][0] new_data = {'a1/a0': gv.gvar(1, 1e-5)} new_p = lsqfit.wavg([fit.p, new_data]) print 'chi2/dof = %.2f\n' % (new_p.chi2 / new_p.dof) print 'E:', new_p['E'][:4] print 'a:', new_p['a'][:4] print 'a1/a0:', new_p['a1/a0'] if DO_BAYES: # Bayesian Fit gv.ranseed([123]) prior = make_prior(4) fit = lsqfit.nonlinear_fit(data=(x, y), fcn=f, prior=prior, p0=fit.pmean) sys.stdout = tee.tee(sys_stdout, open("eg1c.out", "w")) # print fit expval = lsqfit.BayesIntegrator(fit, limit=10.) # adapt integrator to PDF expval(neval=40000, nitn=10) # calculate expectation value of function g(p) fit_hist = gv.PDFHistogram(fit.p['E'][0]) def g(p): parameters = [p['a'][0], p['E'][0]] return dict( mean=parameters, outer=np.outer(parameters, parameters), hist=fit_hist.count(p['E'][0]), ) r = expval(g, neval=40000, nitn=10, adapt=False) # print results print r.summary() means = r['mean'] cov = r['outer'] - np.outer(r['mean'], r['mean']) print 'Results from Bayesian Integration:' print 'a0: mean =', means[0], ' sdev =', cov[0, 0]**0.5 print 'E0: mean =', means[1], ' sdev =', cov[1, 1]**0.5 print 'covariance from Bayesian integral =', np.array2string( cov, prefix=36 * ' ') print print 'Results from Least-Squares Fit:' print 'a0: mean =', fit.p['a'][0].mean, ' sdev =', fit.p['a'][0].sdev print 'E0: mean =', fit.p['E'][0].mean, ' sdev =', fit.p['E'][0].sdev print 'covariance from least-squares fit =', np.array2string( gv.evalcov([fit.p['a'][0], fit.p['E'][0]]), prefix=36 * ' ', precision=3) sys.stdout = sys_stdout # make histogram of E[0] probabilty plt = fit_hist.make_plot(r['hist']) plt.xlabel('$E_0$') plt.ylabel('probability') plt.savefig('eg1c.png', bbox_inches='tight') # plt.show() if DO_BOOTSTRAP: Nbs = 40 # number of bootstrap copies outputs = { 'E1/E0': [], 'E2/E0': [], 'a1/a0': [], 'a2/a0': [], 'E1': [], 'a1': [] } # results for bsfit in fit.bootstrap_iter(n=Nbs): E = bsfit.pmean['E'] # best-fit parameters a = bsfit.pmean['a'] outputs['E1/E0'].append(E[1] / E[0]) # accumulate results outputs['E2/E0'].append(E[2] / E[0]) outputs['a1/a0'].append(a[1] / a[0]) outputs['a2/a0'].append(a[2] / a[0]) outputs['E1'].append(E[1]) outputs['a1'].append(a[1]) # print E[:2] # print a[:2] # print bsfit.chi2/bsfit.dof # extract means and standard deviations from the bootstrap output for k in outputs: outputs[k] = gv.gvar(np.mean(outputs[k]), np.std(outputs[k])) print 'Bootstrap results:' print 'E1/E0 =', outputs['E1/E0'], ' E2/E1 =', outputs['E2/E0'] print 'a1/a0 =', outputs['a1/a0'], ' a2/a0 =', outputs['a2/a0'] print 'E1 =', outputs['E1'], ' a1 =', outputs['a1']