def test_simulation(self): """ CorrFitter.simulated_data_iter """ models = [ self.mkcorr(a="a", b="a", dE="dE", tp=None) ] fitter = self.dofit(models) data = self.data diter = gv.BufferDict() k = list(data.keys())[0] # make n config dataset corresponding to data n = 100 diter = gv.raniter( g = gv.gvar(gv.mean(self.data[k]), gv.evalcov(self.data[k]) * n), n = n ) dataset = gv.dataset.Dataset() for d in diter: dataset.append(k, d) pexact = fitter.fit.pmean covexact = gv.evalcov(gv.dataset.avg_data(dataset)[k]) for sdata in fitter.simulated_data_iter(n=2, dataset=dataset): sfit = fitter.lsqfit( data=sdata, prior=self.prior, p0=pexact, print_fit=False ) diff = dict() for i in ['a', 'logdE']: diff[i] = sfit.p[i][0] - pexact[i][0] c2 = gv.chi2(diff) self.assertLess(c2/c2.dof, 15.) self.assert_arraysclose(gv.evalcov(sdata[k]), covexact)
def main(): data, basis = make_data('etab.h5') fitter = cf.CorrFitter(models=make_models()) p0 = None for N in range(1, 8): print(30 * '=', 'nterm =', N) prior = make_prior(N, basis) fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT) print(fit.format(pstyle=None if N < 7 else 'v')) p0 = fit.pmean print_results(fit, basis, prior, data) if SHOWPLOTS: fit.show_plots(save='etab.{}.png', view='ratio') # check fit quality by adding noise print('\n==================== add svd, prior noise') noisy_fit = fitter.lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, noise=True, ) print(noisy_fit.format(pstyle=None)) dE = fit.p['etab.dE'][:3] noisy_dE = noisy_fit.p['etab.dE'][:3] print(' dE:', dE) print('noisy dE:', noisy_dE) print(' ', gv.fmt_chi2(gv.chi2(dE - noisy_dE))) if SHOWPLOTS: fit.qqplot_residuals().show()
def main(): data, basis = make_data('etab.h5') fitter = cf.CorrFitter(models=make_models()) p0 = None for N in range(1, 8): print(30 * '=', 'nterm =', N) prior = make_prior(N, basis) fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT) print(fit.format(pstyle=None if N < 7 else 'm')) p0 = fit.pmean print_results(fit, basis, prior, data) if DISPLAYPLOTS: fitter.display_plots() print('\n==================== add svd, prior noise') noisy_fit = fitter.lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, noise=True, ) print(noisy_fit.format(pstyle=None)) dE = fit.p['etab.dE'][:3] noisy_dE = noisy_fit.p['etab.dE'][:3] print(' dE:', dE) print('noisy dE:', noisy_dE) print(' ', gv.fmt_chi2(gv.chi2(dE - noisy_dE)))
def main(): data = make_data('etas-Ds.h5') models = make_models() # 1a models = [ models[0], models[1], # 1b dict(nterm=(2, 1), svdcut=6.3e-5), # 1c (models[2], models[3]) # 1d ] fitter = cf.CorrFitter(models=models) # 1e p0 = None for N in [1, 2, 3, 4]: print(30 * '=', 'nterm =', N) prior = make_prior(N) fit = fitter.chained_lsqfit(data=data, prior=prior, p0=p0) # 2 print(fit.format(pstyle=None if N < 4 else 'm')) p0 = fit.pmean print_results(fit, prior, data) if DISPLAYPLOTS: fit.show_plots() # check fit quality by adding noise print('\n==================== add svd, prior noise') noisy_fit = fitter.chained_lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, noise=True, ) print(noisy_fit.format(pstyle=None)) p = key_parameters(fit.p) noisy_p = key_parameters(noisy_fit.p) print(' fit:', p) print('noisy fit:', noisy_p) print(' ', gv.fmt_chi2(gv.chi2(p - noisy_p))) # simulated fit for sim_pdata in fitter.simulated_pdata_iter( n=2, dataset=cf.read_dataset('etas-Ds.h5'), p_exact=fit.pmean ): print('\n==================== simulation') sim_fit = fitter.chained_lsqfit( pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT, ) print(sim_fit.format(pstyle=None)) p = key_parameters(fit.pmean) sim_p = key_parameters(sim_fit.p) print('simulated - exact:', sim_p - p) print(' ', gv.fmt_chi2(gv.chi2(p - sim_p)))
def main(): data = make_data('etas-Ds.h5') fitter = cf.CorrFitter(models=make_models()) p0 = None prior = make_prior(8) # 1 for N in [1, 2]: # 2 print(30 * '=', 'nterm =', N) fit = fitter.lsqfit( data=data, prior=prior, p0=p0, nterm=(N, N), svdcut=SVDCUT # 3 ) print(fit) # 4 p0 = fit.pmean print_results(fit, prior, data) if DISPLAYPLOTS: fit.show_plots() # check fit quality by adding noise print('\n==================== add svd, prior noise') noisy_fit = fitter.lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, nterm=(N, N), noise=True, ) print(noisy_fit.format(pstyle=None)) p = key_parameters(fit.p) noisy_p = key_parameters(noisy_fit.p) print(' fit:', p) print('noisy fit:', noisy_p) print(' ', gv.fmt_chi2(gv.chi2(p - noisy_p))) # simulated fit for sim_pdata in fitter.simulated_pdata_iter( n=2, dataset=cf.read_dataset('etas-Ds.h5'), p_exact=fit.pmean ): print('\n==================== simulation') sim_fit = fitter.lsqfit( pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT, nterm=(N, N), ) print(sim_fit.format(pstyle=None)) p = key_parameters(fit.pmean) sim_p = key_parameters(sim_fit.p) print('simulated - exact:', sim_p - p) print(' ', gv.fmt_chi2(gv.chi2(p - sim_p)))
def main(): data = make_data('Ds-Ds.h5') fitter = cf.CorrFitter(models=make_models()) p0 = None for N in [1, 2, 3, 4]: print(30 * '=', 'nterm =', N) prior = make_prior(N) fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT) print(fit.format(pstyle=None if N < 4 else 'v')) p0 = fit.pmean print_results(fit, prior, data) if SHOWPLOTS: fit.show_plots(save='Ds-Ds.{}.png', view='ratio') # check fit quality by adding noise print('\n==================== add svd, prior noise') noisy_fit = fitter.lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, noise=True, ) print(noisy_fit.format(pstyle=None)) p = key_parameters(fit.p) noisy_p = key_parameters(noisy_fit.p) print(' fit:', p) print('noisy fit:', noisy_p) print(' ', gv.fmt_chi2(gv.chi2(p - noisy_p))) # simulated fit for sim_pdata in fitter.simulated_pdata_iter( n=2, dataset=h5py.File('Ds-Ds.h5', 'r'), p_exact=fit.pmean ): print('\n==================== simulation') sim_fit = fitter.lsqfit( pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT, ) print(sim_fit.format(pstyle=None)) p = key_parameters(fit.pmean) sim_p = key_parameters(sim_fit.p) print('simulated - exact:', sim_p - p) print(' ', gv.fmt_chi2(gv.chi2(p - sim_p)))
def f_outlier_list(fit): ''' Code to find outliers in a fit Reads fit and returns list of indices of outliers. ''' y_th = fit.fcn(fit.x, fit.p) y_exp = fit.y outlier_list = [] for i, (j, k) in enumerate(zip(y_exp, y_th)): # Find points with chi-square more than 3. if gv.chi2(j, k) > 3.0: outlier_list.append(i) return outlier_list
def test_fit(fitter, datafile): """ Test the fit with simulated data """ gv.ranseed((1487942813, 775399747, 906327435)) print('\nRandom seed:', gv.ranseed.seed) dataset = cf.read_dataset(datafile) pexact = fitter.fit.pmean prior = fitter.fit.prior for sdata in fitter.simulated_data_iter(n=2, dataset=dataset, pexact=pexact): print('\n============================== simulation') sfit = fitter.lsqfit(data=sdata, prior=prior, p0=pexact) diff = [] # check chi**2 for leading parameters for k in prior: diff.append(sfit.p[k].flat[0] - pexact[k].flat[0]) chi2diff = gv.chi2(diff) print( 'Leading parameter chi2/dof [dof] = %.2f' % (chi2diff / chi2diff.dof), '[%d]' % chi2diff.dof, ' Q = %.1f' % chi2diff.Q )
def test_fit(fitter, datafile): """ Test the fit with simulated data """ gv.ranseed((623738625, 435880512, 1745400596)) print('\nRandom seed:', gv.ranseed.seed) dataset = read_dataset(datafile) pexact = fitter.fit.pmean prior = fitter.fit.prior for sdata in fitter.simulated_data_iter(n=2, dataset=dataset, pexact=pexact): print('\n============================== simulation') sfit = fitter.lsqfit(data=sdata, prior=prior, p0=pexact, nterm=(2, 2)) diff = [] # check chi**2 for leading parameters for k in prior: diff.append(sfit.p[k].flat[0] - pexact[k].flat[0]) chi2_diff = gv.chi2(diff) print( 'Leading parameter chi2/dof [dof] = %.2f' % (chi2_diff / chi2_diff.dof), '[%d]' % chi2_diff.dof, ' Q = %.1f' % chi2_diff.Q )
def main(): gv.ranseed([2009,2010,2011,2012]) # initialize random numbers (opt.) x,y = make_data() # make fit data p0 = None # make larger fits go faster (opt.) for nexp in range(3,5): print '************************************* nexp =',nexp prior = make_prior(nexp) fit = lsqfit.nonlinear_fit(data=(x,y),fcn=f,prior=prior,p0=p0) print fit # print the fit results E = fit.p['E'] # best-fit parameters a = fit.p['a'] print 'E1/E0 =',E[1]/E[0],' E2/E0 =',E[2]/E[0] print 'a1/a0 =',a[1]/a[0],' a2/a0 =',a[2]/a[0] print if fit.chi2/fit.dof<1.: p0 = fit.pmean # starting point for next fit (opt.) sys_stdout = sys.stdout if DO_ERRORBUDGET: lines = [ "E = fit.p['E']", "a = fit.p['a']", "print(E[1] / E[0])", "print((E[1] / E[0]).partialsdev(fit.prior['E']))", "print((E[1] / E[0]).partialsdev(fit.prior['a']))", "print((E[1] / E[0]).partialsdev(y))" ] sys.stdout = tee.tee(sys_stdout, open("eg4c.out","w")) for line in lines: print ">>>", line if line[:5] == "print": print(eval(line[5:])) # print E[1]/E[0] # print (E[1]/E[0]).partialsdev(fit.prior['E']) # print (E[1]/E[0]).partialsdev(fit.prior['a']) # print (E[1]/E[0]).partialsdev(y) outputs = {'E1/E0':E[1]/E[0], 'E2/E0':E[2]/E[0], 'a1/a0':a[1]/a[0], 'a2/a0':a[2]/a[0]} inputs = {'E':fit.prior['E'],'a':fit.prior['a'],'y':y} sys.stdout = tee.tee(sys_stdout, open("eg4b.out","w")) print fit.fmt_values(outputs) print fit.fmt_errorbudget(outputs,inputs) sys.stdout = sys_stdout if DO_SIMULATIONS: # fit simulations sys.stdout = tee.tee(sys_stdout, open("eg4d.out","w")) for sfit in fit.simulated_fit_iter(3): print '************************************* simulation' print(sfit) sE = sfit.p['E'] # best-fit parameters sa = sfit.p['a'] E = sfit.pexact['E'] a = sfit.pexact['a'] print 'E1/E0 =', sE[1] / sE[0], ' E2/E0 =', sE[2] / sE[0] print 'a1/a0 =', sa[1] / sa[0], ' a2/a0 =', sa[2] / sa[0] print '\nSimulated Fit Values - Exact Values:' print 'E1/E0:', (sE[1] / sE[0]) - (E[1] / E[0]),\ ' E2/E0:', (sE[2] / sE[0]) - (E[2] / E[0]) print 'a1/a0:', (sa[1] / sa[0]) - (a[1] / a[0]),\ ' a2/a0:', (sa[2] / sa[0]) - (a[2] / a[0]) # compute chi**2 comparing fit results to exact results sim_results = [sE[0], sE[1], sa[0], sa[1]] exact_results = [E[0], E[1], a[0], a[1]] chi2 = gv.chi2(sim_results, exact_results, svdcut=1e-8) print '\nParameter chi2/dof [dof] = %.2f' % (chi2/chi2.dof), '[%d]' % chi2.dof, ' Q = %.1f' % chi2.Q print sys.stdout = sys_stdout if DO_EMPBAYES: def fitargs(z,nexp=nexp,prior=prior,f=f,data=(x,y),p0=p0): z = gv.exp(z) prior['a'] = [gv.gvar(0.5,0.5*z[0]) for i in range(nexp)] return dict(prior=prior,data=data,fcn=f,p0=p0) ## z0 = [0.0] fit,z = lsqfit.empbayes_fit(z0,fitargs,tol=1e-3) sys.stdout = tee.tee(sys_stdout, open("eg4a.out","w")) print fit # print the optimized fit results E = fit.p['E'] # best-fit parameters a = fit.p['a'] print 'E1/E0 =',E[1]/E[0],' E2/E0 =',E[2]/E[0] print 'a1/a0 =',a[1]/a[0],' a2/a0 =',a[2]/a[0] # print "prior['a'] =",fit.prior['a'][0] sys.stdout = sys_stdout print if DO_PLOT: import pylab as pp from gvar import mean,sdev fity = f(x,fit.pmean) ratio = y/fity pp.xlim(0,21) pp.xlabel('x') pp.ylabel('y/f(x,p)') pp.errorbar(x=x,y=mean(ratio),yerr=sdev(ratio),fmt='ob') pp.plot([0.0,21.0],[1.0,1.0]) pp.show()
def main(): sys_stdout = sys.stdout sys.stdout = tee.tee(sys.stdout, open("eg3a.out","w")) x, y = make_data() prior = make_prior() fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0] if DO_PLOT: plt.semilogx() plt.errorbar( x=gv.mean(x), xerr=gv.sdev(x), y=gv.mean(y), yerr=gv.sdev(y), fmt='ob' ) # plot fit line xx = np.linspace(0.99 * gv.mean(min(x)), 1.01 * gv.mean(max(x)), 100) yy = fcn(xx, fit.pmean) plt.xlabel('x') plt.ylabel('y') plt.plot(xx, yy, ':r') plt.savefig('eg3.png', bbox_inches='tight') plt.show() sys.stdout = sys_stdout if DO_BOOTSTRAP: gv.ranseed(123) sys.stdout = tee.tee(sys_stdout, open('eg3c.out', 'w')) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0] Nbs = 40 outputs = {'p':[], 'p1/p0':[], 'p3/p2':[]} for bsfit in fit.bootstrap_iter(n=Nbs): p = bsfit.pmean outputs['p'].append(p) outputs['p1/p0'].append(p[1] / p[0]) outputs['p3/p2'].append(p[3] / p[2]) print '\nBootstrap Averages:' outputs = gv.dataset.avg_data(outputs, bstrap=True) print gv.tabulate(outputs) print 'corr(p0,p1) =', gv.evalcorr(outputs['p'][:2])[1,0] # make histograms of p1/p0 and p3/p2 sys.stdout = sys_stdout print sys.stdout = tee.tee(sys_stdout, open('eg3d.out', 'w')) print 'Histogram Analysis:' count = {'p1/p0':[], 'p3/p2':[]} hist = { 'p1/p0':gv.PDFHistogram(fit.p[1] / fit.p[0]), 'p3/p2':gv.PDFHistogram(fit.p[3] / fit.p[2]), } for bsfit in fit.bootstrap_iter(n=1000): p = bsfit.pmean count['p1/p0'].append(hist['p1/p0'].count(p[1] / p[0])) count['p3/p2'].append(hist['p3/p2'].count(p[3] / p[2])) count = gv.dataset.avg_data(count) plt.rcParams['figure.figsize'] = [6.4, 2.4] pltnum = 1 for k in count: print k + ':' print hist[k].analyze(count[k]).stats plt.subplot(1, 2, pltnum) plt.xlabel(k) hist[k].make_plot(count[k], plot=plt) if pltnum == 2: plt.ylabel('') pltnum += 1 plt.rcParams['figure.figsize'] = [6.4, 4.8] plt.savefig('eg3d.png', bbox_inches='tight') plt.show() if DO_BAYESIAN: gv.ranseed(123) sys.stdout = tee.tee(sys_stdout, open('eg3e.out', 'w')) print fit expval = lsqfit.BayesIntegrator(fit) # adapt integrator to PDF from fit neval = 1000 nitn = 10 expval(neval=neval, nitn=nitn) # <g(p)> gives mean and covariance matrix, and histograms hist = [ gv.PDFHistogram(fit.p[0]), gv.PDFHistogram(fit.p[1]), gv.PDFHistogram(fit.p[2]), gv.PDFHistogram(fit.p[3]), ] def g(p): return dict( mean=p, outer=np.outer(p, p), count=[ hist[0].count(p[0]), hist[1].count(p[1]), hist[2].count(p[2]), hist[3].count(p[3]), ], ) # evaluate expectation value of g(p) results = expval(g, neval=neval, nitn=nitn, adapt=False) # analyze results print('\nIterations:') print(results.summary()) print('Integration Results:') pmean = results['mean'] pcov = results['outer'] - np.outer(pmean, pmean) print ' mean(p) =', pmean print ' cov(p) =\n', pcov # create GVars from results p = gv.gvar(gv.mean(pmean), gv.mean(pcov)) print('\nBayesian Parameters:') print(gv.tabulate(p)) # show histograms print('\nHistogram Statistics:') count = results['count'] for i in range(4): print('p[{}] -'.format(i)) print(hist[i].analyze(count[i]).stats) plt.subplot(2, 2, i + 1) plt.xlabel('p[{}]'.format(i)) hist[i].make_plot(count[i], plot=plt) if i % 2 != 0: plt.ylabel('') plt.savefig('eg3e.png', bbox_inches='tight') plt.show() if DO_SIMULATION: gv.ranseed(1234) sys.stdout = tee.tee(sys_stdout, open('eg3f.out', 'w')) print(40 * '*' + ' real fit') print(fit.format(True)) Q = [] p = [] for sfit in fit.simulated_fit_iter(n=3, add_priornoise=False): print(40 * '=' + ' simulation') print(sfit.format(True)) diff = sfit.p - sfit.pexact print '\nsfit.p - pexact =', diff print(gv.fmt_chi2(gv.chi2(diff))) print # omit constraint sys.stdout = tee.tee(sys_stdout, open("eg3b.out", "w")) prior = gv.gvar(4 * ['0(1)']) prior[1] = gv.gvar('0(20)') fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]