def test_marginalization(self): " MultiFitter.lsqfit(..., mopt=...) " fitter = MultiFitter(models=self.make_models(ncg=1)) fit4 = fitter.lsqfit(data=self.data, prior=self.prior, mopt=True) self.assertEqual(str(fit4.p['a']), str(self.ref_fit.p['a'])) self.assertEqual(gv.fmt_chi2(fit4), gv.fmt_chi2(self.ref_fit)) self.assertTrue('b' not in fit4.p)
def test_extend(self): " MultiFitter.lsqfit(..., extend=True) " fitter = MultiFitter(models=self.make_models(ncg=1)) prior = gv.BufferDict([('log(a)', gv.log(self.prior['a'])), ('b', self.prior['b'])]) fit5 = fitter.lsqfit(data=self.data, prior=prior, extend=True) self.assertEqual(str(fit5.p['a']), str(self.ref_fit.p['a'])) self.assertEqual(gv.fmt_chi2(fit5), gv.fmt_chi2(self.ref_fit)) self.assertTrue('log(a)' in fit5.p)
def main(): data, basis = make_data('etab.h5') fitter = cf.CorrFitter(models=make_models()) p0 = None for N in range(1, 8): print(30 * '=', 'nterm =', N) prior = make_prior(N, basis) fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT) print(fit.format(pstyle=None if N < 7 else 'v')) p0 = fit.pmean print_results(fit, basis, prior, data) if SHOWPLOTS: fit.show_plots(save='etab.{}.png', view='ratio') # check fit quality by adding noise print('\n==================== add svd, prior noise') noisy_fit = fitter.lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, noise=True, ) print(noisy_fit.format(pstyle=None)) dE = fit.p['etab.dE'][:3] noisy_dE = noisy_fit.p['etab.dE'][:3] print(' dE:', dE) print('noisy dE:', noisy_dE) print(' ', gv.fmt_chi2(gv.chi2(dE - noisy_dE))) if SHOWPLOTS: fit.qqplot_residuals().show()
def main(): data, basis = make_data('etab.h5') fitter = cf.CorrFitter(models=make_models()) p0 = None for N in range(1, 8): print(30 * '=', 'nterm =', N) prior = make_prior(N, basis) fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT) print(fit.format(pstyle=None if N < 7 else 'm')) p0 = fit.pmean print_results(fit, basis, prior, data) if DISPLAYPLOTS: fitter.display_plots() print('\n==================== add svd, prior noise') noisy_fit = fitter.lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, noise=True, ) print(noisy_fit.format(pstyle=None)) dE = fit.p['etab.dE'][:3] noisy_dE = noisy_fit.p['etab.dE'][:3] print(' dE:', dE) print('noisy dE:', noisy_dE) print(' ', gv.fmt_chi2(gv.chi2(dE - noisy_dE)))
def main(): data = make_data('etas-Ds.h5') models = make_models() # 1a models = [ models[0], models[1], # 1b dict(nterm=(2, 1), svdcut=6.3e-5), # 1c (models[2], models[3]) # 1d ] fitter = cf.CorrFitter(models=models) # 1e p0 = None for N in [1, 2, 3, 4]: print(30 * '=', 'nterm =', N) prior = make_prior(N) fit = fitter.chained_lsqfit(data=data, prior=prior, p0=p0) # 2 print(fit.format(pstyle=None if N < 4 else 'm')) p0 = fit.pmean print_results(fit, prior, data) if DISPLAYPLOTS: fit.show_plots() # check fit quality by adding noise print('\n==================== add svd, prior noise') noisy_fit = fitter.chained_lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, noise=True, ) print(noisy_fit.format(pstyle=None)) p = key_parameters(fit.p) noisy_p = key_parameters(noisy_fit.p) print(' fit:', p) print('noisy fit:', noisy_p) print(' ', gv.fmt_chi2(gv.chi2(p - noisy_p))) # simulated fit for sim_pdata in fitter.simulated_pdata_iter( n=2, dataset=cf.read_dataset('etas-Ds.h5'), p_exact=fit.pmean ): print('\n==================== simulation') sim_fit = fitter.chained_lsqfit( pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT, ) print(sim_fit.format(pstyle=None)) p = key_parameters(fit.pmean) sim_p = key_parameters(sim_fit.p) print('simulated - exact:', sim_p - p) print(' ', gv.fmt_chi2(gv.chi2(p - sim_p)))
def main(): data = make_data('etas-Ds.h5') fitter = cf.CorrFitter(models=make_models()) p0 = None prior = make_prior(8) # 1 for N in [1, 2]: # 2 print(30 * '=', 'nterm =', N) fit = fitter.lsqfit( data=data, prior=prior, p0=p0, nterm=(N, N), svdcut=SVDCUT # 3 ) print(fit) # 4 p0 = fit.pmean print_results(fit, prior, data) if DISPLAYPLOTS: fit.show_plots() # check fit quality by adding noise print('\n==================== add svd, prior noise') noisy_fit = fitter.lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, nterm=(N, N), noise=True, ) print(noisy_fit.format(pstyle=None)) p = key_parameters(fit.p) noisy_p = key_parameters(noisy_fit.p) print(' fit:', p) print('noisy fit:', noisy_p) print(' ', gv.fmt_chi2(gv.chi2(p - noisy_p))) # simulated fit for sim_pdata in fitter.simulated_pdata_iter( n=2, dataset=cf.read_dataset('etas-Ds.h5'), p_exact=fit.pmean ): print('\n==================== simulation') sim_fit = fitter.lsqfit( pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT, nterm=(N, N), ) print(sim_fit.format(pstyle=None)) p = key_parameters(fit.pmean) sim_p = key_parameters(sim_fit.p) print('simulated - exact:', sim_p - p) print(' ', gv.fmt_chi2(gv.chi2(p - sim_p)))
def main(): data = make_data('Ds-Ds.h5') fitter = cf.CorrFitter(models=make_models()) p0 = None for N in [1, 2, 3, 4]: print(30 * '=', 'nterm =', N) prior = make_prior(N) fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT) print(fit.format(pstyle=None if N < 4 else 'v')) p0 = fit.pmean print_results(fit, prior, data) if SHOWPLOTS: fit.show_plots(save='Ds-Ds.{}.png', view='ratio') # check fit quality by adding noise print('\n==================== add svd, prior noise') noisy_fit = fitter.lsqfit( data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, noise=True, ) print(noisy_fit.format(pstyle=None)) p = key_parameters(fit.p) noisy_p = key_parameters(noisy_fit.p) print(' fit:', p) print('noisy fit:', noisy_p) print(' ', gv.fmt_chi2(gv.chi2(p - noisy_p))) # simulated fit for sim_pdata in fitter.simulated_pdata_iter( n=2, dataset=h5py.File('Ds-Ds.h5', 'r'), p_exact=fit.pmean ): print('\n==================== simulation') sim_fit = fitter.lsqfit( pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT, ) print(sim_fit.format(pstyle=None)) p = key_parameters(fit.pmean) sim_p = key_parameters(sim_fit.p) print('simulated - exact:', sim_p - p) print(' ', gv.fmt_chi2(gv.chi2(p - sim_p)))
def main(): sys_stdout = sys.stdout sys.stdout = tee.tee(sys.stdout, open("eg3a.out","w")) x, y = make_data() prior = make_prior() fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0] if DO_PLOT: plt.semilogx() plt.errorbar( x=gv.mean(x), xerr=gv.sdev(x), y=gv.mean(y), yerr=gv.sdev(y), fmt='ob' ) # plot fit line xx = np.linspace(0.99 * gv.mean(min(x)), 1.01 * gv.mean(max(x)), 100) yy = fcn(xx, fit.pmean) plt.xlabel('x') plt.ylabel('y') plt.plot(xx, yy, ':r') plt.savefig('eg3.png', bbox_inches='tight') plt.show() sys.stdout = sys_stdout if DO_BOOTSTRAP: gv.ranseed(123) sys.stdout = tee.tee(sys_stdout, open('eg3c.out', 'w')) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0] Nbs = 40 outputs = {'p':[], 'p1/p0':[], 'p3/p2':[]} for bsfit in fit.bootstrap_iter(n=Nbs): p = bsfit.pmean outputs['p'].append(p) outputs['p1/p0'].append(p[1] / p[0]) outputs['p3/p2'].append(p[3] / p[2]) print '\nBootstrap Averages:' outputs = gv.dataset.avg_data(outputs, bstrap=True) print gv.tabulate(outputs) print 'corr(p0,p1) =', gv.evalcorr(outputs['p'][:2])[1,0] # make histograms of p1/p0 and p3/p2 sys.stdout = sys_stdout print sys.stdout = tee.tee(sys_stdout, open('eg3d.out', 'w')) print 'Histogram Analysis:' count = {'p1/p0':[], 'p3/p2':[]} hist = { 'p1/p0':gv.PDFHistogram(fit.p[1] / fit.p[0]), 'p3/p2':gv.PDFHistogram(fit.p[3] / fit.p[2]), } for bsfit in fit.bootstrap_iter(n=1000): p = bsfit.pmean count['p1/p0'].append(hist['p1/p0'].count(p[1] / p[0])) count['p3/p2'].append(hist['p3/p2'].count(p[3] / p[2])) count = gv.dataset.avg_data(count) plt.rcParams['figure.figsize'] = [6.4, 2.4] pltnum = 1 for k in count: print k + ':' print hist[k].analyze(count[k]).stats plt.subplot(1, 2, pltnum) plt.xlabel(k) hist[k].make_plot(count[k], plot=plt) if pltnum == 2: plt.ylabel('') pltnum += 1 plt.rcParams['figure.figsize'] = [6.4, 4.8] plt.savefig('eg3d.png', bbox_inches='tight') plt.show() if DO_BAYESIAN: gv.ranseed(123) sys.stdout = tee.tee(sys_stdout, open('eg3e.out', 'w')) print fit expval = lsqfit.BayesIntegrator(fit) # adapt integrator to PDF from fit neval = 1000 nitn = 10 expval(neval=neval, nitn=nitn) # <g(p)> gives mean and covariance matrix, and histograms hist = [ gv.PDFHistogram(fit.p[0]), gv.PDFHistogram(fit.p[1]), gv.PDFHistogram(fit.p[2]), gv.PDFHistogram(fit.p[3]), ] def g(p): return dict( mean=p, outer=np.outer(p, p), count=[ hist[0].count(p[0]), hist[1].count(p[1]), hist[2].count(p[2]), hist[3].count(p[3]), ], ) # evaluate expectation value of g(p) results = expval(g, neval=neval, nitn=nitn, adapt=False) # analyze results print('\nIterations:') print(results.summary()) print('Integration Results:') pmean = results['mean'] pcov = results['outer'] - np.outer(pmean, pmean) print ' mean(p) =', pmean print ' cov(p) =\n', pcov # create GVars from results p = gv.gvar(gv.mean(pmean), gv.mean(pcov)) print('\nBayesian Parameters:') print(gv.tabulate(p)) # show histograms print('\nHistogram Statistics:') count = results['count'] for i in range(4): print('p[{}] -'.format(i)) print(hist[i].analyze(count[i]).stats) plt.subplot(2, 2, i + 1) plt.xlabel('p[{}]'.format(i)) hist[i].make_plot(count[i], plot=plt) if i % 2 != 0: plt.ylabel('') plt.savefig('eg3e.png', bbox_inches='tight') plt.show() if DO_SIMULATION: gv.ranseed(1234) sys.stdout = tee.tee(sys_stdout, open('eg3f.out', 'w')) print(40 * '*' + ' real fit') print(fit.format(True)) Q = [] p = [] for sfit in fit.simulated_fit_iter(n=3, add_priornoise=False): print(40 * '=' + ' simulation') print(sfit.format(True)) diff = sfit.p - sfit.pexact print '\nsfit.p - pexact =', diff print(gv.fmt_chi2(gv.chi2(diff))) print # omit constraint sys.stdout = tee.tee(sys_stdout, open("eg3b.out", "w")) prior = gv.gvar(4 * ['0(1)']) prior[1] = gv.gvar('0(20)') fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]
def print_fit(fit, prior,do_v_symm=False): ## -- print the fit parameters neatly ## -- give both summed and differential energies ## -- if variables fit as logs, give both log and linear ## -- do_unicode=False do_sigdigit=True # print ' '+gv.fmt_chi2(fit) print fmt_reduced_chi2(fit,do_v_symm) print print "Printing best fit parameters : " # for skey in sorted(fit.p): spkey = skey.split('_') keylen = len(spkey) if keylen == 1: ikey = -1 jkey = -1 ksuf = '' elif keylen == 2: ikey = int(spkey[1]) jkey = -1 ksuf = '_'+str(ikey) elif keylen == 3: ikey = int(spkey[1]) jkey = int(spkey[2]) ksuf = '_'+str(ikey)+'_'+str(jkey) else: raise KeyError("too many underscores in key name") bkey = ut.get_basekey(skey.split('_')[0]) ## -- if variable was fit as a log, print log first if bkey[0] == 'log': efirst=0. lkey=bkey[1] for j in range(len(fit.p[lkey+ksuf])): sigstr=get_sigma_str(lkey+ksuf,fit,prior,j,do_unicode) if (lkey[-2:] == 'En' or \ lkey[-2:] == 'Eo' or \ lkey[-1 ] == 'E') and keylen == 1: if j > 0: print '{:>10}'.format(bkey[0]+lkey+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr+' | delE'+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode) ##else j==0 for energy else: print '{:>10}'.format(bkey[0]+lkey+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr elif(lkey[-2:] == 'En' or \ lkey[-2:] == 'Eo' or \ lkey[-1 ] == 'E') and keylen > 1: efst = 0 for i in range(ikey): efst += fit.p[skey][0] #print i,efst,lkey.split('_')[0]+'_'+str(i) if j > 0: print '{:>10}'.format(bkey[0]+lkey+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(efst+sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr+' | delE'+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode) ##else j==0 for energy else: print '{:>10}'.format(bkey[0]+lkey+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(efst+sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr ##else not energy else: print '{:>10}'.format(bkey[0]+lkey+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode)\ +' '+sigstr ##endif log elif bkey[0] == 'sqrt': #print "------" efirst=0. lkey=bkey[1] for j in range(len(fit.p[skey])): sigstr=get_sigma_str(lkey+ksuf,fit,prior,j,do_unicode) if (lkey[-2:] == 'En' or \ lkey[-2:] == 'Eo' or \ lkey[-1 ] == 'E'): if j > 0: print '{:>10}'.format(bkey[0]+lkey+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr+' | delE'+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode) ##else j==0 for energy else: print '{:>10}'.format(bkey[0]+lkey+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr ##else not energy else: print '{:>10}'.format(bkey[0]+lkey+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode)\ +' '+sigstr ##endif sqrt else: ## not log, sqrt efirst=0. for j in range(len(fit.p[skey])): if bkey[1][-2:] == 'nn' or bkey[1][-2:] == 'no' or\ bkey[1][-2:] == 'on' or bkey[1][-2:] == 'oo': pass else: sigstr=get_sigma_str(bkey[1]+ksuf,fit,prior,j,do_unicode) if (bkey[1][-2:] == 'En' or \ bkey[1][-2:] == 'Eo' or \ bkey[1][-1 ] == 'E'): if j > 0: print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr+' | delE'+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode) ##else j==0 for energy else: print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr elif(bkey[1][-2:] == 'En' or \ bkey[1][-2:] == 'Eo' or \ bkey[1][-1 ] == 'E'): if j > 0: print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr+' | delE'+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode) ##else j==0 for energy else: print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(sum(fit.p[skey][:j+1]),do_sigdigit,do_unicode)\ +' '+sigstr elif(bkey[1][-2:] == 'gn' or \ bkey[1][-2:] == 'go'): if j > 0: print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][0]+fit.p[skey][j],\ do_sigdigit,do_unicode)\ +' '+sigstr+' | delg'+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode) ##else j==0 for energy else: print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][0],do_sigdigit,do_unicode)\ +' '+sigstr ##else not energy else: if bkey[1][-2:] == 'nn' or bkey[1][-2:] == 'no' or\ bkey[1][-2:] == 'on' or bkey[1][-2:] == 'oo': if df.do_v_symmetric and\ ((bkey[1][-2:] == 'nn' or bkey[1][-2:] == 'oo') and\ (ikey == jkey) ): if (keylen > 1): xi = 1 else: xi = 0 ## -- upper triangle matrix 3-point factors vlen = int(np.sqrt(8*len(fit.p[skey])+1)-1)/2 ui = np.triu_indices(vlen) i = ui[0][j]+xi k = ui[1][j]+xi sigstr=get_sigma_str(bkey[1]+ksuf,fit,prior,j,do_unicode) print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(i)+']'\ +'['+'{:>2}'.format(k)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode)\ +' '+sigstr else: ## -- print 3-point factors for k in range(len(fit.p[skey][0])): sigstr=get_sigma_str(bkey[1]+ksuf,fit,prior,(j,k),do_unicode) print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(j)+']'\ +'['+'{:>2}'.format(k)+'] : '\ +ut.fmt_num(fit.p[skey][j][k],do_sigdigit,do_unicode)\ +' '+sigstr else: print '{:>10}'.format(bkey[1]+ksuf)+'['+'{:>2}'.format(j)+'] : '\ +ut.fmt_num(fit.p[skey][j],do_sigdigit,do_unicode)\ +' '+sigstr # print "------"