def test_simulation(self):
     """ CorrFitter.simulated_data_iter """
     models = [ self.mkcorr(a="a", b="a", dE="dE", tp=None) ]
     fitter = self.dofit(models)
     data = self.data
     diter = gv.BufferDict()
     k = list(data.keys())[0]
     # make n config dataset corresponding to data
     n = 100
     diter = gv.raniter(
         g = gv.gvar(gv.mean(self.data[k]), gv.evalcov(self.data[k]) * n),
         n = n
         )
     dataset = gv.dataset.Dataset()
     for d in diter:
         dataset.append(k, d)
     pexact = fitter.fit.pmean
     covexact = gv.evalcov(gv.dataset.avg_data(dataset)[k])
     for sdata in fitter.simulated_data_iter(n=2, dataset=dataset):
         sfit = fitter.lsqfit(
             data=sdata, prior=self.prior, p0=pexact, print_fit=False
             )
         diff = dict()
         for i in ['a', 'logdE']:
             diff[i] = sfit.p[i][0] - pexact[i][0]
         c2 = gv.chi2(diff)
         self.assertLess(c2/c2.dof, 15.)
         self.assert_arraysclose(gv.evalcov(sdata[k]), covexact)
Example #2
0
def main():
    data, basis = make_data('etab.h5')
    fitter = cf.CorrFitter(models=make_models())
    p0 = None
    for N in range(1, 8):
        print(30 * '=', 'nterm =', N)
        prior = make_prior(N, basis)
        fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT)
        print(fit.format(pstyle=None if N < 7 else 'v'))
        p0 = fit.pmean
    print_results(fit, basis, prior, data)
    if SHOWPLOTS:
        fit.show_plots(save='etab.{}.png', view='ratio')

    # check fit quality by adding noise
    print('\n==================== add svd, prior noise')
    noisy_fit = fitter.lsqfit(
        data=data,
        prior=prior,
        p0=fit.pmean,
        svdcut=SVDCUT,
        noise=True,
    )
    print(noisy_fit.format(pstyle=None))
    dE = fit.p['etab.dE'][:3]
    noisy_dE = noisy_fit.p['etab.dE'][:3]
    print('      dE:', dE)
    print('noisy dE:', noisy_dE)
    print('          ', gv.fmt_chi2(gv.chi2(dE - noisy_dE)))
    if SHOWPLOTS:
        fit.qqplot_residuals().show()
Example #3
0
def main():
    data, basis = make_data('etab.h5')
    fitter = cf.CorrFitter(models=make_models())
    p0 = None
    for N in range(1, 8):
        print(30 * '=', 'nterm =', N)
        prior = make_prior(N, basis)
        fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT)
        print(fit.format(pstyle=None if N < 7 else 'm'))
        p0 = fit.pmean
    print_results(fit, basis, prior, data)
    if DISPLAYPLOTS:
        fitter.display_plots()
    print('\n==================== add svd, prior noise')
    noisy_fit = fitter.lsqfit(
        data=data,
        prior=prior,
        p0=fit.pmean,
        svdcut=SVDCUT,
        noise=True,
    )
    print(noisy_fit.format(pstyle=None))
    dE = fit.p['etab.dE'][:3]
    noisy_dE = noisy_fit.p['etab.dE'][:3]
    print('      dE:', dE)
    print('noisy dE:', noisy_dE)
    print('          ', gv.fmt_chi2(gv.chi2(dE - noisy_dE)))
Example #4
0
def main():
    data = make_data('etas-Ds.h5')
    models = make_models()                                              # 1a
    models = [
      models[0], models[1],                                             # 1b
      dict(nterm=(2, 1), svdcut=6.3e-5),                                # 1c
      (models[2], models[3])                                            # 1d
      ]
    fitter = cf.CorrFitter(models=models)                               # 1e
    p0 = None
    for N in [1, 2, 3, 4]:
        print(30 * '=', 'nterm =', N)
        prior = make_prior(N)
        fit = fitter.chained_lsqfit(data=data, prior=prior, p0=p0)      # 2
        print(fit.format(pstyle=None if N < 4 else 'm'))
        p0 = fit.pmean
    print_results(fit, prior, data)
    if DISPLAYPLOTS:
        fit.show_plots()

    # check fit quality by adding noise
    print('\n==================== add svd, prior noise')
    noisy_fit = fitter.chained_lsqfit(
        data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT,
        noise=True,
        )
    print(noisy_fit.format(pstyle=None))
    p = key_parameters(fit.p)
    noisy_p = key_parameters(noisy_fit.p)
    print('      fit:', p)
    print('noisy fit:', noisy_p)
    print('          ', gv.fmt_chi2(gv.chi2(p - noisy_p)))

    # simulated fit
    for sim_pdata in fitter.simulated_pdata_iter(
        n=2, dataset=cf.read_dataset('etas-Ds.h5'), p_exact=fit.pmean
        ):
        print('\n==================== simulation')
        sim_fit = fitter.chained_lsqfit(
            pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT,
            )
        print(sim_fit.format(pstyle=None))
        p = key_parameters(fit.pmean)
        sim_p = key_parameters(sim_fit.p)
        print('simulated - exact:', sim_p - p)
        print('          ', gv.fmt_chi2(gv.chi2(p - sim_p)))
Example #5
0
def main():
    data = make_data('etas-Ds.h5')
    fitter = cf.CorrFitter(models=make_models())
    p0 = None
    prior = make_prior(8)                                               # 1
    for N in [1, 2]:                                                    # 2
        print(30 * '=', 'nterm =', N)
        fit = fitter.lsqfit(
            data=data, prior=prior, p0=p0, nterm=(N, N), svdcut=SVDCUT  # 3
            )
        print(fit)                                                      # 4
        p0 = fit.pmean
    print_results(fit, prior, data)
    if DISPLAYPLOTS:
        fit.show_plots()

    # check fit quality by adding noise
    print('\n==================== add svd, prior noise')
    noisy_fit = fitter.lsqfit(
        data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT, nterm=(N, N),
        noise=True, 
        )
    print(noisy_fit.format(pstyle=None))
    p = key_parameters(fit.p)
    noisy_p = key_parameters(noisy_fit.p)
    print('      fit:', p)
    print('noisy fit:', noisy_p)
    print('          ', gv.fmt_chi2(gv.chi2(p - noisy_p)))

    # simulated fit
    for sim_pdata in fitter.simulated_pdata_iter(
        n=2, dataset=cf.read_dataset('etas-Ds.h5'), p_exact=fit.pmean
        ):
        print('\n==================== simulation')
        sim_fit = fitter.lsqfit(
            pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT,
            nterm=(N, N),
            )
        print(sim_fit.format(pstyle=None))
        p = key_parameters(fit.pmean)
        sim_p = key_parameters(sim_fit.p)
        print('simulated - exact:', sim_p - p)
        print('          ', gv.fmt_chi2(gv.chi2(p - sim_p)))
Example #6
0
def main():
    data = make_data('Ds-Ds.h5')
    fitter = cf.CorrFitter(models=make_models())
    p0 = None
    for N in [1, 2, 3, 4]:
        print(30 * '=', 'nterm =', N)
        prior = make_prior(N)
        fit = fitter.lsqfit(data=data, prior=prior, p0=p0, svdcut=SVDCUT)
        print(fit.format(pstyle=None if N < 4 else 'v'))
        p0 = fit.pmean
    print_results(fit, prior, data)
    if SHOWPLOTS:
        fit.show_plots(save='Ds-Ds.{}.png', view='ratio')

    # check fit quality by adding noise
    print('\n==================== add svd, prior noise')
    noisy_fit = fitter.lsqfit(
        data=data, prior=prior, p0=fit.pmean, svdcut=SVDCUT,
        noise=True,
        )
    print(noisy_fit.format(pstyle=None))
    p = key_parameters(fit.p)
    noisy_p = key_parameters(noisy_fit.p)
    print('      fit:', p)
    print('noisy fit:', noisy_p)
    print('          ', gv.fmt_chi2(gv.chi2(p - noisy_p)))

    # simulated fit
    for sim_pdata in fitter.simulated_pdata_iter(
        n=2, dataset=h5py.File('Ds-Ds.h5', 'r'), p_exact=fit.pmean
        ):
        print('\n==================== simulation')
        sim_fit = fitter.lsqfit(
            pdata=sim_pdata, prior=prior, p0=fit.pmean, svdcut=SVDCUT,
            )
        print(sim_fit.format(pstyle=None))
        p = key_parameters(fit.pmean)
        sim_p = key_parameters(sim_fit.p)
        print('simulated - exact:', sim_p - p)
        print('          ', gv.fmt_chi2(gv.chi2(p - sim_p)))
Example #7
0
def f_outlier_list(fit):
    ''' Code to find outliers in a fit
    Reads fit and returns list of indices of outliers.
    '''
    y_th = fit.fcn(fit.x, fit.p)
    y_exp = fit.y
    outlier_list = []
    for i, (j, k) in enumerate(zip(y_exp, y_th)):
        # Find points with chi-square more than 3.
        if gv.chi2(j, k) > 3.0:
            outlier_list.append(i)

    return outlier_list
Example #8
0
def test_fit(fitter, datafile):
    """ Test the fit with simulated data """
    gv.ranseed((1487942813, 775399747, 906327435))
    print('\nRandom seed:', gv.ranseed.seed)
    dataset = cf.read_dataset(datafile)
    pexact = fitter.fit.pmean
    prior = fitter.fit.prior
    for sdata in fitter.simulated_data_iter(n=2, dataset=dataset, pexact=pexact):
        print('\n============================== simulation')
        sfit = fitter.lsqfit(data=sdata, prior=prior, p0=pexact)
        diff = []
        # check chi**2 for leading parameters
        for k in prior:
            diff.append(sfit.p[k].flat[0] - pexact[k].flat[0])
        chi2diff = gv.chi2(diff)
        print(
            'Leading parameter chi2/dof [dof] = %.2f' % 
            (chi2diff / chi2diff.dof),
            '[%d]' % chi2diff.dof, 
            '  Q = %.1f' % chi2diff.Q
            ) 
def test_fit(fitter, datafile):
    """ Test the fit with simulated data """ 
    gv.ranseed((623738625, 435880512, 1745400596))
    print('\nRandom seed:', gv.ranseed.seed)
    dataset = read_dataset(datafile)
    pexact = fitter.fit.pmean
    prior = fitter.fit.prior
    for sdata in fitter.simulated_data_iter(n=2, dataset=dataset, pexact=pexact):
        print('\n============================== simulation')
        sfit = fitter.lsqfit(data=sdata, prior=prior, p0=pexact, nterm=(2, 2))
        diff = []
        # check chi**2 for leading parameters
        for k in prior: 
            diff.append(sfit.p[k].flat[0] - pexact[k].flat[0])
        chi2_diff = gv.chi2(diff)
        print(
            'Leading parameter chi2/dof [dof] = %.2f' % 
            (chi2_diff / chi2_diff.dof),
            '[%d]' % chi2_diff.dof, 
            '  Q = %.1f' % chi2_diff.Q
            )
Example #10
0
def main():
    gv.ranseed([2009,2010,2011,2012]) # initialize random numbers (opt.)
    x,y = make_data()               # make fit data
    p0 = None                       # make larger fits go faster (opt.)
    for nexp in range(3,5):
        print '************************************* nexp =',nexp
        prior = make_prior(nexp)
        fit = lsqfit.nonlinear_fit(data=(x,y),fcn=f,prior=prior,p0=p0)
        print fit                   # print the fit results
        E = fit.p['E']              # best-fit parameters
        a = fit.p['a']
        print 'E1/E0 =',E[1]/E[0],'  E2/E0 =',E[2]/E[0]
        print 'a1/a0 =',a[1]/a[0],'  a2/a0 =',a[2]/a[0]
        print
        if fit.chi2/fit.dof<1.:
            p0 = fit.pmean          # starting point for next fit (opt.)
    sys_stdout = sys.stdout
    if DO_ERRORBUDGET:

        lines = [
            "E = fit.p['E']",
            "a = fit.p['a']",
            "print(E[1] / E[0])",
            "print((E[1] / E[0]).partialsdev(fit.prior['E']))",
            "print((E[1] / E[0]).partialsdev(fit.prior['a']))",
            "print((E[1] / E[0]).partialsdev(y))"
            ]
        sys.stdout = tee.tee(sys_stdout, open("eg4c.out","w"))
        for line in lines:
            print ">>>", line
            if line[:5] == "print":
                print(eval(line[5:]))
        # print E[1]/E[0]
        # print (E[1]/E[0]).partialsdev(fit.prior['E'])
        # print (E[1]/E[0]).partialsdev(fit.prior['a'])
        # print (E[1]/E[0]).partialsdev(y)
        outputs = {'E1/E0':E[1]/E[0], 'E2/E0':E[2]/E[0],
                 'a1/a0':a[1]/a[0], 'a2/a0':a[2]/a[0]}
        inputs = {'E':fit.prior['E'],'a':fit.prior['a'],'y':y}

        sys.stdout = tee.tee(sys_stdout, open("eg4b.out","w"))
        print fit.fmt_values(outputs)
        print fit.fmt_errorbudget(outputs,inputs)
        sys.stdout = sys_stdout

    if DO_SIMULATIONS:
        # fit simulations
        sys.stdout = tee.tee(sys_stdout, open("eg4d.out","w"))

        for sfit in fit.simulated_fit_iter(3):
            print '************************************* simulation'
            print(sfit)
            sE = sfit.p['E']             # best-fit parameters
            sa = sfit.p['a']
            E = sfit.pexact['E']
            a = sfit.pexact['a']
            print 'E1/E0 =', sE[1] / sE[0], '  E2/E0 =', sE[2] / sE[0]
            print 'a1/a0 =', sa[1] / sa[0], '  a2/a0 =', sa[2] / sa[0]
            print '\nSimulated Fit Values - Exact Values:'
            print 'E1/E0:', (sE[1] / sE[0]) - (E[1] / E[0]),\
               '  E2/E0:', (sE[2] / sE[0]) - (E[2] / E[0])
            print 'a1/a0:', (sa[1] / sa[0]) - (a[1] / a[0]),\
               '  a2/a0:', (sa[2] / sa[0]) - (a[2] / a[0])

            # compute chi**2 comparing fit results to exact results
            sim_results = [sE[0], sE[1], sa[0], sa[1]]
            exact_results = [E[0], E[1], a[0], a[1]]
            chi2 = gv.chi2(sim_results, exact_results, svdcut=1e-8)
            print '\nParameter chi2/dof [dof] = %.2f' % (chi2/chi2.dof), '[%d]' % chi2.dof, '  Q = %.1f' % chi2.Q
            print
        sys.stdout = sys_stdout

    if DO_EMPBAYES:
        def fitargs(z,nexp=nexp,prior=prior,f=f,data=(x,y),p0=p0):
            z = gv.exp(z)
            prior['a'] = [gv.gvar(0.5,0.5*z[0]) for i in range(nexp)]
            return dict(prior=prior,data=data,fcn=f,p0=p0)
        ##
        z0 = [0.0]
        fit,z = lsqfit.empbayes_fit(z0,fitargs,tol=1e-3)
        sys.stdout = tee.tee(sys_stdout, open("eg4a.out","w"))
        print fit                   # print the optimized fit results
        E = fit.p['E']              # best-fit parameters
        a = fit.p['a']
        print 'E1/E0 =',E[1]/E[0],'  E2/E0 =',E[2]/E[0]
        print 'a1/a0 =',a[1]/a[0],'  a2/a0 =',a[2]/a[0]
        # print "prior['a'] =",fit.prior['a'][0]
        sys.stdout = sys_stdout
        print

    if DO_PLOT:
        import pylab as pp
        from gvar import mean,sdev
        fity = f(x,fit.pmean)
        ratio = y/fity
        pp.xlim(0,21)
        pp.xlabel('x')
        pp.ylabel('y/f(x,p)')
        pp.errorbar(x=x,y=mean(ratio),yerr=sdev(ratio),fmt='ob')
        pp.plot([0.0,21.0],[1.0,1.0])
        pp.show()
Example #11
0
def main():
    sys_stdout = sys.stdout
    sys.stdout = tee.tee(sys.stdout, open("eg3a.out","w"))
    x, y = make_data()
    prior = make_prior()
    fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn)
    print fit
    print 'p1/p0 =', fit.p[1] / fit.p[0], '    p3/p2 =', fit.p[3] / fit.p[2]
    print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]

    if DO_PLOT:
        plt.semilogx()
        plt.errorbar(
            x=gv.mean(x), xerr=gv.sdev(x), y=gv.mean(y), yerr=gv.sdev(y),
            fmt='ob'
            )
        # plot fit line
        xx = np.linspace(0.99 * gv.mean(min(x)), 1.01 * gv.mean(max(x)), 100)
        yy = fcn(xx, fit.pmean)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.plot(xx, yy, ':r')
        plt.savefig('eg3.png', bbox_inches='tight')
        plt.show()

    sys.stdout = sys_stdout
    if DO_BOOTSTRAP:
        gv.ranseed(123)
        sys.stdout = tee.tee(sys_stdout, open('eg3c.out', 'w'))
        print fit
        print 'p1/p0 =', fit.p[1] / fit.p[0], '    p3/p2 =', fit.p[3] / fit.p[2]
        print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]
        Nbs = 40
        outputs = {'p':[], 'p1/p0':[], 'p3/p2':[]}
        for bsfit in fit.bootstrap_iter(n=Nbs):
            p = bsfit.pmean
            outputs['p'].append(p)
            outputs['p1/p0'].append(p[1] / p[0])
            outputs['p3/p2'].append(p[3] / p[2])
        print '\nBootstrap Averages:'
        outputs = gv.dataset.avg_data(outputs, bstrap=True)
        print gv.tabulate(outputs)
        print 'corr(p0,p1) =', gv.evalcorr(outputs['p'][:2])[1,0]

        # make histograms of p1/p0 and p3/p2
        sys.stdout = sys_stdout
        print
        sys.stdout = tee.tee(sys_stdout, open('eg3d.out', 'w'))
        print 'Histogram Analysis:'
        count = {'p1/p0':[], 'p3/p2':[]}
        hist = {
            'p1/p0':gv.PDFHistogram(fit.p[1] / fit.p[0]),
            'p3/p2':gv.PDFHistogram(fit.p[3] / fit.p[2]),
            }
        for bsfit in fit.bootstrap_iter(n=1000):
            p = bsfit.pmean
            count['p1/p0'].append(hist['p1/p0'].count(p[1] / p[0]))
            count['p3/p2'].append(hist['p3/p2'].count(p[3] / p[2]))
        count = gv.dataset.avg_data(count)
        plt.rcParams['figure.figsize'] = [6.4, 2.4]
        pltnum = 1
        for k in count:
            print k + ':'
            print hist[k].analyze(count[k]).stats
            plt.subplot(1, 2, pltnum)
            plt.xlabel(k)
            hist[k].make_plot(count[k], plot=plt)
            if pltnum == 2:
                plt.ylabel('')
            pltnum += 1
        plt.rcParams['figure.figsize'] = [6.4, 4.8]
        plt.savefig('eg3d.png', bbox_inches='tight')
        plt.show()

    if DO_BAYESIAN:
        gv.ranseed(123)
        sys.stdout = tee.tee(sys_stdout, open('eg3e.out', 'w'))
        print fit
        expval = lsqfit.BayesIntegrator(fit)

        # adapt integrator to PDF from fit
        neval = 1000
        nitn = 10
        expval(neval=neval, nitn=nitn)

        # <g(p)> gives mean and covariance matrix, and histograms
        hist = [
            gv.PDFHistogram(fit.p[0]), gv.PDFHistogram(fit.p[1]),
            gv.PDFHistogram(fit.p[2]), gv.PDFHistogram(fit.p[3]),
            ]
        def g(p):
            return dict(
                mean=p,
                outer=np.outer(p, p),
                count=[
                    hist[0].count(p[0]), hist[1].count(p[1]),
                    hist[2].count(p[2]), hist[3].count(p[3]),
                    ],
                )

        # evaluate expectation value of g(p)
        results = expval(g, neval=neval, nitn=nitn, adapt=False)

        # analyze results
        print('\nIterations:')
        print(results.summary())
        print('Integration Results:')
        pmean = results['mean']
        pcov =  results['outer'] - np.outer(pmean, pmean)
        print '    mean(p) =', pmean
        print '    cov(p) =\n', pcov

        # create GVars from results
        p = gv.gvar(gv.mean(pmean), gv.mean(pcov))
        print('\nBayesian Parameters:')
        print(gv.tabulate(p))

        # show histograms
        print('\nHistogram Statistics:')
        count = results['count']
        for i in range(4):
            print('p[{}] -'.format(i))
            print(hist[i].analyze(count[i]).stats)
            plt.subplot(2, 2, i + 1)
            plt.xlabel('p[{}]'.format(i))
            hist[i].make_plot(count[i], plot=plt)
            if i % 2 != 0:
                plt.ylabel('')
        plt.savefig('eg3e.png', bbox_inches='tight')
        plt.show()

    if DO_SIMULATION:
        gv.ranseed(1234)
        sys.stdout = tee.tee(sys_stdout, open('eg3f.out', 'w'))
        print(40 * '*' + ' real fit')
        print(fit.format(True))

        Q = []
        p = []
        for sfit in fit.simulated_fit_iter(n=3, add_priornoise=False):
            print(40 * '=' + ' simulation')
            print(sfit.format(True))
            diff = sfit.p - sfit.pexact
            print '\nsfit.p - pexact =', diff
            print(gv.fmt_chi2(gv.chi2(diff)))
            print

    # omit constraint
    sys.stdout = tee.tee(sys_stdout, open("eg3b.out", "w"))
    prior = gv.gvar(4 * ['0(1)'])
    prior[1] = gv.gvar('0(20)')
    fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn)
    print fit
    print 'p1/p0 =', fit.p[1] / fit.p[0], '    p3/p2 =', fit.p[3] / fit.p[2]
    print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]