def divide_and_conquer_correlations(dset):
 ## -- split up correlations into multiple small datasets and average each small set
 ##    take correlations of small sets and stitch them together
 ##    computations of small sets done in parallel
 ##    assumes all keys in dset take form '<correlator key>_<series configuration>'
 pflst = []
 ## -- create a list of prefixes
 for key in dset:
  skey = key.split('_')
  if not(skey[0] in pflst):
   pflst.append(skey[0])
 
 rdat = gv.BufferDict()
 call = {}
 ## -- compute diagonals first to construct full dataset
 for key1 in pflst:
  ## -- handled separately because there may be more configurations in full sample
  tdat = compute_correlation_pair(dset,key1,key1)
  rdat[key1] = tdat[key1]
  call[key1,key1] = gv.evalcorr(tdat)[key1,key1]

 for i,key1 in enumerate(pflst):
  for j,key2 in enumerate(pflst):
   if i <= j:
    continue ## -- degenerate with i > j, i == j
   tdat = compute_correlation_pair(dset,key1,key2)
   cdat = gv.evalcorr(tdat)
   call[key1,key2] = cdat[key1,key2]
   call[key2,key1] = cdat[key2,key1]
 ## -- add correlations and return
 rdat = gv.correlate(rdat,call)
 return rdat
def data_preperation():
    ################ Delta calculations 
    e_sym2_av =[]
    
    
    for h in range(6):
        e_sym2_av.append ( e_sym2[:,h]  )
        
    ##### Data for plottting purposes
    
    
    e_sym2_pot_av = gv.dataset.avg_data(e_sym2_av,spread=True)- 5/9*T_SM(td)
    
    e_sym2_pot_eff_av = gv.dataset.avg_data(e_sym2_av,spread=True) - T_2_eff(td)
    
    
    
    ### Data for Fitting purposes
    
    s = gv.dataset.svd_diagnosis(e_sym2_av)
    e_sym2_av = gv.dataset.avg_data(e_sym2_av,spread=True)

    e,ev = np.linalg.eig (gv.evalcorr (e_sym2_av) )
    d2 = np.std(  np.absolute(ev[0]) ) **2 
    # print ("N (delta) = ",np.size(td))
    # print ("l_corr (delta) = ", 1 - np.size(td)*d2 )
    
    ################ Eta calculations 
    e_sym2_eta_av =[]
    
    
    for h in range(6):
        e_sym2_eta_av.append ( esym2_eta[:,h]  )
        
    ### Data for plottting purposes
    
    
    e_sym2_eta_pot_av = gv.dataset.avg_data(e_sym2_eta_av,spread=True)- 5/9*T_SM(td)
    
    e_sym2_eta_pot_eff_av = gv.dataset.avg_data(e_sym2_eta_av,spread=True) -T_2_eff(td)
    
    
    
    ### Data for Fitting purposes
    
    s_eta = gv.dataset.svd_diagnosis(e_sym2_eta_av)
    e_sym2_eta_av = gv.dataset.avg_data(e_sym2_eta_av,spread=True)
    
    e,ev = np.linalg.eig (gv.evalcorr (e_sym2_eta_av) )
    d2 = np.std(  np.absolute(ev[0]) ) **2 
    # print ("N (eta) = ",np.size(td))
    # print ("l_corr (eta) = ", 1 - np.size(td)*d2 )
    
    return e_sym2_av,e_sym2_pot_av,e_sym2_pot_eff_av,s,\
        e_sym2_eta_av,e_sym2_eta_pot_av,e_sym2_eta_pot_eff_av,s_eta
Exemple #3
0
def main():
    x, y = make_data()
    prior = make_prior()
    fit = lsqfit.nonlinear_fit(prior=prior, data=(x, y), fcn=fcn)
    print(fit)
    print('p1/p0 =', fit.p[1] / fit.p[0], 'p3/p2 =', fit.p[3] / fit.p[2])
    print('corr(p0,p1) = {:.4f}'.format(gv.evalcorr(fit.p[:2])[1, 0]))
Exemple #4
0
def main():
    print(
        gv.ranseed(
            (2050203335594632366, 8881439510219835677, 2605204918634240925)))

    log_stdout('eg3a.out')
    integ = vegas.Integrator(4 * [[0, 1]])

    # adapt grid
    training = integ(f(), nitn=10, neval=1000)

    # evaluate multi-integrands
    result = integ(f(), nitn=10, neval=5000)
    print('I[0] =', result[0], '  I[1] =', result[1], '  I[2] =', result[2])
    print('Q = %.2f\n' % result.Q)
    print('<x> =', result[1] / result[0])
    print('sigma_x**2 = <x**2> - <x>**2 =',
          result[2] / result[0] - (result[1] / result[0])**2)
    print('\ncorrelation matrix:\n', gv.evalcorr(result))
    unlog_stdout()
    r = gv.gvar(gv.mean(result), gv.sdev(result))
    print(r[1] / r[0])
    print((r[1] / r[0]).sdev / (result[1] / result[0]).sdev)
    print(r[2] / r[0] - (r[1] / r[0])**2)
    print(result.summary())
Exemple #5
0
def nonlinear_shrink(samples, n_eff):
    """
    Shrink the correlation matrix using direct nonlinear shrinkage.

    Works as a wrapper function for shrink.direct_nl_shrink so that the call
    signature is similar to the linear shrinkage functions.

    Args:
        samples: array, of shape (nsamples, p)
        n_eff: the effective number of samples. Usually n <= nsamples
    Returns:
        array, the shrunken correlation matrix
    """
    LOGGER.info('Direct nonlinear shrinkage of correlation matrix.')
    LOGGER.info('Using effective number of samples n=%d.', n_eff)
    corr = gv.evalcorr(gv.dataset.avg_data(samples))
    # Decompose into eigenvalues
    vals, vecs = np.linalg.eig(corr)  # (eigvals, eigvecs)
    # Sort in descending order
    order = np.argsort(vals)[::-1]
    vals = vals[order]
    vecs = vecs[:, order]
    # Shrink the eigenvalue spectrum
    vals_shrink = shrink.direct_nl_shrink(vals, n_eff)
    # Reconstruct eigenvalue matrix: vecs x diag(vals) x vecs^T
    corr_shrink = np.matmul(vecs,
                            np.matmul(np.diag(vals_shrink), vecs.transpose()))
    # Match output of other shrink functions
    pair = (None, corr_shrink)
    return pair
Exemple #6
0
    def plot_error_ellipsis(self, x_key, y_key, observable):
        x = self._get_posterior(x_key)[observable]
        y = self._get_posterior(y_key)[observable]

        fig, ax = plt.subplots()

        corr = '{0:.3g}'.format(gv.evalcorr([x, y])[0, 1])
        std_x = '{0:.3g}'.format(gv.sdev(x))
        std_y = '{0:.3g}'.format(gv.sdev(y))
        text = ('$R_{x, y}=$ %s\n $\sigma_x =$ %s\n $\sigma_y =$ %s' %
                (corr, std_x, std_y))

        # these are matplotlib.patch.Patch properties
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

        # place a text box in upper left in axes coords
        ax.text(0.05,
                0.95,
                text,
                transform=ax.transAxes,
                fontsize=14,
                verticalalignment='top',
                bbox=props)

        C = gv.evalcov([x, y])
        eVe, eVa = np.linalg.eig(C)
        for e, v in zip(eVe, eVa.T):
            plt.plot([
                gv.mean(x) - 1 * np.sqrt(e) * v[0],
                1 * np.sqrt(e) * v[0] + gv.mean(x)
            ], [
                gv.mean(y) - 1 * np.sqrt(e) * v[1],
                1 * np.sqrt(e) * v[1] + gv.mean(y)
            ],
                     'k-',
                     lw=2)

        #plt.scatter(x-np.mean(x), y-np.mean(y), rasterized=True, marker=".", alpha=100.0/self.bs_N)
        #plt.scatter(x, y, rasterized=True, marker=".", alpha=100.0/self.bs_N)

        plt.grid()
        plt.gca().set_aspect('equal', adjustable='datalim')
        plt.xlabel(x_key.replace('_', '\_'), fontsize=24)
        plt.ylabel(y_key.replace('_', '\_'), fontsize=24)

        fig = plt.gcf()
        plt.close()
        return fig
Exemple #7
0
    def visualize_correlations(self, channel):
        """
        Visualize the correlations by making heatmaps of the correlation
        and covariance matrices and by plotting their eigenvalue spectra.
        Args:
            channel: str, the name of the channel (e.g., 'f_parallel')
        Returns:
            (fig, axarr)
        """
        if channel not in self._valid_channels:
            raise ValueError("Unsupported channel", channel)
        dataframe = self.__getattribute__(channel)
        groups = dataframe.groupby('ens_id')
        ncols = len(groups)
        fig, axarr = plt.subplots(nrows=3, ncols=ncols, figsize=(5*ncols, 15))

        for idx, (ens_id, df) in enumerate(groups):
            ax_col = axarr[:, idx]
            ax1, ax2, ax3 = ax_col

            df = df.sort_values(by=['alias_light', 'alias_heavy', 'phat2'])
            corr = gv.evalcorr(df['form_factor'].values)
            sns.heatmap(corr, ax=ax1)
            cov = gv.evalcov(df['form_factor'].values)
            sns.heatmap(cov, ax=ax2)

            matrices = {'corr': corr, 'cov:full': cov, 'cov:diag': np.diag(cov)}
            markers = ['o', 's', '^']
            for (label, mat), marker in zip(matrices.items(), markers):
                if label == 'cov:diag':
                    w = mat
                else:
                    w = np.linalg.eigvals(mat)
                w = np.sort(w)[::-1]
                w /= max(w)
                plt.plot(w, ax=ax3, label=label, marker=marker)

            ax1.set_title(f"Correlation matrix: {ens_id}")
            ax2.set_title(f"Covariance matirx: {ens_id}")
            ax3.set_title(f"Eigenvalue spectra: {ens_id}")

            ax3.legend()
            ax3.set_yscale("log")

        return fig, axarr
Exemple #8
0
def fit_data(x, y, p):
    prior = make_priors(y, p)
    corr = gv.evalcorr(prior)
    #for k1 in prior:
    #    for k2 in prior:
    #        c = np.squeeze(corr[(k1,k2)])
    #        if c not in [1.0,0.0]:
    #            print k1, k2, c
    #        else: pass
    p['fv']['mpiL'] = y['mpiL']
    p['ma']['mpiL'] = y['mmaL']
    fitc = fit_functions(fv=p['fv'], ma=p['ma'])
    fit = lsqfit.nonlinear_fit(data=(x, y['y']),
                               prior=prior,
                               fcn=fitc.fit_switch,
                               maxit=1000000)
    print fit.format('v')
    return {'fit': fit, 'prior': prior, 'fitc': fitc}
Exemple #9
0
def main():
    print(gv.ranseed((1814855126, 100213625, 262796317)))

    log_stdout('eg3a.out')
    integ = vegas.Integrator(4 * [[0, 1]])

    # adapt grid
    training = integ(f(), nitn=10, neval=2000)

    # evaluate multi-integrands
    result = integ(f(), nitn=10, neval=10000)
    print('I[0] =', result[0], '  I[1] =', result[1], '  I[2] =', result[2])
    print('Q = %.2f\n' % result.Q)
    print('<x> =', result[1] / result[0])
    print('sigma_x**2 = <x**2> - <x>**2 =',
          result[2] / result[0] - (result[1] / result[0])**2)
    print('\ncorrelation matrix:\n', gv.evalcorr(result))
    unlog_stdout()
    r = gv.gvar(gv.mean(result), gv.sdev(result))
    print(r[1] / r[0])
    print((r[1] / r[0]).sdev / (result[1] / result[0]).sdev)
    print(r[2] / r[0] - (r[1] / r[0])**2)
    print((r[2] / r[0] - (r[1] / r[0])**2).sdev /
          (result[2] / result[0] - (result[1] / result[0])**2).sdev)
    print(result.summary())

    # do it again for a dictionary
    print(gv.ranseed((1814855126, 100213625, 262796317)))
    integ = vegas.Integrator(4 * [[0, 1]])

    # adapt grid
    training = integ(f(), nitn=10, neval=2000)

    # evaluate the integrals
    result = integ(fdict(), nitn=10, neval=10000)
    log_stdout('eg3b.out')
    print(result)
    print('Q = %.2f\n' % result.Q)
    print('<x> =', result['x'] / result['1'])
    print('sigma_x**2 = <x**2> - <x>**2 =',
          result['x**2'] / result['1'] - (result['x'] / result['1'])**2)
    unlog_stdout()
Exemple #10
0
min_den = 14
max_den = 17

m_e_inv_kf_red = np.zeros([max_den - min_den, 6, 2])

for i in range(min_den, max_den):
    m_e_inv_kf_red[i - min_den, :, :] = 1 / m_e_kf[i, :, :]

y_SM_1 = []
for h in range(6):
    y_SM_1.append(m_e_inv_kf_red[:, h, 0])

s = gv.dataset.svd_diagnosis(y_SM_1)
y_SM_1 = gv.dataset.avg_data(y_SM_1, spread=True)

e, ev = np.linalg.eig(gv.evalcorr(y_SM_1))
d2 = np.std(np.absolute(ev[0]))**2
print("l_corr (linear,SM) = ", 1 - (max_den - min_den) * d2)


def f_SM_1(x, p):
    ans = 1 + x * p['k1']
    return ans


prior_m_e_inv_SM_1 = {}
prior_m_e_inv_SM_1['k1'] = gv.gvar(0, 100)

x = np.arange(min_den + 1, max_den + 1, 1)
x = x * 0.01
Exemple #11
0
print('Scaling 1:')
print(SM1_par)

print('\n')

print('Scaling 2:')
print(SM2_par)

print('\n')

print('Scaling 3:')
print(SM3_par)

print('\n')

e, ev = np.linalg.eig(gv.evalcorr(e_SM_mod_av))
d2 = np.std(np.absolute(ev[0]))**2
print("N (Scale 1,SM) = ", np.size(d_SM))
print("l_corr (Scale 1,SM) = ", 1 - np.size(d_SM) * d2)

print('\n')

e, ev = np.linalg.eig(gv.evalcorr(te_SM_mod_av))
d2 = np.std(np.absolute(ev[0]))**2
print("N (Scale 2,SM) = ", np.size(td))
print("l_corr (Scale 2,SM) = ", 1 - np.size(td) * d2)

print('\n')

e, ev = np.linalg.eig(gv.evalcorr(te_SM_av))
d2 = np.std(np.absolute(ev[0]))**2
def compute_offdiagonal((dset,key1,key2)):
 print "off-diagonal key ",(key1,key2)
 tdat = compute_correlation_pair(dset,key1,key2)
 return (key1,key2,gv.evalcorr(tdat)[key1,key2])
def compute_diagonal((dset,key)):
 print "diagonal key ",key
 tdat = compute_correlation_pair(dset,key,key)
 return (key,gv.mean(tdat[key]),gv.sdev(tdat[key]),gv.evalcorr(tdat)[key,key])
Exemple #14
0
def main():
    sys_stdout = sys.stdout
    sys.stdout = tee.tee(sys.stdout, open("eg3a.out","w"))
    x, y = make_data()
    prior = make_prior()
    fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn)
    print fit
    print 'p1/p0 =', fit.p[1] / fit.p[0], '    p3/p2 =', fit.p[3] / fit.p[2]
    print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]

    if DO_PLOT:
        plt.semilogx()
        plt.errorbar(
            x=gv.mean(x), xerr=gv.sdev(x), y=gv.mean(y), yerr=gv.sdev(y),
            fmt='ob'
            )
        # plot fit line
        xx = np.linspace(0.99 * gv.mean(min(x)), 1.01 * gv.mean(max(x)), 100)
        yy = fcn(xx, fit.pmean)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.plot(xx, yy, ':r')
        plt.savefig('eg3.png', bbox_inches='tight')
        plt.show()

    sys.stdout = sys_stdout
    if DO_BOOTSTRAP:
        gv.ranseed(123)
        sys.stdout = tee.tee(sys_stdout, open('eg3c.out', 'w'))
        print fit
        print 'p1/p0 =', fit.p[1] / fit.p[0], '    p3/p2 =', fit.p[3] / fit.p[2]
        print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]
        Nbs = 40
        outputs = {'p':[], 'p1/p0':[], 'p3/p2':[]}
        for bsfit in fit.bootstrap_iter(n=Nbs):
            p = bsfit.pmean
            outputs['p'].append(p)
            outputs['p1/p0'].append(p[1] / p[0])
            outputs['p3/p2'].append(p[3] / p[2])
        print '\nBootstrap Averages:'
        outputs = gv.dataset.avg_data(outputs, bstrap=True)
        print gv.tabulate(outputs)
        print 'corr(p0,p1) =', gv.evalcorr(outputs['p'][:2])[1,0]

        # make histograms of p1/p0 and p3/p2
        sys.stdout = sys_stdout
        print
        sys.stdout = tee.tee(sys_stdout, open('eg3d.out', 'w'))
        print 'Histogram Analysis:'
        count = {'p1/p0':[], 'p3/p2':[]}
        hist = {
            'p1/p0':gv.PDFHistogram(fit.p[1] / fit.p[0]),
            'p3/p2':gv.PDFHistogram(fit.p[3] / fit.p[2]),
            }
        for bsfit in fit.bootstrap_iter(n=1000):
            p = bsfit.pmean
            count['p1/p0'].append(hist['p1/p0'].count(p[1] / p[0]))
            count['p3/p2'].append(hist['p3/p2'].count(p[3] / p[2]))
        count = gv.dataset.avg_data(count)
        plt.rcParams['figure.figsize'] = [6.4, 2.4]
        pltnum = 1
        for k in count:
            print k + ':'
            print hist[k].analyze(count[k]).stats
            plt.subplot(1, 2, pltnum)
            plt.xlabel(k)
            hist[k].make_plot(count[k], plot=plt)
            if pltnum == 2:
                plt.ylabel('')
            pltnum += 1
        plt.rcParams['figure.figsize'] = [6.4, 4.8]
        plt.savefig('eg3d.png', bbox_inches='tight')
        plt.show()

    if DO_BAYESIAN:
        gv.ranseed(123)
        sys.stdout = tee.tee(sys_stdout, open('eg3e.out', 'w'))
        print fit
        expval = lsqfit.BayesIntegrator(fit)

        # adapt integrator to PDF from fit
        neval = 1000
        nitn = 10
        expval(neval=neval, nitn=nitn)

        # <g(p)> gives mean and covariance matrix, and histograms
        hist = [
            gv.PDFHistogram(fit.p[0]), gv.PDFHistogram(fit.p[1]),
            gv.PDFHistogram(fit.p[2]), gv.PDFHistogram(fit.p[3]),
            ]
        def g(p):
            return dict(
                mean=p,
                outer=np.outer(p, p),
                count=[
                    hist[0].count(p[0]), hist[1].count(p[1]),
                    hist[2].count(p[2]), hist[3].count(p[3]),
                    ],
                )

        # evaluate expectation value of g(p)
        results = expval(g, neval=neval, nitn=nitn, adapt=False)

        # analyze results
        print('\nIterations:')
        print(results.summary())
        print('Integration Results:')
        pmean = results['mean']
        pcov =  results['outer'] - np.outer(pmean, pmean)
        print '    mean(p) =', pmean
        print '    cov(p) =\n', pcov

        # create GVars from results
        p = gv.gvar(gv.mean(pmean), gv.mean(pcov))
        print('\nBayesian Parameters:')
        print(gv.tabulate(p))

        # show histograms
        print('\nHistogram Statistics:')
        count = results['count']
        for i in range(4):
            print('p[{}] -'.format(i))
            print(hist[i].analyze(count[i]).stats)
            plt.subplot(2, 2, i + 1)
            plt.xlabel('p[{}]'.format(i))
            hist[i].make_plot(count[i], plot=plt)
            if i % 2 != 0:
                plt.ylabel('')
        plt.savefig('eg3e.png', bbox_inches='tight')
        plt.show()

    if DO_SIMULATION:
        gv.ranseed(1234)
        sys.stdout = tee.tee(sys_stdout, open('eg3f.out', 'w'))
        print(40 * '*' + ' real fit')
        print(fit.format(True))

        Q = []
        p = []
        for sfit in fit.simulated_fit_iter(n=3, add_priornoise=False):
            print(40 * '=' + ' simulation')
            print(sfit.format(True))
            diff = sfit.p - sfit.pexact
            print '\nsfit.p - pexact =', diff
            print(gv.fmt_chi2(gv.chi2(diff)))
            print

    # omit constraint
    sys.stdout = tee.tee(sys_stdout, open("eg3b.out", "w"))
    prior = gv.gvar(4 * ['0(1)'])
    prior[1] = gv.gvar('0(20)')
    fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn)
    print fit
    print 'p1/p0 =', fit.p[1] / fit.p[0], '    p3/p2 =', fit.p[3] / fit.p[2]
    print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]
Exemple #15
0
def main():
    ### 1) least-squares fit to the data
    x = np.array([
        0.2, 0.4, 0.6, 0.8, 1.,
        1.2, 1.4, 1.6, 1.8, 2.,
        2.2, 2.4, 2.6, 2.8, 3.,
        3.2, 3.4, 3.6, 3.8
        ])
    y = gv.gvar([
        '0.38(20)', '2.89(20)', '0.85(20)', '0.59(20)', '2.88(20)',
        '1.44(20)', '0.73(20)', '1.23(20)', '1.68(20)', '1.36(20)',
        '1.51(20)', '1.73(20)', '2.16(20)', '1.85(20)', '2.00(20)',
        '2.11(20)', '2.75(20)', '0.86(20)', '2.73(20)'
        ])
    prior = make_prior()
    fit = lsqfit.nonlinear_fit(data=(x, y), prior=prior, fcn=fitfcn, extend=True)
    if LSQFIT_ONLY:
        sys.stdout = tee.tee(STDOUT, open('case-outliers-lsq.out', 'w'))
    elif not MULTI_W:
        sys.stdout = tee.tee(STDOUT, open('case-outliers.out', 'w'))
    print(fit)

    # plot data
    plt.errorbar(x, gv.mean(y), gv.sdev(y), fmt='o', c='b')

    # plot fit function
    xline = np.linspace(x[0], x[-1], 100)
    yline = fitfcn(xline, fit.p)
    plt.plot(xline, gv.mean(yline), 'k:')
    yp = gv.mean(yline) + gv.sdev(yline)
    ym = gv.mean(yline) - gv.sdev(yline)
    plt.fill_between(xline, yp, ym, color='0.8')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.savefig('case-outliers1.png', bbox_inches='tight')
    if LSQFIT_ONLY:
        return

    ### 2) Bayesian integral with modified PDF
    pdf = ModifiedPDF(data=(x, y), fcn=fitfcn, prior=prior)

    # integrator for expectation values with modified PDF
    expval = lsqfit.BayesIntegrator(fit, pdf=pdf)

    # adapt integrator to pdf
    expval(neval=1000, nitn=15)

    # evaluate expectation value of g(p)
    def g(p):
        w = 0.5 + 0.5 * p['2w-1']
        c = p['c']
        return dict(w=[w, w**2], mean=c, outer=np.outer(c,c))

    results = expval(g, neval=1000, nitn=15, adapt=False)
    print(results.summary())
    # expval.map.show_grid(15)

    if MULTI_W:
        sys.stdout = tee.tee(STDOUT, open('case-outliers-multi.out', 'w'))

    # parameters c[i]
    mean = results['mean']
    cov = results['outer'] - np.outer(mean, mean)
    c = mean + gv.gvar(np.zeros(mean.shape), gv.mean(cov))
    print('c =', c)
    print(
        'corr(c) =',
        np.array2string(gv.evalcorr(c), prefix=10 * ' '),
        '\n',
        )

    # parameter w
    wmean, w2mean = results['w']
    wsdev = gv.mean(w2mean - wmean ** 2) ** 0.5
    w = wmean + gv.gvar(np.zeros(np.shape(wmean)), wsdev)
    print('w =', w, '\n')

    # Bayes Factor
    print('logBF =', np.log(expval.norm))
    sys.stdout = STDOUT

    if MULTI_W:
        return

    # add new fit to plot
    yline = fitfcn(xline, dict(c=c))
    plt.plot(xline, gv.mean(yline), 'r--')
    yp = gv.mean(yline) + gv.sdev(yline)
    ym = gv.mean(yline) - gv.sdev(yline)
    plt.fill_between(xline, yp, ym, color='r', alpha=0.2)
    plt.savefig('case-outliers2.png', bbox_inches='tight')
 taglist.append(('l32v5.bar3pt.'+irrepStr+'.ayay.t06.p00','ayay','t6','16m'))
 taglist.append(('l32v5.bar3pt.'+irrepStr+'.ayay.t-7.p00','ayay','t7','16m'))
 taglist.append(('l32v5.bar3pt.'+irrepStr+'.azaz.t06.p00','azaz','t6','16m'))
 taglist.append(('l32v5.bar3pt.'+irrepStr+'.azaz.t-7.p00','azaz','t7','16m'))

## -- consolidated all loading into a single file:
start = time.time()
print "loading gvar data: start ",start
dall = standard_load(taglist,filekey,argsin)
print "end ",(time.time() - start)

## -- get entire correlation matrix
start = time.time()
print "making correlation: start ",start
#corall = gv.evalcorr(dall) ## -- super slow
corall = gv.evalcorr(dall.buf) ## -- uses precomputed data, need to slice data manually
print "end ",(time.time() - start)
print "making covariance : start ",start
covall = gv.evalcov(dall.buf) ## -- uses precomputed data, need to slice data manually
print "end ",(time.time() - start)

## -- test routines, print correlation eigenvalues, eigenvectors to file
#for testkey in ['s12','s21','s13','s31','s15','s51','s16','s61']:
#for testkey in ['aiais11t6','aiais22t6','aiais33t6','aiais55t6','aiais66t6']:
#for testkey in ['aiais11t7','aiais22t7','aiais33t7','aiais55t7','aiais66t7']:
#for testkey in ['s11','s22','s33','s55','s66']:
# evec = gvl.eigvalsh(corall[dall.slice(testkey),dall.slice(testkey)],True)
# f = open('corr.'+testkey+'.dat','w')
# f.write('#key         : '+testkey+'\n')
# f.write('#eigenvalues :\n')
# seval = str(evec[0][0])
Exemple #17
0
def kappa_NM(m_e_kf):

    min_den = 14
    max_den = 17
    
    m_e_inv_kf_red = np.zeros([max_den-min_den,6,2])
    
    for i in range(min_den,max_den):
            m_e_inv_kf_red[i-min_den,:,:] = 1/m_e_kf[i,:,:]
    
    y_NM_1 = []
    for h in range(6):
              y_NM_1.append (m_e_inv_kf_red[:,h,1])
    
    
    s = gv.dataset.svd_diagnosis(y_NM_1)
    y_NM_1 = gv.dataset.avg_data(y_NM_1,spread=True)
    
    e,ev = np.linalg.eig (gv.evalcorr (y_NM_1) )
    d2 = np.std(  np.absolute(ev[0]) ) **2 
    #print ("l_corr (linear,NM) = ", 1 - (max_den-min_den)*d2  )
    
    def f_NM_1 (x,p):
        ans =  1 + x*p['k1'] 
        return ans
        
    prior_m_e_inv_NM_1 = {}
    prior_m_e_inv_NM_1['k1'] = gv.gvar(0,100)
    
    
    x = np.arange(min_den+1 , max_den+1, 1)
    x = x*0.01
    
    fit = lsqfit.nonlinear_fit(data=(x, y_NM_1), prior=prior_m_e_inv_NM_1, fcn=f_NM_1, debug=True
                                  ,svdcut=0.25,add_svdnoise=False)
    #print (fit)
    
    par_NM_1 = fit.p
    
    ## Quadratic fit
    
    min_den = 6     
    max_den = 20
    
    m_e_inv_kf_red = np.zeros([max_den-min_den,6,2])
    
    for i in range(min_den,max_den):
            m_e_inv_kf_red[i-min_den,:,:] = 1/m_e_kf[i,:,:]
    
    y_NM_2 = []
    for h in range(6):
              y_NM_2.append (m_e_inv_kf_red[:,h,1])
    
    
    s = gv.dataset.svd_diagnosis(y_NM_2)
    y_NM_2 = gv.dataset.avg_data(y_NM_2,spread=True)
    
    e,ev = np.linalg.eig (gv.evalcorr (y_NM_2) )
    d2 = np.std(  np.absolute(ev[0]) ) **2 
    #print ("l_corr (quadratic,NM) = ", 1 - (max_den-min_den)*d2  )
    
    def f_NM_2 (x,p):
        ans =  1 + x*p['k1']  + x**2 * p['k2']
        return ans
        
    prior_m_e_inv_NM_2 = {}
    prior_m_e_inv_NM_2['k1'] = gv.gvar(0,100)
    prior_m_e_inv_NM_2['k2'] = gv.gvar(0,100)
    
    x = np.arange(min_den+1 , max_den+1, 1)
    x = x*0.01
    
    fit = lsqfit.nonlinear_fit(data=(x, y_NM_2), prior=prior_m_e_inv_NM_2, fcn=f_NM_2, debug=True
                                  ,svdcut=s.svdcut,add_svdnoise=False)
    #print (fit)
    
    par_NM_2 = fit.p
    
    
    
    return f_NM_1, par_NM_1,f_NM_2, par_NM_2
Exemple #18
0
def main():
    ### 1) least-squares fit to the data
    x = np.array([
        0.2, 0.4, 0.6, 0.8, 1., 1.2, 1.4, 1.6, 1.8, 2., 2.2, 2.4, 2.6, 2.8, 3.,
        3.2, 3.4, 3.6, 3.8
    ])
    y = gv.gvar([
        '0.38(20)', '2.89(20)', '0.85(20)', '0.59(20)', '2.88(20)', '1.44(20)',
        '0.73(20)', '1.23(20)', '1.68(20)', '1.36(20)', '1.51(20)', '1.73(20)',
        '2.16(20)', '1.85(20)', '2.00(20)', '2.11(20)', '2.75(20)', '0.86(20)',
        '2.73(20)'
    ])
    prior = make_prior()
    fit = lsqfit.nonlinear_fit(data=(x, y), prior=prior, fcn=fitfcn)
    if LSQFIT_ONLY:
        sys.stdout = tee.tee(STDOUT, open('case-outliers-lsq.out', 'w'))
    elif not MULTI_W:
        sys.stdout = tee.tee(STDOUT, open('case-outliers.out', 'w'))
    print(fit)

    # plot data
    plt.errorbar(x, gv.mean(y), gv.sdev(y), fmt='o', c='b')

    # plot fit function
    xline = np.linspace(x[0], x[-1], 100)
    yline = fitfcn(xline, fit.p)
    plt.plot(xline, gv.mean(yline), 'k:')
    yp = gv.mean(yline) + gv.sdev(yline)
    ym = gv.mean(yline) - gv.sdev(yline)
    plt.fill_between(xline, yp, ym, color='0.8')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.savefig('case-outliers1.png', bbox_inches='tight')
    if LSQFIT_ONLY:
        return

    ### 2) Bayesian integral with modified PDF
    pdf = ModifiedPDF(data=(x, y), fcn=fitfcn, prior=prior)

    # integrator for expectation values with modified PDF
    expval = lsqfit.BayesIntegrator(fit, pdf=pdf)

    # adapt integrator to pdf
    expval(neval=1000, nitn=15)

    # evaluate expectation value of g(p)
    def g(p):
        w = p['w']
        c = p['c']
        return dict(w=[w, w**2], mean=c, outer=np.outer(c, c))

    results = expval(g, neval=1000, nitn=15, adapt=False)
    print(results.summary())
    # expval.map.show_grid(15)

    if MULTI_W:
        sys.stdout = tee.tee(STDOUT, open('case-outliers-multi.out', 'w'))

    # parameters c[i]
    mean = results['mean']
    cov = results['outer'] - np.outer(mean, mean)
    c = mean + gv.gvar(np.zeros(mean.shape), gv.mean(cov))
    print('c =', c)
    print(
        'corr(c) =',
        np.array2string(gv.evalcorr(c), prefix=10 * ' '),
        '\n',
    )

    # parameter w
    wmean, w2mean = results['w']
    wsdev = gv.mean(w2mean - wmean**2)**0.5
    w = wmean + gv.gvar(np.zeros(np.shape(wmean)), wsdev)
    print('w =', w, '\n')

    # Bayes Factor
    print('logBF =', np.log(results.norm))
    sys.stdout = STDOUT

    if MULTI_W:
        return

    # add new fit to plot
    yline = fitfcn(xline, dict(c=c))
    plt.plot(xline, gv.mean(yline), 'r--')
    yp = gv.mean(yline) + gv.sdev(yline)
    ym = gv.mean(yline) - gv.sdev(yline)
    plt.fill_between(xline, yp, ym, color='r', alpha=0.2)
    plt.savefig('case-outliers2.png', bbox_inches='tight')
Exemple #19
0
    print "mN/fpi:", result['mN/fpi']
    # for andre
    andre = dict()
    andre['mpi'] = result['mpi'].mean
    andre['mka'] = result['mka'].mean
    andre['fpi'] = result['fpi'].mean
    andre['fka'] = result['fka'].mean
    andre['mN'] = result['mN'].mean
    andre['ZAll'] = result['ZAll'].mean
    andre['ZAls'] = result['ZAls'].mean
    andre['key'] = ['mpi', 'mka', 'fpi', 'fka', 'mN', 'ZAll', 'ZAls']
    andre['cov'] = gv.evalcov([
        result['mpi'], result['mka'], result['fpi'], result['fka'],
        result['mN'], result['ZAll'], result['ZAls']
    ]).tolist()
    andre['corr'] = gv.evalcorr([
        result['mpi'], result['mka'], result['fpi'], result['fka'],
        result['mN'], result['ZAll'], result['ZAls']
    ]).tolist()
    f = open(
        './flow_result/%s_%s_%s.yml' %
        (params['grand_ensemble']['ens']['tag'],
         params['grand_ensemble']['ml'], params['grand_ensemble']['ms']), 'w+')
    yaml.dump(andre, f)
    f.flush()
    f.close()
    # write output
    #pickle.dump(result, open('./pickle_result/flow%s_%s.pickle' %(params['grand_ensemble']['flow'], params['grand_ensemble']['ens']['tag']), 'wb'))
    #g = pickle.load(open('./pickle_result/flow%s_%s.pickle' %((params['grand_ensemble']['flow'],params['grand_ensemble']['ens']['tag']), 'rb'))
    #print g
Exemple #20
0
def kappa_SM(m_e_kf):
    min_den = 14
    max_den = 17
    
    m_e_inv_kf_red = np.zeros([max_den-min_den,6,2])
    
    for i in range(min_den,max_den):
            m_e_inv_kf_red[i-min_den,:,:] = 1/m_e_kf[i,:,:]
    
    y_SM_1 = []
    for h in range(6):
              y_SM_1.append (m_e_inv_kf_red[:,h,0])
    
    
    s = gv.dataset.svd_diagnosis(y_SM_1)
    y_SM_1 = gv.dataset.avg_data(y_SM_1,spread=True)
    
    e,ev = np.linalg.eig (gv.evalcorr (y_SM_1) )
    d2 = np.std(  np.absolute(ev[0]) ) **2 
    #print ("l_corr (linear,SM) = ", 1 - (max_den-min_den)*d2  )
    
    def f_SM_1 (x,p):
        ans =  1 + x*p['k1'] 
        return ans
        
    prior_m_e_inv_SM_1 = {}
    prior_m_e_inv_SM_1['k1'] = gv.gvar(0,100)
    
    
    x = np.arange(min_den+1 , max_den+1, 1)
    x = x*0.01
    
    fit = lsqfit.nonlinear_fit(data=(x, y_SM_1), prior=prior_m_e_inv_SM_1, fcn=f_SM_1, debug=True
                                  ,svdcut=s.svdcut,add_svdnoise=False)
    #print (fit)
    
    par_SM_1 = fit.p
    
    ### Quadratic Fit
    
    min_den = 6     
    max_den = 20
    
    m_e_inv_kf_red = np.zeros([max_den-min_den,6,2])
    
    for i in range(min_den,max_den):
            m_e_inv_kf_red[i-min_den,:,:] = 1/m_e_kf[i,:,:]
    
    y_SM_2 = []
    for h in range(6):
              y_SM_2.append (m_e_inv_kf_red[:,h,0])
    
    
    s = gv.dataset.svd_diagnosis(y_SM_2)
    y_SM_2 = gv.dataset.avg_data(y_SM_2,spread=True)
    
    e,ev = np.linalg.eig (gv.evalcorr (y_SM_2) )
    d2 = np.std(  np.absolute(ev[0]) ) **2 
    #print ("l_corr (quadratic,SM) = ", 1 - (max_den-min_den)*d2  )
    
    def f_SM_2 (x,p):
        ans =  1 + x*p['k1']  + x**2 * p['k2']
        return ans
        
    prior_m_e_inv_SM_2 = {}
    prior_m_e_inv_SM_2['k1'] = gv.gvar(0,100)
    prior_m_e_inv_SM_2['k2'] = gv.gvar(0,100)
    
    x = np.arange(min_den+1 , max_den+1, 1)
    x = x*0.01
    
    fit = lsqfit.nonlinear_fit(data=(x, y_SM_2), prior=prior_m_e_inv_SM_2, fcn=f_SM_2, debug=True
                                  ,svdcut=s.svdcut,add_svdnoise=False)
    
    par_SM_2 = fit.p
    
    ##### QQ plot 
    # 
    # residuals = fit.residuals
    # residuals = np.sort(residuals)
    # np.random.seed(73568478)
    # quantiles = np.random.normal(0, 1, np.size(residuals))
    # quantiles = np.sort(quantiles)
    # 
    # r2 = r2_score(residuals, quantiles)
    # r2 = np.around(r2,2)
    # 
    # z = np.polyfit(quantiles, residuals ,deg = 1 )
    # p = np.poly1d(z)
    # x = np.arange(np.min(quantiles),np.max(quantiles),0.001)
    # 
    # fig,ax = plt.subplots(1)
    # plt.plot (quantiles, residuals, 'ob')
    # plt.plot (x,p(x), color='blue')
    # plt.plot (x,x,'r--')
    # ax.text(0.1, 0.9, 'R ='+str(r2)+'' , transform = ax.transAxes,fontsize='13')
    # plt.xlabel ('Theoretical quantiles',fontsize='15')
    # plt.ylabel ('Ordered fit residuals',fontsize='15')
    # ax.tick_params(labelsize='14')
    # plt.show()
    
    
    return f_SM_1, par_SM_1,f_SM_2, par_SM_2
Exemple #21
0
def correct_covariance(data,
                       binsize=1,
                       shrink_choice=None,
                       ordered_tags=None,
                       bstrap=False,
                       inflate=1.0):
    """
    Correct the covariance using three steps:
    (a) adjust the size of the diagonal errors (via the variances)
        with "blocking" (a.ka. "binning") in Monte Carlo time,
    (b) adjust the correlations of the *full* dataset with shrinkage, and
    (c) combine the adjusted errors and correlation matrices.
    Args:
        data: dict with the full dataset.
        binsize: int, the binsize to use. Default is 1 (no binning).
        shrink_choice: str, which shrinkage scheme to use. Default is None
            (no shrinkage). Valid options: 'RBLW', 'OA', 'LW', and 'nonlinear'.
    Returns:
        final_cov: the final correct covariance "matrix" as a dictionary
    """
    if ordered_tags is None:
        ordered_tags = sorted(data.keys(), key=str)
    # shapes are (n,p), where n is nsamples and p is ndata
    try:
        sizes = [data[tag].shape[1] for tag in ordered_tags]
    except IndexError:
        # edge case: single datum per sample
        sizes = [1 for tag in ordered_tags]
    total_size = np.sum(sizes)

    shrink_fcns = {
        'RBLW': shrink.rblw_shrink_correlation_identity,
        'OA': shrink.oa_shrink_correlation_identity,
        'LW': shrink.lw_shrink_correlation_identity,
        'nonlinear': nonlinear_shrink,
    }

    # Estimate errors from binned variances
    binned_data = {tag: avg_bin(data[tag], binsize) for tag in ordered_tags}
    binned_cov = gv.evalcov(gv.dataset.avg_data(binned_data, bstrap=bstrap))
    binned_err = {}
    for key_pair in binned_cov:
        key1, key2 = key_pair
        if key1 == key2:
            binned_err[key1] =\
                inflate * np.diag(np.sqrt(np.diag(binned_cov[key_pair])))

    # Estimate correlations from shrunken correlation matrices
    if shrink_choice is None:
        # No shrinkage -- take correlations from full dataset
        corr_shrink = gv.evalcorr(
            gv.dataset.avg_data(
                {tag: binned_data[tag]
                 for tag in ordered_tags}))
    else:
        # Carry out the desired shrinkage
        samples = np.hstack([data[tag] for tag in ordered_tags])
        if total_size == len(ordered_tags):
            # edge case: single datum per sample
            samples = samples.reshape(-1, len(ordered_tags))
        kwargs = {}
        if shrink_choice == 'nonlinear':
            kwargs['n_eff'] = samples.shape[0] // binsize
        (_, corr_shrink_concat) = shrink_fcns[shrink_choice](samples, **kwargs)
        corr_shrink = decomp_blocks(corr_shrink_concat, ordered_tags, sizes)
    # Correlate errors according to the shrunken correlation matrix
    final_cov = {}
    for key_l, key_r in corr_shrink:
        # err x corr x err
        final_cov[(key_l, key_r)] = np.matmul(
            binned_err[key_l],
            np.matmul(corr_shrink[(key_l, key_r)], binned_err[key_r]))
    return final_cov
for h in range(6):
    e_sym2_av.append(e_sym2[:, h])

##### Data for plottting purposes

e_sym2_pot_av = gv.dataset.avg_data(e_sym2_av, spread=True) - 5 / 9 * T_SM(td)

e_sym2_pot_eff_av = gv.dataset.avg_data(e_sym2_av, spread=True) - T_2_eff(td)

##### Data for Fitting purposes

s = gv.dataset.svd_diagnosis(e_sym2_av)
e_sym2_av = gv.dataset.avg_data(e_sym2_av, spread=True)

e, ev = np.linalg.eig(gv.evalcorr(e_sym2_av))
d2 = np.std(np.absolute(ev[0]))**2
print("N (delta) = ", np.size(td))
print("l_corr (delta) = ", 1 - np.size(td) * d2)


def u(alpha, x):
    N = 4
    b_sat = 17
    return 1 - (-3 * x)**(N + 1 - alpha) * np.exp(-b_sat * (1 + 3 * x))


def V2(den, p):
    b_sym = 42 - 17
    x = (den - p['n_sat']) / (3 * p['n_sat'])
Exemple #23
0

def f(x, p):
    return p[0] + p[1] * np.exp(-p[2] * x)


p0 = [0.5, 0.4, 0.7]

N = 10000
x = np.linspace(0.2, 1.0, N)
y = make_fake_data(x, p0, f)

sys.stdout = tee.tee(sys_stdout, open('eg9a.out', 'w'))
print('x = [{}  {} ... {}]'.format(x[0], x[1], x[-1]))
print('y = [{}  {} ... {}]'.format(y[0], y[1], y[-1]))
print('corr(y[0],y[9999]) =', gv.evalcorr([y[0], y[-1]])[1, 0])
print()


# fit function and prior
def fcn(x, p):
    return p[0] + p[1] * np.exp(-p[2] * x)


prior = gv.gvar(['0(1)', '0(1)', '0(1)'])

# Nstride fits, each to nfit data points
nfit = 100
Nstride = len(y) // nfit
fit_time = 0.0
for n in range(0, Nstride):
Exemple #24
0
 def samples(self, n):
     dim = self.r.shape[1]
     H = linalg.hilbert(dim) / (2 * self.s**2)
     x = gv.gvar(self.r[0], H)
     print(gv.evalcorr(x))
     return np.array([rx for rx in gv.raniter(x, n)])
Exemple #25
0
"""
import math
import vegas
import gvar as gv

def integrand(x):
    """
    Integrand function.
    """
    dx2 = 0.0
    for d in range(4):
        dx2 += (x[d] - 0.5) ** 2
    f = math.exp(-200 * dx2)
    # multi integral simultaneously, return a list.
    return [f, f * x[0], f * x[0] ** 2]

integ = vegas.Integrator(4 * [[0, 1]])

# adapt grid
training = integ(integrand, nitn=10, neval=2000)

# final analysis
result = integ(integrand, nitn=10, neval=1e4)
print('I[0] = {}   I[1] = {}   I[2] = {}'.format(*result))
print('Q = %.2f\n' % result.Q)

print('<x> = ', result[1] / result[0])
print('sigma_x^2 = <x^2> - <x>^2 = ',
      result[2] / result[0] - (result[1] / result[0]) ** 2)
print('\ncorrelation matrix:\n', gv.evalcorr(result))