def divide_and_conquer_correlations(dset): ## -- split up correlations into multiple small datasets and average each small set ## take correlations of small sets and stitch them together ## computations of small sets done in parallel ## assumes all keys in dset take form '<correlator key>_<series configuration>' pflst = [] ## -- create a list of prefixes for key in dset: skey = key.split('_') if not(skey[0] in pflst): pflst.append(skey[0]) rdat = gv.BufferDict() call = {} ## -- compute diagonals first to construct full dataset for key1 in pflst: ## -- handled separately because there may be more configurations in full sample tdat = compute_correlation_pair(dset,key1,key1) rdat[key1] = tdat[key1] call[key1,key1] = gv.evalcorr(tdat)[key1,key1] for i,key1 in enumerate(pflst): for j,key2 in enumerate(pflst): if i <= j: continue ## -- degenerate with i > j, i == j tdat = compute_correlation_pair(dset,key1,key2) cdat = gv.evalcorr(tdat) call[key1,key2] = cdat[key1,key2] call[key2,key1] = cdat[key2,key1] ## -- add correlations and return rdat = gv.correlate(rdat,call) return rdat
def data_preperation(): ################ Delta calculations e_sym2_av =[] for h in range(6): e_sym2_av.append ( e_sym2[:,h] ) ##### Data for plottting purposes e_sym2_pot_av = gv.dataset.avg_data(e_sym2_av,spread=True)- 5/9*T_SM(td) e_sym2_pot_eff_av = gv.dataset.avg_data(e_sym2_av,spread=True) - T_2_eff(td) ### Data for Fitting purposes s = gv.dataset.svd_diagnosis(e_sym2_av) e_sym2_av = gv.dataset.avg_data(e_sym2_av,spread=True) e,ev = np.linalg.eig (gv.evalcorr (e_sym2_av) ) d2 = np.std( np.absolute(ev[0]) ) **2 # print ("N (delta) = ",np.size(td)) # print ("l_corr (delta) = ", 1 - np.size(td)*d2 ) ################ Eta calculations e_sym2_eta_av =[] for h in range(6): e_sym2_eta_av.append ( esym2_eta[:,h] ) ### Data for plottting purposes e_sym2_eta_pot_av = gv.dataset.avg_data(e_sym2_eta_av,spread=True)- 5/9*T_SM(td) e_sym2_eta_pot_eff_av = gv.dataset.avg_data(e_sym2_eta_av,spread=True) -T_2_eff(td) ### Data for Fitting purposes s_eta = gv.dataset.svd_diagnosis(e_sym2_eta_av) e_sym2_eta_av = gv.dataset.avg_data(e_sym2_eta_av,spread=True) e,ev = np.linalg.eig (gv.evalcorr (e_sym2_eta_av) ) d2 = np.std( np.absolute(ev[0]) ) **2 # print ("N (eta) = ",np.size(td)) # print ("l_corr (eta) = ", 1 - np.size(td)*d2 ) return e_sym2_av,e_sym2_pot_av,e_sym2_pot_eff_av,s,\ e_sym2_eta_av,e_sym2_eta_pot_av,e_sym2_eta_pot_eff_av,s_eta
def main(): x, y = make_data() prior = make_prior() fit = lsqfit.nonlinear_fit(prior=prior, data=(x, y), fcn=fcn) print(fit) print('p1/p0 =', fit.p[1] / fit.p[0], 'p3/p2 =', fit.p[3] / fit.p[2]) print('corr(p0,p1) = {:.4f}'.format(gv.evalcorr(fit.p[:2])[1, 0]))
def main(): print( gv.ranseed( (2050203335594632366, 8881439510219835677, 2605204918634240925))) log_stdout('eg3a.out') integ = vegas.Integrator(4 * [[0, 1]]) # adapt grid training = integ(f(), nitn=10, neval=1000) # evaluate multi-integrands result = integ(f(), nitn=10, neval=5000) print('I[0] =', result[0], ' I[1] =', result[1], ' I[2] =', result[2]) print('Q = %.2f\n' % result.Q) print('<x> =', result[1] / result[0]) print('sigma_x**2 = <x**2> - <x>**2 =', result[2] / result[0] - (result[1] / result[0])**2) print('\ncorrelation matrix:\n', gv.evalcorr(result)) unlog_stdout() r = gv.gvar(gv.mean(result), gv.sdev(result)) print(r[1] / r[0]) print((r[1] / r[0]).sdev / (result[1] / result[0]).sdev) print(r[2] / r[0] - (r[1] / r[0])**2) print(result.summary())
def nonlinear_shrink(samples, n_eff): """ Shrink the correlation matrix using direct nonlinear shrinkage. Works as a wrapper function for shrink.direct_nl_shrink so that the call signature is similar to the linear shrinkage functions. Args: samples: array, of shape (nsamples, p) n_eff: the effective number of samples. Usually n <= nsamples Returns: array, the shrunken correlation matrix """ LOGGER.info('Direct nonlinear shrinkage of correlation matrix.') LOGGER.info('Using effective number of samples n=%d.', n_eff) corr = gv.evalcorr(gv.dataset.avg_data(samples)) # Decompose into eigenvalues vals, vecs = np.linalg.eig(corr) # (eigvals, eigvecs) # Sort in descending order order = np.argsort(vals)[::-1] vals = vals[order] vecs = vecs[:, order] # Shrink the eigenvalue spectrum vals_shrink = shrink.direct_nl_shrink(vals, n_eff) # Reconstruct eigenvalue matrix: vecs x diag(vals) x vecs^T corr_shrink = np.matmul(vecs, np.matmul(np.diag(vals_shrink), vecs.transpose())) # Match output of other shrink functions pair = (None, corr_shrink) return pair
def plot_error_ellipsis(self, x_key, y_key, observable): x = self._get_posterior(x_key)[observable] y = self._get_posterior(y_key)[observable] fig, ax = plt.subplots() corr = '{0:.3g}'.format(gv.evalcorr([x, y])[0, 1]) std_x = '{0:.3g}'.format(gv.sdev(x)) std_y = '{0:.3g}'.format(gv.sdev(y)) text = ('$R_{x, y}=$ %s\n $\sigma_x =$ %s\n $\sigma_y =$ %s' % (corr, std_x, std_y)) # these are matplotlib.patch.Patch properties props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # place a text box in upper left in axes coords ax.text(0.05, 0.95, text, transform=ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) C = gv.evalcov([x, y]) eVe, eVa = np.linalg.eig(C) for e, v in zip(eVe, eVa.T): plt.plot([ gv.mean(x) - 1 * np.sqrt(e) * v[0], 1 * np.sqrt(e) * v[0] + gv.mean(x) ], [ gv.mean(y) - 1 * np.sqrt(e) * v[1], 1 * np.sqrt(e) * v[1] + gv.mean(y) ], 'k-', lw=2) #plt.scatter(x-np.mean(x), y-np.mean(y), rasterized=True, marker=".", alpha=100.0/self.bs_N) #plt.scatter(x, y, rasterized=True, marker=".", alpha=100.0/self.bs_N) plt.grid() plt.gca().set_aspect('equal', adjustable='datalim') plt.xlabel(x_key.replace('_', '\_'), fontsize=24) plt.ylabel(y_key.replace('_', '\_'), fontsize=24) fig = plt.gcf() plt.close() return fig
def visualize_correlations(self, channel): """ Visualize the correlations by making heatmaps of the correlation and covariance matrices and by plotting their eigenvalue spectra. Args: channel: str, the name of the channel (e.g., 'f_parallel') Returns: (fig, axarr) """ if channel not in self._valid_channels: raise ValueError("Unsupported channel", channel) dataframe = self.__getattribute__(channel) groups = dataframe.groupby('ens_id') ncols = len(groups) fig, axarr = plt.subplots(nrows=3, ncols=ncols, figsize=(5*ncols, 15)) for idx, (ens_id, df) in enumerate(groups): ax_col = axarr[:, idx] ax1, ax2, ax3 = ax_col df = df.sort_values(by=['alias_light', 'alias_heavy', 'phat2']) corr = gv.evalcorr(df['form_factor'].values) sns.heatmap(corr, ax=ax1) cov = gv.evalcov(df['form_factor'].values) sns.heatmap(cov, ax=ax2) matrices = {'corr': corr, 'cov:full': cov, 'cov:diag': np.diag(cov)} markers = ['o', 's', '^'] for (label, mat), marker in zip(matrices.items(), markers): if label == 'cov:diag': w = mat else: w = np.linalg.eigvals(mat) w = np.sort(w)[::-1] w /= max(w) plt.plot(w, ax=ax3, label=label, marker=marker) ax1.set_title(f"Correlation matrix: {ens_id}") ax2.set_title(f"Covariance matirx: {ens_id}") ax3.set_title(f"Eigenvalue spectra: {ens_id}") ax3.legend() ax3.set_yscale("log") return fig, axarr
def fit_data(x, y, p): prior = make_priors(y, p) corr = gv.evalcorr(prior) #for k1 in prior: # for k2 in prior: # c = np.squeeze(corr[(k1,k2)]) # if c not in [1.0,0.0]: # print k1, k2, c # else: pass p['fv']['mpiL'] = y['mpiL'] p['ma']['mpiL'] = y['mmaL'] fitc = fit_functions(fv=p['fv'], ma=p['ma']) fit = lsqfit.nonlinear_fit(data=(x, y['y']), prior=prior, fcn=fitc.fit_switch, maxit=1000000) print fit.format('v') return {'fit': fit, 'prior': prior, 'fitc': fitc}
def main(): print(gv.ranseed((1814855126, 100213625, 262796317))) log_stdout('eg3a.out') integ = vegas.Integrator(4 * [[0, 1]]) # adapt grid training = integ(f(), nitn=10, neval=2000) # evaluate multi-integrands result = integ(f(), nitn=10, neval=10000) print('I[0] =', result[0], ' I[1] =', result[1], ' I[2] =', result[2]) print('Q = %.2f\n' % result.Q) print('<x> =', result[1] / result[0]) print('sigma_x**2 = <x**2> - <x>**2 =', result[2] / result[0] - (result[1] / result[0])**2) print('\ncorrelation matrix:\n', gv.evalcorr(result)) unlog_stdout() r = gv.gvar(gv.mean(result), gv.sdev(result)) print(r[1] / r[0]) print((r[1] / r[0]).sdev / (result[1] / result[0]).sdev) print(r[2] / r[0] - (r[1] / r[0])**2) print((r[2] / r[0] - (r[1] / r[0])**2).sdev / (result[2] / result[0] - (result[1] / result[0])**2).sdev) print(result.summary()) # do it again for a dictionary print(gv.ranseed((1814855126, 100213625, 262796317))) integ = vegas.Integrator(4 * [[0, 1]]) # adapt grid training = integ(f(), nitn=10, neval=2000) # evaluate the integrals result = integ(fdict(), nitn=10, neval=10000) log_stdout('eg3b.out') print(result) print('Q = %.2f\n' % result.Q) print('<x> =', result['x'] / result['1']) print('sigma_x**2 = <x**2> - <x>**2 =', result['x**2'] / result['1'] - (result['x'] / result['1'])**2) unlog_stdout()
min_den = 14 max_den = 17 m_e_inv_kf_red = np.zeros([max_den - min_den, 6, 2]) for i in range(min_den, max_den): m_e_inv_kf_red[i - min_den, :, :] = 1 / m_e_kf[i, :, :] y_SM_1 = [] for h in range(6): y_SM_1.append(m_e_inv_kf_red[:, h, 0]) s = gv.dataset.svd_diagnosis(y_SM_1) y_SM_1 = gv.dataset.avg_data(y_SM_1, spread=True) e, ev = np.linalg.eig(gv.evalcorr(y_SM_1)) d2 = np.std(np.absolute(ev[0]))**2 print("l_corr (linear,SM) = ", 1 - (max_den - min_den) * d2) def f_SM_1(x, p): ans = 1 + x * p['k1'] return ans prior_m_e_inv_SM_1 = {} prior_m_e_inv_SM_1['k1'] = gv.gvar(0, 100) x = np.arange(min_den + 1, max_den + 1, 1) x = x * 0.01
print('Scaling 1:') print(SM1_par) print('\n') print('Scaling 2:') print(SM2_par) print('\n') print('Scaling 3:') print(SM3_par) print('\n') e, ev = np.linalg.eig(gv.evalcorr(e_SM_mod_av)) d2 = np.std(np.absolute(ev[0]))**2 print("N (Scale 1,SM) = ", np.size(d_SM)) print("l_corr (Scale 1,SM) = ", 1 - np.size(d_SM) * d2) print('\n') e, ev = np.linalg.eig(gv.evalcorr(te_SM_mod_av)) d2 = np.std(np.absolute(ev[0]))**2 print("N (Scale 2,SM) = ", np.size(td)) print("l_corr (Scale 2,SM) = ", 1 - np.size(td) * d2) print('\n') e, ev = np.linalg.eig(gv.evalcorr(te_SM_av)) d2 = np.std(np.absolute(ev[0]))**2
def compute_offdiagonal((dset,key1,key2)): print "off-diagonal key ",(key1,key2) tdat = compute_correlation_pair(dset,key1,key2) return (key1,key2,gv.evalcorr(tdat)[key1,key2])
def compute_diagonal((dset,key)): print "diagonal key ",key tdat = compute_correlation_pair(dset,key,key) return (key,gv.mean(tdat[key]),gv.sdev(tdat[key]),gv.evalcorr(tdat)[key,key])
def main(): sys_stdout = sys.stdout sys.stdout = tee.tee(sys.stdout, open("eg3a.out","w")) x, y = make_data() prior = make_prior() fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0] if DO_PLOT: plt.semilogx() plt.errorbar( x=gv.mean(x), xerr=gv.sdev(x), y=gv.mean(y), yerr=gv.sdev(y), fmt='ob' ) # plot fit line xx = np.linspace(0.99 * gv.mean(min(x)), 1.01 * gv.mean(max(x)), 100) yy = fcn(xx, fit.pmean) plt.xlabel('x') plt.ylabel('y') plt.plot(xx, yy, ':r') plt.savefig('eg3.png', bbox_inches='tight') plt.show() sys.stdout = sys_stdout if DO_BOOTSTRAP: gv.ranseed(123) sys.stdout = tee.tee(sys_stdout, open('eg3c.out', 'w')) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0] Nbs = 40 outputs = {'p':[], 'p1/p0':[], 'p3/p2':[]} for bsfit in fit.bootstrap_iter(n=Nbs): p = bsfit.pmean outputs['p'].append(p) outputs['p1/p0'].append(p[1] / p[0]) outputs['p3/p2'].append(p[3] / p[2]) print '\nBootstrap Averages:' outputs = gv.dataset.avg_data(outputs, bstrap=True) print gv.tabulate(outputs) print 'corr(p0,p1) =', gv.evalcorr(outputs['p'][:2])[1,0] # make histograms of p1/p0 and p3/p2 sys.stdout = sys_stdout print sys.stdout = tee.tee(sys_stdout, open('eg3d.out', 'w')) print 'Histogram Analysis:' count = {'p1/p0':[], 'p3/p2':[]} hist = { 'p1/p0':gv.PDFHistogram(fit.p[1] / fit.p[0]), 'p3/p2':gv.PDFHistogram(fit.p[3] / fit.p[2]), } for bsfit in fit.bootstrap_iter(n=1000): p = bsfit.pmean count['p1/p0'].append(hist['p1/p0'].count(p[1] / p[0])) count['p3/p2'].append(hist['p3/p2'].count(p[3] / p[2])) count = gv.dataset.avg_data(count) plt.rcParams['figure.figsize'] = [6.4, 2.4] pltnum = 1 for k in count: print k + ':' print hist[k].analyze(count[k]).stats plt.subplot(1, 2, pltnum) plt.xlabel(k) hist[k].make_plot(count[k], plot=plt) if pltnum == 2: plt.ylabel('') pltnum += 1 plt.rcParams['figure.figsize'] = [6.4, 4.8] plt.savefig('eg3d.png', bbox_inches='tight') plt.show() if DO_BAYESIAN: gv.ranseed(123) sys.stdout = tee.tee(sys_stdout, open('eg3e.out', 'w')) print fit expval = lsqfit.BayesIntegrator(fit) # adapt integrator to PDF from fit neval = 1000 nitn = 10 expval(neval=neval, nitn=nitn) # <g(p)> gives mean and covariance matrix, and histograms hist = [ gv.PDFHistogram(fit.p[0]), gv.PDFHistogram(fit.p[1]), gv.PDFHistogram(fit.p[2]), gv.PDFHistogram(fit.p[3]), ] def g(p): return dict( mean=p, outer=np.outer(p, p), count=[ hist[0].count(p[0]), hist[1].count(p[1]), hist[2].count(p[2]), hist[3].count(p[3]), ], ) # evaluate expectation value of g(p) results = expval(g, neval=neval, nitn=nitn, adapt=False) # analyze results print('\nIterations:') print(results.summary()) print('Integration Results:') pmean = results['mean'] pcov = results['outer'] - np.outer(pmean, pmean) print ' mean(p) =', pmean print ' cov(p) =\n', pcov # create GVars from results p = gv.gvar(gv.mean(pmean), gv.mean(pcov)) print('\nBayesian Parameters:') print(gv.tabulate(p)) # show histograms print('\nHistogram Statistics:') count = results['count'] for i in range(4): print('p[{}] -'.format(i)) print(hist[i].analyze(count[i]).stats) plt.subplot(2, 2, i + 1) plt.xlabel('p[{}]'.format(i)) hist[i].make_plot(count[i], plot=plt) if i % 2 != 0: plt.ylabel('') plt.savefig('eg3e.png', bbox_inches='tight') plt.show() if DO_SIMULATION: gv.ranseed(1234) sys.stdout = tee.tee(sys_stdout, open('eg3f.out', 'w')) print(40 * '*' + ' real fit') print(fit.format(True)) Q = [] p = [] for sfit in fit.simulated_fit_iter(n=3, add_priornoise=False): print(40 * '=' + ' simulation') print(sfit.format(True)) diff = sfit.p - sfit.pexact print '\nsfit.p - pexact =', diff print(gv.fmt_chi2(gv.chi2(diff))) print # omit constraint sys.stdout = tee.tee(sys_stdout, open("eg3b.out", "w")) prior = gv.gvar(4 * ['0(1)']) prior[1] = gv.gvar('0(20)') fit = lsqfit.nonlinear_fit(prior=prior, data=(x,y), fcn=fcn) print fit print 'p1/p0 =', fit.p[1] / fit.p[0], ' p3/p2 =', fit.p[3] / fit.p[2] print 'corr(p0,p1) =', gv.evalcorr(fit.p[:2])[1,0]
def main(): ### 1) least-squares fit to the data x = np.array([ 0.2, 0.4, 0.6, 0.8, 1., 1.2, 1.4, 1.6, 1.8, 2., 2.2, 2.4, 2.6, 2.8, 3., 3.2, 3.4, 3.6, 3.8 ]) y = gv.gvar([ '0.38(20)', '2.89(20)', '0.85(20)', '0.59(20)', '2.88(20)', '1.44(20)', '0.73(20)', '1.23(20)', '1.68(20)', '1.36(20)', '1.51(20)', '1.73(20)', '2.16(20)', '1.85(20)', '2.00(20)', '2.11(20)', '2.75(20)', '0.86(20)', '2.73(20)' ]) prior = make_prior() fit = lsqfit.nonlinear_fit(data=(x, y), prior=prior, fcn=fitfcn, extend=True) if LSQFIT_ONLY: sys.stdout = tee.tee(STDOUT, open('case-outliers-lsq.out', 'w')) elif not MULTI_W: sys.stdout = tee.tee(STDOUT, open('case-outliers.out', 'w')) print(fit) # plot data plt.errorbar(x, gv.mean(y), gv.sdev(y), fmt='o', c='b') # plot fit function xline = np.linspace(x[0], x[-1], 100) yline = fitfcn(xline, fit.p) plt.plot(xline, gv.mean(yline), 'k:') yp = gv.mean(yline) + gv.sdev(yline) ym = gv.mean(yline) - gv.sdev(yline) plt.fill_between(xline, yp, ym, color='0.8') plt.xlabel('x') plt.ylabel('y') plt.savefig('case-outliers1.png', bbox_inches='tight') if LSQFIT_ONLY: return ### 2) Bayesian integral with modified PDF pdf = ModifiedPDF(data=(x, y), fcn=fitfcn, prior=prior) # integrator for expectation values with modified PDF expval = lsqfit.BayesIntegrator(fit, pdf=pdf) # adapt integrator to pdf expval(neval=1000, nitn=15) # evaluate expectation value of g(p) def g(p): w = 0.5 + 0.5 * p['2w-1'] c = p['c'] return dict(w=[w, w**2], mean=c, outer=np.outer(c,c)) results = expval(g, neval=1000, nitn=15, adapt=False) print(results.summary()) # expval.map.show_grid(15) if MULTI_W: sys.stdout = tee.tee(STDOUT, open('case-outliers-multi.out', 'w')) # parameters c[i] mean = results['mean'] cov = results['outer'] - np.outer(mean, mean) c = mean + gv.gvar(np.zeros(mean.shape), gv.mean(cov)) print('c =', c) print( 'corr(c) =', np.array2string(gv.evalcorr(c), prefix=10 * ' '), '\n', ) # parameter w wmean, w2mean = results['w'] wsdev = gv.mean(w2mean - wmean ** 2) ** 0.5 w = wmean + gv.gvar(np.zeros(np.shape(wmean)), wsdev) print('w =', w, '\n') # Bayes Factor print('logBF =', np.log(expval.norm)) sys.stdout = STDOUT if MULTI_W: return # add new fit to plot yline = fitfcn(xline, dict(c=c)) plt.plot(xline, gv.mean(yline), 'r--') yp = gv.mean(yline) + gv.sdev(yline) ym = gv.mean(yline) - gv.sdev(yline) plt.fill_between(xline, yp, ym, color='r', alpha=0.2) plt.savefig('case-outliers2.png', bbox_inches='tight')
taglist.append(('l32v5.bar3pt.'+irrepStr+'.ayay.t06.p00','ayay','t6','16m')) taglist.append(('l32v5.bar3pt.'+irrepStr+'.ayay.t-7.p00','ayay','t7','16m')) taglist.append(('l32v5.bar3pt.'+irrepStr+'.azaz.t06.p00','azaz','t6','16m')) taglist.append(('l32v5.bar3pt.'+irrepStr+'.azaz.t-7.p00','azaz','t7','16m')) ## -- consolidated all loading into a single file: start = time.time() print "loading gvar data: start ",start dall = standard_load(taglist,filekey,argsin) print "end ",(time.time() - start) ## -- get entire correlation matrix start = time.time() print "making correlation: start ",start #corall = gv.evalcorr(dall) ## -- super slow corall = gv.evalcorr(dall.buf) ## -- uses precomputed data, need to slice data manually print "end ",(time.time() - start) print "making covariance : start ",start covall = gv.evalcov(dall.buf) ## -- uses precomputed data, need to slice data manually print "end ",(time.time() - start) ## -- test routines, print correlation eigenvalues, eigenvectors to file #for testkey in ['s12','s21','s13','s31','s15','s51','s16','s61']: #for testkey in ['aiais11t6','aiais22t6','aiais33t6','aiais55t6','aiais66t6']: #for testkey in ['aiais11t7','aiais22t7','aiais33t7','aiais55t7','aiais66t7']: #for testkey in ['s11','s22','s33','s55','s66']: # evec = gvl.eigvalsh(corall[dall.slice(testkey),dall.slice(testkey)],True) # f = open('corr.'+testkey+'.dat','w') # f.write('#key : '+testkey+'\n') # f.write('#eigenvalues :\n') # seval = str(evec[0][0])
def kappa_NM(m_e_kf): min_den = 14 max_den = 17 m_e_inv_kf_red = np.zeros([max_den-min_den,6,2]) for i in range(min_den,max_den): m_e_inv_kf_red[i-min_den,:,:] = 1/m_e_kf[i,:,:] y_NM_1 = [] for h in range(6): y_NM_1.append (m_e_inv_kf_red[:,h,1]) s = gv.dataset.svd_diagnosis(y_NM_1) y_NM_1 = gv.dataset.avg_data(y_NM_1,spread=True) e,ev = np.linalg.eig (gv.evalcorr (y_NM_1) ) d2 = np.std( np.absolute(ev[0]) ) **2 #print ("l_corr (linear,NM) = ", 1 - (max_den-min_den)*d2 ) def f_NM_1 (x,p): ans = 1 + x*p['k1'] return ans prior_m_e_inv_NM_1 = {} prior_m_e_inv_NM_1['k1'] = gv.gvar(0,100) x = np.arange(min_den+1 , max_den+1, 1) x = x*0.01 fit = lsqfit.nonlinear_fit(data=(x, y_NM_1), prior=prior_m_e_inv_NM_1, fcn=f_NM_1, debug=True ,svdcut=0.25,add_svdnoise=False) #print (fit) par_NM_1 = fit.p ## Quadratic fit min_den = 6 max_den = 20 m_e_inv_kf_red = np.zeros([max_den-min_den,6,2]) for i in range(min_den,max_den): m_e_inv_kf_red[i-min_den,:,:] = 1/m_e_kf[i,:,:] y_NM_2 = [] for h in range(6): y_NM_2.append (m_e_inv_kf_red[:,h,1]) s = gv.dataset.svd_diagnosis(y_NM_2) y_NM_2 = gv.dataset.avg_data(y_NM_2,spread=True) e,ev = np.linalg.eig (gv.evalcorr (y_NM_2) ) d2 = np.std( np.absolute(ev[0]) ) **2 #print ("l_corr (quadratic,NM) = ", 1 - (max_den-min_den)*d2 ) def f_NM_2 (x,p): ans = 1 + x*p['k1'] + x**2 * p['k2'] return ans prior_m_e_inv_NM_2 = {} prior_m_e_inv_NM_2['k1'] = gv.gvar(0,100) prior_m_e_inv_NM_2['k2'] = gv.gvar(0,100) x = np.arange(min_den+1 , max_den+1, 1) x = x*0.01 fit = lsqfit.nonlinear_fit(data=(x, y_NM_2), prior=prior_m_e_inv_NM_2, fcn=f_NM_2, debug=True ,svdcut=s.svdcut,add_svdnoise=False) #print (fit) par_NM_2 = fit.p return f_NM_1, par_NM_1,f_NM_2, par_NM_2
def main(): ### 1) least-squares fit to the data x = np.array([ 0.2, 0.4, 0.6, 0.8, 1., 1.2, 1.4, 1.6, 1.8, 2., 2.2, 2.4, 2.6, 2.8, 3., 3.2, 3.4, 3.6, 3.8 ]) y = gv.gvar([ '0.38(20)', '2.89(20)', '0.85(20)', '0.59(20)', '2.88(20)', '1.44(20)', '0.73(20)', '1.23(20)', '1.68(20)', '1.36(20)', '1.51(20)', '1.73(20)', '2.16(20)', '1.85(20)', '2.00(20)', '2.11(20)', '2.75(20)', '0.86(20)', '2.73(20)' ]) prior = make_prior() fit = lsqfit.nonlinear_fit(data=(x, y), prior=prior, fcn=fitfcn) if LSQFIT_ONLY: sys.stdout = tee.tee(STDOUT, open('case-outliers-lsq.out', 'w')) elif not MULTI_W: sys.stdout = tee.tee(STDOUT, open('case-outliers.out', 'w')) print(fit) # plot data plt.errorbar(x, gv.mean(y), gv.sdev(y), fmt='o', c='b') # plot fit function xline = np.linspace(x[0], x[-1], 100) yline = fitfcn(xline, fit.p) plt.plot(xline, gv.mean(yline), 'k:') yp = gv.mean(yline) + gv.sdev(yline) ym = gv.mean(yline) - gv.sdev(yline) plt.fill_between(xline, yp, ym, color='0.8') plt.xlabel('x') plt.ylabel('y') plt.savefig('case-outliers1.png', bbox_inches='tight') if LSQFIT_ONLY: return ### 2) Bayesian integral with modified PDF pdf = ModifiedPDF(data=(x, y), fcn=fitfcn, prior=prior) # integrator for expectation values with modified PDF expval = lsqfit.BayesIntegrator(fit, pdf=pdf) # adapt integrator to pdf expval(neval=1000, nitn=15) # evaluate expectation value of g(p) def g(p): w = p['w'] c = p['c'] return dict(w=[w, w**2], mean=c, outer=np.outer(c, c)) results = expval(g, neval=1000, nitn=15, adapt=False) print(results.summary()) # expval.map.show_grid(15) if MULTI_W: sys.stdout = tee.tee(STDOUT, open('case-outliers-multi.out', 'w')) # parameters c[i] mean = results['mean'] cov = results['outer'] - np.outer(mean, mean) c = mean + gv.gvar(np.zeros(mean.shape), gv.mean(cov)) print('c =', c) print( 'corr(c) =', np.array2string(gv.evalcorr(c), prefix=10 * ' '), '\n', ) # parameter w wmean, w2mean = results['w'] wsdev = gv.mean(w2mean - wmean**2)**0.5 w = wmean + gv.gvar(np.zeros(np.shape(wmean)), wsdev) print('w =', w, '\n') # Bayes Factor print('logBF =', np.log(results.norm)) sys.stdout = STDOUT if MULTI_W: return # add new fit to plot yline = fitfcn(xline, dict(c=c)) plt.plot(xline, gv.mean(yline), 'r--') yp = gv.mean(yline) + gv.sdev(yline) ym = gv.mean(yline) - gv.sdev(yline) plt.fill_between(xline, yp, ym, color='r', alpha=0.2) plt.savefig('case-outliers2.png', bbox_inches='tight')
print "mN/fpi:", result['mN/fpi'] # for andre andre = dict() andre['mpi'] = result['mpi'].mean andre['mka'] = result['mka'].mean andre['fpi'] = result['fpi'].mean andre['fka'] = result['fka'].mean andre['mN'] = result['mN'].mean andre['ZAll'] = result['ZAll'].mean andre['ZAls'] = result['ZAls'].mean andre['key'] = ['mpi', 'mka', 'fpi', 'fka', 'mN', 'ZAll', 'ZAls'] andre['cov'] = gv.evalcov([ result['mpi'], result['mka'], result['fpi'], result['fka'], result['mN'], result['ZAll'], result['ZAls'] ]).tolist() andre['corr'] = gv.evalcorr([ result['mpi'], result['mka'], result['fpi'], result['fka'], result['mN'], result['ZAll'], result['ZAls'] ]).tolist() f = open( './flow_result/%s_%s_%s.yml' % (params['grand_ensemble']['ens']['tag'], params['grand_ensemble']['ml'], params['grand_ensemble']['ms']), 'w+') yaml.dump(andre, f) f.flush() f.close() # write output #pickle.dump(result, open('./pickle_result/flow%s_%s.pickle' %(params['grand_ensemble']['flow'], params['grand_ensemble']['ens']['tag']), 'wb')) #g = pickle.load(open('./pickle_result/flow%s_%s.pickle' %((params['grand_ensemble']['flow'],params['grand_ensemble']['ens']['tag']), 'rb')) #print g
def kappa_SM(m_e_kf): min_den = 14 max_den = 17 m_e_inv_kf_red = np.zeros([max_den-min_den,6,2]) for i in range(min_den,max_den): m_e_inv_kf_red[i-min_den,:,:] = 1/m_e_kf[i,:,:] y_SM_1 = [] for h in range(6): y_SM_1.append (m_e_inv_kf_red[:,h,0]) s = gv.dataset.svd_diagnosis(y_SM_1) y_SM_1 = gv.dataset.avg_data(y_SM_1,spread=True) e,ev = np.linalg.eig (gv.evalcorr (y_SM_1) ) d2 = np.std( np.absolute(ev[0]) ) **2 #print ("l_corr (linear,SM) = ", 1 - (max_den-min_den)*d2 ) def f_SM_1 (x,p): ans = 1 + x*p['k1'] return ans prior_m_e_inv_SM_1 = {} prior_m_e_inv_SM_1['k1'] = gv.gvar(0,100) x = np.arange(min_den+1 , max_den+1, 1) x = x*0.01 fit = lsqfit.nonlinear_fit(data=(x, y_SM_1), prior=prior_m_e_inv_SM_1, fcn=f_SM_1, debug=True ,svdcut=s.svdcut,add_svdnoise=False) #print (fit) par_SM_1 = fit.p ### Quadratic Fit min_den = 6 max_den = 20 m_e_inv_kf_red = np.zeros([max_den-min_den,6,2]) for i in range(min_den,max_den): m_e_inv_kf_red[i-min_den,:,:] = 1/m_e_kf[i,:,:] y_SM_2 = [] for h in range(6): y_SM_2.append (m_e_inv_kf_red[:,h,0]) s = gv.dataset.svd_diagnosis(y_SM_2) y_SM_2 = gv.dataset.avg_data(y_SM_2,spread=True) e,ev = np.linalg.eig (gv.evalcorr (y_SM_2) ) d2 = np.std( np.absolute(ev[0]) ) **2 #print ("l_corr (quadratic,SM) = ", 1 - (max_den-min_den)*d2 ) def f_SM_2 (x,p): ans = 1 + x*p['k1'] + x**2 * p['k2'] return ans prior_m_e_inv_SM_2 = {} prior_m_e_inv_SM_2['k1'] = gv.gvar(0,100) prior_m_e_inv_SM_2['k2'] = gv.gvar(0,100) x = np.arange(min_den+1 , max_den+1, 1) x = x*0.01 fit = lsqfit.nonlinear_fit(data=(x, y_SM_2), prior=prior_m_e_inv_SM_2, fcn=f_SM_2, debug=True ,svdcut=s.svdcut,add_svdnoise=False) par_SM_2 = fit.p ##### QQ plot # # residuals = fit.residuals # residuals = np.sort(residuals) # np.random.seed(73568478) # quantiles = np.random.normal(0, 1, np.size(residuals)) # quantiles = np.sort(quantiles) # # r2 = r2_score(residuals, quantiles) # r2 = np.around(r2,2) # # z = np.polyfit(quantiles, residuals ,deg = 1 ) # p = np.poly1d(z) # x = np.arange(np.min(quantiles),np.max(quantiles),0.001) # # fig,ax = plt.subplots(1) # plt.plot (quantiles, residuals, 'ob') # plt.plot (x,p(x), color='blue') # plt.plot (x,x,'r--') # ax.text(0.1, 0.9, 'R ='+str(r2)+'' , transform = ax.transAxes,fontsize='13') # plt.xlabel ('Theoretical quantiles',fontsize='15') # plt.ylabel ('Ordered fit residuals',fontsize='15') # ax.tick_params(labelsize='14') # plt.show() return f_SM_1, par_SM_1,f_SM_2, par_SM_2
def correct_covariance(data, binsize=1, shrink_choice=None, ordered_tags=None, bstrap=False, inflate=1.0): """ Correct the covariance using three steps: (a) adjust the size of the diagonal errors (via the variances) with "blocking" (a.ka. "binning") in Monte Carlo time, (b) adjust the correlations of the *full* dataset with shrinkage, and (c) combine the adjusted errors and correlation matrices. Args: data: dict with the full dataset. binsize: int, the binsize to use. Default is 1 (no binning). shrink_choice: str, which shrinkage scheme to use. Default is None (no shrinkage). Valid options: 'RBLW', 'OA', 'LW', and 'nonlinear'. Returns: final_cov: the final correct covariance "matrix" as a dictionary """ if ordered_tags is None: ordered_tags = sorted(data.keys(), key=str) # shapes are (n,p), where n is nsamples and p is ndata try: sizes = [data[tag].shape[1] for tag in ordered_tags] except IndexError: # edge case: single datum per sample sizes = [1 for tag in ordered_tags] total_size = np.sum(sizes) shrink_fcns = { 'RBLW': shrink.rblw_shrink_correlation_identity, 'OA': shrink.oa_shrink_correlation_identity, 'LW': shrink.lw_shrink_correlation_identity, 'nonlinear': nonlinear_shrink, } # Estimate errors from binned variances binned_data = {tag: avg_bin(data[tag], binsize) for tag in ordered_tags} binned_cov = gv.evalcov(gv.dataset.avg_data(binned_data, bstrap=bstrap)) binned_err = {} for key_pair in binned_cov: key1, key2 = key_pair if key1 == key2: binned_err[key1] =\ inflate * np.diag(np.sqrt(np.diag(binned_cov[key_pair]))) # Estimate correlations from shrunken correlation matrices if shrink_choice is None: # No shrinkage -- take correlations from full dataset corr_shrink = gv.evalcorr( gv.dataset.avg_data( {tag: binned_data[tag] for tag in ordered_tags})) else: # Carry out the desired shrinkage samples = np.hstack([data[tag] for tag in ordered_tags]) if total_size == len(ordered_tags): # edge case: single datum per sample samples = samples.reshape(-1, len(ordered_tags)) kwargs = {} if shrink_choice == 'nonlinear': kwargs['n_eff'] = samples.shape[0] // binsize (_, corr_shrink_concat) = shrink_fcns[shrink_choice](samples, **kwargs) corr_shrink = decomp_blocks(corr_shrink_concat, ordered_tags, sizes) # Correlate errors according to the shrunken correlation matrix final_cov = {} for key_l, key_r in corr_shrink: # err x corr x err final_cov[(key_l, key_r)] = np.matmul( binned_err[key_l], np.matmul(corr_shrink[(key_l, key_r)], binned_err[key_r])) return final_cov
for h in range(6): e_sym2_av.append(e_sym2[:, h]) ##### Data for plottting purposes e_sym2_pot_av = gv.dataset.avg_data(e_sym2_av, spread=True) - 5 / 9 * T_SM(td) e_sym2_pot_eff_av = gv.dataset.avg_data(e_sym2_av, spread=True) - T_2_eff(td) ##### Data for Fitting purposes s = gv.dataset.svd_diagnosis(e_sym2_av) e_sym2_av = gv.dataset.avg_data(e_sym2_av, spread=True) e, ev = np.linalg.eig(gv.evalcorr(e_sym2_av)) d2 = np.std(np.absolute(ev[0]))**2 print("N (delta) = ", np.size(td)) print("l_corr (delta) = ", 1 - np.size(td) * d2) def u(alpha, x): N = 4 b_sat = 17 return 1 - (-3 * x)**(N + 1 - alpha) * np.exp(-b_sat * (1 + 3 * x)) def V2(den, p): b_sym = 42 - 17 x = (den - p['n_sat']) / (3 * p['n_sat'])
def f(x, p): return p[0] + p[1] * np.exp(-p[2] * x) p0 = [0.5, 0.4, 0.7] N = 10000 x = np.linspace(0.2, 1.0, N) y = make_fake_data(x, p0, f) sys.stdout = tee.tee(sys_stdout, open('eg9a.out', 'w')) print('x = [{} {} ... {}]'.format(x[0], x[1], x[-1])) print('y = [{} {} ... {}]'.format(y[0], y[1], y[-1])) print('corr(y[0],y[9999]) =', gv.evalcorr([y[0], y[-1]])[1, 0]) print() # fit function and prior def fcn(x, p): return p[0] + p[1] * np.exp(-p[2] * x) prior = gv.gvar(['0(1)', '0(1)', '0(1)']) # Nstride fits, each to nfit data points nfit = 100 Nstride = len(y) // nfit fit_time = 0.0 for n in range(0, Nstride):
def samples(self, n): dim = self.r.shape[1] H = linalg.hilbert(dim) / (2 * self.s**2) x = gv.gvar(self.r[0], H) print(gv.evalcorr(x)) return np.array([rx for rx in gv.raniter(x, n)])
""" import math import vegas import gvar as gv def integrand(x): """ Integrand function. """ dx2 = 0.0 for d in range(4): dx2 += (x[d] - 0.5) ** 2 f = math.exp(-200 * dx2) # multi integral simultaneously, return a list. return [f, f * x[0], f * x[0] ** 2] integ = vegas.Integrator(4 * [[0, 1]]) # adapt grid training = integ(integrand, nitn=10, neval=2000) # final analysis result = integ(integrand, nitn=10, neval=1e4) print('I[0] = {} I[1] = {} I[2] = {}'.format(*result)) print('Q = %.2f\n' % result.Q) print('<x> = ', result[1] / result[0]) print('sigma_x^2 = <x^2> - <x>^2 = ', result[2] / result[0] - (result[1] / result[0]) ** 2) print('\ncorrelation matrix:\n', gv.evalcorr(result))