def test_const_results(self): pdf = PDF('NNPDF30_nlo_as_0118') obs = make_observable('data/higgs/ggh_13tev.root', order=1) res = produce_results(pdf, obs)[0]._data.as_matrix() previous = [ 31.22150023, 28.9888971 , 31.3305855 , 31.35700134, 30.66074282, 28.2795592 , 31.50932389, 31.43558106, 31.28601552, 31.36406676, 30.69818304, 30.86453794, 31.62708945, 31.6582997 , 31.11008626, 31.59319744, 31.52866286, 31.28639456, 31.07427256, 31.41568155, 31.60044526, 31.36966168, 30.93032191, 33.05392731, 30.96536474, 31.4317893 , 30.72436163, 31.22605137, 31.24243901, 31.21880536, 31.68798426, 31.07646885, 30.59035729, 31.34800315, 31.21256586, 32.19365014, 31.58647388, 31.74649083, 31.04692704, 30.2313781 , 31.4132261 , 31.01493873, 31.36481456, 31.26872855, 31.09018502, 31.51168931, 31.28105268, 30.91102961, 30.88952926, 31.31389055, 31.22196011, 31.19094082, 31.03705732, 31.54684624, 31.25938782, 31.10403155, 30.87467055, 31.83239232, 31.05799506, 31.68930494, 31.0387522 , 31.27327964, 31.30919955, 31.51614659, 31.06029472, 31.43211917, 31.35963882, 32.68355944, 30.96018759, 30.60001451, 31.25165147, 31.37222175, 31.19241142, 29.4805518 , 31.66853784, 30.96975209, 30.61955437, 31.34032813, 32.0724701 , 30.9346566 , 31.27130234, 31.38000397, 30.83068422, 31.50593514, 31.22489125, 30.74042979, 31.21191289, 31.4358684 , 31.27231758, 31.81871636, 31.10093208, 30.64649533, 31.40680264, 31.12985161, 31.8378625 , 31.18650963, 31.51129514, 31.18025208, 31.87366192, 31.78494394, 31.45846653] self.assertTrue(np.allclose(res, previous))
def execute_config(conf, output_dir, db): import pandas as pd import smpdflib.core as lib import logging resultset = [] for group in conf.actiongroups: pdfsets, observables = group['pdfsets'], group['observables'] resources = group.copy() resources.pop('actions') # perform convolution #TODO Do this better if any(requires_result(act) for act in group['actions']): results = lib.produce_results(pdfsets, observables, db) resultset.append(results) data_table = lib.results_table(results) summed_table = lib.summed_results_table(results) total = pd.concat((data_table, summed_table), ignore_index = True) if logging.getLogger().isEnabledFor(logging.DEBUG): ... #print_results(results) resources.update({'results':results, 'data_table':data_table, 'total':total, 'summed_table':summed_table}) if any(requires_correlations(act) for act in group['actions']): pdfcorrlist = lib.correlations(data_table, db=db) resources.update({'pdfcorrlist':pdfcorrlist}) prefix = group['prefix'] resources.update({ 'output_dir':output_dir, 'prefix':prefix, 'pdfsets': pdfsets, 'db': db}) for action, res in do_actions(group['actions'], resources): logging.info("Finalized action '%s'." % action) return resultset
pdf = PDF("MC900_nlo") obs = [make_observable(path, order='NLO') for path in itertools.chain( sorted(glob.glob("data/z/*.root")), sorted(glob.glob("data/w/*.root")), sorted(glob.glob("data/higgs/*.root")), sorted(glob.glob("data/ttbar/*.root")), )] tolerance = 0.05 thresholds = [0.0, 0.25, 0.5, 0.75, 0.9, 0.99] if __name__ == '__main__': neig = [] logging.basicConfig(level=logging.INFO) db = shelve.open('db/db') results = produce_results([pdf], obs, db=db) for t in thresholds: V, _ ,desc = get_smpdf_params(pdf, results, smpdf_tolerance=tolerance, correlation_threshold=t, db=db) neig.append(V.shape[1]) print("For thresholf %.2f we get:" %t) print(desc) with open("thresholdsladder.json", 'w') as f: json.dump([thresholds, neig], f) db.close()
def get_smpdf_params( pdf, pdf_results, smpdf_tolerance, full_grid=False, db=None, correlation_threshold=DEFAULT_CORRELATION_THRESHOLD, nonlinear_correction=True, ): first_res = get_smpdf_lincomb( pdf, pdf_results, full_grid=full_grid, target_error=smpdf_tolerance, correlation_threshold=correlation_threshold ) norm = first_res.norm lincomb = first_res.lincomb description = first_res.desc vec = first_res.lincomb / norm if nonlinear_correction: logging.info("Estimating nonlinear correction") with tempfile.TemporaryDirectory() as td: applwrap.setlhapdfpath(td) tempname = str(uuid.uuid1()) logging.info("Creating temporary PDF %s" % tempname) hessian_from_lincomb(pdf, vec, folder=td, set_name=tempname, db=db) observables = [r.obs for r in pdf_results] temppdf = PDF(tempname) temppdf.infopath real_results = produce_results(temppdf, observables) logging.info("Real results obtained") results_to_refine = [] newtols = [] for smpdf_res, prior_res in zip(real_results, pdf_results): real_error = 1 - smpdf_res.std_error() / prior_res.std_error() # pandas indexing is broken, so have to call as_matrix.... bad_bins = np.array(real_error > smpdf_tolerance, copy=False) if bad_bins.any(): lin_errors = list(first_res.errors[str(smpdf_res.obs)].values()) newtol = smpdf_tolerance - (real_error[bad_bins] - np.array(lin_errors)[bad_bins]) impossible = np.argwhere(newtol < 0) if len(impossible): raise TooMuchPrecision(smpdf_res.obs, impossible[0] + 1) newtols += list(newtol) logging.debug("New tolerances for observable %s: %s" % (prior_res.obs, newtol)) # Create result with the same type as prior, and only # bad_bins. newres = type(prior_res)(prior_res.obs, prior_res.pdf, prior_res._data.ix[bad_bins]) results_to_refine.append(newres) if results_to_refine: logging.info("Calculating eigenvectors to refine") ref_res = get_smpdf_lincomb( pdf, results_to_refine, full_grid=full_grid, target_error=newtols, correlation_threshold=correlation_threshold, Rold=first_res.Rold, ) lincomb, description = merge_lincombs(first_res.lincomb, ref_res.lincomb, description, ref_res.desc) else: logging.info("All results are within tolerance") return lincomb, norm, description
from smpdflib.plots import plot_bindist #Db folder should exist... db = shelve.open('db/db') to_plot = {make_observable('data/applgrid/CMSWCHARM-WpCb-eta4.root', order=1) : 4, make_observable('data/applgrid/APPLgrid-LHCb-Z0-ee_arXiv1212.4260-eta34.root', order=1) : 8, } pdfs = [PDF('MC900_nnlo', label="legendtext"), PDF('CMC100_nnlo', label="other"), PDF('MCH_nnlo_100', label="another") ] if __name__ == '__main__': for obs, bin in to_plot.items(): results = produce_results(pdfs, [obs], db=db) obs_table = results_table(results) for (obs,b), fig in plot_bindist(obs_table, bin, base_pdf = pdfs[0]): ax = fig.axes[0] ax.set_xlabel("My X label") ax.set_ylabel("MY Y label") ax.set_title("My title") path = "%s_bin_%s.pdf" % (obs, b+1) fig.savefig(path) plt.close(fig)
prior_name = "MC900_nnlo" prefixes = ['z2tr' + x for x in '0 10 25 50 75 90 99'.split()] smpdf_names = [prefix + 'smpdf_' + prior_name for prefix in prefixes] lincoef_paths = ['output/%s_lincomb.csv' % name for name in smpdf_names] if __name__ == '__main__': prior = PDF(prior_name) smpdfs = [PDF(name) for name in smpdf_names] pdfs = [prior] + smpdfs obs = make_observable(obs_name, order='NLO') with shelve.open('db/db') as db: res_prior, *res_smpdfs = produce_results(pdfs, [obs], db=db) coefs = [pd.DataFrame.from_csv(path, sep='\t') for path in lincoef_paths] prior_std = res_prior.std_error() real_tols = [1 - res.std_error()/prior_std for res in res_smpdfs] prior_diffs = (res_prior._all_vals.T - res_prior.central_value).as_matrix().ravel() neig = [len(pdf) - 1 for pdf in smpdfs] rotated_tols = [] for path in lincoef_paths: coefs = pd.DataFrame.from_csv(path, sep='\t') rotated_diffs = np.dot(prior_diffs, coefs) rotated_std = la.norm(rotated_diffs)