def create_mc2hessian(pdf, Q, Neig, output_dir, name=None, db=None): X = get_X(pdf, Q, reshape=True) vec = compress_X(X, Neig) norm = _pdf_normalization(pdf) description = {"input_hash": mc2h_input_hash(pdf, Q, Neig)} save_lincomb(vec, norm, description=description, output_dir=output_dir, name=name) return hessian_from_lincomb(pdf, vec / norm, folder=output_dir, set_name=name, db=db)
def create_smpdf( pdf, pdf_results, output_dir, name, smpdf_tolerance, full_grid=False, db=None, correlation_threshold=DEFAULT_CORRELATION_THRESHOLD, nonlinear_correction=True, ): lincomb, norm, description = get_smpdf_params( pdf, pdf_results, smpdf_tolerance, full_grid=full_grid, db=db, correlation_threshold=correlation_threshold, nonlinear_correction=nonlinear_correction, ) vec = lincomb / norm description = complete_smpdf_description( description, pdf, pdf_results, full_grid=full_grid, target_error=smpdf_tolerance ) # We have do do this because LHAPDF seems to not parse complex structures parsed_desc = {"smpdf_description": yaml.dump(description, default_flow_style=False)} save_lincomb(lincomb, norm, description, output_dir, name) with open(osp.join(output_dir, name + "_description.yaml"), "w") as f: yaml.dump(description, f, default_flow_style=False) logging.info("Final linear combination has %d eigenvectors" % lincomb.shape[1]) return hessian_from_lincomb(pdf, vec, folder=output_dir, set_name=name, db=db, extra_fields=parsed_desc)
def get_smpdf_params( pdf, pdf_results, smpdf_tolerance, full_grid=False, db=None, correlation_threshold=DEFAULT_CORRELATION_THRESHOLD, nonlinear_correction=True, ): first_res = get_smpdf_lincomb( pdf, pdf_results, full_grid=full_grid, target_error=smpdf_tolerance, correlation_threshold=correlation_threshold ) norm = first_res.norm lincomb = first_res.lincomb description = first_res.desc vec = first_res.lincomb / norm if nonlinear_correction: logging.info("Estimating nonlinear correction") with tempfile.TemporaryDirectory() as td: applwrap.setlhapdfpath(td) tempname = str(uuid.uuid1()) logging.info("Creating temporary PDF %s" % tempname) hessian_from_lincomb(pdf, vec, folder=td, set_name=tempname, db=db) observables = [r.obs for r in pdf_results] temppdf = PDF(tempname) temppdf.infopath real_results = produce_results(temppdf, observables) logging.info("Real results obtained") results_to_refine = [] newtols = [] for smpdf_res, prior_res in zip(real_results, pdf_results): real_error = 1 - smpdf_res.std_error() / prior_res.std_error() # pandas indexing is broken, so have to call as_matrix.... bad_bins = np.array(real_error > smpdf_tolerance, copy=False) if bad_bins.any(): lin_errors = list(first_res.errors[str(smpdf_res.obs)].values()) newtol = smpdf_tolerance - (real_error[bad_bins] - np.array(lin_errors)[bad_bins]) impossible = np.argwhere(newtol < 0) if len(impossible): raise TooMuchPrecision(smpdf_res.obs, impossible[0] + 1) newtols += list(newtol) logging.debug("New tolerances for observable %s: %s" % (prior_res.obs, newtol)) # Create result with the same type as prior, and only # bad_bins. newres = type(prior_res)(prior_res.obs, prior_res.pdf, prior_res._data.ix[bad_bins]) results_to_refine.append(newres) if results_to_refine: logging.info("Calculating eigenvectors to refine") ref_res = get_smpdf_lincomb( pdf, results_to_refine, full_grid=full_grid, target_error=newtols, correlation_threshold=correlation_threshold, Rold=first_res.Rold, ) lincomb, description = merge_lincombs(first_res.lincomb, ref_res.lincomb, description, ref_res.desc) else: logging.info("All results are within tolerance") return lincomb, norm, description