def create_mc2hessian(pdf, Q, Neig, output_dir, name=None, db=None): X = get_X(pdf, Q, reshape=True) vec = compress_X(X, Neig) norm = _pdf_normalization(pdf) description = {"input_hash": mc2h_input_hash(pdf, Q, Neig)} save_lincomb(vec, norm, description=description, output_dir=output_dir, name=name) return hessian_from_lincomb(pdf, vec / norm, folder=output_dir, set_name=name, db=db)
def plot_correlations(results): for result in results: pdf = result.pdf obs = result.obs Qs = iter(obs.meanQ) xgrid = pdf.make_xgrid() fl = pdf.make_flavors() figure, axarr = plt.subplots(len(fl), sharex=True, sharey=True, figsize=(8, len(fl)+3)) for b in result.binlabels: Q = next(Qs) X = get_X(pdf, Q=Q, xgrid=xgrid, fl=fl, reshape=True) values, threshold = bin_corrs_from_X(result._all_vals.ix[b], X) ind = 0 for f, axis in zip(fl, axarr): step = len(xgrid) current_vals = values[ind:ind+step] ind+=step line, = axis.plot(xgrid, current_vals) stacked = np.array([xgrid, current_vals]).T sel_ranges = split_ranges(stacked, abs(current_vals)>threshold, filter_falses=True) for arr in sel_ranges: x,y = arr.T axis.plot(x,y, linewidth=3, color=line.get_color()) axis.axvspan(np.min(x), np.max(x), color="#eeeeff") axis.set_ylim([-1,1]) axis.set_xscale('log') axis.set_ylabel("$%s$"%PDG_PARTONS[f]) axis.axhline(threshold, c='r', ls='--') axis.axhline(-threshold, c='r', ls='--') axarr[0].set_title(str(obs) + "\n") plt.xlabel("$x$") figure.subplots_adjust(hspace=0) plt.setp([a.get_xticklabels() for a in figure.axes[:-1]], visible=False) yield (obs,pdf), figure
def get_smpdf_lincomb( pdf, pdf_results, target_error, full_grid=False, correlation_threshold=DEFAULT_CORRELATION_THRESHOLD, Rold=None ): """Extract the linear combination that describes the linar part of the error of the given results with at least `target_error` precision`. See <paper> for details. `Rold` is returned so computation can be resumed iteratively (and then merged) with for example `merge_lincombs`.""" # Estimator= norm**2(rotated)/norm**2(total) which is additive when adding # eigenvecotors # Error = (1 - sqrt(1-estimator)) # TODO: Optimize by calculating estimator instead of error? # target_estimator = 1 - (1-target_error)**2 nxf = len(pdf.make_xgrid()) * len(pdf.make_flavors()) nrep = len(pdf) - 1 max_neig = np.min([nxf, nrep]) # We must divide by norm since we are reproducing the covmat and not XX.T norm = _pdf_normalization(pdf) if isinstance(target_error, collections.Container): total_bins = sum(r.nbins for r in pdf_results) if len(target_error) != total_bins: raise ValueError("Incorrect target error specification") target_error = iter(target_error) elif isinstance(target_error, numbers.Real): target_error = itertools.repeat(target_error) elif not isinstance(target_error, collections.Iterator): raise ValueError("Target error not understood") lincomb = np.zeros(shape=(nrep, max_neig)) desc = OrderedDict() errors = OrderedDict() index = 0 for result in pdf_results: obs_desc = OrderedDict() obs_errors = OrderedDict() desc[str(result.obs)] = obs_desc errors[str(result.obs)] = obs_errors if result.pdf != pdf: raise ValueError("PDF results must be for %s" % pdf) for b in result.binlabels: Xreal = get_X(pdf, Q=result.meanQ[b], reshape=True) prediction = result._all_vals.ix[b] original_diffs = prediction - np.mean(prediction) if Rold is not None: X = np.dot(Xreal, Rold) rotated_diffs = np.dot(original_diffs, Rold) else: rotated_diffs = original_diffs X = Xreal eigs_for_bin = 0 error_val = next(target_error) # Would be # while _get_error(rotated_diffs, original_diffs) > error_val # except that we want to capture current_error while True: current_error = _get_error(rotated_diffs, original_diffs) if current_error < error_val: break X = _mask_X(X, rotated_diffs, correlation_threshold=correlation_threshold) P, R = _pop_eigenvector(X) if Rold is not None: P = np.dot(Rold, P) R = np.dot(Rold, R) Rold = R rotated_diffs = np.dot(original_diffs, Rold) X = np.dot(Xreal, Rold) lincomb[:, index : index + 1] = P index += 1 if index == max_neig: raise TooMuchPrecision(result.obs, b + 1) eigs_for_bin += 1 if eigs_for_bin: logging.info( "Obtained %d eigenvector%s for observable %s, " "bin %d" % (eigs_for_bin, "s" * (eigs_for_bin > 1), result.obs, b + 1) ) else: logging.debug("Observable %s, " "bin %d is already well reproduced." % (result.obs, b + 1)) obs_desc[int(b + 1)] = index obs_errors[int(b + 1)] = current_error # Prune extra zeros lincomb = lincomb[:, :index] logging.debug("Linear combination has %d eigenvectors" % lincomb.shape[1]) return SMPDFLincombResult(lincomb=lincomb, norm=norm, desc=desc, errors=errors, Rold=Rold)