Beispiel #1
0
def create_mc2hessian(pdf, Q, Neig, output_dir, name=None, db=None):
    X = get_X(pdf, Q, reshape=True)
    vec = compress_X(X, Neig)
    norm = _pdf_normalization(pdf)
    description = {"input_hash": mc2h_input_hash(pdf, Q, Neig)}
    save_lincomb(vec, norm, description=description, output_dir=output_dir, name=name)

    return hessian_from_lincomb(pdf, vec / norm, folder=output_dir, set_name=name, db=db)
Beispiel #2
0
def plot_correlations(results):

    for result in results:
        pdf = result.pdf
        obs = result.obs

        Qs = iter(obs.meanQ)
        xgrid = pdf.make_xgrid()

        fl = pdf.make_flavors()

        figure, axarr = plt.subplots(len(fl), sharex=True,
                                     sharey=True,
                                     figsize=(8, len(fl)+3))

        for b in result.binlabels:
            Q = next(Qs)
            X = get_X(pdf, Q=Q, xgrid=xgrid, fl=fl, reshape=True)
            values, threshold = bin_corrs_from_X(result._all_vals.ix[b], X)
            ind = 0
            for f, axis in zip(fl, axarr):
                step = len(xgrid)
                current_vals = values[ind:ind+step]
                ind+=step
                line, = axis.plot(xgrid, current_vals)
                stacked = np.array([xgrid, current_vals]).T
                sel_ranges = split_ranges(stacked, abs(current_vals)>threshold,
                             filter_falses=True)
                for arr in sel_ranges:
                    x,y = arr.T
                    axis.plot(x,y, linewidth=3, color=line.get_color())
                    axis.axvspan(np.min(x), np.max(x), color="#eeeeff")
                axis.set_ylim([-1,1])
                axis.set_xscale('log')
                axis.set_ylabel("$%s$"%PDG_PARTONS[f])

                axis.axhline(threshold, c='r', ls='--')
                axis.axhline(-threshold, c='r', ls='--')

        axarr[0].set_title(str(obs) + "\n")
        plt.xlabel("$x$")
        figure.subplots_adjust(hspace=0)
        plt.setp([a.get_xticklabels() for a in figure.axes[:-1]], visible=False)

        yield (obs,pdf), figure
Beispiel #3
0
def get_smpdf_lincomb(
    pdf, pdf_results, target_error, full_grid=False, correlation_threshold=DEFAULT_CORRELATION_THRESHOLD, Rold=None
):
    """Extract the linear combination that describes the linar part of
    the error of the given results with at least `target_error` precision`.
    See <paper> for details.
    `Rold` is returned so computation can be resumed iteratively (and then
    merged) with for example `merge_lincombs`."""
    # Estimator= norm**2(rotated)/norm**2(total) which is additive when adding
    # eigenvecotors
    # Error = (1 - sqrt(1-estimator))
    # TODO: Optimize by calculating estimator instead of error?
    # target_estimator = 1 - (1-target_error)**2

    nxf = len(pdf.make_xgrid()) * len(pdf.make_flavors())
    nrep = len(pdf) - 1
    max_neig = np.min([nxf, nrep])
    # We must divide by norm since we are reproducing the covmat and not XX.T
    norm = _pdf_normalization(pdf)

    if isinstance(target_error, collections.Container):
        total_bins = sum(r.nbins for r in pdf_results)
        if len(target_error) != total_bins:
            raise ValueError("Incorrect target error specification")
        target_error = iter(target_error)
    elif isinstance(target_error, numbers.Real):
        target_error = itertools.repeat(target_error)
    elif not isinstance(target_error, collections.Iterator):
        raise ValueError("Target error not understood")

    lincomb = np.zeros(shape=(nrep, max_neig))

    desc = OrderedDict()
    errors = OrderedDict()

    index = 0

    for result in pdf_results:
        obs_desc = OrderedDict()
        obs_errors = OrderedDict()
        desc[str(result.obs)] = obs_desc
        errors[str(result.obs)] = obs_errors
        if result.pdf != pdf:
            raise ValueError("PDF results must be for %s" % pdf)
        for b in result.binlabels:
            Xreal = get_X(pdf, Q=result.meanQ[b], reshape=True)
            prediction = result._all_vals.ix[b]
            original_diffs = prediction - np.mean(prediction)
            if Rold is not None:
                X = np.dot(Xreal, Rold)
                rotated_diffs = np.dot(original_diffs, Rold)
            else:
                rotated_diffs = original_diffs
                X = Xreal

            eigs_for_bin = 0
            error_val = next(target_error)

            # Would be
            # while _get_error(rotated_diffs, original_diffs) > error_val
            # except that we want to capture current_error
            while True:
                current_error = _get_error(rotated_diffs, original_diffs)
                if current_error < error_val:
                    break
                X = _mask_X(X, rotated_diffs, correlation_threshold=correlation_threshold)
                P, R = _pop_eigenvector(X)
                if Rold is not None:
                    P = np.dot(Rold, P)
                    R = np.dot(Rold, R)
                Rold = R

                rotated_diffs = np.dot(original_diffs, Rold)
                X = np.dot(Xreal, Rold)
                lincomb[:, index : index + 1] = P
                index += 1
                if index == max_neig:
                    raise TooMuchPrecision(result.obs, b + 1)
                eigs_for_bin += 1
            if eigs_for_bin:
                logging.info(
                    "Obtained %d eigenvector%s for observable %s, "
                    "bin %d" % (eigs_for_bin, "s" * (eigs_for_bin > 1), result.obs, b + 1)
                )
            else:
                logging.debug("Observable %s, " "bin %d is already well reproduced." % (result.obs, b + 1))
            obs_desc[int(b + 1)] = index
            obs_errors[int(b + 1)] = current_error

    # Prune extra zeros
    lincomb = lincomb[:, :index]
    logging.debug("Linear combination has %d eigenvectors" % lincomb.shape[1])

    return SMPDFLincombResult(lincomb=lincomb, norm=norm, desc=desc, errors=errors, Rold=Rold)