def cubic_interp(obs_t, cum_obs):
    """
    Construct a cubic count interpolant
    (which for monotonic counts is a quadratic rate)
    """

    # extend with null counts
    # so that it extrapolates, but conservatively
    obs_t = np.concatenate([
        [obs_t[0] - 2, obs_t[0] - 1],
        obs_t,
        [obs_t[-1] + 1, obs_t[-1] + 2]
    ])
    cum_obs = np.concatenate([
        [cum_obs[0], cum_obs[0]],
        cum_obs,
        [cum_obs[-1], cum_obs[-1]]
    ])

    big_n_hat = PPoly.from_bernstein_basis(
        PchipInterpolator(
            obs_t,
            cum_obs,
            extrapolate=True
        )
    )
    return big_n_hat
Example #2
0
    def test_pp_from_bp(self):
        x = [0, 1, 3]
        c = [[3, 3], [1, 1], [4, 2]]
        bp = BPoly(c, x)
        pp = PPoly.from_bernstein_basis(bp)
        bp1 = BPoly.from_power_basis(pp)

        xp = [0.1, 1.4]
        assert_allclose(bp(xp), pp(xp))
        assert_allclose(bp(xp), bp1(xp))
Example #3
0
    def test_pp_from_bp(self):
        x = [0, 1, 3]
        c = [[3, 3], [1, 1], [4, 2]]
        bp = BPoly(c, x)
        pp = PPoly.from_bernstein_basis(bp)
        bp1 = BPoly.from_power_basis(pp)

        xp = [0.1, 1.4]
        assert_allclose(bp(xp), pp(xp))
        assert_allclose(bp(xp), bp1(xp))
Example #4
0
    def test_bp_from_pp_random(self):
        np.random.seed(1234)
        m, k = 5, 8  # number of intervals, order
        x = np.sort(np.random.random(m))
        c = np.random.random((k, m - 1))
        pp = PPoly(c, x)
        bp = BPoly.from_power_basis(pp)
        pp1 = PPoly.from_bernstein_basis(bp)

        xp = np.linspace(x[0], x[-1], 21)
        assert_allclose(pp(xp), bp(xp))
        assert_allclose(pp(xp), pp1(xp))
Example #5
0
    def test_bp_from_pp_random(self):
        np.random.seed(1234)
        m, k = 5, 8   # number of intervals, order
        x = np.sort(np.random.random(m))
        c = np.random.random((k, m-1))
        pp = PPoly(c, x)
        bp = BPoly.from_power_basis(pp)
        pp1 = PPoly.from_bernstein_basis(bp)

        xp = np.linspace(x[0], x[-1], 21)
        assert_allclose(pp(xp), bp(xp))
        assert_allclose(pp(xp), pp1(xp))
Example #6
0
    def test_derivative_ppoly(self):
        # make sure it's consistent w/ power basis
        np.random.seed(1234)
        m, k = 5, 8  # number of intervals, order
        x = np.sort(np.random.random(m))
        c = np.random.random((k, m - 1))
        bp = BPoly(c, x)
        pp = PPoly.from_bernstein_basis(bp)

        for d in range(k):
            bp = bp.derivative()
            pp = pp.derivative()
            xp = np.linspace(x[0], x[-1], 21)
            assert_allclose(bp(xp), pp(xp))
Example #7
0
    def test_derivative_ppoly(self):
        # make sure it's consistent w/ power basis
        np.random.seed(1234)
        m, k = 5, 8   # number of intervals, order
        x = np.sort(np.random.random(m))
        c = np.random.random((k, m-1))
        bp = BPoly(c, x)
        pp = PPoly.from_bernstein_basis(bp)

        for d in range(k):
            bp = bp.derivative()
            pp = pp.derivative()
            xp = np.linspace(x[0], x[-1], 21)
            assert_allclose(bp(xp), pp(xp))
Example #8
0
 def roots(self):
     """
     Return the roots of the interpolated function.
     """
     return (PPoly.from_bernstein_basis(self)).roots()
    def run(self):
        import ROOT

        inp = self.input()
        outp = self.output()

        interpolation_bins = 1000

        # cannot get the function from ROOT, use scipy instead
        from scipy.interpolate import PchipInterpolator, BPoly, PPoly

        # get categories in which to fit the scale factors
        categories = []
        for category, _, _ in self.config_inst.walk_categories():
            if len(self.category_tags) > 0 and not category.has_tag(self.category_tags, mode=any):
                continue
            if self.has_c_shift:
                if category.get_aux("region", None) == "c" and category.get_aux("phase_space") == "measure":
                    if category.has_tag(self.b_tagger):
                        categories.append(category)
            else:
                if category.has_tag(("merged", self.b_tagger), mode=all) and category.get_aux("phase_space") == "measure":
                    categories.append(category)

        # get scaling factors for normalization
        if self.fix_normalization:
            norm_factors = inp["norm"].load()[self.effective_shift]

        # contents of .csv file for scale factors
        fit_results = []
        # finely binned histograms to write to the output file
        hist_dict = {}
        # TF1's to write to the output file
        function_dict = {}
        with inp["sf"]["scale_factors"].load("r") as input_file:
            for category in categories:
                region = category.get_aux("region")
                category_dir = input_file.GetDirectory(category.name)

                # get scale factor histogram
                hist_keys = category_dir.GetListOfKeys()
                if len(hist_keys) != 1:
                    raise ValueError("Found more than one histogram in %s, cannot identify scale "
                        "factor hist." % category_dir)
                hist = category_dir.Get(hist_keys[0].GetName())
                nbins = hist.GetNbinsX()
                if self.fix_normalization:
                    hist.Scale(norm_factors[category.name])

                x_axis = hist.GetXaxis()
                interpolation_hist = ROOT.TH1D(hist.GetName() + "_fine", hist.GetTitle(),
                    interpolation_bins, x_axis.GetXmin(), x_axis.GetXmax())
                x_values = ROOT.vector("double")()
                y_values = ROOT.vector("double")()
                for bin_idx in range(1, nbins + 1):
                    if hist.GetBinCenter(bin_idx) < 0:
                        continue
                    x_values.push_back(hist.GetBinCenter(bin_idx))
                    y_values.push_back(hist.GetBinContent(bin_idx))

                interpolator = PchipInterpolator(x_values, y_values)
                # define region in which to use interpolation
                first_point, last_point = min(x_values), max(x_values)

                # create finely binned histogram from either TF1 or interpolator
                for bin_idx in range(interpolation_bins + 2):
                    bin_center = interpolation_hist.GetBinCenter(bin_idx)
                    if bin_center < 0:
                        interpolation_hist.SetBinContent(bin_idx, hist.GetBinContent(1))
                    elif bin_center < first_point:
                        interpolation_hist.SetBinContent(bin_idx, interpolator(first_point))
                    elif bin_center > last_point:
                        interpolation_hist.SetBinContent(bin_idx, interpolator(last_point))
                    else:
                        interpolation_hist.SetBinContent(bin_idx, interpolator(bin_center))
                hist_dict[category] = interpolation_hist

                # fill .csv file in final iteration (after normalization fix)
                # also create piecewise linear TF1's
                if self.fix_normalization:
                    function_pieces = []

                    results = {}
                    results["eta_min"], results["eta_max"] = category.get_aux("eta")
                    pt_range = category.get_aux("pt")
                    results["pt_min"] = pt_range[0]
                    results["pt_max"] = min(pt_range[1], 10000.)  # replace inf
                    results["flavor_id"] = self.config_inst.get_aux("flavor_ids")[region]

                    if self.effective_shift == "nominal":
                        sysType = "central"
                    else:
                        sys_name, direction = self.effective_shift.rsplit("_", 1)
                        sysType = "{}_{}".format(direction,
                            sys_name.replace("c_stats", "cferr").replace("lf_stats", "lfstats").replace("hf_stats", "hfstats")
                        )
                    results["sysType"] = sysType

                    # skip unwanted combinations
                    if "cferr" in sysType and region != "c":
                        continue

                    fit_results_tpl = "3, iterativefit, {sysType}, {flavor_id}, {eta_min}, " \
                        "{eta_max}, {pt_min}, {pt_max}".format(**results)
                    fit_results.append(fit_results_tpl + ", -15, 0, {}".format(hist.GetBinContent(1)))
                    fit_results.append(fit_results_tpl + ", 0, {}, {}".format(first_point,
                        interpolator(first_point)))

                    function_pieces.append("(x < 0) * {}".format(hist.GetBinContent(1)))
                    function_pieces.append("(x >= 0) * (x < {}) * {}".format(first_point,
                        interpolator(first_point)))

                    # intermediate functions
                    # change interpolated function from bernstein to power basis
                    bpoly_interpolation = BPoly(interpolator.c, interpolator.x)
                    ppoly_interpolation = PPoly.from_bernstein_basis(bpoly_interpolation)

                    interpolator_idx = 0
                    for bin_idx in range(1, nbins):
                        if hist.GetBinCenter(bin_idx) < first_point:
                            continue
                        x_min = hist.GetBinCenter(bin_idx)
                        x_max = hist.GetBinCenter(bin_idx + 1)

                        interpolator_coefficients = ppoly_interpolation.c[:, interpolator_idx]
                        interpolator_x = ppoly_interpolation.x[interpolator_idx]
                        interpolator_idx += 1
                        formula = ""
                        for i in xrange(3):
                            formula += "{}*".format(interpolator_coefficients[i]) + \
                                "*".join(["(x-{})".format(interpolator_x)] * (3 - i))
                            formula += "+" if interpolator_coefficients[i + 1] >= 0. else ""
                        formula += str(interpolator_coefficients[3])
                        fit_results.append(fit_results_tpl + ", {}, {}, {}".format(x_min,
                            x_max, formula))
                        function_pieces.append("(x >= {}) * (x < {}) * ({})".format(
                            x_min, x_max, formula))

                    fit_results.append(fit_results_tpl + ", {}, 1.1, {}".format(last_point,
                        interpolator(last_point)))
                    function_pieces.append("(x >= {}) * {}".format(last_point,
                        interpolator(last_point)))

                    function = ROOT.TF1("sf_{}".format(category.name),
                        " + ".join(["({})".format(piece) for piece in function_pieces]),
                        -2., 1.1)
                    function_dict[category] = function

                    # sanity check
                    for val in x_values:
                        if (abs(function.Eval(val) - interpolator(val)) > 1e-5):
                            raise Exception("SF Function does not match interpolator values: "
                                "{} vs {}".format(function.Eval(val), interpolator(val)))

            # write to output file
            with outp["sf"].localize("w") as tmp:
                with tmp.dump("RECREATE") as output_file:
                    for category, hist in hist_dict.items():
                        category_dir = output_file.mkdir(category.name)
                        category_dir.cd()
                        hist.Write("sf")
            if self.fix_normalization:
                with outp["csv"].localize("w") as tmp:
                    with tmp.open("w") as result_file:
                        result_file.write("\n".join(fit_results))

                with outp["functions"].localize("w") as tmp:
                    with tmp.dump("RECREATE") as output_file:
                        for category, func in function_dict.items():
                            category_dir = output_file.mkdir(category.name)
                            category_dir.cd()
                            func.Write("sf")