def cubic_interp(obs_t, cum_obs): """ Construct a cubic count interpolant (which for monotonic counts is a quadratic rate) """ # extend with null counts # so that it extrapolates, but conservatively obs_t = np.concatenate([ [obs_t[0] - 2, obs_t[0] - 1], obs_t, [obs_t[-1] + 1, obs_t[-1] + 2] ]) cum_obs = np.concatenate([ [cum_obs[0], cum_obs[0]], cum_obs, [cum_obs[-1], cum_obs[-1]] ]) big_n_hat = PPoly.from_bernstein_basis( PchipInterpolator( obs_t, cum_obs, extrapolate=True ) ) return big_n_hat
def test_pp_from_bp(self): x = [0, 1, 3] c = [[3, 3], [1, 1], [4, 2]] bp = BPoly(c, x) pp = PPoly.from_bernstein_basis(bp) bp1 = BPoly.from_power_basis(pp) xp = [0.1, 1.4] assert_allclose(bp(xp), pp(xp)) assert_allclose(bp(xp), bp1(xp))
def test_bp_from_pp_random(self): np.random.seed(1234) m, k = 5, 8 # number of intervals, order x = np.sort(np.random.random(m)) c = np.random.random((k, m - 1)) pp = PPoly(c, x) bp = BPoly.from_power_basis(pp) pp1 = PPoly.from_bernstein_basis(bp) xp = np.linspace(x[0], x[-1], 21) assert_allclose(pp(xp), bp(xp)) assert_allclose(pp(xp), pp1(xp))
def test_bp_from_pp_random(self): np.random.seed(1234) m, k = 5, 8 # number of intervals, order x = np.sort(np.random.random(m)) c = np.random.random((k, m-1)) pp = PPoly(c, x) bp = BPoly.from_power_basis(pp) pp1 = PPoly.from_bernstein_basis(bp) xp = np.linspace(x[0], x[-1], 21) assert_allclose(pp(xp), bp(xp)) assert_allclose(pp(xp), pp1(xp))
def test_derivative_ppoly(self): # make sure it's consistent w/ power basis np.random.seed(1234) m, k = 5, 8 # number of intervals, order x = np.sort(np.random.random(m)) c = np.random.random((k, m - 1)) bp = BPoly(c, x) pp = PPoly.from_bernstein_basis(bp) for d in range(k): bp = bp.derivative() pp = pp.derivative() xp = np.linspace(x[0], x[-1], 21) assert_allclose(bp(xp), pp(xp))
def test_derivative_ppoly(self): # make sure it's consistent w/ power basis np.random.seed(1234) m, k = 5, 8 # number of intervals, order x = np.sort(np.random.random(m)) c = np.random.random((k, m-1)) bp = BPoly(c, x) pp = PPoly.from_bernstein_basis(bp) for d in range(k): bp = bp.derivative() pp = pp.derivative() xp = np.linspace(x[0], x[-1], 21) assert_allclose(bp(xp), pp(xp))
def roots(self): """ Return the roots of the interpolated function. """ return (PPoly.from_bernstein_basis(self)).roots()
def run(self): import ROOT inp = self.input() outp = self.output() interpolation_bins = 1000 # cannot get the function from ROOT, use scipy instead from scipy.interpolate import PchipInterpolator, BPoly, PPoly # get categories in which to fit the scale factors categories = [] for category, _, _ in self.config_inst.walk_categories(): if len(self.category_tags) > 0 and not category.has_tag(self.category_tags, mode=any): continue if self.has_c_shift: if category.get_aux("region", None) == "c" and category.get_aux("phase_space") == "measure": if category.has_tag(self.b_tagger): categories.append(category) else: if category.has_tag(("merged", self.b_tagger), mode=all) and category.get_aux("phase_space") == "measure": categories.append(category) # get scaling factors for normalization if self.fix_normalization: norm_factors = inp["norm"].load()[self.effective_shift] # contents of .csv file for scale factors fit_results = [] # finely binned histograms to write to the output file hist_dict = {} # TF1's to write to the output file function_dict = {} with inp["sf"]["scale_factors"].load("r") as input_file: for category in categories: region = category.get_aux("region") category_dir = input_file.GetDirectory(category.name) # get scale factor histogram hist_keys = category_dir.GetListOfKeys() if len(hist_keys) != 1: raise ValueError("Found more than one histogram in %s, cannot identify scale " "factor hist." % category_dir) hist = category_dir.Get(hist_keys[0].GetName()) nbins = hist.GetNbinsX() if self.fix_normalization: hist.Scale(norm_factors[category.name]) x_axis = hist.GetXaxis() interpolation_hist = ROOT.TH1D(hist.GetName() + "_fine", hist.GetTitle(), interpolation_bins, x_axis.GetXmin(), x_axis.GetXmax()) x_values = ROOT.vector("double")() y_values = ROOT.vector("double")() for bin_idx in range(1, nbins + 1): if hist.GetBinCenter(bin_idx) < 0: continue x_values.push_back(hist.GetBinCenter(bin_idx)) y_values.push_back(hist.GetBinContent(bin_idx)) interpolator = PchipInterpolator(x_values, y_values) # define region in which to use interpolation first_point, last_point = min(x_values), max(x_values) # create finely binned histogram from either TF1 or interpolator for bin_idx in range(interpolation_bins + 2): bin_center = interpolation_hist.GetBinCenter(bin_idx) if bin_center < 0: interpolation_hist.SetBinContent(bin_idx, hist.GetBinContent(1)) elif bin_center < first_point: interpolation_hist.SetBinContent(bin_idx, interpolator(first_point)) elif bin_center > last_point: interpolation_hist.SetBinContent(bin_idx, interpolator(last_point)) else: interpolation_hist.SetBinContent(bin_idx, interpolator(bin_center)) hist_dict[category] = interpolation_hist # fill .csv file in final iteration (after normalization fix) # also create piecewise linear TF1's if self.fix_normalization: function_pieces = [] results = {} results["eta_min"], results["eta_max"] = category.get_aux("eta") pt_range = category.get_aux("pt") results["pt_min"] = pt_range[0] results["pt_max"] = min(pt_range[1], 10000.) # replace inf results["flavor_id"] = self.config_inst.get_aux("flavor_ids")[region] if self.effective_shift == "nominal": sysType = "central" else: sys_name, direction = self.effective_shift.rsplit("_", 1) sysType = "{}_{}".format(direction, sys_name.replace("c_stats", "cferr").replace("lf_stats", "lfstats").replace("hf_stats", "hfstats") ) results["sysType"] = sysType # skip unwanted combinations if "cferr" in sysType and region != "c": continue fit_results_tpl = "3, iterativefit, {sysType}, {flavor_id}, {eta_min}, " \ "{eta_max}, {pt_min}, {pt_max}".format(**results) fit_results.append(fit_results_tpl + ", -15, 0, {}".format(hist.GetBinContent(1))) fit_results.append(fit_results_tpl + ", 0, {}, {}".format(first_point, interpolator(first_point))) function_pieces.append("(x < 0) * {}".format(hist.GetBinContent(1))) function_pieces.append("(x >= 0) * (x < {}) * {}".format(first_point, interpolator(first_point))) # intermediate functions # change interpolated function from bernstein to power basis bpoly_interpolation = BPoly(interpolator.c, interpolator.x) ppoly_interpolation = PPoly.from_bernstein_basis(bpoly_interpolation) interpolator_idx = 0 for bin_idx in range(1, nbins): if hist.GetBinCenter(bin_idx) < first_point: continue x_min = hist.GetBinCenter(bin_idx) x_max = hist.GetBinCenter(bin_idx + 1) interpolator_coefficients = ppoly_interpolation.c[:, interpolator_idx] interpolator_x = ppoly_interpolation.x[interpolator_idx] interpolator_idx += 1 formula = "" for i in xrange(3): formula += "{}*".format(interpolator_coefficients[i]) + \ "*".join(["(x-{})".format(interpolator_x)] * (3 - i)) formula += "+" if interpolator_coefficients[i + 1] >= 0. else "" formula += str(interpolator_coefficients[3]) fit_results.append(fit_results_tpl + ", {}, {}, {}".format(x_min, x_max, formula)) function_pieces.append("(x >= {}) * (x < {}) * ({})".format( x_min, x_max, formula)) fit_results.append(fit_results_tpl + ", {}, 1.1, {}".format(last_point, interpolator(last_point))) function_pieces.append("(x >= {}) * {}".format(last_point, interpolator(last_point))) function = ROOT.TF1("sf_{}".format(category.name), " + ".join(["({})".format(piece) for piece in function_pieces]), -2., 1.1) function_dict[category] = function # sanity check for val in x_values: if (abs(function.Eval(val) - interpolator(val)) > 1e-5): raise Exception("SF Function does not match interpolator values: " "{} vs {}".format(function.Eval(val), interpolator(val))) # write to output file with outp["sf"].localize("w") as tmp: with tmp.dump("RECREATE") as output_file: for category, hist in hist_dict.items(): category_dir = output_file.mkdir(category.name) category_dir.cd() hist.Write("sf") if self.fix_normalization: with outp["csv"].localize("w") as tmp: with tmp.open("w") as result_file: result_file.write("\n".join(fit_results)) with outp["functions"].localize("w") as tmp: with tmp.dump("RECREATE") as output_file: for category, func in function_dict.items(): category_dir = output_file.mkdir(category.name) category_dir.cd() func.Write("sf")