def get_costh_phi_in_bins(hist_3d): """Get all costh phi histograms in each bin of the 3rd variable""" arr = get_array(hist_3d) binning = np.array([get_binning(hist_3d, 'X'), get_binning(hist_3d, 'Y')]) err = get_array(hist_3d, errors=True) return [from_array(arr[:,:,i], binning, errors=err[:,:,i]) for i in xrange(arr.shape[2])]
def get_contour(hist): """ Get the outer contour of all filled points in the histogram """ vals = get_array(hist) > 0 xbinning, ybinning = get_binning(hist, 0), get_binning(hist, 1) xvals = 0.5 * (xbinning[:-1] + xbinning[1:]) yvals = 0.5 * (ybinning[:-1] + ybinning[1:]) filled = [] for ix, xv in enumerate(xvals): for iy, yv in enumerate(yvals): if vals[ix, iy]: filled.append([xv, yv]) filled = np.array(filled) hull = ConvexHull(filled) # Append the first point again at the end to "close" the contour xcont = filled[hull.vertices, 0] xcont = np.append(xcont, np.array([xcont[0]])) ycont = filled[hull.vertices, 1] ycont = np.append(ycont, np.array(ycont[0])) return r.TGraph(len(hull.vertices) + 1, xcont, ycont)
def to_bw_hist(hist): """Fill all filled bins with value 1 and all empty ones with 0""" arr = get_array(hist) # TODO: generalize and put into hist_utils binning = np.array([get_binning(hist, 0), get_binning(hist, 1)]) arr = arr != 0 return from_array(arr, binning)
def get_pt_bin(amap, pt_val): """ Get the pt bin costh-phi map from the passed (3d) acceptance map """ pt_bin = find_bin(get_binning(amap, 2), np.array([pt_val]))[0] val, err = get_array(amap), get_array(amap, errors=True) ctp_binning = np.array([get_binning(amap, i) for i in [0, 1]]) return from_array(val[:, :, pt_bin], ctp_binning, errors=err[:, :, pt_bin])
def get_combined_ppd_2d(inputfiles, var1, var2): """ Get the combined 2d ppd from all inputfiles """ ppds = [get_scaled_ppd_2d(f, var1, var2, 100, 100) for f in inputfiles] ppd_binning = np.array([get_binning(ppds[0], 0), get_binning(ppds[0], 1)]) ppd_vals = np.array([get_array(p) for p in ppds]) # TODO: at some point find out how argmax works in multiple dimensions return from_array(np.max(ppd_vals, axis=0), ppd_binning)
def store_hists(outfile, hists, basename, binvar=None): """Store histograms""" outfile.cd() if binvar is not None: # store 2d projections onto costh-phi for name, hist in hists.iteritems(): projections = get_costh_phi_in_bins(hist) var_binning = get_binning(hist, 'Z') for ibin, proj in enumerate(projections): bin_bord = '{:.2f}_{:.2f}'.format(var_binning[ibin], var_binning[ibin + 1]) bin_bord = bin_bord.replace('.', 'p').replace('-', 'm') proj.GetXaxis().SetTitle(hist.GetXaxis().GetTitle()) proj.GetYaxis().SetTitle(hist.GetYaxis().GetTitle()) proj.SetName('_'.join(['proj', basename, binvar[0], bin_bord, name])) proj.Write() # also store the 3d maps that are used in the lookup hist.SetName('_'.join([basename, binvar[0], name])) hist.Write() else: for name, hist in hists.iteritems(): hist.SetName('_'.join([basename, name])) hist.Write()
def main(args): """Main""" data = get_dataframe(args.datafile) cmfile = r.TFile.Open(args.corrmapfile) accmap = get_correction_map(cmfile, not args.no_pt, args.acceptance) cond_mkdir(args.outdir) plot_args = {'drawOpt': 'colz'} if args.plot_arguments is not None: plot_args.update(parse_plot_args(args.plot_arguments.split(';;'))) if isinstance(accmap, r.TH2): plot = make_overlay_plot(accmap, data, **plot_args) plot.SaveAs('{}/corrmap_data_overlay_2d.pdf'.format(args.outdir)) else: pt_binning = get_binning(accmap, 2) pt_bins = zip(pt_binning[:-1], pt_binning[1:]) for pt_bin in pt_bins: pdata = apply_selections(data, select_bin('JpsiPt', *pt_bin)) pmap = get_pt_bin(accmap, 0.5 * np.sum(pt_bin)) plot = make_overlay_plot(pmap, pdata, **plot_args) plot.SaveAs('{}/corrmap_data_overlay_2d_{}_{}.pdf'.format( args.outdir, int(pt_bin[0]), int(pt_bin[1])))
def test_non_compatible_binning(self, mock_logger): hist = _get_hist(1) non_comp_binning = np.linspace(0, 1, 7) exp_err = 'Cannot rebin histogram with binning {} to target binning {}' self.assertTrue(hu.rebin_1d_binning(hist, non_comp_binning) is None) mock_logger.error.assert_called_with(exp_err.format(hu.get_binning(hist), non_comp_binning))
def shift_by_median(ppd, use_val=None): """ Shift the ppd by the median to center it around 0 """ if use_val is None: return ppd else: med = use_val binning = get_binning(ppd) return from_array(get_array(ppd), binning - med, errors=get_array(ppd, errors=True))
def test_find_bin_nonreg_binning(self): hist = r.TH1D(create_random_str(8), '', 10, np.linspace(0, 1, 11)**2) binning = hu.get_binning(hist) values = np.random.uniform(0, 1, 1000) exp_idcs = np.array([hist.FindBin(v) for v in values]) exp_idcs -= 1 bin_idcs = hu.find_bin(binning, values) npt.assert_equal(bin_idcs, exp_idcs)
def test_find_bin_reg_binning(self): hist = r.TH1D(create_random_str(8), '', 10, 0, 1) binning = hu.get_binning(hist) values = np.random.uniform(0, 1, 1000) exp_idcs = np.array([hist.FindBin(v) for v in values]) exp_idcs -= 1 # correct for TH1 indexing starting at 1 bin_idcs = hu.find_bin(binning, values) npt.assert_equal(bin_idcs, exp_idcs)
def test_find_bin_warning(self, mock_logger): exp_warn = 'When trying to find the bin indices at least one value '\ 'could not be attributed to a bin in the passed binning' bins = hu.get_binning(hist = r.TH1D(create_random_str(), '', 10, 0, 1)) values = np.array([-0.1, 0.2, 0.3, 0.4]) bin_idcs = hu.find_bin(bins, values) mock_logger.warn.assert_called_with(exp_warn) values = np.array([0.1, 0.2, 1.3, 0.4, 0.5]) bin_idcs = hu.find_bin(bins, values) mock_logger.warn.assert_called_with(exp_warn)
def make_overlay_plot(pt_map, pt_data, **kwargs): """ Plot the coverage of the pt_data onto the """ amap_x, amap_y = get_binning(pt_map, 0), get_binning(pt_map, 1) if np.min(amap_x) == 0: costh = pt_data.costh_HX_fold.abs() else: costh = pt_data.costh_HX_fold data_dist = hist2d(costh, pt_data.phi_HX_fold, x_hist_sett=(len(amap_x) - 1, amap_x), y_hist_sett=(len(amap_y) - 1, amap_y)) coverage = get_array(data_dist) > 0 cov_graph = get_mask_graph(amap_x, amap_y, coverage) can = mkplot(pt_map, **kwargs) mkplot(cov_graph, can=can, drawOpt='sameE5', attr=[{ 'color': r.kRed, 'fillalpha': (r.kRed, 0), 'marker': 1 }]) mkplot([ r.TLine(v, np.min(amap_y), v, np.max(amap_y)) for v in [-0.625, -0.45, 0.45, 0.625] ], attr=[{ 'color': 12, 'line': 7, 'width': 2 }], can=can, drawOpt='same') return can
def get_coverage_contour(hist, coverage=0.683): """ Get the contour from the passed histogram that surpasses the specified coerage """ vals = get_array(hist) sum_vals = np.sum(vals) def _coverage(level): """Calculate the coverage corresponding to the passed level""" return np.sum(vals * (vals >= level)) / sum_vals # do some pre-processing to start from a slightly better bracket for the # secant method dec_cov = np.array( [_coverage(0.05 * i * np.max(vals)) for i in xrange(21)]) q_bin = find_bin(dec_cov, np.array([coverage])) search_brack = [ q_bin * 0.05 * np.max(vals), (q_bin + 1) * 0.05 * np.max(vals) ] cov_level = root_scalar(lambda x: _coverage(x) - coverage, bracket=search_brack) filled = vals >= cov_level.root x_vals, y_vals = get_binning(hist, 'X'), get_binning(hist, 'Y') # get the bin centers x_vals = 0.5 * (x_vals[1:] + x_vals[:-1]) y_vals = 0.5 * (y_vals[1:] + y_vals[:-1]) filled_coords = [] for ix, xv in enumerate(x_vals): for iy, yv in enumerate(y_vals): if filled[ix, iy]: filled_coords.append([xv, yv]) return contour_as_tgraph(np.array(filled_coords))
def get_combined_ppd(inputfiles, var): """ Get the combined ppd from all inputfiles """ ppds = [get_scaled_ppd(f, var) for f in inputfiles] # PPDs all have the same binning ppd_binning = get_binning(ppds[0]) ppd_vals = np.array([get_array(p) for p in ppds]) ppd_errs = np.array([get_array(p, errors=True) for p in ppds]) # Get the maximum value in each gin and its uncertainty max_idx = np.argmax(ppd_vals, axis=0) # Necessary for 2d indexing. There might be an easier way for this idx = np.arange(0, len(ppd_vals[0])) max_ppd = ppd_vals[max_idx, idx] max_err = ppd_errs[max_idx, idx] return from_array(max_ppd, ppd_binning, errors=max_err)
def _test_from_array_nd_w_overflow(self, n_dim): hist = _get_hist(n_dim) arr = hu.get_array(hist, overflow=True) axes = 'X' if n_dim == 2: axes = 'XY' if n_dim == 3: axes = 'XYZ' binning = np.array([hu.get_binning(hist, ax) for ax in axes]) arr_hist = hu.from_array(arr, binning) npt.assert_equal(hu.get_array(arr_hist, overflow=True), arr) npt.assert_equal(hu.get_binning(arr_hist, 'X'), hu.get_binning(hist, 'X')) if n_dim > 1: npt.assert_equal(hu.get_binning(arr_hist, 'Y'), hu.get_binning(hist, 'Y')) if n_dim > 2: npt.assert_equal(hu.get_binning(arr_hist, 'Z'), hu.get_binning(hist, 'Z')) err = hu.get_array(hist, errors=True, overflow=True) arr_err_hist = hu.from_array(arr, binning, errors=err) npt.assert_equal(hu.get_array(arr_err_hist, overflow=True), arr) npt.assert_equal(hu.get_array(arr_err_hist, overflow=True, errors=True), err)
def __init__(self, acc_map, min_acc=0, mask_prec=None, mask=None): """ Args: acc_map (TH2D, TH3D or THnD): costh-phi map or costh-phi-var map obtained by applying all cuts and selections (and possibly efficiency weightings). For each bin 1 / (bin content) will be the weight for the acceptance correction min_acc (float, optional): Mask all bins with an acceptance below this value (default = 0) mask_prec (float, optional): If not None, mask all bins for which the relative error is larger than the passed value mask (np.array, optional): Array with the same dimensions as the acceptance map. All bins containing a non False value will be masked. Overrides the min_acc and mask_prec argument (i.e. they will be ignored) but still respects zero bin masking """ self.hist = acc_map logging.debug('Using acceptance map \'{}\''.format( self.hist.GetName())) # Corrections are 1 / acceptance map acc_values = get_array(self.hist) if mask is not None: if mask.shape != acc_values.shape: logging.error('mask and acceptance map need to have the same ' 'dimensions. mask: {}, map: {}'.format( mask.shape, acc_values.shape)) if min_acc != 0: logging.info( 'Ignoring min_acc={} because a mask is used'.format( min_acc)) if mask_prec is not None: logging.info( 'Ignoring mask_prec={} because a mask is used'.format( mask_prec)) # mask the values without acceptance in the acceptance map # this will also make them return -1 for the correction map logging.debug('Masking {} bins according to the mask'.format( np.sum(mask))) empty_mask = (acc_values == 0) logging.debug('Masking {} empty bins'.format(np.sum(empty_mask))) masked_vals = empty_mask | mask else: if min_acc < 0 or min_acc > 1: logging.warning('The minimum acceptance should be a value ' 'between 0 and 1, but is {}'.format(min_acc)) masked_vals = (acc_values <= min_acc).astype(bool) logging.debug('Minimum acceptance = {}: Masking {} bins'.format( min_acc, np.sum(masked_vals))) if mask_prec is not None: if isinstance(mask_prec, float): acc_errs = get_array(self.hist, errors=True) rel_uncer = np.zeros_like(acc_errs) np.divide(acc_errs, acc_values, out=rel_uncer, where=acc_values != 0) mask_uncer = (rel_uncer > mask_prec).astype(bool) logging.debug( 'Minimum precision = {}: Masking {} bins'.format( mask_prec, np.sum(mask_uncer))) masked_vals |= mask_uncer else: logging.error( 'mask_prec has to be a float value. Not using' ' it to mask bins with too low precision.') acc_values = ~masked_vals * acc_values + -1 * masked_vals logging.debug('{} of {} bins are masked in the correction map'.format( np.sum(masked_vals), acc_values.size)) self.corr_map = 1.0 / acc_values self.var_binnings = [] self.ndim = self.corr_map.ndim for i in xrange(self.ndim): self.var_binnings.append(get_binning(acc_map, i))
def shift_by_median(ppd, median): """Shift the ppd by the passed median""" return from_array(get_array(ppd), get_binning(ppd) - median, errors=get_array(ppd, errors=True))
def test_handles_binning(self): # Test if binning arrays are handled correctly (type conversion for # ROOT to understand) hist = hu.from_array(np.random.uniform(0, 1, 10), np.arange(0, 11, 1)) npt.assert_equal(hu.get_binning(hist, 'X'), np.arange(0, 11, 1))