def add_pltables(self, pltables): """ Add plotable objects """ for plt in make_iterable(pltables): # do not store duplicates if not plt in self.pltables: self.pltables.append(plt)
def set_hist_opts(hists): """Set some 'sane' default options to the passed histogram. Args: hists (ROOT.TH1 or list): the histogram(s) to set the options to. """ hists = make_iterable(hists) for hist in hists: hist.SetStats(0) # disable stat box hist.Sumw2()
def _get_ratio_lines(ratio_hist, vals): """ Get the lines to put into the ratio pad at the given values """ xmax, xmin = get_x_max(ratio_hist), get_x_min(ratio_hist) lines = [] for val in make_iterable(vals): lines.append(r.TLine(xmin, val, xmax, val)) set_attributes(lines[-1], width=2, line=2, color=12) return lines
def _get_y_min_hist(hists): """ Get the minimum y-value of all histograms Args: hists (list or ROOT.TH1): list of ROOT.TH1 for which the minimum y-value should be obtained Returns: float: The minimum y-value of all passed histograms """ return min(h.GetBinContent(h.GetMinimumBin()) for h in make_iterable(hists))
def _get_x_min_hist(hists): """ Get the minimum x-value of all histograms Args: hists (list or ROOT.TH1): list of ROOT.TH1 for which the minimum x-value should be obtained Returns: float: The minimum x-value of all passed histograms """ # 0 is underflow bin (thus starting at 1) return min(h.GetXaxis().GetBinLowEdge(1) for h in make_iterable(hists))
def apply_selections(dataframe, selections, negate=False): """ Apply all selections and return the reduced dataframe. Args: dataframe (pandas.DataFrame): The data to which the selections should be applied selections (list of functions, function or numpy.ndarray): List of functions taking the DataFrame as single argument and returning a list of booleans (with the same number) of rows as the DataFrame, where the elements with True will be selected or a selection array that can be used to index into a DataFrame and select certain events negate (Boolean): Instead of returning all events fulfilling the selection return all events not fulfilling the selection Returns: pandas.DataFrame: New DataFrame with only the elements of the passed DataFrame that pass the selection, unless all elements pass the selection, than the original dataframe will be returned """ if selections is None: return dataframe if isinstance(selections, np.ndarray) or isinstance(selections, pd.Series): sum_selection = selections else: selections = make_iterable(selections) # Check if all selections are actually functions. If not sipmly log as # this will fail in the next few lines anyway if not all(callable(f) for f in selections): logging.error( 'Passed selections are not all functions and also not' ' an array of boolean indices') sum_selection = np.ones(dataframe.shape[0], dtype=bool) for sel in selections: sum_selection &= sel(dataframe) if negate: sum_selection = np.invert(sum_selection) if np.sum(sum_selection) == dataframe.shape[0]: logging.debug( 'Sum of selections (after possible negation) selects all ' 'elements from passed DataFrame.') return dataframe # NOTE: since this indexing uses an array of bools this will always return a # copy return dataframe[sum_selection]
def _get_extremal_value(pltable, axis, value): """ Helper function doing the actual work """ vals = [] for plt in make_iterable(pltable): # handled = False for plttype in EVAL_ORDER: if plt.InheritsFrom(plttype): vals.append(EXTREMAL_FUNCS[plttype][axis][value](plt)) break # We only need to handle each pltable once # Do nothing if we can't handle it (TODO: introduce logging) # somewhat hacky way to get either min or max depending on the desired value return __builtin__.__dict__[value](vals)
def _get_x_max_hist(hists): """ Get the maximum x-value of all histograms Args: hists (list or ROOT.TH1): list of ROOT.TH1 for which the maximum x-value should be obtained Returns: float: The maximum x-value of all passed histograms """ max_bin = lambda ax: ax.GetNbins() + 1 # get the low edge of the overflow bin get_ax = lambda h: h.GetXaxis() get_le = lambda ax, i: ax.GetBinLowEdge(i) return max(get_le(get_ax(h), max_bin(get_ax(h))) for h in make_iterable(hists))
def _get_x_min_graph(graph): """ Get the minimum x value of a graph (or graphs) """ min_vals = [] for graph in make_iterable(graph): xvals = np.array(graph.GetX()) if isinstance(graph, r.TGraphAsymmErrors): xerr, _, _, _ = get_errors(graph) elif isinstance(graph, r.TGraphErrors): xerr, _ = get_errors(graph) else: xerr = np.zeros(graph.GetN()) min_vals.append(np.min(xvals - xerr)) return np.min(min_vals)
def _get_y_max_graph(graph): """ Get the maximum y value of a graph (or graphs) """ max_vals = [] for graph in make_iterable(graph): yvals = np.array(graph.GetY()) if isinstance(graph, r.TGraphAsymmErrors): _, _, _, yerr = get_errors(graph) elif isinstance(graph, r.TGraphErrors): _, yerr = get_errors(graph) else: yerr = np.zeros(graph.GetN()) max_vals.append(np.max(yvals + yerr)) return np.max(max_vals)
def _project_THn(hist, axes): """ Internal function for THn projections. For these it should be more straight forward to get the expected behavior from ROOT """ axes = np.array(list(make_iterable(axes)), dtype='i8') all_axes = list(xrange(hist.GetNdimensions())) sum_axes = tuple(a for a in all_axes if a not in axes) # Set ranges for axes that are integrated over to not include overflow for ax_idx in sum_axes: axis = hist.GetAxis(ax_idx) axis.SetRange(1, axis.GetNbins()) # create a name that does not interfere with the options name = replace_all(create_random_str(), (('e', '_'), ('E', '_'), ('a', '_'), ('A', '_'), ('o', '_'), ('O', '_'))) # If at all possible return a TH1 (because they are easier to handle) if len(axes) == 3: proj_hist = hist.Projection(axes[0], axes[1], axes[2], "E_" + name) elif len(axes) == 2: proj_hist = hist.Projection(axes[0], axes[1], "E_" + name) elif len(axes) == 1: proj_hist = hist.Projection(axes[0], "E_" + name) else: proj_hist = hist.ProjectionND(axes.shape[0], axes.astype('i4'), "E_" + name) # Make sure that the uncertainties are set / calculated # For some reason this seems to work when a TH1 is returned, but for THn # this status is somehow "lost", which can lead to trouble later if isinstance(proj_hist, r.TH1): if proj_hist.GetSumw2().fN == 0: proj_hist.Sumw2() if isinstance(proj_hist, r.THnBase): if not proj_hist.GetCalculateErrors(): proj_hist.Sumw2() return proj_hist
def collect_requirements(selections): """Collect the list of variables that is needed for the selections""" if selections is None: return [] variables = set() for selection in make_iterable(selections): if hasattr(selection, 'requires'): for req in selection.requires: variables.add(req) else: sel_name = '' if hasattr(selection, '__name__'): sel_name = selection.__name__ elif hasattr(selection, '__class__'): sel_name = selection.__class__ logging.warning( '\'{}\' does not have a requires field, possibly ' 'cannot get the necessary variables'.format(sel_name)) return list(variables)
def mkplot(pltables, **kwargs): """ Plot all pltables onto a canvas and return the canvas Args: pltables: single plotable ROOT object or list of plotable ROOT objects. If this is an empty list an empty canvas (or the passed in canvas) will be returned Keyword Args: colors (list. optional): list of colors to be used for plotting (otherwise default colors will be used) drawOpt (str, optional): option that will be passed to the Draw() method leg (ROOT.TLegend, optional): Put the passed TLegend onto the plot legEntries (list): list of string (at least as long as the plot list) from which the keys for the legend are taken yRange, xRange (list): Two element list defining the range for the given axis. Valid options are: two floats or any mixture of one float and a None value or even two None values. For any passed value that value will be used as axis range, for any None value an appropriate value will be determined from the passed pltables [xy]dscale (float, optional): Factor that is used to extend the plot range using the min / max observed value in the plots. The plot range will be (min * (1 - dscale), max * (1 - dscale)). The [xy]Range takes precedence over this one. can (ROOT.TCanvas): Do not create new canvas but use passed canvas to plot on log[xyz] (boolean, optional): Set the [xyz] axis to log scale title (str, optional): Use the passed string as title of the plot (will be added to the first plot in the list of pltables or the plot_hist if it is set up) Returns: TCanvasWrapper: Transparent wrapper class around a TCanvas that forwards all calls to the TCanvas but keeps the plotted objects alive as long as the TCanvas is alive See also: plot_on_canvas """ # allow single plots to be handled the same as a list of plots pltables = make_iterable(pltables) # - Have to specially treat a single THStack here, since THStack is an # instance of Iterable. Putting it into a list here to be sure to always # having a list in plot_on_canvas, which makes handling there easier if isinstance(pltables, r.THStack): pltables = [pltables] # Need to pop the can kwarg here otherwise plot_on_canvas will receive it # twice and complain can = _setup_canvas(kwargs.pop('can', None), **kwargs) can.cd() # To make TCanvas.DrawFrame work in any case # Check if at least one pltable has been passed and return the canvas if not # NOTE: Can't return earlier with None, since a canvas is expected to be # returned, and only here is it certain, that we return the right canvas if len(pltables) < 1: return can # Handle the plot frame if not 'same' in kwargs.get('drawOpt', ''): plot_hist = _setup_plot_hist(can, pltables, **kwargs) if plot_hist is not None: # Force the usage of the plot hist if it is created drawOpt = kwargs.pop('drawOpt', '') kwargs['drawOpt'] = 'same' + drawOpt # only add the plotting frame if it is actually created can.add_pltables(plot_hist) leg = kwargs.pop('leg', None) if leg is None: legPos = kwargs.pop('legPos', None) if legPos is not None and len(legPos) == 4: leg = setup_legend(*legPos) plot_on_canvas(can, pltables, leg=leg, **kwargs) can.add_pltables(pltables) title = kwargs.pop('title', None) if title is not None: can.pltables[0].SetTitle(title) if leg is not None: can.add_tobject(leg) return can
def scale_0(hists): for h in make_iterable(hists): h.Scale(1.0 / h.GetBinContent(1)) h.SetStats(0) return hists
def add_auxiliary_info(pad, years, pos='right', mc=False, prelim=False): """Add the auxiliary information to the passed pad""" def get_lumi(year): if DATABASE is None: logging.warning('DATABASE not initialized.') return -1 return '{} fb^{{-1}} ({} TeV)'.format(DATABASE.get_int_lumi(year), DATABASE.get_energy(year)) # required text setup CMS_TEXT = 'CMS' CMS_TEXT_FONT = 61 CMS_TEXT_SIZE = 0.75 CMS_TEXT_OFFSET = 0.1 EXTRA_TEXT = '' if mc: EXTRA_TEXT = 'Simulation' if prelim: EXTRA_TEXT += ' Preliminary' EXTRA_TEXT_FONT = 52 EXTRA_TEXT_SIZE = 0.76 * CMS_TEXT_SIZE REL_POS_X = 0.045 REL_POS_Y = 0.065 REL_EXTRA_DY = 1.1 PAD = get_pad_margins(pad) pad.cd() # latex used to draw everything latex = setup_basic_latex() # lumi info lumi_text = ' + '.join([get_lumi(y) for y in make_iterable(years)]) add_lumi_info(pad, lumi_text) if pos == 'right': pos_x = 1 - PAD['r'] - REL_POS_X * (1 - PAD['l'] - PAD['r']) if pos == 'left': pos_x = PAD['l'] + REL_POS_X * (1 - PAD['l'] - PAD['r']) pos_y = 1 - PAD['t'] - REL_POS_Y * (1 - PAD['t'] - PAD['b']) text_align = 11 if pos == 'right': text_align = 31 # cms text latex.SetTextFont(CMS_TEXT_FONT) latex.SetTextSize(CMS_TEXT_SIZE * PAD['t']) latex.SetTextAlign(text_align) latex.DrawLatex(pos_x, pos_y, CMS_TEXT) # extra text (preliminary, etc..) latex.SetTextFont(EXTRA_TEXT_FONT) latex.SetTextSize(EXTRA_TEXT_SIZE * PAD['t']) latex.SetTextAlign(text_align) latex.DrawLatex(pos_x, pos_y - REL_EXTRA_DY * CMS_TEXT_SIZE * PAD['t'], EXTRA_TEXT)
def baseline_plot(baseline, compplots, **kwargs): """ Make a plot and compare the compplots with the baseline plots. Divides the plot into an absolute value plot in the top and a ratio plot using the baseline as denominator in the bottom. Args: baseline (plotable ROOT object): The baseline plotable that will be used as comparison for all the compplots compplots (plotable ROOT objects): The plotables that should be compared to the baseline Keyword Args: basename (str): Legend entry to be used for the baseline legEntries (list of str): legend entries to be used for the compplots yRangeRatio (tuple of floats): The minimum and maximum y-value for the ratio pad compname (str): Name describing the whole of the compplots that will be used in the y-axis of the ratio pad putline (float or list of floats): Put horizontal lines into the ratio pad at the given values Other Keyword Args are forwarded to mkplot. Returns: TCanvasWrapper: Transparent wrapper holding the plot and all its objects See Also: mkplot, plot_on_canvas """ comp_attr = kwargs.pop('attr', None) if comp_attr is None: comp_attr = default_attributes(open_markers=False, size=1.0) # the baseline will always be black. Try to match the size of the markers to # the one that were requested by the user base_attr = {'color': 1, 'marker': 20, 'size': 1.5} sizes = np.unique([a['size'] for a in comp_attr if 'size' in a]) if len(sizes) == 1: base_attr['size'] = sizes[0] attr = [base_attr] + comp_attr # use the xLabel only for the lower plot xLabel = kwargs.pop('xLabel', None) # add the baseline name to the legend entries (if any) legEntries = kwargs.pop('legEntries', None) base_name = kwargs.pop('basename', 'baseline') if legEntries is not None: legEntries = [base_name] + legEntries # setup canvas can = kwargs.pop('can', None) if can is None: can_name = create_random_str() can = r.TCanvas(can_name, '', 50, 50, 600, 600) can.cd() ppad = r.TPad('_'.join([can.GetName(), 'plotpad']), 'plotpad', 0, 0.3, 1, 1) r.SetOwnership(ppad, False) ppad.Draw() # plot the comparison plot ppad = mkplot([baseline] + make_iterable(compplots), attr=attr, can=ppad, legEntries=legEntries, **kwargs) can.cd() rpad = r.TPad('_'.join([can.GetName(), 'ratiopad']), 'rpad', 0, 0, 1, 0.33) rpad.SetBottomMargin(0.2) r.SetOwnership(rpad, False) rpad.Draw() # remove some kwargs for kwarg in ['yLabel', 'legPos', 'leg', 'legEntries', 'yRange', 'logy']: kwargs.pop(kwarg, None) ratios = [divide(p, baseline) for p in make_iterable(compplots)] # determine the ratios and plot them rpad = mkplot(ratios, attr=comp_attr, can=rpad, xLabel=xLabel, yLabel=' / '.join( [kwargs.pop('compname', 'distribution(s)'), base_name]), yRange=kwargs.pop('yRangeRatio', [None, None]), **kwargs) for hist in rpad.pltables: _set_ratio_properties(hist) putlines = kwargs.pop('putline', None) if putlines is not None: # Simply assume that all others follow the same as the first one lines = _get_ratio_lines(rpad.pltables[0], putlines) for line in lines: line.Draw() rpad.add_pltables(lines) # attach all plots to the returned canvas if not isinstance(can, TCanvasWrapper): can = TCanvasWrapper(can) # can.add_pltables(ppad.pltables + rpad.pltables) # for obj in ppad.attached_tobjects: # can.add_tobject(obj) can.add_tobject(ppad) can.add_tobject(rpad) return can
def _val_getter(hists): """Closure to avoid a lambda""" return __builtin__.__dict__[extremum](fext(gaxis(h)) for h in make_iterable(hists))