def make_data_mc_comparison_plot( histograms=[], histogram_lables=[], histogram_colors=[], histogram_properties=Histogram_properties(), data_index=0, save_folder='plots/', save_as=['pdf'], normalise=False, show_ratio=False, show_stat_errors_on_mc=False, draw_vertical_line=0, systematics_for_ratio=None, systematics_for_plot=None, histograms_to_compare=None, ): ''' systematics_for_plot takes the same input as systematics_for_ratio. There may be some repition with reagrds to mc_error and mc_relative_errors, but these only deal with a flat error up and down. ''' save_folder = check_save_folder(save_folder) # make copies in order not to mess with existing histograms histograms_ = deepcopy(histograms) stack = HistStack() add_mc = stack.Add for index, histogram in enumerate(histograms_): label = histogram_lables[index] color = histogram_colors[index] histogram.SetTitle(label) if normalise: histogram.Sumw2() if not index == data_index: histogram.fillstyle = 'solid' histogram.fillcolor = color histogram.legendstyle = 'F' add_mc(histogram) data = histograms_[data_index] data.SetMarkerSize(CMS.data_marker_size) if normalise: n_events_data = data.Integral() n_events_mc = stack.Integral() data.Scale(1 / n_events_data) stack.Scale(1 / n_events_mc) # plot with matplotlib plt.figure(figsize=CMS.figsize, dpi=CMS.dpi, facecolor=CMS.facecolor) axes = None if show_ratio: ratio = data.Clone('ratio') sumHists = sum(stack.GetHists()) for bin_i in range(1, sumHists.GetNbinsX()): sumHists.SetBinError(bin_i, 0) ratio.Divide(sum(stack.GetHists())) ratio.SetMarkerSize(3) gs = gridspec.GridSpec(2, 1, height_ratios=[5, 1]) axes = plt.subplot(gs[0]) else: axes = plt.axes() if histogram_properties.set_log_y: axes.set_yscale('log', nonposy="clip") axes.set_ylim(ymin=1e-2) if systematics_for_plot != None: plusErrors = [x + 1 for x in systematics_for_plot] minusErrors = [1 - x for x in systematics_for_plot] stack_lower = sum(stack.GetHists()) stack_upper = stack_lower.Clone('upper') for bin_i in range(1, stack_lower.GetNbinsX() + 1): central_value = stack_lower.GetBinContent(bin_i) error_upper_bound = plusErrors[bin_i - 1] * central_value error_lower_bound = minusErrors[bin_i - 1] * central_value stack_upper.SetBinContent(bin_i, error_upper_bound) stack_lower.SetBinContent(bin_i, error_lower_bound) rplt.fill_between( stack_lower, stack_upper, axes, hatch='//', # facecolor = 'Black', facecolor='None', edgecolor='Grey', alpha=1., linewidth=0., zorder=len(histograms_) + 1) mc_error = histogram_properties.mc_error mc_relative_errors = histogram_properties.mc_relative_errors if mc_relative_errors: stack_lower = sum(stack.GetHists()) stack_upper = stack_lower.Clone('upper') for bin_i in range(1, stack_lower.GetNbinsX()): central_value = stack_lower.GetBinContent(bin_i) relative_error = mc_relative_errors[bin_i - 1] error_upper_bound = central_value * (1 + relative_error) error_lower_bound = central_value * (1 - relative_error) stack_lower.SetBinContent(bin_i, error_upper_bound) stack_upper.SetBinContent(bin_i, error_lower_bound) rplt.fill_between(stack_upper, stack_lower, axes, facecolor='0.75', alpha=0.5, hatch='/', zorder=len(histograms_) + 1) else: if mc_error > 0: stack_lower = sum(stack.GetHists()) stack_upper = stack_lower.Clone('upper') stack_lower.Scale(1 - mc_error) stack_upper.Scale(1 + mc_error) rplt.fill_between(stack_upper, stack_lower, axes, facecolor='0.75', alpha=0.5, hatch='/', zorder=len(histograms_) + 1) if not mc_error > 0 and show_stat_errors_on_mc: stack_lower = sum(stack.GetHists()) mc_errors = list(stack_lower.yerravg()) stack_upper = stack_lower.Clone('upper') for bin_i in range(1, stack_lower.GetNbinsX()): central_value = stack_lower.GetBinContent(bin_i) error = mc_errors[bin_i - 1] error_upper_bound = central_value + error error_lower_bound = central_value - error stack_lower.SetBinContent(bin_i, error_lower_bound) stack_upper.SetBinContent(bin_i, error_upper_bound) rplt.fill_between(stack_upper, stack_lower, axes, facecolor='0.75', alpha=0.5, hatch='/', zorder=len(histograms_) + 1) # a comment on zorder: the MC stack should be always at the very back (z = 1), # then the MC error (z = len(histograms_) + 1) and finally the data # (z = len(histograms_) + 2) rplt.hist(stack, stacked=True, axes=axes, zorder=1) rplt.errorbar(data, emptybins=histogram_properties.emptybins, axes=axes, xerr=histogram_properties.xerr, elinewidth=2, capsize=10, capthick=2, zorder=len(histograms_) + 2) if histograms_to_compare: h_compare = {} for h, l, c in zip(histograms_to_compare['hists'], histograms_to_compare['labels'], histograms_to_compare['colours']): for histogram in histograms_: if histogram.GetTitle() not in [ histograms_to_compare['to_replace'], 'data' ]: h += histogram h_compare[l] = [h, c] rplt.step( h, axes=axes, label=l, color=c, linewidth=4, ) # put legend into the correct order (data is always first!) handles, labels = axes.get_legend_handles_labels() data_label_index = labels.index('data') data_handle = handles[data_label_index] labels.remove('data') handles.remove(data_handle) labels.insert(0, 'data') handles.insert(0, data_handle) if mc_error > 0 or (not mc_error > 0 and show_stat_errors_on_mc): p1 = Rectangle((0, 0), 1, 1, fc="0.75", alpha=0.5, hatch='/') handles.append(p1) labels.append(histogram_properties.mc_errors_label) l1 = axes.legend(handles, labels, numpoints=1, frameon=histogram_properties.legend_color, bbox_to_anchor=histogram_properties.legend_location, bbox_transform=plt.gcf().transFigure, prop=CMS.legend_properties, ncol=histogram_properties.legend_columns) l1.set_zorder(102) set_labels(plt, histogram_properties, show_x_label=not show_ratio, axes=axes) x_limits = histogram_properties.x_limits y_limits = histogram_properties.y_limits if len(x_limits) >= 2: axes.set_xlim(xmin=x_limits[0], xmax=x_limits[-1]) if len(y_limits) >= 2: axes.set_ylim(ymin=y_limits[0], ymax=y_limits[-1]) else: y_max = get_best_max_y( histograms_, x_limits=x_limits) * histogram_properties.y_max_scale print("Chosen limits : ", 0, y_max) axes.set_ylim(ymin=0, ymax=y_max) if histogram_properties.set_log_y: if not len(y_limits) == 2: # if not user set y-limits, set default axes.set_ylim(ymin=1e-1) #draw a red vertical line if needed: if draw_vertical_line != 0: plt.axvline(x=draw_vertical_line, color='red', linewidth=3) if show_ratio: plt.setp(axes.get_xticklabels(), visible=False) ax1 = plt.subplot(gs[1]) ax1.minorticks_on() ax1.grid(True, 'major', linewidth=1) ax1.axhline(y=1, linewidth=1) set_labels(plt, histogram_properties, show_x_label=True, show_title=False) plt.ylabel(r'$\frac{\mathrm{data}}{\mathrm{pred.}}$', CMS.y_axis_title) ax1.yaxis.set_label_coords(-0.115, 0.8) rplt.errorbar(ratio, emptybins=histogram_properties.emptybins, axes=ax1, xerr=histogram_properties.xerr, elinewidth=1.5, capsize=5, capthick=1.5) if histograms_to_compare: for l, h in h_compare.iteritems(): r = data.Clone(l).Divide(h[0]) rplt.step( r, axes=ax1, label='', colour=h[1], linewidth=2, ) if len(x_limits) >= 2: ax1.set_xlim(xmin=x_limits[0], xmax=x_limits[-1]) if len(histogram_properties.ratio_y_limits) >= 2: ax1.set_ylim(ymin=histogram_properties.ratio_y_limits[0], ymax=histogram_properties.ratio_y_limits[-1]) # dynamic tick placement adjust_ratio_ticks(ax1.yaxis, n_ticks=3, y_limits=histogram_properties.ratio_y_limits) if histogram_properties.integerXVariable: ax1.tick_params(axis='x', which='minor', bottom='off', top='off') if systematics_for_ratio != None: plusErrors = [x + 1 for x in systematics_for_ratio] minusErrors = [1 - x for x in systematics_for_ratio] ratioPlusError = ratio.Clone('plus') ratioMinusError = ratio.Clone('minus') for bin_i in range(1, ratioPlusError.GetNbinsX() + 1): ratioPlusError.SetBinContent(bin_i, plusErrors[bin_i - 1]) ratioMinusError.SetBinContent(bin_i, minusErrors[bin_i - 1]) rplt.fill_between( ratioPlusError, ratioMinusError, axes, hatch='//', # facecolor = 'Black', facecolor='None', edgecolor='Grey', alpha=1., linewidth=0., # zorder = len(histograms_) + 1 zorder=0) if CMS.tight_layout: plt.tight_layout() for save in save_as: if save == 'root': saveHistogramsToROOTFile( data, stack, save_folder + histogram_properties.name + '.' + save) else: plt.savefig(save_folder + histogram_properties.name + '.' + save) plt.close()
# need to set things like min, max, change to log, etc for hstack and soloHists normalizeTo = plots_path.get('normalize', plots_config.get('normalize', None)) if normalizeTo is not None: dataScale = 0. if "unity" in normalizeTo: for hist in hstack: if (hist.integral() != 0): hist.scale(1.0/hist.integral()) for hist in soloHists: if (hist.integral() != 0): hist.scale(1.0/hist.integral()) elif "stack" in normalizeTo: for hist in soloHists: if (hist.integral() != 0): print(hist) print(hstack.Integral()/hist.integral()) exit() hist.scale(hstack.Integral()/hist.integral()) else: if normalizeTo not in [hist.title for hist in soloHists]: raise ValueError("Could not find %s as a solo hist for normalizing to." % normalizeTo) for hist in soloHists: if hist.title == normalizeTo: dataScale = hist.integral() mcScale = 0. for hist in hstack: mcScale += hist.integral() if mcScale != 0.: normalizeFactor = dataScale/mcScale else: normalizeFactor = 1. for hist in hstack: