fill_opts=fill_opts, error_opts=error_opts, ) # DATA plotting hist.plot1d(h1[data].integrate('region', region), ax=ax, clear=False, error_opts=data_err_opts) # -- Ratio Plot hist.plotratio( num=h1[data].integrate('region', region).sum("dataset"), denom=h1[notdata].integrate('region', region).sum("dataset"), ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc="num", ) np.set_printoptions(suppress=True) rax.set_ylabel("Data/MC") rax.set_ylim(0, 2) ax._get_lines.prop_cycler = ax._get_patches_for_fill.prop_cycler ax.autoscale(axis="x", tight=True) ax.set_ylim(ymin, ymax) ax.set_xlim(xmin, xmax)
order=processes) #hist.plot1d(histogram[signal], overlay="dataset", ax=ax, overflow=bins[name]['overflow'], line_opts={'linewidth':3}, clear=False) #hist.plot1d(histogram['750_1_scan'], overlay="dataset", ax=ax, overflow=bins[name]['overflow'], line_opts={'linewidth':3}, clear=False) hist.plot1d(histogram['1000_1_scan'], overlay="dataset", ax=ax, overflow=bins[name]['overflow'], line_opts={'linewidth': 3}, clear=False) if usePseudoData: # build ratio hist.plotratio(num=histogram['pseudodata'].sum("dataset"), denom=histogram[notdata].sum("dataset"), ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num', overflow=bins[name]['overflow']) for l in ['linear', 'log']: if usePseudoData: saveFig(fig, ax, rax, plotDir, name, scale=l, shape=False, y_max=y_max) else:
def sf_1d(acc, tag, regex, outputrootfile): outdir = './output/' if not os.path.exists(outdir): os.makedirs(outdir) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7,7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) # new_ax = hist.Bin('vpt','LHE V $p_{T}$ (GeV)',list(range(100,500,50)) + list(range(500,1000,100)) + list(range(1000,2000,250))) pt_types = ['stat1'] if tag in ['dy','wjet']: pt_types.append('dress') new_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',list(range(100,800,100))+list(range(800,1200,200))+list(range(1200,2800,800))) else: new_ax = hist.Bin('vpt','V $p_{T}$ (GeV)',[200,250]+list(range(300,800,100))+list(range(800,1400,200))) overflow = 'none' for pt_type in pt_types: for selection in ['inclusive','monojet','vbf']: dist = f'gen_vpt_{selection}_{pt_type}' acc.load(dist) h = copy.deepcopy(acc[dist]) h = h.rebin(h.axis('vpt'), new_ax) if selection == 'monojet': h = h.integrate(h.axis("jpt")) if selection == 'vbf': h = h.integrate(h.axis("jpt")) h = h.integrate(h.axis("mjj")) h = merge_extensions(h, acc, reweight_pu=False) scale_xs_lumi(h) h = merge_datasets(h) h = h[re.compile(regex)] hist.plot1d( h, overlay='dataset', overflow=overflow, binwnorm=True, ax=ax) lo = h[re.compile('.*HT.*')].integrate('dataset') nlo = h[re.compile('.*(LHE|amc).*')].integrate('dataset') hist.plotratio(nlo, lo, ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow=overflow, error_opts=data_err_opts, label='2017 NLO/LO ratio' ) # if tag in ['dy','wjet']: old = get_old_kfac(tag) old_x = 0.5*(old.bins[:,0]+old.bins[:,1]) rax.plot(old_x, old.values,'ob-', label='2016 QCD k fac') rax.plot(old_x, old.values * pdfwgt_sf(old_x),'or-', label='2016 x ad-hoc DY pdfwgt SF') ax.set_yscale('log') ax.set_ylim(1e-3,1e6) rax.set_ylim(0,2) rax.legend() fig.savefig(pjoin(outdir,f'{tag}_{dist}.pdf')) sf_x = lo.axis('vpt').edges(overflow=overflow) sf_y = nlo.values(overflow=overflow)[()] / lo.values(overflow=overflow)[()] outputrootfile[f'{tag}_{pt_type}_{selection}'] = (sf_y,sf_x)
def plot_recoil(acc, region_tag="1m", dataset='SingleMuon', year=2018, tag="test", distribution="recoil"): h = acc[distribution] h = merge_extensions(h) # h = scale_to_xs(h, acc) h = merge_datasets(h) newbin = hist.Bin(distribution,f"{distribution} (GeV)",np.array(list(range(0,400,20)) + list(range(400,1100,100)))) h = h.rebin(h.axis(distribution), newbin) ds = f'{dataset}_{year}' h = h.project(h.axis('dataset'), ds) # print(h) hnum = h.project(h.axis('region'),f'tr_{region_tag}_num') hden = h.project(h.axis('region'),f'tr_{region_tag}_den') # print(hden, hnum) # Recoil plot try: fig, ax,_ = hist.plot1d(hnum, binwnorm=True) except KeyError: print(f'ERROR: {region_tag}, {dataset}, {year}') return hist.plot1d(hden, ax=ax, clear=False, binwnorm=True) plt.yscale('log') plt.gca().set_ylim(0.1,1e6) outdir = f"./output/{tag}" if not os.path.exists(outdir): os.makedirs(outdir) fig.savefig(pjoin(outdir, f'{distribution}_{region_tag}_{dataset}_{year}.pdf')) with open(pjoin(outdir,f'table_{region_tag}_{dataset}_{year}.txt'),"w") as f: f.write(content_table(hnum, hden) + "\n") plt.close(fig) # Efficiency fig, ax,_ = hist.plotratio(hnum, hden, guide_opts={}, unc='clopper-pearson', error_opts=markers('data') ) ax.set_ylim(0,1.1) ax.set_xlim(0,xmax) ax.set_ylabel("Efficiency") plt.text(1., 1., r"$\approx$ %.1f fb$^{-1}$ (13 TeV)" % lumi(year), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) plt.text(0., 1., f'{region_tag}, {year}', fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) plt.text(1., 0., f'{trgname(year, tag)}', fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes ) plt.plot([0,xmax],[0.95,0.95],'r-') fig.savefig(pjoin(outdir, f'eff_{region_tag}_{dataset}_{year}.pdf')) plt.close(fig)
density=True ) ax.autoscale(axis='x', tight=True) #ax.set_ylim(0, None) ax.set_ylim(1e-8,1e1) ax.set_yscale('log') ax.set_xlabel(None) leg = ax.legend() # now we build the ratio plot hist.plotratio( num=numerator, denom=denominator, ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num' ) rax.set_ylabel('Ratio') rax.set_ylim(0,2) # add some labels cms = plt.text(0., 1., u"CMS Simulation work", fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) lumi = plt.text(1., 1., r"137 fb$^{-1}$ (13 TeV)",
list(range(80, 800, 40)) + list(range(800, 2000, 100))) h = h.rebin(h.axis('vpt'), new_ax) print(h) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) hist.plot1d(h, overlay='type', overflow='all', ax=ax, clear=False, binwnorm=True) ax.legend() hist.plotratio(h['dilepton'].integrate('type'), h['nano'].integrate('type'), ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts, label='status 1 dilepton / LHE', clear=False) # rax.set_ylim(0.9,1.1) fig.savefig('test_lhe.pdf')
def probefilter(dataset_type="inclusive"): fig, ax = plt.subplots( 2, 1, figsize=(10, 12)) #, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) plt.style.use(mplhep.style.ROOT) h_bu_truthpt = histograms["Bu"]["TruthBuToKMuMu_pt"].integrate( f"dataset", (f"Bu2KJpsi2KMuMu_{dataset_type}")) h_bu_truthpt.axis("selection").index( "inclusive").label = r"$B_u$, inclusive" h_bu_truthpt.axis("selection").index( "probefilter").label = r"$B_u$, emul. probe filter" hist.plot1d(h_bu_truthpt[(["inclusive"]), :], ax=ax[0], clear=False, overlay="selection", line_opts={ "color": "red", "linestyle": "-" }) hist.plot1d(h_bu_truthpt[(["probefilter"]), :], ax=ax[0], clear=False, overlay="selection", line_opts={ "color": "red", "linestyle": "--" }) h_bs_truthpt = histograms["Bs"]["TruthBsToKKMuMu_pt"].integrate( "dataset", (f"Bs2PhiJpsi2KKMuMu_{dataset_type}")) h_bs_truthpt.axis("selection").index( "inclusive").label = r"$B_s$, inclusive" h_bs_truthpt.axis("selection").index( "probefilter").label = r"$B_s$, emul. probe filter" hist.plot1d(h_bs_truthpt[(["inclusive", "probefilter"]), :], ax=ax[0], clear=False, overlay="selection", line_opts={ "color": "blue", "linestyle": "-" }) #hist.plot1d(h_bs_truthpt[(["probefilter"]),:], ax=ax[0], clear=False, overlay="selection", line_opts={"color":"blue", "linestyle":"--"}) h_bd_truthpt = histograms["Bd"]["TruthBdToKPiMuMu_pt"].integrate( "dataset", (f"Bd2KstarJpsi2KPiMuMu_{dataset_type}")) h_bd_truthpt.axis("selection").index( "inclusive").label = r"$B_d$, inclusive" h_bd_truthpt.axis("selection").index( "probefilter").label = r"$B_d$, emul. probe filter" hist.plot1d(h_bd_truthpt[(["inclusive", "probefilter"]), :], ax=ax[0], clear=False, overlay="selection", line_opts={ "color": "green", "linestyle": "-" }) h_bu_truthpt_inclusive = h_bu_truthpt.integrate("selection", ("inclusive")) h_bu_truthpt_probefilter = h_bu_truthpt.integrate("selection", ("probefilter")) h_bu_truthpt_probefilter.label = "Efficiency" hist.plotratio( num=h_bu_truthpt_probefilter, denom=h_bu_truthpt_inclusive, ax=ax[1], unc="clopper-pearson", clear=False, error_opts={ 'color': 'red', 'marker': '.' }, ) h_bs_truthpt_inclusive = h_bs_truthpt.integrate("selection", ("inclusive")) h_bs_truthpt_probefilter = h_bs_truthpt.integrate("selection", ("probefilter")) h_bs_truthpt_probefilter.label = "Efficiency" hist.plotratio( num=h_bs_truthpt_probefilter, denom=h_bs_truthpt_inclusive, ax=ax[1], unc="clopper-pearson", clear=False, error_opts={ 'color': 'blue', 'marker': '.' }, ) h_bd_truthpt_inclusive = h_bd_truthpt.integrate("selection", ("inclusive")) h_bd_truthpt_probefilter = h_bd_truthpt.integrate("selection", ("probefilter")) h_bd_truthpt_probefilter.label = "Efficiency" hist.plotratio( num=h_bd_truthpt_probefilter, denom=h_bd_truthpt_inclusive, ax=ax[1], unc="clopper-pearson", clear=False, error_opts={ 'color': 'green', 'marker': '.' }, ) ax[0].set_yscale("log") ax[0].set_ylim(1., 1.e6) if dataset_type == "inclusive": ax[1].set_yscale("log") ax[1].set_ylim(0.001, 0.2) ax[0].legend(fontsize=14) plt.tight_layout() fig.savefig( f"{figure_directory}/probefilter_efficiency_{dataset_type}.png")
def Stack(self, hname={}, xtit='', ytit=''): ''' prName can be a list of histograms or a dictionary 'histoName : xtit' ''' if isinstance(hname, dict): for k in hname: self.Stack(k, hname[k], ytit) return if isinstance(hname, list): for k in hname: self.Stack(k, xtit, ytit) return density = False binwnorm = None plt.rcParams.update(self.textParams) if self.doData(hname) and self.doRatio: fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=.07) else: fig, ax = plt.subplots(1, 1, figsize=( 7, 7)) #, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) # Colors from cycler import cycler colors = self.GetColors(self.bkglist) if self.invertStack: _n = len(h.identifiers(overlay)) - 1 colors = colors[_n::-1] ax.set_prop_cycle(cycler(color=colors)) fill_opts = self.fill_opts error_opts = self.error_opts data_err_opts = self.data_err_opts if not self.doStack: error_opts = None fill_opts = None if self.invertStack and type(h._axes[0]) == hist.hist_tools.Cat: h._axes[0]._sorted.reverse() h = self.GetHistogram(hname, self.bkglist) h.scale(1000. * self.lumi) hist.plot1d(h, overlay="process", ax=ax, clear=False, stack=self.doStack, density=density, line_opts=None, fill_opts=fill_opts, error_opts=error_opts, binwnorm=binwnorm) if self.doData(hname): hData = self.GetHistogram(hname, self.dataName) hist.plot1d(hData, ax=ax, clear=False, error_opts=data_err_opts, binwnorm=binwnorm) ydata = hData.values(overflow='all') ax.autoscale(axis='x', tight=True) ax.set_ylim(0, None) ax.set_xlabel(None) if self.doLegend: leg_anchor = (1., 1.) leg_loc = 'upper left' handles, labels = ax.get_legend_handles_labels() if self.doData(hname): handles = handles[-1:] + handles[:-1] labels = ['Data'] + labels[:-1] ax.legend(handles, labels) #,bbox_to_anchor=leg_anchor,loc=leg_loc) if self.doData(hname) and self.doRatio: hist.plotratio(hData, h.sum("process"), clear=False, ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num') rax.set_ylabel(self.yRatioTit) rax.set_ylim(self.ratioRange[0], self.ratioRange[1]) if self.doLogY: ax.set_yscale("log") ax.set_ylim(1, ax.get_ylim()[1] * 5) if not self.xRange is None: ax.set_xlim(xRange[0], xRange[1]) if not self.yRange is None: ax.set_ylim(yRange[0], yRange[1]) # Labels CMS = plt.text(0., 1., r"$\bf{CMS}$ Preliminary", fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) lumi = plt.text(1., 1., r"%1.1f %s (%s)" % (self.lumi, self.lumiunit, self.sqrts), fontsize=20, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) if not self.region is None: lab = plt.text(0.03, .98, self.region, fontsize=16, horizontalalignment='left', verticalalignment='top', transform=ax.transAxes) ax.set_ylim(0, ax.get_ylim()[1] * 1.1) # Save os.system('mkdir -p %s' % self.outpath) fig.savefig(os.path.join(self.outpath, hname + '.png'))
def plot_datamc(outputs): bkg, data = outputs['bkg'], outputs['data'] fill_opts = {'edgecolor': (0, 0, 0, 0.3), 'alpha': 0.8} error_opts = { 'label': 'Stat. Unc.', 'hatch': 'xxx', 'facecolor': 'none', 'edgecolor': (0, 0, 0, .5), 'linewidth': 0 } data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, 'emarker': '_' } CHANNELS = ['2mu2e', '4mu'] res = {} for i, chan in enumerate(CHANNELS, start=1): res[chan] = fig, (axes, raxes) = plt.subplots( 2, 2, figsize=(16, 8), gridspec_kw={"height_ratios": (4, 1)}, sharex=True) fig.subplots_adjust(hspace=.07, wspace=0.1) hist.plot1d(bkg['pt_cat'].integrate('channel', slice(i, i + 1)).integrate( 'njet', slice(0, 4)), overlay='cat', ax=axes[0], stack=True, overflow='over', line_opts=None, fill_opts=fill_opts, error_opts=error_opts) hist.plot1d(data['pt_cat'].integrate('channel', slice(i, i + 1)).integrate( 'njet', slice(0, 4)), overlay='cat', ax=axes[0], overflow='over', clear=False, error_opts=data_err_opts) hist.plot1d(bkg['pt_cat'].integrate('channel', slice( i, i + 1)).integrate('njet', slice(4, 10), overflow='over'), overlay='cat', ax=axes[1], stack=True, overflow='over', line_opts=None, fill_opts=fill_opts, error_opts=error_opts) hist.plot1d(data['pt_cat'].integrate('channel', slice( i, i + 1)).integrate('njet', slice(4, 10), overflow='over'), overlay='cat', ax=axes[1], overflow='over', clear=False, error_opts=data_err_opts) for ax in axes: ax.autoscale(axis='both', tight=True) ax.set_yscale('symlog') ax.set_xlabel(None) ax.set_ylabel(ax.get_ylabel(), y=1.0, ha="right") ax.text(1, 1, '59.74/fb (13TeV)', ha='right', va='bottom', transform=ax.transAxes) axes[0].set_title(f'[{chan}|CR] leptonJets pT, N(AK4PFCHS)<4', x=0.0, ha="left") axes[1].set_title(f'[{chan}|CR] leptonJets pT, N(AK4PFCHS)>=4', x=0.0, ha="left") hist.plotratio(data['pt_cat'].integrate('channel', slice( i, i + 1)).integrate('njet', slice(0, 4)).sum('cat'), bkg['pt_cat'].integrate('channel', slice( i, i + 1)).integrate('njet', slice(0, 4)).sum('cat'), ax=raxes[0], overflow='over', error_opts=data_err_opts, unc='num', denom_fill_opts={}, guide_opts={}) hist.plotratio(data['pt_cat'].integrate( 'channel', slice(i, i + 1)).integrate('njet', slice(4, 10), overflow='over').sum('cat'), bkg['pt_cat'].integrate('channel', slice( i, i + 1)).integrate('njet', slice(4, 10), overflow='over').sum('cat'), ax=raxes[1], overflow='over', error_opts=data_err_opts, unc='num', denom_fill_opts={}, guide_opts={}) for rax in raxes: rax.set_ylabel('Data/MC') rax.set_ylim(0, 2) rax.set_xlabel(rax.get_xlabel(), x=1.0, ha="right") return res
axis, histogram, notdata, [], [], overflow=bins[name]['overflow'], rebin=bins[name]['bins'], ratio=False, scales=scales) if useData: # build ratio hist.plotratio( num=histogram['Data'].sum("dataset"), denom=histogram[notdata].sum("dataset"), ax=rax, error_opts=data_err_opts, denom_fill_opts= None, # triggers this: https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py#L376 guide_opts={}, unc='num', #unc=None, overflow=bins[name]['overflow']) if 'upHists' in bins[name]: addUncertainties(rax, axis, histogram, notdata, [output[x] for x in bins[name]['upHists']], [output[x] for x in bins[name]['downHists']], overflow=bins[name]['overflow'], rebin=bins[name]['bins'],
h1['DY'], ax=ax, clear=False, stack=True, fill_opts=fill_opts, error_opts=error_opts, ) # DATA plotting hist.plot1d(h1['Egamma_RunAB'], ax=ax, clear=False, error_opts=data_err_opts) # Ratio Plot hist.plotratio(num=h1['Egamma_RunAB'].sum("dataset"), denom=h1['DY'].sum("dataset"), ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num') rax.set_ylabel('Data/MC') rax.set_ylim(0, 2) ax._get_lines.prop_cycler = ax._get_patches_for_fill.prop_cycler ax.autoscale(axis='x', tight=True) ax.set_ylim(ymin, ymax) ax.set_xlim(xmin, xmax) ax.set_xlabel('') #ax.set_yscale('log') #rax.set_xlabel('# of Priamary vertex')
print("Styling top") ax1.set_xlim(0., 1000.) ax1.set_ylim(0.1, 100000.) ax1.set_yscale("log") print("Integrating for ratio") h_zpt_pdfwgtT = stuff["h_zpt"].integrate("dataset", ("WJetsToQQ_pdfwgt")) h_zpt_pdfwgtF = stuff["h_zpt"].integrate("dataset", ("WJetsToQQ")) print("Plotting bottom") hist.plotratio(h_zpt_pdfwgtT, h_zpt_pdfwgtF, ax=ax2, unc="poisson-ratio", error_opts={ 'color': 'blue', 'marker': '.' }, guide_opts={ 'xmin': 0., 'xmax': 1000. }) ax2.set_xlim(0., 1000.) plt.tight_layout() print("Saving") fig1.savefig("pdfwgt_Vpt.png") fig2, ax = plt.subplots() hist.plot1d(stuff["h_genjetAK8_mass"], overlay="dataset", ax=ax) fig2.savefig("pdfwgt_genjetAK8mass.png")
def plot_recoil(acc, xmax=1e3, ymin=0, ymax=1.1, region_tag="1m", dataset='SingleMuon', year=2018, tag="test", distribution="recoil", axis_name=None, noscale=False, jeteta_config=None, output_format='pdf'): # Select and prepare histogram h = copy.deepcopy(acc[distribution]) h = merge_extensions(h, acc, reweight_pu=('nopu' in distribution), noscale=noscale) if not noscale: scale_xs_lumi(h) h = merge_datasets(h) # Rebinning axis_name = distribution if not axis_name else axis_name if 'photon' in distribution: newbin = hist.Bin( axis_name, f"{axis_name} (GeV)", np.array( list(range(0, 250, 10)) + list(range(250, 400, 50)) + list(range(400, 1100, 100)))) elif distribution == 'mjj': newbin = hist.Bin( axis_name, r'$M_{jj}$ (GeV)', np.array( list(range(200, 600, 200)) + list(range(600, 1500, 300)) + [1500, 2000, 2750, 3500])) else: newbin = hist.Bin( axis_name, f"{axis_name} (GeV)", np.array(list(range(0, 500, 25)) + list(range(500, 1100, 100)))) h = h.rebin(h.axis(axis_name), newbin) ds = f'{dataset}_{year}' # Pick dataset and regions h = h.integrate(h.axis('dataset'), ds) if jeteta_config: hnum = h.integrate(h.axis('region'), f'tr_{region_tag}_num_{jeteta_config}') hden = h.integrate(h.axis('region'), f'tr_{region_tag}_den_{jeteta_config}') else: hnum = h.integrate(h.axis('region'), f'tr_{region_tag}_num') hden = h.integrate(h.axis('region'), f'tr_{region_tag}_den') # Recoil plot try: fig, ax, _ = hist.plot1d(hnum, binwnorm=True) except KeyError: pprint(h.axis('region').identifiers()) print(f'ERROR: {region_tag}, {dataset}, {year}') return hist.plot1d(hden, ax=ax, clear=False, binwnorm=True) plt.yscale('log') plt.gca().set_ylim(0.1, 1e6) outdir = f"./output/{tag}" if not os.path.exists(outdir): os.makedirs(outdir) outname = f'{region_tag}{"_noscale_" if noscale else "_"}{distribution}_{dataset}_{year}{"_"+jeteta_config if jeteta_config else ""}' fig.savefig(pjoin(outdir, f'{outname}.{output_format}')) with open(pjoin(outdir, f'table_{outname}.txt'), "w") as f: f.write(content_table(hnum, hden, axis_name) + "\n") plt.close(fig) # Efficiency plot fig, ax, _ = hist.plotratio(hnum, hden, guide_opts={}, unc='clopper-pearson', error_opts=markers('data')) ax.set_ylim(ymin, ymax) ax.set_xlim(0, xmax) ax.set_ylabel("Efficiency") plt.text(1., 1., r"%.1f fb$^{-1}$ (13 TeV)" % lumi_by_region(region_tag, year), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) plt.text(1., 0.95, f'{jeteta_config if jeteta_config else ""}', fontsize=12, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) plt.text(0., 1., f'{region_tag}, {year}', fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) plt.text(1., 0., f'{trgname(year, tag)}', fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) if 'g_' in region_tag: plt.plot([215, 215], [0.8, 1.1], 'r-') plt.plot([0, xmax], [0.95, 0.95], 'r-') fig.savefig(pjoin(outdir, f'eff_{outname}.pdf')) plt.close(fig)
def make_plot(acc, region, distribution, year, data, mc, signal=None, outdir='./output/stack/', integrate=None, ylim=None, xlim=None, rylim=None, tag=None, output_format='pdf', ratio=True): """Creates a data vs MC comparison plot :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator """ # Rebin s = Style() h = copy.deepcopy(acc[distribution]) assert (h) try: newax = s.get_binning(distribution, region) h = h.rebin(h.axis(newax.name), newax) except KeyError: pass # Integrate over an extra axis inte_tag = "" if integrate: (inte_axis, inte_low, inte_high) = integrate h = h.integrate(inte_axis, slice(inte_low, inte_high)) #can add an overflow option here inte_tag += "_" + inte_axis + "_" + str(inte_low) + "_" + str( inte_high) # Pick the region we want to look at # E.g. cr_2m_j = Di-Muon control region with monojet selection h = h.integrate(h.axis('region'), region) # Plotting # Add ratio plot at the bottom if specified (default) # Otherwise just plot the histogram if ratio: fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) else: fig, ax = plt.subplots(1, 1, figsize=(7, 5)) data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, } signal_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'r', 'elinewidth': 1, } # Plot single muon data # Note the syntax we use to pick the data set if data: hist.plot1d(h[data], overlay='dataset', error_opts=data_err_opts, ax=ax, overflow='all', binwnorm=True) if signal: fig, ax, _ = hist.plot1d(h[signal], overlay='dataset', error_opts=signal_err_opts, ax=ax, overflow='all', binwnorm=True) # Plot MC background samples # Here we use a regular expression to match # data sets we want hist.plot1d(h[mc], overlay='dataset', stack=True, clear=False, overflow='all', ax=ax, binwnorm=True) # Apply correct colors to BG histograms handles, labels = ax.get_legend_handles_labels() for handle, label in zip(handles, labels): col = None for k, v in colors.items(): if re.match(k, label): col = v break if col: handle.set_color(col) handle.set_linestyle('-') handle.set_edgecolor('k') # Update legend try: region_name = s.region_names[region] except KeyError: region_name = region ax.legend(title=region_name, ncol=1) # Ratio plot if data: hist.plotratio(h[data].integrate('dataset'), h[mc].integrate('dataset'), ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts) ax.text(1., 0., distribution, fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(1., 1., f'{lumi(year)} fb$^{{-1}}$ ({year})', fontsize=14, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(0., 1., '$\\bf{CMS}$ internal', fontsize=14, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) # Aesthetics ax.set_yscale("log") ax.set_ylabel('Events / Bin width') plot_settings = style.plot_settings() if region in plot_settings.keys(): plot_settings = plot_settings[region] if distribution in plot_settings.keys(): plot_settings = plot_settings[distribution] if ylim: ax.set_ylim(ylim[0], ylim[1]) elif 'ylim' in plot_settings.keys(): ax.set_ylim(plot_settings['ylim']) else: ax.set_ylim(1e-1, 1e6) if xlim: ax.set_xlim(xlim[0], xlim[1]) elif 'xlim' in plot_settings.keys(): ax.set_xlim(plot_settings['xlim']) if ratio: if rylim: rax.set_ylim(*rylim) else: rax.set_ylim(0.75, 1.25) loc1 = matplotlib.ticker.MultipleLocator(base=0.2) loc2 = matplotlib.ticker.MultipleLocator(base=0.1) rax.yaxis.set_major_locator(loc1) rax.yaxis.set_minor_locator(loc2) rax.grid(axis='y', which='minor', linestyle='--') rax.grid(axis='y', which='major', linestyle='--') rax.set_ylabel('Data / MC') if not os.path.exists(outdir): os.makedirs(outdir) for form in output_format.split(','): outpath = pjoin( outdir, f"{region}_{distribution}{inte_tag}_{tag + '_' if tag else ''}{year}.{form}" ) fig.savefig(outpath) print(f"Saved plot file in {outpath}") plt.close('all')
def makePlot(output, histo, axis, bins=None, data=[], normalize=True, log=False, save=False, axis_label=None, ratio_range=None, upHists=[], downHists=[], shape=False, ymax=False, new_colors=colors, new_labels=my_labels, order=None, signals=[], omit=[], lumi=60.0, binwnorm=None, overlay=None, use_label=True, y_axis_label='Events'): if save: finalizePlotDir('/'.join(save.split('/')[:-1])) mc_sel = re.compile( '(?!(%s))' % ('|'.join(data + omit))) if len(data + omit) > 0 else re.compile('') data_sel = re.compile('|'.join(data)) bkg_sel = re.compile( '(?!(%s))' % ('|'.join(data + signals + omit))) if len(data + signals + omit) > 0 else re.compile('') if histo is None: processes = [p[0] for p in output.values().keys() if not p[0] in data] histogram = output.copy() else: processes = [ p[0] for p in output[histo].values().keys() if not p[0] in data ] histogram = output[histo].copy() histogram = histogram.project(axis, 'dataset') if overlay: overlay = overlay.project(axis, 'dataset') if bins: histogram = histogram.rebin(axis, bins) if overlay: overlay = overlay.rebin(axis, bins) y_max = histogram[bkg_sel].sum("dataset").values(overflow='over')[()].max() print(histogram[bkg_sel].sum("dataset").values(overflow='over')[()].max()) MC_total = histogram[bkg_sel].sum("dataset").values( overflow='over')[()].sum() Data_total = 0 if data: Data_total = histogram[data_sel].sum("dataset").values( overflow='over')[()].sum() #observation = histogram[data[0]].sum('dataset').copy() #first = True #for d in data: # print (d) # if not first: # observation.add(histogram[d].sum('dataset')) # print ("adding") # first = False print("Data:", round(Data_total, 0), "MC:", round(MC_total, 2)) if normalize and data_sel: scales = {process: Data_total / MC_total for process in processes} histogram.scale(scales, axis='dataset') else: scales = {} if shape: scales = { process: 1 / histogram[process].sum("dataset").values( overflow='over')[()].sum() for process in processes } histogram.scale(scales, axis='dataset') if data: fig, (ax, rax) = plt.subplots(2, 1, figsize=(10, 10), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) else: fig, ax = plt.subplots(1, 1, figsize=(10, 10)) if signals: for sig in signals: ax = hist.plot1d(histogram[sig], overlay="dataset", ax=ax, stack=False, overflow='over', clear=False, line_opts=line_opts, fill_opts=None, binwnorm=binwnorm) if overlay: ax = hist.plot1d(overlay, overlay="dataset", ax=ax, stack=False, overflow='over', clear=False, line_opts=line_opts, fill_opts=None, binwnorm=binwnorm) if shape: ax = hist.plot1d(histogram[bkg_sel], overlay="dataset", ax=ax, stack=False, overflow='over', clear=False, line_opts=line_opts, fill_opts=None, binwnorm=binwnorm) else: ax = hist.plot1d(histogram[bkg_sel], overlay="dataset", ax=ax, stack=True, overflow='over', clear=False, line_opts=None, fill_opts=fill_opts, order=(order if order else processes), binwnorm=binwnorm) if data: ax = hist.plot1d(histogram[data_sel].sum("dataset"), ax=ax, overflow='over', error_opts=data_err_opts, clear=False, binwnorm=binwnorm) #ax = hist.plot1d(observation, ax=ax, overflow='over', error_opts=data_err_opts, clear=False) hist.plotratio( num=histogram[data_sel].sum("dataset"), denom=histogram[bkg_sel].sum("dataset"), ax=rax, error_opts=data_err_opts, denom_fill_opts= None, # triggers this: https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py#L376 guide_opts={}, unc='num', #unc=None, overflow='over') handles, labels = ax.get_legend_handles_labels() updated_labels = [] for handle, label in zip(handles, labels): try: if label is None or label == 'None': updated_labels.append("Observation") handle.set_color('#000000') else: updated_labels.append(new_labels[label]) handle.set_color(new_colors[label]) except: pass if data: if ratio_range: rax.set_ylim(*ratio_range) else: rax.set_ylim(0.1, 1.9) rax.set_ylabel('Obs./Pred.') if axis_label: rax.set_xlabel(axis_label) ax.set_xlabel(axis_label) ax.set_ylabel(y_axis_label) if not binwnorm: if not shape: addUncertainties(ax, axis, histogram, bkg_sel, [output[histo + '_' + x] for x in upHists], [output[histo + '_' + x] for x in downHists], overflow='over', rebin=bins, ratio=False, scales=scales) if data: addUncertainties(rax, axis, histogram, bkg_sel, [output[histo + '_' + x] for x in upHists], [output[histo + '_' + x] for x in downHists], overflow='over', rebin=bins, ratio=True, scales=scales) if log: ax.set_yscale('log') y_mult = 1.7 if not log else 100 if ymax: ax.set_ylim(0.01, ymax) else: y_max = y_max * y_mult * (Data_total / MC_total) if data else y_max * y_mult ax.set_ylim(0.01, y_max if not shape else 2) #if binwnorm: ax.set_ylim(0.5) ax.legend( loc='upper right', ncol=2, borderaxespad=0.0, labels=updated_labels, handles=handles, ) plt.subplots_adjust(hspace=0) if use_label: if len(data) > 0: fig.text(0.0, 0.995, '$\\bf{CMS}$ Preliminary', fontsize=25, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) else: fig.text(0.0, 0.995, '$\\bf{CMS}$ Simulation', fontsize=25, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) fig.text(0.6, 0.995, r'$%.1f\ fb^{-1}$ (13 TeV)' % (lumi), fontsize=25, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) if normalize: fig.text(0.55, 0.65, 'Data/MC = %s' % round(Data_total / MC_total, 2), fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) if save: #finalizePlotDir(outdir) fig.savefig("{}.pdf".format(save)) fig.savefig("{}.png".format(save)) #fig.savefig(save) print("Figure saved in:", save)
def test_plotratio(): # histogram creation and manipulation from coffea import hist # matplotlib import matplotlib.pyplot as plt plt.switch_backend("agg") lepton_kinematics = fill_lepton_kinematics() # Add some pseudodata to a pt histogram so we can make a nice data/mc plot pthist = lepton_kinematics.sum("eta") bin_values = pthist.axis("pt").centers() poisson_means = pthist.sum("flavor").values()[()] values = np.repeat(bin_values, np.random.poisson(poisson_means)) pthist.fill(flavor="pseudodata", pt=values) # Set nicer labels, by accessing the string bins' label property pthist.axis("flavor").index("electron").label = "e Flavor" pthist.axis("flavor").index("muon").label = r"$\mu$ Flavor" pthist.axis("flavor").index( "pseudodata").label = r"Pseudodata from e/$\mu$" # using regular expressions on flavor name to select just the data # another method would be to fill a separate data histogram import re notdata = re.compile("(?!pseudodata)") # make a nice ratio plot plt.rcParams.update({ "font.size": 14, "axes.titlesize": 18, "axes.labelsize": 18, "xtick.labelsize": 12, "ytick.labelsize": 12, }) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=0.07) # Here is an example of setting up a color cycler to color the various fill patches # http://colorbrewer2.org/#type=qualitative&scheme=Paired&n=6 from cycler import cycler colors = ["#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c"] ax.set_prop_cycle(cycler(color=colors)) fill_opts = {"edgecolor": (0, 0, 0, 0.3), "alpha": 0.8} error_opts = { "label": "Stat. Unc.", "hatch": "///", "facecolor": "none", "edgecolor": (0, 0, 0, 0.5), "linewidth": 0, } data_err_opts = { "linestyle": "none", "marker": ".", "markersize": 10.0, "color": "k", "elinewidth": 1, } hist.plot1d( pthist[notdata], overlay="flavor", ax=ax, clear=False, stack=True, line_opts=None, fill_opts=fill_opts, error_opts=error_opts, ) hist.plot1d( pthist["pseudodata"], overlay="flavor", ax=ax, clear=False, error_opts=data_err_opts, ) ax.autoscale(axis="x", tight=True) ax.set_ylim(0, None) ax.set_xlabel(None) ax.legend() hist.plotratio( pthist["pseudodata"].sum("flavor"), pthist[notdata].sum("flavor"), ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc="num", ) rax.set_ylabel("Ratio") rax.set_ylim(0, 2) plt.text( 0.0, 1.0, "☕", fontsize=28, horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes, ) plt.text( 1.0, 1.0, r"1 fb$^{-1}$ (?? TeV)", fontsize=16, horizontalalignment="right", verticalalignment="bottom", transform=ax.transAxes, )
def ratio_plot(num, denom): # make a nice ratio plot, adjusting some font sizes plt.rcParams.update({ 'font.size': 16, 'axes.titlesize': 18, 'axes.labelsize': 18, 'xtick.labelsize': 14, 'ytick.labelsize': 14 }) fig, (ax, rax) = plt.subplots( nrows=2, ncols=1, figsize=(10,10), gridspec_kw={"height_ratios": (3, 1)}, sharex=True ) fig.subplots_adjust(hspace=.07) # Here is an example of setting up a color cycler to color the various fill patches # We get the colors from this useful utility: http://colorbrewer2.org/ from cycler import cycler colors = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a'] ax.set_prop_cycle(cycler(color=colors)) fill_opts = { 'edgecolor': (0,0,0,0.3), 'alpha': 0.8 } error_opts = { 'label': 'Stat. Unc.', 'hatch': '///', 'facecolor': 'none', 'edgecolor': (0,0,0,.5), 'linewidth': 0 } data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, } # plot the MC first hist.plot1d( num, ax=ax, clear=False, line_opts=None, fill_opts=fill_opts, error_opts=error_opts ) # now the pseudodata, setting clear=False to avoid overwriting the previous plot hist.plot1d( denom, ax=ax, clear=False, fill_opts=fill_opts, error_opts=error_opts ) ax.autoscale(axis='x', tight=True) ax.set_ylim(0, None) ax.set_xlabel(None) # now we build the ratio plot hist.plotratio( num=num, denom=denom, ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num' ) rax.set_ylabel('Ratio') rax.set_ylim(0, 15) return ax, rax
def drawSolo(h, sel, var_name, var_label, plottitle, lumifb, vars_cut, regionsel, savename, plotData=False, plotDensity=False): #printyield(h,['zqq','wqq'],['qcd'],'n2ddt') #printyield(h,['zqq','wqq'],['qcd'],'in_v3_ddt') #printyield(h,['zqq','wqq'],['qcd'],'gruddt') exceptions = ['process', var_name] for var, val in vars_cut.items(): exceptions.append(var) if (regionsel is not ''): exceptions.append('region') x = h.sum(*[ax for ax in h.axes() if ax.name not in exceptions], overflow='allnan') mc = h.remove([ 'JetHT', 'SingleMuon', ], 'process') mc_processes = ['zqq', 'wqq', 'qcd', 'st', 'wlnu', 'tt'] #'tttoleptonic','tttosemileptonic','tttohadronic'] data_processes = [ 'SingleMuon', 'JetHT', ] if 'signal' in regionsel: data = h.remove( mc_processes + ['SingleMuon'], 'process' ) # if 'signal' in regionsel else mc_processes + ['JetHT'],'process') mc = h.remove([ 'JetHT', 'SingleMuon', ], 'process') #kfactor = QCDkfactor(data,mc) x.scale({'qcd': 0.9}, 'process') print('applying QCD k factor:', kfactor) for reg in regionsel: print('integrating ', reg) x = x.integrate('region', reg) #x.remove([p for p in h.axis('process').identifiers() if reg not in str(p)], 'region') if 'vselection' in regionsel: print(x.integrate('process', 'SingleMuon').values()[()]) data = np.sum(x.integrate('process', 'SingleMuon').values()[()]) ttyield = np.sum(x.integrate('process', 'tt').values()[()]) qcdyield = np.sum(x.integrate('process', 'qcd').values()[()]) wlnuyield = np.sum(x.integrate('process', 'wlnu').values()[()]) styield = np.sum(x.integrate('process', 'st').values()[()]) ttkfac = (data - qcdyield - wlnuyield - styield) / ttyield x.scale({'tt': ttkfac}, 'process') print('applying tt k factor', ttkfac) for var, val in vars_cut.items(): if var != var_name: print('integrating ', var, val[0], val[1]) x = x.integrate(var, slice(val[0], val[1]), overflow='none') #x = x.remove(['noselection','signal'] if 'ttbar' in regionsel else ['ttbar_muoncontrol'] ,'region') #print(x.axis('region').identifiers()) #x = x.sum('region') if var_name in vars_cut.keys(): x = x[:, vars_cut[var_name][0]:vars_cut[var_name][1]] xaxis = var_name x.axis(xaxis).label = var_label mc = x.remove(data_processes, 'process') mc.axis('process').sorting = 'integral' data = x.remove( mc_processes + ['SingleMuon'] if 'signal' in regionsel else mc_processes + ['JetHT'], 'process') for ih, hkey in enumerate(mc.identifiers('process')): mc.identifiers('process')[ih].label = process_latex[hkey.name] if plotData: fig, (ax, rax) = plt.subplots(nrows=2, ncols=1, figsize=(11, 11), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=.02) else: fig, ax = plt.subplots() hist.plot1d(mc, overlay='process', ax=ax, stack=True if not plotDensity else False, clear=False, fill_opts=fill_opts, error_opts=err_opts if not plotDensity else None, overflow='allnan', density=plotDensity) tot = mc.sum('process') #hist.plot1d(tot, # clear=False, # fill_opts=fill_tot_opts, # ) if plotData: hist.plot1d(data, overlay='process', ax=ax, clear=False, error_opts=data_err_opts, overflow='allnan') hist.plotratio(num=data.sum('process'), denom=tot, ax=rax, clear=False, overflow='allnan', error_opts=data_err_opts, unc='num') rax.set_ylabel('Data/MC') rax.set_ylim(0, 2) ax.set_ylabel('Events') ax.autoscale(axis='y', tight=False) #ax.set_xlim(20, 200) #ax.ticklabel_format(axis='x', style='sci') old_handles, old_labels = ax.get_legend_handles_labels() leg = ax.legend(handles=old_handles, labels=old_labels, title=r'$%s$' % plottitle if plottitle else None, loc='upper right', title_fontsize=14, fontsize=12, facecolor='white', framealpha=0.2) lumi = plt.text(1., 1., r"%.1f fb$^{-1}$ (13 TeV)" % lumifb, fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) cmstext = plt.text(0., 1., "CMS", fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes, fontweight='bold') if plotData: addtext = plt.text(0.085, 1., "Preliminary", fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes, style='italic') else: addtext = plt.text(0.085, 1., "Simulation Preliminary", fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes, style='italic') #hep.cms.cmslabel(ax, data=False, paper=False, year='2017') print("solo_%s_%s_%s_lumi%i.pdf" % (sel, var_name, savename, lumifb)) fig.savefig("solo_%s_%s_%s_lumi%i.pdf" % (sel, var_name, savename, lumifb)) fig.savefig("solo_%s_%s_%s_lumi%i.png" % (sel, var_name, savename, lumifb)) ax.semilogy() minvals = [] #for xd in x.values(): # minvals.append(min(np.trim_zeros(x.values()[xd]))) #decsplit = str(min(minvals)).split('.') #if (int(decsplit[0])==0): # logmin = 0.1**float(len(decsplit[1])-len(decsplit[1].lstrip('0'))+2) #else: # logmin = 10.**float(len(decsplit[0])-1) ax.set_ylim(0.1 if plotDensity else 0.1, None) print("solo_%s_%s_%s_lumi%i_logy.pdf" % (sel, var_name, savename, lumifb)) fig.savefig("solo_%s_%s_%s_lumi%i_logy.pdf" % (sel, var_name, savename, lumifb)) fig.savefig("solo_%s_%s_%s_lumi%i_logy.png" % (sel, var_name, savename, lumifb))
def recoseleff_tag(): for btype in ["Bu", "Bs", "Bd"]: fig, ax = plt.subplots(2, 1, figsize=( 10, 12)) #, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) plt.style.use(mplhep.style.ROOT) if btype == "Bu": h_truthpt = histograms["Bu"]["TruthBuToKMuMu_pt"].integrate( "dataset", ["Bu2KJpsi2KMuMu_inclusive"]) elif btype == "Bs": h_truthpt = histograms["Bs"]["TruthBsToKKMuMu_pt"].integrate( "dataset", ["Bs2PhiJpsi2KKMuMu_inclusive"]) elif btype == "Bd": h_truthpt = histograms["Bd"]["TruthBdToKPiMuMu_pt"].integrate( "dataset", ["Bd2KstarJpsi2KPiMuMu_inclusive"]) else: sys.exit(1) # Legend entries h_truthpt.axis("selection").index("inclusive").label = "Inclusive" h_truthpt.axis("selection").index("matched").label = "Reco matched" h_truthpt.axis("selection").index( "matched_sel").label = "Reco matched * selection" h_truthpt.axis("selection").index( "matched_tag").label = "Reco matched * selection * tag" # Top plot hist.plot1d(h_truthpt[(["inclusive"]), :], ax=ax[0], clear=False, overlay="selection", line_opts={"color": "black"}) hist.plot1d(h_truthpt[(["matched"]), :], ax=ax[0], clear=False, overlay="selection", line_opts={"color": "red"}) hist.plot1d(h_truthpt[(["matched_sel"]), :], ax=ax[0], clear=False, overlay="selection", line_opts={"color": "green"}) hist.plot1d(h_truthpt[(["matched_tag"]), :], ax=ax[0], clear=False, overlay="selection", line_opts={"color": "blue"}) ax[0].set_ylim(1., 1.e6) ax[0].set_yscale("log") # Bottom plot h_truthpt_inclusive = h_truthpt.integrate("selection", (["inclusive"])) h_truthpt_matched = h_truthpt.integrate("selection", (["matched"])) h_truthpt_matched_sel = h_truthpt.integrate("selection", (["matched_sel"])) h_truthpt_matched_probe = h_truthpt.integrate("selection", (["matched_tag"])) h_truthpt_matched.label = "Efficiency" hist.plotratio( num=h_truthpt_matched, denom=h_truthpt_inclusive, ax=ax[1], unc="clopper-pearson", clear=True, error_opts={ 'color': 'red', 'marker': '.' }, ) h_truthpt_matched_sel.label = "Efficiency" hist.plotratio( num=h_truthpt_matched_sel, denom=h_truthpt_inclusive, ax=ax[1], unc="clopper-pearson", clear=False, error_opts={ 'color': 'green', 'marker': '.' }, ) h_truthpt_matched_probe.label = "Efficiency" hist.plotratio( num=h_truthpt_matched_probe, denom=h_truthpt_inclusive, ax=ax[1], unc="clopper-pearson", clear=False, error_opts={ 'color': 'blue', 'marker': '.' }, ) ax[0].legend(fontsize=14) ax[1].set_ylim(0., 0.4) plt.tight_layout() fig.savefig(f"{figure_directory}/recosel_efficiency_tag_{btype}.png")
def make_plot(acc, region, distribution, year, data, mc, signal=None, outdir='./output/stack/', integrate=None, ylim=None, xlim=None, rylim=None, tag=None, output_format='pdf', ratio=True, mcscale=1): """Creates a data vs MC comparison plot :param acc: Accumulator (processor output) :type acc: coffea.processor.accumulator """ # Rebin s = Style() h = copy.deepcopy(acc[distribution]) if region.startswith("sr"): h.scale( { ds: (mcscale if mc.match(ds) else 1) for ds in map(str, h.axis("dataset").identifiers()) }, axis='dataset') assert (h) try: newax = s.get_binning(distribution, region) h = h.rebin(h.axis(newax.name), newax) except KeyError: pass # Integrate over an extra axis inte_tag = "" if integrate: (inte_axis, inte_low, inte_high) = integrate h = h.integrate(inte_axis, slice(inte_low, inte_high)) #can add an overflow option here inte_tag += "_" + inte_axis + "_" + str(inte_low) + "_" + str( inte_high) # Pick the region we want to look at # E.g. cr_2m_j = Di-Muon control region with monojet selection h = h.integrate(h.axis('region'), region) # Plotting # Add ratio plot at the bottom if specified (default) # Otherwise just plot the histogram if ratio: fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) else: fig, ax = plt.subplots(1, 1, figsize=(7, 5)) data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, } signal_err_opts = { #'linestyle':'-', 'color': 'crimson', 'elinewidth': 1, } # Plot single muon data # Note the syntax we use to pick the data set if data: hist.plot1d(h[data], overlay='dataset', error_opts=data_err_opts, ax=ax, overflow='all', binwnorm=1) if signal: hist.plot1d(h[signal], overlay='dataset', error_opts=signal_err_opts, ax=ax, overflow='all', binwnorm=1, clear=False) # Plot MC background samples # Here we use a regular expression to match # data sets we want if mc != None: hist.plot1d(h[mc], overlay='dataset', stack=True, clear=False, overflow='all', ax=ax, binwnorm=1) # Apply correct colors to BG histograms handles, labels = ax.get_legend_handles_labels() new_labels = [] for handle, label in zip(handles, labels): col = None for k, v in colors.items(): if re.match(k, label): col = v break if col: handle.set_color(col) handle.set_linestyle('-') handle.set_edgecolor('k') l = None channel = channel_name(region) # Pick the proper legend labels for the channel if channel == 'VBF': legend_labels_to_use = legend_labels['VBF'] elif channel in ['Monojet', 'Mono-V']: legend_labels_to_use = legend_labels['Monojet/Mono-V'] # Add in the common labels legend_labels_to_use.update(legend_labels['Common']) for k, v in legend_labels_to_use.items(): if re.match(k, label): l = v new_labels.append(l if l else label) # Update legend try: region_name = s.region_names[region] except KeyError: region_name = region ax.legend(title=region_name, ncol=2, handles=handles, labels=new_labels) # Ratio plot if data and ratio: hist.plotratio(h[data].integrate('dataset'), h[mc].integrate('dataset'), ax=rax, denom_fill_opts={}, guide_opts={}, unc='num', overflow='all', error_opts=data_err_opts) ax.text(1., 0., distribution, fontsize=10, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.text(0., 1., '$\\bf{CMS}$ internal', fontsize=14, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) fig.text(1., 1., f'{channel_name(region)}, {lumi(year):.1f} fb$^{{-1}}$ ({year})', fontsize=14, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) # Aesthetics ax.set_yscale("log") ax.set_ylabel('Events / GeV') plot_settings = style.plot_settings() if region in plot_settings.keys(): plot_settings = plot_settings[region] if distribution in plot_settings.keys(): plot_settings = plot_settings[distribution] if ylim: if ylim == "auto": width = np.diff([x for x in h.axes() if "dataset" not in x.name][0].edges()) vmc = h[mc].integrate("dataset").values()[()] / width try: vdata = h[data].integrate("dataset").values()[()] / width except: vdata = vmc if signal: vsig = h[signal].integrate("dataset").values()[()] / width else: vsig = vmc ax.set_ylim( 0.5 * min([ np.min(vmc[vmc > 0]), np.min(vdata[vdata > 0]), np.min(vsig[vsig > 0]) ]), 1e2 * max([np.max(vmc), np.max(vdata), np.min(vsig)]), ) else: ax.set_ylim(ylim[0], ylim[1]) elif 'ylim' in plot_settings.keys(): ax.set_ylim(plot_settings['ylim']) else: ax.set_ylim(1e-1, 1e6) if xlim: ax.set_xlim(xlim[0], xlim[1]) elif 'xlim' in plot_settings.keys(): ax.set_xlim(plot_settings['xlim']) if ratio: if rylim: rax.set_ylim(*rylim) else: rax.set_ylim(0.5, 1.5) loc1 = matplotlib.ticker.MultipleLocator(base=0.2) loc2 = matplotlib.ticker.MultipleLocator(base=0.1) rax.yaxis.set_major_locator(loc1) rax.yaxis.set_minor_locator(loc2) rax.grid(axis='y', which='minor', linestyle='--') rax.grid(axis='y', which='major', linestyle='--') rax.set_ylabel('Data / MC') if not os.path.exists(outdir): os.makedirs(outdir) for form in output_format.split(','): outpath = pjoin( outdir, f"{region}_{distribution}{inte_tag}_{tag + '_' if tag else ''}{year}.{form}" ) fig.savefig(outpath) print(f"Saved plot file in {outpath}") plt.close('all')
def test_plotratio(): # histogram creation and manipulation from coffea import hist # matplotlib import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt lepton_kinematics = fill_lepton_kinematics() # Add some pseudodata to a pt histogram so we can make a nice data/mc plot pthist = lepton_kinematics.sum('eta') bin_values = pthist.axis('pt').centers() poisson_means = pthist.sum('flavor').values()[()] values = np.repeat(bin_values, np.random.poisson(poisson_means)) pthist.fill(flavor='pseudodata', pt=values) # Set nicer labels, by accessing the string bins' label property pthist.axis('flavor').index('electron').label = 'e Flavor' pthist.axis('flavor').index('muon').label = r'$\mu$ Flavor' pthist.axis('flavor').index( 'pseudodata').label = r'Pseudodata from e/$\mu$' # using regular expressions on flavor name to select just the data # another method would be to fill a separate data histogram import re notdata = re.compile('(?!pseudodata)') # make a nice ratio plot plt.rcParams.update({ 'font.size': 14, 'axes.titlesize': 18, 'axes.labelsize': 18, 'xtick.labelsize': 12, 'ytick.labelsize': 12 }) fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=.07) # Here is an example of setting up a color cycler to color the various fill patches # http://colorbrewer2.org/#type=qualitative&scheme=Paired&n=6 from cycler import cycler colors = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c'] ax.set_prop_cycle(cycler(color=colors)) fill_opts = {'edgecolor': (0, 0, 0, 0.3), 'alpha': 0.8} error_opts = { 'label': 'Stat. Unc.', 'hatch': '///', 'facecolor': 'none', 'edgecolor': (0, 0, 0, .5), 'linewidth': 0 } data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, 'emarker': '_' } hist.plot1d(pthist[notdata], overlay="flavor", ax=ax, clear=False, stack=True, line_opts=None, fill_opts=fill_opts, error_opts=error_opts) hist.plot1d(pthist['pseudodata'], overlay="flavor", ax=ax, clear=False, error_opts=data_err_opts) ax.autoscale(axis='x', tight=True) ax.set_ylim(0, None) ax.set_xlabel(None) leg = ax.legend() hist.plotratio(pthist['pseudodata'].sum("flavor"), pthist[notdata].sum("flavor"), ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num') rax.set_ylabel('Ratio') rax.set_ylim(0, 2) coffee = plt.text(0., 1., u"☕", fontsize=28, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) lumi = plt.text(1., 1., r"1 fb$^{-1}$ (?? TeV)", fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes)
clear=False, #error_opts = styles.data_err_opts ) ax.autoscale(axis='x', tight=True) ax.set_ylim(0, None) ax.set_xlabel(None) ax.set_ylabel('Counts') #leg = ax.legend() if not args.isSig: cofplt.plotratio(grouped['data'].sum("process"), grouped[mc].sum("process"), ax=rax, error_opts=styles.data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num') rax.set_ylabel('Ratio') rax.set_xlabel(x_title) rax.set_ylim(0, 2) rax.set_yticks([0, 0.5, 1, 1.5, 2]) lumi = plt.text(1., 1., f'{year["lumi"]/1000}' r" fb$^{-1}$ (13 TeV)", fontsize=16, horizontalalignment='right', verticalalignment='bottom',
def plotWithRatio(h, hData, overlay, invertStack=True, lumi=35.9, label="CMS Preliminary", colors=None, ratio=[0.5, 1.5], xRange=None, yRange=None, logY=False, extraText=None): # make a nice ratio plot plt.rcParams.update({ 'font.size': 14, 'axes.titlesize': 18, 'axes.labelsize': 18, 'xtick.labelsize': 12, 'ytick.labelsize': 12 }) if not hData is None: fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=.07) else: fig, ax = plt.subplots(1, 1, figsize=( 7, 7)) #, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) # Here is an example of setting up a color cycler to color the various fill patches # http://colorbrewer2.org/#type=qualitative&scheme=Paired&n=6 from cycler import cycler if colors is None: colors = [ '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99', '#b15928' ] # colors = ['#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a'] if invertStack: _n = len(h.identifiers(overlay)) - 1 colors = colors[_n::-1] ax.set_prop_cycle(cycler(color=colors)) fill_opts = {'edgecolor': (0, 0, 0, 0.3), 'alpha': 0.8} error_opts = { 'label': 'Stat. Unc.', 'hatch': '///', 'facecolor': 'none', 'edgecolor': (0, 0, 0, .5), 'linewidth': 0 } data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, 'emarker': '_' } if invertStack: h._axes[0]._sorted.reverse() hist.plot1d( h, overlay=overlay, # invertStack=True, ax=ax, clear=False, stack=True, line_opts=None, fill_opts=fill_opts, error_opts=error_opts) if invertStack: h._axes[0]._sorted.reverse() if not hData is None: hist.plot1d(hData, overlay=overlay, ax=ax, clear=False, error_opts=data_err_opts) ax.autoscale(axis='x', tight=True) ax.set_ylim(0, None) ax.set_xlabel(None) if invertStack: handles, labels = ax.get_legend_handles_labels() if hData is None: handles = handles[-2::-1] + handles[-1:-2:-1] labels = labels[-2::-1] + labels[-1:-2:-1] else: handles = handles[-1:-2:-1] + handles[-3::-1] + handles[-2:-3:-1] labels = labels[-1:-2:-1] + labels[-3::-1] + labels[-2:-3:-1] ax.legend(handles, labels, bbox_to_anchor=(1., 1.), loc='upper left') else: leg = ax.legend(loc='upper right') ax.legend(bbox_to_anchor=(1., 1.)) if not hData is None: hist.plotratio(hData.sum(overlay), h.sum(overlay), ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num') rax.set_ylabel('Ratio') rax.set_ylim(ratio[0], ratio[1]) if logY: ax.set_yscale("log") ax.set_ylim(1, ax.get_ylim()[1] * 5) if not xRange is None: ax.set_xlim(xRange[0], xRange[1]) if not yRange is None: ax.set_ylim(yRange[0], yRange[1]) CMS = plt.text(0., 1., r"$\bf{CMS}$ Preliminary", fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) if not extraText is None: extraLabel = plt.text(0.02, .99, extraText, fontsize=16, horizontalalignment='left', verticalalignment='top', transform=ax.transAxes) ax.set_ylim(0, ax.get_ylim()[1] * 1.1) lumi = plt.text(1., 1., r"%.1f fb$^{-1}$ (13 TeV)" % (lumi), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes)
def makePlot(output, histo, axis, bins=None, mc_sel=bkgonly, data_sel='MuonEG', normalize=True, log=False, save=False, axis_label=None, ratio_range=None, upHists=[], downHists=[], shape=False, ymax=False, new_colors=colors, new_labels=my_labels, order=None): if save: finalizePlotDir( '/'.join(save.split('/')[:-1]) ) if histo is None: processes = [ p[0] for p in output.values().keys() if not p[0]=='MuonEG' ] histogram = output.copy() else: processes = [ p[0] for p in output[histo].values().keys() if not p[0]=='MuonEG' ] histogram = output[histo].copy() histogram = histogram.project(axis, 'dataset') if bins: histogram = histogram.rebin(axis, bins) y_max = histogram[mc_sel].sum("dataset").values(overflow='over')[()].max() MC_total = histogram[mc_sel].sum("dataset").values(overflow='over')[()].sum() Data_total = 0 if data_sel: Data_total = histogram[data_sel].sum("dataset").values(overflow='over')[()].sum() print ("Data:", round(Data_total,0), "MC:", round(MC_total,2)) if normalize and data_sel: scales = { process: Data_total/MC_total for process in processes } histogram.scale(scales, axis='dataset') else: scales = {} if shape: scales = { process: 1/histogram[process].sum("dataset").values(overflow='over')[()].sum() for process in processes } histogram.scale(scales, axis='dataset') if data_sel: fig, (ax, rax) = plt.subplots(2,1,figsize=(10,10), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) else: fig, ax = plt.subplots(1,1,figsize=(10,10) ) if shape: ax = hist.plot1d(histogram[mc_sel], overlay="dataset", ax=ax, stack=False, overflow='over', clear=False, line_opts=line_opts, fill_opts=None) else: ax = hist.plot1d(histogram[mc_sel], overlay="dataset", ax=ax, stack=True, overflow='over', clear=False, line_opts=None, fill_opts=fill_opts, order=(order if order else processes)) if data_sel: ax = hist.plot1d(histogram[data_sel], overlay="dataset", ax=ax, overflow='over', error_opts=data_err_opts, clear=False) hist.plotratio( num=histogram[data_sel].sum("dataset"), denom=histogram[mc_sel].sum("dataset"), ax=rax, error_opts=data_err_opts, denom_fill_opts=None, # triggers this: https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py#L376 guide_opts={}, unc='num', #unc=None, overflow='over' ) handles, labels = ax.get_legend_handles_labels() updated_labels = [] for handle, label in zip(handles, labels): #print (label) try: updated_labels.append(new_labels[label]) if not label=='MuonEG': handle.set_color(new_colors[label]) except: pass if data_sel: if ratio_range: rax.set_ylim(*ratio_range) else: rax.set_ylim(0.1,1.9) rax.set_ylabel('Obs./Pred.') if axis_label: rax.set_xlabel(axis_label) ax.set_xlabel(axis_label) ax.set_ylabel('Events') if not shape: addUncertainties(ax, axis, histogram, mc_sel, [output[histo+'_'+x] for x in upHists], [output[histo+'_'+x] for x in downHists], overflow='over', rebin=bins, ratio=False, scales=scales) if data_sel: addUncertainties(rax, axis, histogram, mc_sel, [output[histo+'_'+x] for x in upHists], [output[histo+'_'+x] for x in downHists], overflow='over', rebin=bins, ratio=True, scales=scales) if log: ax.set_yscale('log') y_mult = 1.3 if not log else 100 if ymax: ax.set_ylim(0.01, ymax) else: ax.set_ylim(0.01,y_max*y_mult if not shape else 2) ax.legend( loc='upper right', ncol=2, borderaxespad=0.0, labels=updated_labels, handles=handles, ) plt.subplots_adjust(hspace=0) fig.text(0.0, 0.995, '$\\bf{CMS}$ Preliminary', fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) fig.text(0.8, 0.995, '13 TeV', fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) if normalize: fig.text(0.55, 0.65, 'Data/MC = %s'%round(Data_total/MC_total,2), fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) if save: #finalizePlotDir(outdir) fig.savefig("{}.pdf".format(save)) fig.savefig("{}.png".format(save)) #fig.savefig(save) print ("Figure saved in:", save)
print(iset) print(icat) title = iset + ' mistag ' + icat filename = 'mistag_' + iset + '_' + icat + '.' + 'png' print(outputs_unweighted[iset]['numerator']) Numerator = outputs_unweighted[iset]['numerator'].integrate( 'anacat', icat).integrate('dataset', iset) Denominator = outputs_unweighted[iset]['denominator'].integrate( 'anacat', icat).integrate('dataset', iset) print(Numerator) print(Denominator) mistag = hist.plotratio(num=Numerator, denom=Denominator, error_opts={ 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1 }, unc='num') plt.title(title) plt.ylim(bottom=0, top=0.12) plt.xlim(left=100, right=2500) # ---- Better mistag plots are made in 'TTbarResCoffea_MistagAnalysis-BkgEst' python script ---- # # ---- However, if one wants to save these raw plots, they may uncomment the following 5 lines ---- # #plt.xticks(np.array([0, 500, 600, 700])) #mistag.set_xscale('function', functions=(forward, inverse)) #mistag.set_xscale('log') #plt.savefig(SaveDirectory+filename, bbox_inches="tight")
output = processor.run_uproot_job( sigDS_2mu2e, treename='ffNtuplizer/ffNtuple', processor_instance=MuEffiResoProcessor(), executor=processor.futures_executor, executor_args=dict(workers=12, flatten=False), chunksize=500000, ) fig, ax = plt.subplots(figsize=(8, 6)) hist.plotratio(num=output['lxy'][longdecay].sum('dataset').integrate( 'reco', 'true'), denom=output['lxy'][longdecay].sum('dataset').integrate( 'reco', 'inclusive'), overflow='over', error_opts={ 'marker': 'o', }, ax=ax, label='PFMu+DSAMu') hist.plotratio(num=output['lxy-pf'][longdecay].sum('dataset').integrate( 'reco', 'true'), denom=output['lxy-pf'][longdecay].sum('dataset').integrate( 'reco', 'inclusive'), overflow='over', error_opts={ 'marker': 'o', 'color': 'tab:red', 'fillstyle': 'none', }, ax=ax,
sigDS, treename='ffNtuplizer/ffNtuple', processor_instance=MuLJVtxProcessor(), executor=processor.futures_executor, executor_args=dict(workers=12, flatten=False), chunksize=500000, ) ## vertex efficiency fig, ax = plt.subplots(figsize=(8, 6)) hist.plotratio( num=output['vertexgood'][longdecay].sum('dataset').integrate( 'reco', 'vertexed'), denom=output['vertexgood'][longdecay].sum('dataset').integrate( 'reco', 'inclusive'), error_opts={ 'marker': 'o', 'xerr': [5 / 2, 65 / 2, 630 / 2] }, ax=ax, ) ax.set_xscale('symlog') ax.autoscale(axis='both', tight=True) ax.set_ylim([0.9, 1.02]) ax.set_yticks(np.arange(0.9, 1.02, 0.02)) ax.set_xticks([0, 1, 2, 3, 4, 5, 10, 20, 30, 50, 70, 100, 300, 500, 700]) ax.set_xticklabels([ '0', '$10^0$', '2', '3', '4', '5', '$10^1$', '20', '30', '50', '70', '$10^2$', '300', '500', '700' ]) ax.grid(axis='y', ls='--')
def plotWithRatio(h, hData, overlay, stacked=True, density=False, invertStack=True, lumi=35.9, label="CMS Preliminary", colors=None, ratioRange=[0.5, 1.5], xRange=None, yRange=None, logY=False, extraText=None, leg='upper right', binwnorm=None): # make a nice ratio plot plt.rcParams.update({ 'font.size': 14, 'axes.titlesize': 18, 'axes.labelsize': 18, 'xtick.labelsize': 12, 'ytick.labelsize': 12 }) if not hData is None: fig, (ax, rax) = plt.subplots(2, 1, figsize=(7, 7), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=.07) else: fig, ax = plt.subplots(1, 1, figsize=( 7, 7)) #, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) # Here is an example of setting up a color cycler to color the various fill patches # http://colorbrewer2.org/#type=qualitative&scheme=Paired&n=6 from cycler import cycler if not colors is None: if invertStack: _n = len(h.identifiers(overlay)) - 1 colors = colors[_n::-1] ax.set_prop_cycle(cycler(color=colors)) fill_opts = {'edgecolor': (0, 0, 0, 0.3), 'alpha': 0.8} error_opts = { 'label': 'Stat. Unc.', 'hatch': '///', 'facecolor': 'none', 'edgecolor': (0, 0, 0, .5), 'linewidth': 0 } if not stacked: error_opts = None fill_opts = None data_err_opts = { 'linestyle': 'none', 'marker': '.', 'markersize': 10., 'color': 'k', 'elinewidth': 1, # 'emarker': '_' } if invertStack: if type(h._axes[0]) == hist.hist_tools.Cat: h._axes[0]._sorted.reverse() hist.plot1d(h, overlay=overlay, ax=ax, clear=False, stack=stacked, density=density, line_opts=None, fill_opts=fill_opts, error_opts=error_opts, binwnorm=binwnorm) # if invertStack: # if type(h._axes[0])==hist.hist_tools.Cat: # h._axes[0]._sorted.reverse() if not hData is None: hist.plot1d( hData, # overlay=overlay, ax=ax, clear=False, error_opts=data_err_opts, binwnorm=binwnorm) ax.autoscale(axis='x', tight=True) ax.set_ylim(0, None) if not binwnorm is None: ax.set_ylabel(f"<Counts/{binwnorm}>") if '[' in ax.get_xlabel(): units = ax.get_xlabel().split('[')[-1].split(']')[0] ax.set_ylabel(f"<Counts / {binwnorm} {units}>") ax.set_xlabel(None) if leg == "right": leg_anchor = (1., 1.) leg_loc = 'upper left' elif leg == "upper right": leg_anchor = (1., 1.) leg_loc = 'upper right' elif leg == "upper left": leg_anchor = (0., 1.) leg_loc = 'upper left' if not leg is None: handles, labels = ax.get_legend_handles_labels() if not hData is None: handles = handles[-1:] + handles[:-1] labels = ['Data'] + labels[:-1] ax.legend(handles, labels, bbox_to_anchor=leg_anchor, loc=leg_loc) if not hData is None: hist.plotratio(hData, h.sum(overlay), ax=rax, error_opts=data_err_opts, denom_fill_opts={}, guide_opts={}, unc='num') rax.set_ylabel('Ratio') rax.set_ylim(ratioRange[0], ratioRange[1]) if logY: ax.set_yscale("log") ax.set_ylim(1, ax.get_ylim()[1] * 5) if not xRange is None: ax.set_xlim(xRange[0], xRange[1]) if not yRange is None: ax.set_ylim(yRange[0], yRange[1]) CMS = plt.text(0., 1., r"$\bf{CMS}$ Preliminary", fontsize=16, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) if not extraText is None: extraLabel = plt.text(0.02, .99, extraText, fontsize=16, horizontalalignment='left', verticalalignment='top', transform=ax.transAxes) ax.set_ylim(0, ax.get_ylim()[1] * 1.1) lumi = plt.text(1., 1., r"%.1f fb$^{-1}$ (13 TeV)" % (lumi), fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes)
def plot_data_mc(self, inclusive, channel, region, fig, var, gs, year='2016', normalize=True, logy=True, get_rates=False, mc_factor=1, save_to=''): variable = get_variable(var) data_sources = { 'data': ['data', 'data_A', 'data_B','data_C','data_D','data_E','data_F','data_G','data_H'] } bkg_sources_by_region = { "z-peak": { 'DY': ['dy_0j', 'dy_1j', 'dy_2j'], 'EWK': ['ewk_lljj_mll50_mjj120'], 'TTbar + Single Top':['ttjets_dl',\ 'ttjets_sl',\ 'ttw', 'ttz', 'st_tw_top', 'st_tw_antitop'], 'VV + VVV': ['ww_2l2nu', 'wz_2l2q', 'wz_1l1nu2q', 'wz_3lnu', 'www','wwz','wzz','zzz'], }, "h-sidebands" : { 'DY': [ 'dy_m105_160_amc', 'dy_m105_160_vbf_amc'], 'EWK': [self.ewk_name], 'TTbar + Single Top':['ttjets_dl',\ 'ttjets_sl',\ 'ttw', 'ttz', 'st_tw_top', 'st_tw_antitop'], 'VV + VVV': ['ww_2l2nu', 'wz_2l2q', 'wz_1l1nu2q', 'wz_3lnu', 'www','wwz','wzz','zzz'], }, "h-peak": { 'DY': [ 'dy_m105_160_amc', 'dy_m105_160_vbf_amc'], 'EWK': [self.ewk_name], 'TTbar + Single Top':['ttjets_dl',\ 'ttjets_sl',\ 'ttw', 'ttz', 'st_tw_top', 'st_tw_antitop'], 'VV + VVV': ['ww_2l2nu', 'wz_2l2q', 'wz_1l1nu2q', 'wz_3lnu', 'www','wwz','wzz','zzz'], } } all_bkg_sources = { 'DY': ['dy', 'dy_0j', 'dy_1j', 'dy_2j', 'dy_m105_160_amc', 'dy_m105_160_vbf_amc', 'dy_m105_160_mg', 'dy_m105_160_vbf_mg'], 'EWK': ['ewk_lljj_mll50_mjj120','ewk_lljj_mll105_160', 'ewk_lljj_mll105_160_ptj0'], 'TTbar + Single Top':['ttjets_dl', 'ttjets_sl', 'ttw', 'ttz', 'st_tw_top', 'st_tw_antitop'], 'VV + VVV': ['ww_2l2nu', 'wz_2l2q', 'wz_1l1nu2q', 'wz_3lnu', 'www','wwz','wzz','zzz'], } bkg_sources = {} accumulators_copy = {} if inclusive: bkg_sources = all_bkg_sources # accumulators_copy[self.weight_option] = self.accumulators[var].sum('region')[:,channel,self.weight_option].copy() accumulators_copy[self.weight_option] = self.accumulators[var][:,:,channel,self.weight_option].copy() for syst in self.syst_sources: accumulators_copy[f'{syst}_up'] = self.accumulators[var].sum('region')[:,channel,f'{syst}_up'].copy() accumulators_copy[f'{syst}_down'] = self.accumulators[var].sum('region')[:,channel,f'{syst}_down'].copy() else: bkg_sources = bkg_sources_by_region[region] # print(region, channel,self.accumulators[var][:,region,channel,self.weight_option].values()) accumulators_copy[self.weight_option] = self.accumulators[var][:,region, channel, self.weight_option].copy() for syst in self.syst_sources: accumulators_copy[f'{syst}_up'] = self.accumulators[var][:,region, channel, f'{syst}_up'].copy() accumulators_copy[f'{syst}_down'] = self.accumulators[var][:,region, channel, f'{syst}_down'].copy() all_bkg = [] for group, bkgs in bkg_sources.items(): for b in bkgs: all_bkg.append(b) if b not in self.weights_by_ds.keys(): self.weights_by_ds[b] = 1 integrals = {} rescale = {} if get_rates: for b in all_bkg+data_sources['data']+[self.ggh_name, self.vbf_name]: accum = accumulators_copy[self.weight_option][b].sum('region').sum('channel').sum('dataset').values() if not accum: continue integrals[b] = accum[(self.weight_option,)].sum() rescale[b] = self.norms[region][b]/integrals[b] for syst in accumulators_copy.keys(): if get_rates: accumulators_copy[syst].scale(rescale, axis='dataset') accumulators_copy[syst].scale(self.weights_by_ds, axis='dataset') if inclusive: if var=="dimuon_mass": data = accumulators_copy[self.weight_option][:, ['h-sidebands', 'z-peak'], :,self.weight_option].sum('region').sum('channel').sum('syst').group('dataset', hist.Cat("dataset", "Dataset"), data_sources) else: data = accumulators_copy[self.weight_option].sum('region').sum('channel').sum('syst').group('dataset', hist.Cat("dataset", "Dataset"), data_sources) accumulators_copy[self.weight_option] = accumulators_copy[self.weight_option].sum('region') bkg = accumulators_copy[self.weight_option].sum('channel').sum('syst').group('dataset', hist.Cat("dataset", "Dataset"), bkg_sources) ggh = accumulators_copy[self.weight_option][self.ggh_name].sum('channel').sum('syst') vbf = accumulators_copy[self.weight_option][self.vbf_name].sum('channel').sum('syst') else: data = accumulators_copy[self.weight_option].sum('region').sum('channel').sum('syst').group('dataset', hist.Cat("dataset", "Dataset"), data_sources) bkg = accumulators_copy[self.weight_option].sum('region').sum('channel').sum('syst').group('dataset', hist.Cat("dataset", "Dataset"), bkg_sources) ggh = accumulators_copy[self.weight_option][self.ggh_name].sum('region').sum('channel').sum('syst') vbf = accumulators_copy[self.weight_option][self.vbf_name].sum('region').sum('channel').sum('syst') if get_rates: self.datasets[region] = all_bkg+data_sources['data']+[self.ggh_name, self.vbf_name] for b in self.datasets[region]: if b in [i[0] for i in list(accumulators_copy[self.weight_option].values().keys())]: histogram = accumulators_copy[self.weight_option][b].sum('region').sum('channel').sum('dataset').values()[(self.weight_option,)]*mc_factor sumw2 = list(accumulators_copy[self.weight_option][b].sum('region').sum('channel').sum('dataset')._sumw2.values())[0]*mc_factor self.hist_dict[region][b] = {'hist':histogram, 'sumw2':sumw2} self.hist_syst[region][b] = {} self.rates[region][b] = histogram.sum() for syst in self.syst_sources: hist_up = accumulators_copy[f'{syst}_up'][b].sum('region').sum('channel').sum('dataset').values()[(syst+'_up',)]*mc_factor sumw2_up = list(accumulators_copy[f'{syst}_up'][b].sum('region').sum('channel').sum('dataset')._sumw2.values())[0]*mc_factor hist_down = accumulators_copy[f'{syst}_down'][b].sum('region').sum('channel').sum('dataset').values()[(syst+'_down',)]*mc_factor sumw2_down = list(accumulators_copy[f'{syst}_down'][b].sum('region').sum('channel').sum('dataset')._sumw2.values())[0]*mc_factor self.hist_syst[region][b][syst+"Up"] = {'hist':hist_up, 'sumw2':sumw2_up} self.hist_syst[region][b][syst+"Down"] = {'hist':hist_down, 'sumw2':sumw2_down} data_is_valid = data.sum(var).sum('dataset').values() bkg_is_valid = bkg.sum(var).sum('dataset').values() bkgs_by_source = bkg.sum(var).values() print(bkgs_by_source) # print(bkg_is_valid) ggh_is_valid = ggh.sum(var).sum('dataset').values() vbf_is_valid = vbf.sum(var).sum('dataset').values() bkg.axis('dataset').sorting = 'integral' # sort backgrounds by event yields if normalize and data_is_valid and bkg_is_valid: data_int = data.sum(var).sum('dataset').values()[()].sum() bkg_int = bkg.sum(var).sum('dataset').values()[()].sum() bkg_sf = data_int/bkg_int bkg.scale(bkg_sf) print(bkg_sf) if bkg_is_valid: bkg.scale(mc_factor) print(f'Total bkg: {bkg_is_valid[()]}') if ggh_is_valid: ggh.scale(mc_factor) print(f'Total ggh: {ggh_is_valid[()]}') if vbf_is_valid: vbf.scale(mc_factor) print(f'Total vbf: {vbf_is_valid[()]}') if data_is_valid: print(f'Total data: {data_is_valid[()]}') data_opts = {'color': 'k', 'marker': '.', 'markersize':15} stack_fill_opts = {'alpha': 0.8, 'edgecolor':(0,0,0)} stack_error_opts = {'label':'Stat. unc.','facecolor':(0,0,0,.4), 'hatch':'', 'linewidth': 0} if save_to: fig.clf() plotsize=12 ratio_plot_size = 0.25 fig.set_size_inches(plotsize, plotsize*(1+ratio_plot_size)) gs = fig.add_gridspec(2, 1, height_ratios=[(1-ratio_plot_size),ratio_plot_size], hspace = .05) # Top panel: Data vs. MC plot plt1 = fig.add_subplot(gs[0]) if bkg_is_valid: try: ax_bkg = hist.plot1d(bkg, ax=plt1, overlay='dataset', overflow='all', stack=True, fill_opts=stack_fill_opts, error_opts=stack_error_opts) except: ax_bkg = hist.plot1d(bkg, ax=plt1, overlay='dataset', overflow='all', stack=False, fill_opts=stack_fill_opts, error_opts=stack_error_opts) # draw signal histograms one by one manually because set_prop_cycle didn't work for changing color map if ggh_is_valid: ax_ggh = hist.plot1d(ggh, overlay='dataset', overflow='all', line_opts={'linewidth':2, 'color':'lime'}, error_opts=None) if vbf_is_valid: ax_vbf = hist.plot1d(vbf, overlay='dataset', overflow='all', line_opts={'linewidth':2, 'color':'b'}, error_opts=None) if data_is_valid: ax_data = hist.plot1d(data, overlay='dataset', overflow='all', line_opts=None, error_opts=data_opts) lbl = hep.cms.cmslabel(ax=plt1, data=True, paper=False, year=year) else: lbl = hep.cms.cmslabel(ax=plt1, data=False, paper=False, year=year) if logy: plt1.set_yscale('log') plt1.set_ylim(0.001, 1e8) # else: # plt1.set_ylim(0., 1e5) plt1.set_xlabel('') if 'dnn_score' in var: plt1.set_xlim(0, parameters["dnn_max"][year]) elif 'dimuon_mass' not in var: plt1.set_xlim(variable.xmin, variable.xmax) plt1.tick_params(axis='x', labelbottom=False) plt1.legend(prop={'size': 'xx-small'}) # Bottom panel: Data/MC ratio plot plt2 = fig.add_subplot(gs[1], sharex=plt1) if data_is_valid and bkg_is_valid: num = data.sum('dataset') denom = bkg.sum('dataset') ax = hist.plotratio(num=num, ax=plt2, denom=denom, error_opts=data_opts, denom_fill_opts={}, guide_opts={}, unc='num') plt2.axhline(1, ls='--') plt2.set_ylim([0.6,1.4]) plt2.set_ylabel('Data/MC') lbl = plt2.get_xlabel() lbl = lbl if lbl else var if inclusive: plt2.set_xlabel(f'{lbl}, inclusive, {channel} channel') else: plt2.set_xlabel(f'{lbl}, {region}, {channel} channel') if save_to: if inclusive: fig.savefig(save_to+f"{var}_{channel}_inclusive_{self.suffix}_{self.year}.png") print(f"Saving {var}_{channel}_inclusive_{self.suffix}_{self.year}.png") else: fig.savefig(save_to+f"{var}_{channel}_{region}_{self.suffix}_{self.year}.png") print(f"Saving {var}_{channel}_{region}_{self.suffix}_{self.year}.png")
hists['deltar'].fill(sample=str_all, h1=hvec.pt, weight=weightsW) hists['deltar'].fill(sample=str_8, h1=hvec.pt[ak8_4q], weight=weightsW[ak8_4q]) hists['deltar'].fill(sample=str_15, h1=hvec.pt[ak15_4q], weight=weightsW[ak15_4q]) # hists['genHiggsPt'].fill(sample='HH4b full', h1=higgs_pt[:, 0], weight=weights['fullHH4b']) fig, (axs, rax) = plt.subplots(2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) hist.plot1d(hists['deltar'], line_opts=line_opts, ax=axs) axs.set_xlabel(None) # axs.set_ylim(0, 5) axs.legend(fancybox=True, shadow=True, frameon=True, prop={'size': 16}) data_err_opts_c['color'] = 'blue' hist.plotratio(hists['deltar'][str_8].sum('sample'), hists['deltar'][str_all].sum('sample'), ax=rax, error_opts=data_err_opts_c, unc='num', clear=False, label='0.8') data_err_opts_c['color'] = 'orange' hist.plotratio(hists['deltar'][str_15].sum('sample'), hists['deltar'][str_all].sum('sample'), ax=rax, error_opts=data_err_opts_c, unc='num', clear=False, label='1.5') rax.legend(fancybox=True, shadow=True, frameon=True, prop={'size': 16}) rax.set_ylabel("captured/total") rax.set_ylim(0, 1) rax.grid(which='both') plt.savefig('figs/deltar.pdf', bbox_inches='tight') plt.show()