def test_train(test, train, y_test, y_train, labels=[], bins=25, node=0, plot_dir=None, weight_test=None, weight_train=None): ks = {} fig, ax = plt.subplots(1,1,figsize=(10,10)) h = {} for i, label in enumerate(labels): _ks, _p = scipy.stats.kstest( train[:,node][(y_train==i)], test[:,node][(y_test==i)] ) #_ks, _p = -1, -1 ks[label] = (_p, _ks) h[label+'_test'] = Hist1D(test[:,node][(y_test==i)], bins=bins, weights=weight_test[(y_test==i)]).normalize() h[label+'_train'] = Hist1D(train[:,node][(y_train==i)], bins=bins, label=label+' (p=%.2f, KS=%.2f)'%(_p, _ks), weights=weight_train[(y_train==i)]).normalize() h[label+'_test'].plot(color=colors[i], histtype="step", ls='--', linewidth=2) h[label+'_train'].plot(color=colors[i], histtype="step", linewidth=2) if plot_dir: finalizePlotDir(plot_dir) fig.savefig("{}/score_node_{}.png".format(plot_dir, node)) fig.savefig("{}/score_node_{}.pdf".format(plot_dir, node)) return ks
def test_train_cat(test, train, y_test, y_train, labels=[], n_cat=5, plot_dir=None, weight_test=None, weight_train=None): ks = {} bins = [x-0.5 for x in range(n_cat+1)] fig, ax = plt.subplots(1,1,figsize=(10,10)) h = {} for i, label in enumerate(labels): _ks, _p = scipy.stats.kstest( train.argmax(axis=1)[(y_train==i)], test.argmax(axis=1)[(y_test==i)] ) #_ks, _p = -1, -1 ks[label] = (_p, _ks) h[label+'_test'] = Hist1D(test.argmax(axis=1)[(y_test==i)], bins=bins, weights=weight_test[(y_test==i)]).normalize() h[label+'_train'] = Hist1D(train.argmax(axis=1)[(y_train==i)], bins=bins, label=label+' (p=%.2f, KS=%.2f)'%(_p, _ks), weights=weight_train[(y_train==i)]).normalize() h[label+'_test'].plot(color=colors[i], histtype="step", ls='--', linewidth=2) h[label+'_train'].plot(color=colors[i], histtype="step", linewidth=2) ax.set_ylabel('a.u.') ax.set_xlabel('category') ax.set_ylim(0,1/n_cat*5) if plot_dir: finalizePlotDir(plot_dir) fig.savefig("{}/categories.png".format(plot_dir)) fig.savefig("{}/categories.pdf".format(plot_dir)) return ks
def saveFig( fig, ax, rax, path, name, scale='linear', shape=False, y_max=-1 ): outdir = os.path.join(path,scale) finalizePlotDir(outdir) ax.set_yscale(scale) ax.set_ylabel('Events') if scale == 'linear': if y_max<0 or True: pass else: ax.set_ylim(0, 1 if shape else 1.2*y_max) else: if y_max<0 and not shape: pass else: ax.set_ylim(0.000005 if shape else 0.05, 3 if shape else 300*y_max) #if scale == 'log': # ax.set_ylim(y_min, y_max) #else: # ax.set_ylim(0, y_max) # #if shape: # # ax.yaxis.set_ticks(np.array([10e-4,10e-3,10e-2,10e-1,10e0])) # #else: # # ax.yaxis.set_ticks(np.array([10e-2,10e-1,10e0,10e1,10e2,10e3,10e4,10e5,10e6])) handles, labels = ax.get_legend_handles_labels() new_labels = [] WHcount = 0 for handle, label in zip(handles, labels): #print (handle, label) try: #new_labels.append(my_labels[label]) if WHcount>0 and label=='WH': new_labels.append('WH (1000,0)') handle.set_color('#000000') else: new_labels.append(my_labels[label]) if not label=='pseudodata': handle.set_color(colors[label]) if label=='WH': WHcount+=1 except: pass if rax: plt.subplots_adjust(hspace=0) rax.set_ylabel('Obs./Pred.') rax.set_ylim(0.5,1.5) ax.legend(title='',ncol=2,handles=handles, labels=new_labels, frameon=False) fig.text(0.15, 0.995, '$\\bf{CMS}$', fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) fig.text(0.30, 1., '$\\it{Simulation}$', fontsize=14, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) fig.text(0.8, 1., '13 TeV', fontsize=14, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) fig.savefig(os.path.join(outdir, "{}.pdf".format(name))) fig.savefig(os.path.join(outdir, "{}.png".format(name)))
def get_cat_plot(X, y, labels=[], n_cat=5, plot_dir=None, weight=None): ks = {} bins = [x-0.5 for x in range(n_cat+1)] fig, ax = plt.subplots(1,1,figsize=(10,10)) h = {} for i, label in enumerate(labels): h[label+'_train'] = Hist1D(X.argmax(axis=1)[(y==i)], bins=bins, label=label, weights=weight[(y==i)]) h[label+'_train'].plot(color=colors[i], histtype="step", linewidth=2) ax.set_ylabel('a.u.') ax.set_xlabel('category') ax.set_ylim(0,200) if plot_dir: finalizePlotDir(plot_dir) fig.savefig("{}/abs_categories.png".format(plot_dir)) fig.savefig("{}/abs_categories.pdf".format(plot_dir))
def makePlot(output, histo, axis, bins=None, data=[], normalize=True, log=False, save=False, axis_label=None, ratio_range=None, upHists=[], downHists=[], shape=False, ymax=False, new_colors=colors, new_labels=my_labels, order=None, signals=[], omit=[], lumi=60.0, binwnorm=None, overlay=None, use_label=True, y_axis_label='Events'): if save: finalizePlotDir('/'.join(save.split('/')[:-1])) mc_sel = re.compile( '(?!(%s))' % ('|'.join(data + omit))) if len(data + omit) > 0 else re.compile('') data_sel = re.compile('|'.join(data)) bkg_sel = re.compile( '(?!(%s))' % ('|'.join(data + signals + omit))) if len(data + signals + omit) > 0 else re.compile('') if histo is None: processes = [p[0] for p in output.values().keys() if not p[0] in data] histogram = output.copy() else: processes = [ p[0] for p in output[histo].values().keys() if not p[0] in data ] histogram = output[histo].copy() histogram = histogram.project(axis, 'dataset') if overlay: overlay = overlay.project(axis, 'dataset') if bins: histogram = histogram.rebin(axis, bins) if overlay: overlay = overlay.rebin(axis, bins) y_max = histogram[bkg_sel].sum("dataset").values(overflow='over')[()].max() print(histogram[bkg_sel].sum("dataset").values(overflow='over')[()].max()) MC_total = histogram[bkg_sel].sum("dataset").values( overflow='over')[()].sum() Data_total = 0 if data: Data_total = histogram[data_sel].sum("dataset").values( overflow='over')[()].sum() #observation = histogram[data[0]].sum('dataset').copy() #first = True #for d in data: # print (d) # if not first: # observation.add(histogram[d].sum('dataset')) # print ("adding") # first = False print("Data:", round(Data_total, 0), "MC:", round(MC_total, 2)) if normalize and data_sel: scales = {process: Data_total / MC_total for process in processes} histogram.scale(scales, axis='dataset') else: scales = {} if shape: scales = { process: 1 / histogram[process].sum("dataset").values( overflow='over')[()].sum() for process in processes } histogram.scale(scales, axis='dataset') if data: fig, (ax, rax) = plt.subplots(2, 1, figsize=(10, 10), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) else: fig, ax = plt.subplots(1, 1, figsize=(10, 10)) if signals: for sig in signals: ax = hist.plot1d(histogram[sig], overlay="dataset", ax=ax, stack=False, overflow='over', clear=False, line_opts=line_opts, fill_opts=None, binwnorm=binwnorm) if overlay: ax = hist.plot1d(overlay, overlay="dataset", ax=ax, stack=False, overflow='over', clear=False, line_opts=line_opts, fill_opts=None, binwnorm=binwnorm) if shape: ax = hist.plot1d(histogram[bkg_sel], overlay="dataset", ax=ax, stack=False, overflow='over', clear=False, line_opts=line_opts, fill_opts=None, binwnorm=binwnorm) else: ax = hist.plot1d(histogram[bkg_sel], overlay="dataset", ax=ax, stack=True, overflow='over', clear=False, line_opts=None, fill_opts=fill_opts, order=(order if order else processes), binwnorm=binwnorm) if data: ax = hist.plot1d(histogram[data_sel].sum("dataset"), ax=ax, overflow='over', error_opts=data_err_opts, clear=False, binwnorm=binwnorm) #ax = hist.plot1d(observation, ax=ax, overflow='over', error_opts=data_err_opts, clear=False) hist.plotratio( num=histogram[data_sel].sum("dataset"), denom=histogram[bkg_sel].sum("dataset"), ax=rax, error_opts=data_err_opts, denom_fill_opts= None, # triggers this: https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py#L376 guide_opts={}, unc='num', #unc=None, overflow='over') handles, labels = ax.get_legend_handles_labels() updated_labels = [] for handle, label in zip(handles, labels): try: if label is None or label == 'None': updated_labels.append("Observation") handle.set_color('#000000') else: updated_labels.append(new_labels[label]) handle.set_color(new_colors[label]) except: pass if data: if ratio_range: rax.set_ylim(*ratio_range) else: rax.set_ylim(0.1, 1.9) rax.set_ylabel('Obs./Pred.') if axis_label: rax.set_xlabel(axis_label) ax.set_xlabel(axis_label) ax.set_ylabel(y_axis_label) if not binwnorm: if not shape: addUncertainties(ax, axis, histogram, bkg_sel, [output[histo + '_' + x] for x in upHists], [output[histo + '_' + x] for x in downHists], overflow='over', rebin=bins, ratio=False, scales=scales) if data: addUncertainties(rax, axis, histogram, bkg_sel, [output[histo + '_' + x] for x in upHists], [output[histo + '_' + x] for x in downHists], overflow='over', rebin=bins, ratio=True, scales=scales) if log: ax.set_yscale('log') y_mult = 1.7 if not log else 100 if ymax: ax.set_ylim(0.01, ymax) else: y_max = y_max * y_mult * (Data_total / MC_total) if data else y_max * y_mult ax.set_ylim(0.01, y_max if not shape else 2) #if binwnorm: ax.set_ylim(0.5) ax.legend( loc='upper right', ncol=2, borderaxespad=0.0, labels=updated_labels, handles=handles, ) plt.subplots_adjust(hspace=0) if use_label: if len(data) > 0: fig.text(0.0, 0.995, '$\\bf{CMS}$ Preliminary', fontsize=25, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) else: fig.text(0.0, 0.995, '$\\bf{CMS}$ Simulation', fontsize=25, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) fig.text(0.6, 0.995, r'$%.1f\ fb^{-1}$ (13 TeV)' % (lumi), fontsize=25, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) if normalize: fig.text(0.55, 0.65, 'Data/MC = %s' % round(Data_total / MC_total, 2), fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) if save: #finalizePlotDir(outdir) fig.savefig("{}.pdf".format(save)) fig.savefig("{}.png".format(save)) #fig.savefig(save) print("Figure saved in:", save)
def makePlot(output, histo, axis, bins=None, mc_sel=bkgonly, data_sel='MuonEG', normalize=True, log=False, save=False, axis_label=None, ratio_range=None, upHists=[], downHists=[], shape=False, ymax=False, new_colors=colors, new_labels=my_labels, order=None): if save: finalizePlotDir( '/'.join(save.split('/')[:-1]) ) if histo is None: processes = [ p[0] for p in output.values().keys() if not p[0]=='MuonEG' ] histogram = output.copy() else: processes = [ p[0] for p in output[histo].values().keys() if not p[0]=='MuonEG' ] histogram = output[histo].copy() histogram = histogram.project(axis, 'dataset') if bins: histogram = histogram.rebin(axis, bins) y_max = histogram[mc_sel].sum("dataset").values(overflow='over')[()].max() MC_total = histogram[mc_sel].sum("dataset").values(overflow='over')[()].sum() Data_total = 0 if data_sel: Data_total = histogram[data_sel].sum("dataset").values(overflow='over')[()].sum() print ("Data:", round(Data_total,0), "MC:", round(MC_total,2)) if normalize and data_sel: scales = { process: Data_total/MC_total for process in processes } histogram.scale(scales, axis='dataset') else: scales = {} if shape: scales = { process: 1/histogram[process].sum("dataset").values(overflow='over')[()].sum() for process in processes } histogram.scale(scales, axis='dataset') if data_sel: fig, (ax, rax) = plt.subplots(2,1,figsize=(10,10), gridspec_kw={"height_ratios": (3, 1)}, sharex=True) else: fig, ax = plt.subplots(1,1,figsize=(10,10) ) if shape: ax = hist.plot1d(histogram[mc_sel], overlay="dataset", ax=ax, stack=False, overflow='over', clear=False, line_opts=line_opts, fill_opts=None) else: ax = hist.plot1d(histogram[mc_sel], overlay="dataset", ax=ax, stack=True, overflow='over', clear=False, line_opts=None, fill_opts=fill_opts, order=(order if order else processes)) if data_sel: ax = hist.plot1d(histogram[data_sel], overlay="dataset", ax=ax, overflow='over', error_opts=data_err_opts, clear=False) hist.plotratio( num=histogram[data_sel].sum("dataset"), denom=histogram[mc_sel].sum("dataset"), ax=rax, error_opts=data_err_opts, denom_fill_opts=None, # triggers this: https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py#L376 guide_opts={}, unc='num', #unc=None, overflow='over' ) handles, labels = ax.get_legend_handles_labels() updated_labels = [] for handle, label in zip(handles, labels): #print (label) try: updated_labels.append(new_labels[label]) if not label=='MuonEG': handle.set_color(new_colors[label]) except: pass if data_sel: if ratio_range: rax.set_ylim(*ratio_range) else: rax.set_ylim(0.1,1.9) rax.set_ylabel('Obs./Pred.') if axis_label: rax.set_xlabel(axis_label) ax.set_xlabel(axis_label) ax.set_ylabel('Events') if not shape: addUncertainties(ax, axis, histogram, mc_sel, [output[histo+'_'+x] for x in upHists], [output[histo+'_'+x] for x in downHists], overflow='over', rebin=bins, ratio=False, scales=scales) if data_sel: addUncertainties(rax, axis, histogram, mc_sel, [output[histo+'_'+x] for x in upHists], [output[histo+'_'+x] for x in downHists], overflow='over', rebin=bins, ratio=True, scales=scales) if log: ax.set_yscale('log') y_mult = 1.3 if not log else 100 if ymax: ax.set_ylim(0.01, ymax) else: ax.set_ylim(0.01,y_max*y_mult if not shape else 2) ax.legend( loc='upper right', ncol=2, borderaxespad=0.0, labels=updated_labels, handles=handles, ) plt.subplots_adjust(hspace=0) fig.text(0.0, 0.995, '$\\bf{CMS}$ Preliminary', fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) fig.text(0.8, 0.995, '13 TeV', fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) if normalize: fig.text(0.55, 0.65, 'Data/MC = %s'%round(Data_total/MC_total,2), fontsize=20, horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes ) if save: #finalizePlotDir(outdir) fig.savefig("{}.pdf".format(save)) fig.savefig("{}.png".format(save)) #fig.savefig(save) print ("Figure saved in:", save)