def test_blocks_hist(cmdopt, data_gen): output = skh_plt.hist(data_gen[0], bins='blocks', scale='binwidth', color='green') if cmdopt == "generate": with open(answer_dir+'/answers_blocks_hist.npz', 'wb') as f: np.savez(f, bc=output[0], be=output[1]) plt.title('test_blocks_hist') plt.show() elif cmdopt == "test": answers = np.load(answer_dir+'/answers_blocks_hist.npz') assert(np.all(output[0] == answers['bc'])) assert(np.all(output[1] == answers['be']))
def test_simple_hist4(cmdopt, data_gen): output = skh_plt.hist(data_gen[0], weights=data_gen[2], bins=range(5), normed=True, scale='binwidth', color='red', histtype='bar') if cmdopt == "generate": with open(answer_dir+'/answers_simple_hist4.npz', 'wb') as f: np.savez(f, bc=output[0], be=output[1]) plt.title('test_simple_hist4') plt.show() elif cmdopt == "test": answers = np.load(answer_dir+'/answers_simple_hist4.npz') assert(np.all(output[0] == answers['bc'])) assert(np.all(output[1] == answers['be']))
def test_error_bars(cmdopt, data_gen): output = skh_plt.hist(data_gen[0], bins=20, errorbars=True, err_return=True, scale=5) if cmdopt == "generate": with open(answer_dir+'/answers_error_bars.npz', 'wb') as f: np.savez(f, bc=output[0], be=output[1], berr=output[2]) plt.title('test_error_bars') plt.show() elif cmdopt == "test": answers = np.load(answer_dir+'/answers_error_bars.npz') assert(np.all(output[0] == answers['bc'])) assert(np.all(output[1] == answers['be'])) assert(np.all(output[2] == answers['berr']))
def test_error_bars4(cmdopt, data_gen): output = skh_plt.hist(data_gen[0], bins=50, errorbars=True, err_return=True, histtype='step', err_type='poisson', suppress_zero=True, scale='binwidth') if cmdopt == "generate": with open(answer_dir+'/answers_error_bars4.npz', 'wb') as f: np.savez(f, bc=output[0], be=output[1], berr=output[2]) plt.title('test_error_bars4') plt.show() elif cmdopt == "test": answers = np.load(answer_dir+'/answers_error_bars4.npz') assert(np.all(output[0] == answers['bc'])) assert(np.all(output[1] == answers['be'])) assert(np.all(output[2] == answers['berr']))
def test_error_bars2(cmdopt, data_gen): output = skh_plt.hist(data_gen[0], bins=1, errorbars=True, scale=0.5, normed=True, err_color='k', alpha=0.1, err_type='poisson', err_return=True) if cmdopt == "generate": with open(answer_dir+'/answers_error_bars2.npz', 'wb') as f: np.savez(f, bc=output[0], be=output[1], berr=output[2]) plt.title('test_error_bars2') plt.show() elif cmdopt == "test": answers = np.load(answer_dir+'/answers_error_bars2.npz') assert(np.all(output[0] == answers['bc'])) assert(np.all(output[1] == answers['be'])) assert(np.all(output[2] == answers['berr']))
def test_error_bars_stacked3(cmdopt, data_gen): output = skh_plt.hist([data_gen[0], data_gen[1]], bins=20, histtype='step', stacked=True, weights=[data_gen[2], data_gen[2]], errorbars=True, err_return=True, normed=True, scale=2) if cmdopt == "generate": with open(answer_dir+'/answers_error_bars_stacked3.npz', 'wb') as f: np.savez(f, bc=output[0], be=output[1], berr=output[2]) plt.title('test_error_bars_stacked2') plt.show() elif cmdopt == "test": answers = np.load(answer_dir+'/answers_error_bars_stacked3.npz') assert(np.all(output[0] == answers['bc'])) assert(np.all(output[1] == answers['be'])) assert(np.all(output[2] == answers['berr']))
def plot_binned_data_error(self, axis, bin_edges, data, wgt_sqrd, *args, **kwargs): binwidth = bin_edges[1] - bin_edges[0] errors = np.sqrt(wgt_sqrd) if 'density' in kwargs and kwargs['density'] == True: errors = errors / np.sum(data) / binwidth errors = errors.reindex(np.arange(1, len(bin_edges)), fill_value=0) #The dataset values are the bin centres x = (bin_edges[1:] + bin_edges[:-1]) / 2.0 #The weights are the y-values of the input binned data weights = data return skh_plt.hist(x, ax=axis, bins=bin_edges, weights=weights, errorbars=errors, *args, **kwargs)
def plot_stacked_binned_data_error(self, axis, bin_edges, data, wgt_sqrd, *args, **kwargs): errors = wgt_sqrd[0] for i in np.arange(1, len(wgt_sqrd)): errors = errors.add(wgt_sqrd[i], fill_value=0) errors = np.sqrt(errors) errors = np.array( errors.reindex(np.arange(1, len(bin_edges)), fill_value=0)) #The dataset values are the bin centres x = (bin_edges[1:] + bin_edges[:-1]) / 2.0 x = np.array([x]).repeat(len(data), axis=0) x = np.transpose(x) #The weights are the y-values of the input binned data weights = np.transpose(data) return skh_plt.hist(x, ax=axis, bins=bin_edges, weights=weights, errorbars=errors, stacked=True, *args, **kwargs)
def plotDataMC(setupClient): topDF_list = [] zjetsDF_list = [] wjetsDF_list = [] dibosonDF_list = [] signalDF_list = [] for itype in setupClient.InputFilesSB.keys(): for ifile in setupClient.InputFilesSB[itype]: print(ifile) if 'Top' in ifile: topDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Train') ] topDF_list += [getDFEvents(setupClient.PDPath, ifile, '_Test')] if 'Data' in ifile: dataDF = getDFEvents(setupClient.PDPath, ifile, 'Data') if 'Zjets' in ifile: zjetsDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Train') ] zjetsDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Test') ] if 'Diboson' in ifile: dibosonDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Train') ] dibosonDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Test') ] if 'ggF' in ifile: signalDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Train') ] signalDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Test') ] if 'Wjets' in ifile: wjetsDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Train') ] wjetsDF_list += [ getDFEvents(setupClient.PDPath, ifile, '_Test') ] topDF = pd.concat(topDF_list, ignore_index=True) zjetsDF = pd.concat(zjetsDF_list, ignore_index=True) wjetsDF = pd.concat(wjetsDF_list, ignore_index=True) dibosonDF = pd.concat(dibosonDF_list, ignore_index=True) signalDF = pd.concat(signalDF_list, ignore_index=True) for var in setupClient.VariablesToPlot: print("Plotting variable", var) # print ' min:',min(dibosonDF[var]), ' max', max(dibosonDF[var]) bins = np.linspace(min(dibosonDF[var]), max(dibosonDF[var]), 20) plt.hist([topDF[var], dibosonDF[var], zjetsDF[var], wjetsDF[var]], histtype='stepfilled', normed=False, bins=bins, weights=[ topDF['weight'], dibosonDF['weight'], zjetsDF['weight'], wjetsDF['weight'] ], label=[ 'Top', 'Diboson', 'Z + jets', 'W + jets', ], stacked=True) plt.hist(signalDF[var], histtype='step', normed=False, bins=bins, weights=signalDF['weight'], label=r'ggF', linewidth=1, color='red', linestyle='dashed') # plt.hist(dataDF[var], histtype='step', normed=False, bins=bins, label=r'Data', linewidth=2, color='black', linestyle='dashed') _ = skh_plt.hist(dataDF[var], bins=bins, errorbars=True, histtype='marker', label='Data', color='black') plt.legend(loc='best', prop={'size': 10}) plt.xlabel(var, fontsize=14) plt.savefig(setupClient.VarPlotPath + "/" + var + "_DataMC.png") plt.yscale('log') plt.savefig(setupClient.VarPlotPath + "/" + var + "_DataMC_log.png") plt.clf()
def train_and_validate(steps=10000, minibatch=128, LRrange=[0.0001, 0.00001, 10000, 0], beta1=0.9, beta2=0.999, nafdim=16, depth=2, \ savedir='abcdnn', seed=100, retrain=False, train=True): rawinputs, normedinputs, inputmeans, inputsigma, ncat_per_feature = prepdata( ) print(ncat_per_feature) inputdim = 4 ncat_per_feature = ncat_per_feature[0:inputdim] conddim = normedinputs.shape[1] - inputdim issignal = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] >= 3 ) # signal_selection isbackground = ~issignal bkgnormed = normedinputs[isbackground] bkg = rawinputs[isbackground] xmax = np.reshape(inputmeans + 5 * inputsigma, inputmeans.shape[1]) m = ABCDdnn(ncat_per_feature, inputdim, minibatch=minibatch, conddim=conddim, LRrange=LRrange, \ beta1=beta1, beta2=beta2, nafdim=nafdim, depth=depth, savedir=savedir, retrain=retrain, seed=seed) m.setrealdata(bkgnormed) m.savehyperparameters() m.monitorevery = 100 if train: m.train(steps) m.display_training() nj9cut = True if nj9cut: ncol = 3 # for plots below condlist = [[[ 1., 0., 0., 1., 0., ]], [[ 0., 1., 0., 1., 0., ]], [[ 0., 0., 1., 1., 0., ]], [[ 1., 0., 0., 0., 1., ]], [[ 0., 1., 0., 0., 1., ]], [[ 0., 0., 1., 0., 1., ]]] select0 = (rawinputs['njet'] == 7) & (rawinputs['nbtag'] == 2) select1 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] == 2) select2 = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] == 2) select3 = (rawinputs['njet'] == 7) & (rawinputs['nbtag'] >= 3) select4 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] >= 3) select5 = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] >= 3) select_data = [select0, select1, select2, select3, select4, select5] plottextlist = [ f'$N_j=7, N_b=2$', f'$N_j=8, N_b=2$', f'$N_j\geq 9, N_b=2$', f'$N_j=7, N_b\geq 3$', f'$N_j=8, N_b\geq 3$', f'$N_j\geq 9, N_b\geq 3$' ] njlist = [7, 8, 9, 7, 8, 9] nblist = [2, 2, 2, 3, 3, 3] else: ncol = 3 # for plots condlist = [[[ 0., 1., 0., 0., 1., 0., ]], [[ 0., 0., 1., 0., 1., 0., ]], [[ 0., 0., 0., 1., 1., 0., ]], [[ 0., 1., 0., 0., 0., 1., ]], [[ 0., 0., 1., 0., 0., 1., ]], [[ 0., 0., 0., 1., 0., 1., ]]] select0 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] == 2) select1 = (rawinputs['njet'] == 9) & (rawinputs['nbtag'] == 2) select2 = (rawinputs['njet'] >= 10) & (rawinputs['nbtag'] == 2) select3 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] >= 3) select4 = (rawinputs['njet'] == 9) & (rawinputs['nbtag'] >= 3) select5 = (rawinputs['njet'] >= 10) & (rawinputs['nbtag'] >= 3) select_data = [select0, select1, select2, select3, select4, select5] plottextlist = [ f'$N_j=8, N_b=2$', f'$N_j=9, N_b=2$', f'$N_j\geq 10, N_b=2$', f'$N_j=8, N_b\geq 3$', f'$N_j=9, N_b\geq 3$', f'$N_j\geq 10, N_b\geq 3$' ] njlist = [8, 9, 10, 8, 9, 10] nblist = [2, 2, 2, 3, 3, 3] # create fake data fakedatalist = [] for cond, nj, nb in zip(condlist, njlist, nblist): nmcbatches = int(bkgnormed.shape[0] / minibatch) nmcremain = bkgnormed.shape[0] % minibatch fakelist = [] cond_to_append = np.repeat(cond, minibatch, axis=0) for _ib in range(nmcbatches): xin = bkgnormed[_ib * minibatch:(_ib + 1) * minibatch, :inputdim] xin = np.hstack( (xin, cond_to_append)) # append conditional to the feature inputs xgen = m.model.predict(xin) #xgen = m.generate_sample(cond) fakelist.append(xgen) # last batch xin = bkgnormed[nmcbatches * minibatch:, :inputdim] xin = np.hstack( (xin, np.repeat(cond, nmcremain, axis=0))) # append conditional to the feature inputs xgen = m.model.predict(xin) fakelist.append(xgen) # all data fakedata = np.vstack(fakelist) fakedata = fakedata * inputsigma[:, :inputdim] + inputmeans[:, : inputdim] nfakes = fakedata.shape[0] fakedata = np.hstack((fakedata, np.array([nj]*nfakes).reshape((nfakes,1))\ , np.array([nb]*nfakes).reshape(nfakes,1) ) ) fakedatalist.append(fakedata) labelsindices = [['MET', 'met', 0.0, xmax[0]], ['H_T', 'ht', 0.0, xmax[1]],\ ['p_{T5}', 'pt5', 0.0, xmax[2]], ['p_{T6}', 'pt6', 0.0, xmax[3]]] nbins = 20 runplots = True if runplots: yscales = ['log', 'linear'] for yscale in yscales: for li in labelsindices: pos = featurevars.index(li[1]) fig, ax = plt.subplots(2, ncol, figsize=(3 * ncol, 6)) iplot = 0 for fakedata, seld, plottext in zip(fakedatalist, select_data, plottextlist): input_data = rawinputs[seld] # Make ratio plots plotaxes = MplPlotter.ratio_plot(dict(x=input_data[li[1]], bins=nbins, range=(li[2], li[3]), errorbars=True, normed=True, histtype='marker'), \ dict(x=fakedata[:, pos], bins=nbins, range=(li[2], li[3]), errorbars=True, normed=True), ratio_range=(0.25, 1.9)) plotfig = plotaxes[0][0].get_figure() plotaxes[0][0].set_yscale(yscale) plotfig.set_size_inches(5, 5) plotfig.savefig( os.path.join( savedir, f'result_{li[1]}_{iplot}_{yscale}_ratio.pdf')) # make matrix of plots row = iplot // ncol col = iplot % ncol iplot += 1 plt.sca(ax[row, col]) ax[row, col].set_yscale(yscale) ax[row, col].set_xlabel(f"${li[0]}$ (GeV)") MplPlotter.hist(input_data[li[1]], bins=nbins, alpha=0.5, range=(li[2], li[3]), errorbars=True, histtype='marker', normed=True) MplPlotter.hist(fakedata[:, pos], bins=nbins, alpha=0.5, range=(li[2], li[3]), errorbars=True, normed=True) MplPlotter.hist(bkg[li[1]], bins=nbins, alpha=0.5, range=(li[2], li[3]), histtype='step', normed=True) plt.text(0.6, 0.8, plottext, transform=ax[row, col].transAxes, fontsize=10) fig.tight_layout() fig.savefig( os.path.join(savedir, f'result_matrix_{li[1]}_{yscale}.pdf')) generatesigsample = True if generatesigsample: bkgsigfakedata = np.vstack(fakedatalist) datadict = {} for var, idx in zip(featurevars, range(len(featurevars))): datadict[var] = bkgsigfakedata[:, idx] writetorootfile(os.path.join(savedir, 'fakedata_NAF.root'), datadict) pass
def comp_study(input_data, n_events, xlims=None, resamples=100, dist_name='2Gauss'): bb_dir = os.path.join('/Users/brianpollack/Coding/BayesianBlocks') do_log = True # data_nom = input_data[:n_events] if dist_name == 'Gauss': np.random.seed(88) data_nom = np.random.normal(125, 2, size=n_events) resample_list = np.random.normal(125, 2, size=(resamples, n_events)) do_log = False elif dist_name == '2LP': np.random.seed(33) data_nom = np.concatenate( (np.random.laplace(loc=90, scale=5, size=int(n_events * 0.65)), np.random.laplace(loc=110, scale=1.5, size=int(n_events * 0.25)), np.random.uniform(low=80, high=120, size=int(n_events * 0.10)))) resample_list = np.concatenate( (np.random.laplace( loc=90, scale=5, size=(resamples, int(n_events * 0.65))), np.random.laplace( loc=110, scale=1.5, size=(resamples, int(n_events * 0.25))), np.random.uniform( low=80, high=120, size=(resamples, int(n_events * 0.10)))), axis=1) do_log = False elif dist_name == 'jPT': np.random.seed(11) data_nom = np.random.choice(input_data, size=n_events, replace=False) resample_list = np.random.choice(input_data, size=(resamples, n_events), replace=True) elif dist_name == 'DY': np.random.seed(200) data_nom = np.random.choice(input_data, size=n_events, replace=False) resample_list = np.random.choice(input_data, size=(resamples, n_events), replace=True) else: np.random.seed(1) data_nom = np.random.choice(input_data, size=n_events, replace=False) resample_list = np.random.choice(input_data, size=(resamples, n_events), replace=True) fig_hist, axes_hist = plt.subplots(3, 3, sharex=True, sharey=False, constrained_layout=True) fig_hist.suptitle(f'{dist_name} Distribution, N={n_events}', fontsize=22) # fig_hist.text(-0.03, 0.5, 'Entries/Bin Width', va='center', rotation='vertical', fontsize=20) # axes_hist[2][0].get_xaxis().set_ticks([]) # axes_hist[2][1].get_xaxis().set_ticks([]) # axes_hist[2][2].get_xaxis().set_ticks([]) axes_hist[0][0].set_title('Sturges') hist_sturges_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins='sturges', errorbars=False, alpha=0.5, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[0][0]) axes_hist[0][1].set_title('Doane') hist_doane_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins='doane', errorbars=False, alpha=0.5, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[0][1]) axes_hist[0][2].set_title('Scott') hist_scott_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins='scott', errorbars=False, alpha=0.5, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[0][2]) axes_hist[1][0].set_title('Freedman Diaconis') axes_hist[1][0].set_ylabel('Entries/Bin Width', fontsize=20) hist_fd_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins='fd', errorbars=False, alpha=0.5, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[1][0]) axes_hist[1][1].set_title('Knuth') _, bk = knuth_bin_width(data_nom, return_bins=True) hist_knuth_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins=bk, errorbars=False, alpha=0.5, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[1][1]) axes_hist[1][2].set_title('Rice') hist_rice_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins='rice', errorbars=False, alpha=0.5, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[1][2]) axes_hist[2][0].set_title('Sqrt(N)') hist_sqrt_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins='sqrt', errorbars=False, alpha=0.5, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[2][0]) # bep = bep_optimizer(data_nom) # _, bep = pd.qcut(data_nom, nep, retbins=True) hist_sturges = np.histogram(data_nom, bins='sturges') hist_doane = np.histogram(data_nom, bins='doane') hist_scott = np.histogram(data_nom, bins='scott') hist_fd = np.histogram(data_nom, bins='fd') hist_knuth = np.histogram(data_nom, bins=bk) hist_rice = np.histogram(data_nom, bins='rice') hist_sqrt = np.histogram(data_nom, bins='sqrt') r_sturges = rough(hist_sturges_bw, plot=False) r_doane = rough(hist_doane_bw) r_scott = rough(hist_scott_bw) r_fd = rough(hist_fd_bw) r_knuth = rough(hist_knuth_bw, plot=False) r_rice = rough(hist_rice_bw) r_sqrt = rough(hist_sqrt_bw, plot=False) eli_sturges = err_li(data_nom, hist_sturges) eli_doane = err_li(data_nom, hist_doane) eli_scott = err_li(data_nom, hist_scott) eli_fd = err_li(data_nom, hist_fd) eli_knuth = err_li(data_nom, hist_knuth) eli_rice = err_li(data_nom, hist_rice) eli_sqrt = err_li(data_nom, hist_sqrt) avg_eli_sturges = [] avg_eli_doane = [] avg_eli_scott = [] avg_eli_fd = [] avg_eli_knuth = [] avg_eli_rice = [] avg_eli_sqrt = [] for i in resample_list: avg_eli_sturges.append(err_li(i, hist_sturges)) avg_eli_doane.append(err_li(i, hist_doane)) avg_eli_scott.append(err_li(i, hist_scott)) avg_eli_fd.append(err_li(i, hist_fd)) avg_eli_knuth.append(err_li(i, hist_knuth)) avg_eli_rice.append(err_li(i, hist_rice)) avg_eli_sqrt.append(err_li(i, hist_sqrt)) avg_eli_sturges = np.mean(avg_eli_sturges) avg_eli_doane = np.mean(avg_eli_doane) avg_eli_scott = np.mean(avg_eli_scott) avg_eli_fd = np.mean(avg_eli_fd) avg_eli_knuth = np.mean(avg_eli_knuth) avg_eli_rice = np.mean(avg_eli_rice) avg_eli_sqrt = np.mean(avg_eli_sqrt) avg_eli_list = [ avg_eli_sturges, avg_eli_doane, avg_eli_scott, avg_eli_fd, avg_eli_knuth, avg_eli_rice, avg_eli_sqrt ] r_list = [r_sturges, r_doane, r_scott, r_fd, r_knuth, r_rice, r_sqrt] elis_list = [ eli_sturges, eli_doane, eli_scott, eli_fd, eli_knuth, eli_rice, eli_sqrt ] axes_hist[2][1].set_title('Equal Population') bep = bep_optimizer(data_nom, resample_list, r_list, avg_eli_list) hist_ep_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins=bep, errorbars=False, alpha=0.5, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[2][1]) hist_ep = np.histogram(data_nom, bins=bep) r_ep = rough(hist_ep_bw) eli_ep = err_li(data_nom, hist_ep) avg_eli_ep = [] for i in resample_list: avg_eli_ep.append(err_li(i, hist_ep)) avg_eli_ep = np.mean(avg_eli_ep) axes_hist[2][2].set_title('Bayesian Blocks') p0 = bb_optimizer(data_nom, resample_list, r_list, avg_eli_list) bb = bayesian_blocks(data_nom, p0=p0) if xlims: bb[0] = xlims[0] bb[-1] = xlims[-1] hist_bb_bw = skh_plt.hist(x=data_nom, histtype='stepfilled', bins=bb, errorbars=False, alpha=1, log=do_log, scale='binwidth', err_type='gaussian', ax=axes_hist[2][2]) # if n_events == 1000 and dist_name == '2LP': # axes_hist[2][2].set_ylim((0, 100)) hist_bb = np.histogram(data_nom, bins=bb) r_bb = rough(hist_bb_bw, plot=False) eli_bb = err_li(data_nom, hist_bb) avg_eli_bb = [] for i in resample_list: avg_eli_bb.append(err_li(i, hist_bb)) avg_eli_bb = np.mean(avg_eli_bb) r_list.append(r_ep) r_list.append(r_bb) avg_eli_list.append(avg_eli_ep) avg_eli_list.append(avg_eli_bb) elis_list.append(eli_ep) elis_list.append(eli_bb) plt.savefig(bb_dir + f'/plots/bin_comp/hists_{dist_name}_{n_events}.pdf') xs = [ 'Sturges', 'Doane', 'Scott', 'FD', 'Knuth', 'Rice', 'Sqrt', 'EP', 'BB' ] fig_metric, axes_metric = plt.subplots(2, 1, constrained_layout=True) fig_hist.suptitle(f'{dist_name} Distribution, N={n_events}') for i in range(len(elis_list)): if xs[i] == 'BB': axes_metric[0].scatter(avg_eli_list[i], r_list[i], label=xs[i], s=400, marker='*', c='k') else: axes_metric[0].scatter(avg_eli_list[i], r_list[i], label=xs[i], s=200) axes_metric[0].set_ylabel(r'$W_n$ (Wiggles)') axes_metric[0].set_xlabel(r'$\hat{E}$ (Average Error)') # ax = plt.gca() # ax.set_yscale('log') # ax.set_xscale('log') # ax.relim() # ax.autoscale_view() axes_metric[0].grid() axes_metric[0].legend(ncol=1, bbox_to_anchor=(1.05, 1.15), loc='upper left') axes_metric[0].set_title(f'{dist_name} Distribution, N={n_events}', fontsize=22) # plt.savefig(bb_dir+f'/plots/bin_comp/scat_{dist_name}_{n_events}.pdf') # plt.figure() rank_rough = rankdata(r_list, method='min') rank_avg_eli = rankdata(avg_eli_list, method='min') cont = axes_metric[1].bar(xs, rank_rough, 0.35, label=r'$W_n$ Ranking', alpha=0.5) cont[-1].set_alpha(1) cont = axes_metric[1].bar(xs, rank_avg_eli, 0.35, bottom=rank_rough, label=r'$\hat{E}$ Ranking', alpha=0.5) cont[-1].set_alpha(1) axes_metric[1].legend(loc='upper left', bbox_to_anchor=(1.0, 0.8)) # axes_metric[1].set_title(f'Combined Ranking, {dist_name} Distribution, N={n_events}') axes_metric[1].set_xlabel('Binning Method') axes_metric[1].set_ylabel('Rank') plt.savefig(bb_dir + f'/plots/bin_comp/metric_{dist_name}_{n_events}.pdf')
def test_hist_fails(cmdopt, data_gen): with pytest.raises(ValueError): skh_plt.hist([data_gen[0], data_gen[1]], stacked=True, histtype='marker') with pytest.raises(ValueError): skh_plt.hist([data_gen[0], data_gen[1]], histtype='marker') with pytest.raises(KeyError): skh_plt.hist(1, err_return=True) with pytest.raises(ValueError): skh_plt.hist([data_gen[0], data_gen[1]], weights=data_gen[2]) with pytest.raises(ValueError): skh_plt.hist(data_gen[0], weights=data_gen[2][0:10]) with pytest.raises(KeyError): skh_plt.hist(data_gen[0], err_type='fake', errorbars=True) output1 = skh_plt.hist(5) assert(np.all(output1[0] == 1)) output2 = skh_plt.hist([], range=(0, 1)) assert(np.all(output2[0] == 0))
def plotScores(): isBlindAnalysis = True modelName = 'llqqDNN_100_60_2_0' outDirAfterDilep = [ 'Out_AfterDilepton_TrainggF1000_FullStat_1FatJet', 'Out_AfterDilepton_TrainggF2000_FullStat_1FatJet', 'Out_AfterDilepton_TrainggF3000_FullStat_1FatJet', 'Out_AfterDilepton_TrainggF700_FullStat_1FatJet' ] outDirAfterggF = [ 'Out_AfterggFMerged_TrainggF1000_FullStat_1FatJet', 'Out_AfterggFMerged_TrainggF2000_FullStat_1FatJet', 'Out_AfterggFMerged_TrainggF3000_FullStat_1FatJet', 'Out_AfterggFMerged_TrainggF700_FullStat_1FatJet' ] for idir in outDirAfterDilep: # for idir in outDirAfterggF: if isBlindAnalysis == False: yhat_data = np.load(os.path.join(idir, modelName, "yhat_data.npy")) yhat_train_signal = np.load( os.path.join(idir, modelName, "yhat_train_signal.npy")) yhat_train_background = np.load( os.path.join(idir, modelName, "yhat_train_background.npy")) yhat_test_signal = np.load( os.path.join(idir, modelName, "yhat_test_signal.npy")) yhat_test_background = np.load( os.path.join(idir, modelName, "yhat_test_background.npy")) bins = np.linspace(0, 1, 50) plt.hist(yhat_train_signal, bins=bins, histtype='step', lw=2, alpha=0.5, color='deepskyblue', label='TrainSignal', normed=True) plt.hist(yhat_test_signal, bins=bins, histtype='stepfilled', lw=2, alpha=0.5, color='turquoise', label='TestSignal', normed=True) plt.hist(yhat_train_background, bins=bins, histtype='step', lw=2, alpha=0.5, color='deeppink', label='TrainBackground', normed=True) plt.hist(yhat_test_background, bins=bins, histtype='stepfilled', lw=2, alpha=0.5, color='plum', label='TestBackground', normed=True) if isBlindAnalysis == False: skh_plt.hist(yhat_data, bins=bins, errorbars=True, histtype='marker', label='Data', color='black', normed=True) plt.legend(loc="upper center") plt.ylabel('Norm. Entries') plt.xlabel('DNN score') plt.yscale('log') plt.savefig(idir + '/' + modelName + "/MC_TrainTest_Score.pdf") # plt.show() plt.clf()