def test_ratio_plot_log(cmdopt, data_gen): output = skh_plt.ratio_plot(dict(x=data_gen[0], errorbars=True, histtype='marker', log=True, err_x=False), dict(x=data_gen[1], weights=data_gen[2], errorbars=True), logx=True, ratio_range=(0, 10)) if cmdopt == "generate": with open(answer_dir + '/answers_ratio_plot_log.npz', 'wb') as f: np.savez(f, bc1=output[1][0], be1=output[1][1], bc2=output[2][0], be2=output[2][1]) output[0][0].set_title('test_ratio_plot_log') plt.show() elif cmdopt == "test": answers = np.load(answer_dir + '/answers_ratio_plot_log.npz') assert (np.all(output[1][0] == answers['bc1'])) assert (np.all(output[1][1] == answers['be1'])) assert (np.all(output[2][0] == answers['bc2'])) assert (np.all(output[2][1] == answers['be2']))
def test_ratio_plot_stacked(cmdopt, data_gen): output = skh_plt.ratio_plot(dict(x=[data_gen[0], data_gen[1]], stacked=True, errorbars=True), dict(x=[data_gen[0], data_gen[1]], weights=[data_gen[2], data_gen[2]], stacked=True, errorbars=True, err_style='line'), range=(-5, 5), bins='blocks') if cmdopt == "generate": with open(answer_dir + '/answers_ratio_plot_stacked.npz', 'wb') as f: np.savez(f, bc1=output[1][0], be1=output[1][1], bc2=output[2][0], be2=output[2][1]) output[0][0].set_title('test_ratio_plot_stacked') plt.show() elif cmdopt == "test": answers = np.load(answer_dir + '/answers_ratio_plot_stacked.npz') assert (np.all(output[1][0] == answers['bc1'])) assert (np.all(output[1][1] == answers['be1'])) assert (np.all(output[2][0] == answers['bc2'])) assert (np.all(output[2][1] == answers['be2']))
def test_ratio_plot_quick(cmdopt, data_gen): # bin tests with pytest.raises(KeyError): skh_plt.ratio_plot(dict(x=data_gen[0], bins=10), dict(x=data_gen[1], bins=11)) output = skh_plt.ratio_plot(dict(x=data_gen[0]), dict(x=data_gen[1], bins=11)) assert(len(output[1][0]) == 11) # range tests with pytest.raises(KeyError): skh_plt.ratio_plot(dict(x=data_gen[0], range=(0, 1)), dict(x=data_gen[1], range=(1, 2))) output = skh_plt.ratio_plot(dict(x=data_gen[0], range=(-0.1, 0.1)), dict(x=data_gen[1])) assert(output[1][1][0] >= -0.1 and output[1][1][-1] <= 0.1) output = skh_plt.ratio_plot(dict(x=data_gen[0]), dict(x=data_gen[1], range=(-0.1, 0.1))) assert(output[1][1][0] >= -0.1 and output[1][1][-1] <= 0.1)
def train_and_validate(steps=10000, minibatch=128, LRrange=[0.0001, 0.00001, 10000, 0], beta1=0.9, beta2=0.999, nafdim=16, depth=2, \ savedir='abcdnn', seed=100, retrain=False, train=True): rawinputs, normedinputs, inputmeans, inputsigma, ncat_per_feature = prepdata( ) print(ncat_per_feature) inputdim = 4 ncat_per_feature = ncat_per_feature[0:inputdim] conddim = normedinputs.shape[1] - inputdim issignal = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] >= 3 ) # signal_selection isbackground = ~issignal bkgnormed = normedinputs[isbackground] bkg = rawinputs[isbackground] xmax = np.reshape(inputmeans + 5 * inputsigma, inputmeans.shape[1]) m = ABCDdnn(ncat_per_feature, inputdim, minibatch=minibatch, conddim=conddim, LRrange=LRrange, \ beta1=beta1, beta2=beta2, nafdim=nafdim, depth=depth, savedir=savedir, retrain=retrain, seed=seed) m.setrealdata(bkgnormed) m.savehyperparameters() m.monitorevery = 100 if train: m.train(steps) m.display_training() nj9cut = True if nj9cut: ncol = 3 # for plots below condlist = [[[ 1., 0., 0., 1., 0., ]], [[ 0., 1., 0., 1., 0., ]], [[ 0., 0., 1., 1., 0., ]], [[ 1., 0., 0., 0., 1., ]], [[ 0., 1., 0., 0., 1., ]], [[ 0., 0., 1., 0., 1., ]]] select0 = (rawinputs['njet'] == 7) & (rawinputs['nbtag'] == 2) select1 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] == 2) select2 = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] == 2) select3 = (rawinputs['njet'] == 7) & (rawinputs['nbtag'] >= 3) select4 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] >= 3) select5 = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] >= 3) select_data = [select0, select1, select2, select3, select4, select5] plottextlist = [ f'$N_j=7, N_b=2$', f'$N_j=8, N_b=2$', f'$N_j\geq 9, N_b=2$', f'$N_j=7, N_b\geq 3$', f'$N_j=8, N_b\geq 3$', f'$N_j\geq 9, N_b\geq 3$' ] njlist = [7, 8, 9, 7, 8, 9] nblist = [2, 2, 2, 3, 3, 3] else: ncol = 3 # for plots condlist = [[[ 0., 1., 0., 0., 1., 0., ]], [[ 0., 0., 1., 0., 1., 0., ]], [[ 0., 0., 0., 1., 1., 0., ]], [[ 0., 1., 0., 0., 0., 1., ]], [[ 0., 0., 1., 0., 0., 1., ]], [[ 0., 0., 0., 1., 0., 1., ]]] select0 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] == 2) select1 = (rawinputs['njet'] == 9) & (rawinputs['nbtag'] == 2) select2 = (rawinputs['njet'] >= 10) & (rawinputs['nbtag'] == 2) select3 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] >= 3) select4 = (rawinputs['njet'] == 9) & (rawinputs['nbtag'] >= 3) select5 = (rawinputs['njet'] >= 10) & (rawinputs['nbtag'] >= 3) select_data = [select0, select1, select2, select3, select4, select5] plottextlist = [ f'$N_j=8, N_b=2$', f'$N_j=9, N_b=2$', f'$N_j\geq 10, N_b=2$', f'$N_j=8, N_b\geq 3$', f'$N_j=9, N_b\geq 3$', f'$N_j\geq 10, N_b\geq 3$' ] njlist = [8, 9, 10, 8, 9, 10] nblist = [2, 2, 2, 3, 3, 3] # create fake data fakedatalist = [] for cond, nj, nb in zip(condlist, njlist, nblist): nmcbatches = int(bkgnormed.shape[0] / minibatch) nmcremain = bkgnormed.shape[0] % minibatch fakelist = [] cond_to_append = np.repeat(cond, minibatch, axis=0) for _ib in range(nmcbatches): xin = bkgnormed[_ib * minibatch:(_ib + 1) * minibatch, :inputdim] xin = np.hstack( (xin, cond_to_append)) # append conditional to the feature inputs xgen = m.model.predict(xin) #xgen = m.generate_sample(cond) fakelist.append(xgen) # last batch xin = bkgnormed[nmcbatches * minibatch:, :inputdim] xin = np.hstack( (xin, np.repeat(cond, nmcremain, axis=0))) # append conditional to the feature inputs xgen = m.model.predict(xin) fakelist.append(xgen) # all data fakedata = np.vstack(fakelist) fakedata = fakedata * inputsigma[:, :inputdim] + inputmeans[:, : inputdim] nfakes = fakedata.shape[0] fakedata = np.hstack((fakedata, np.array([nj]*nfakes).reshape((nfakes,1))\ , np.array([nb]*nfakes).reshape(nfakes,1) ) ) fakedatalist.append(fakedata) labelsindices = [['MET', 'met', 0.0, xmax[0]], ['H_T', 'ht', 0.0, xmax[1]],\ ['p_{T5}', 'pt5', 0.0, xmax[2]], ['p_{T6}', 'pt6', 0.0, xmax[3]]] nbins = 20 runplots = True if runplots: yscales = ['log', 'linear'] for yscale in yscales: for li in labelsindices: pos = featurevars.index(li[1]) fig, ax = plt.subplots(2, ncol, figsize=(3 * ncol, 6)) iplot = 0 for fakedata, seld, plottext in zip(fakedatalist, select_data, plottextlist): input_data = rawinputs[seld] # Make ratio plots plotaxes = MplPlotter.ratio_plot(dict(x=input_data[li[1]], bins=nbins, range=(li[2], li[3]), errorbars=True, normed=True, histtype='marker'), \ dict(x=fakedata[:, pos], bins=nbins, range=(li[2], li[3]), errorbars=True, normed=True), ratio_range=(0.25, 1.9)) plotfig = plotaxes[0][0].get_figure() plotaxes[0][0].set_yscale(yscale) plotfig.set_size_inches(5, 5) plotfig.savefig( os.path.join( savedir, f'result_{li[1]}_{iplot}_{yscale}_ratio.pdf')) # make matrix of plots row = iplot // ncol col = iplot % ncol iplot += 1 plt.sca(ax[row, col]) ax[row, col].set_yscale(yscale) ax[row, col].set_xlabel(f"${li[0]}$ (GeV)") MplPlotter.hist(input_data[li[1]], bins=nbins, alpha=0.5, range=(li[2], li[3]), errorbars=True, histtype='marker', normed=True) MplPlotter.hist(fakedata[:, pos], bins=nbins, alpha=0.5, range=(li[2], li[3]), errorbars=True, normed=True) MplPlotter.hist(bkg[li[1]], bins=nbins, alpha=0.5, range=(li[2], li[3]), histtype='step', normed=True) plt.text(0.6, 0.8, plottext, transform=ax[row, col].transAxes, fontsize=10) fig.tight_layout() fig.savefig( os.path.join(savedir, f'result_matrix_{li[1]}_{yscale}.pdf')) generatesigsample = True if generatesigsample: bkgsigfakedata = np.vstack(fakedatalist) datadict = {} for var, idx in zip(featurevars, range(len(featurevars))): datadict[var] = bkgsigfakedata[:, idx] writetorootfile(os.path.join(savedir, 'fakedata_NAF.root'), datadict) pass