Ejemplo n.º 1
0
def test_blocks_hist(cmdopt, data_gen):

    output = skh_plt.hist(data_gen[0], bins='blocks', scale='binwidth', color='green')

    if cmdopt == "generate":
        with open(answer_dir+'/answers_blocks_hist.npz', 'wb') as f:
            np.savez(f, bc=output[0], be=output[1])
        plt.title('test_blocks_hist')
        plt.show()
    elif cmdopt == "test":
        answers = np.load(answer_dir+'/answers_blocks_hist.npz')
        assert(np.all(output[0] == answers['bc']))
        assert(np.all(output[1] == answers['be']))
Ejemplo n.º 2
0
def test_simple_hist4(cmdopt, data_gen):
    output = skh_plt.hist(data_gen[0], weights=data_gen[2], bins=range(5), normed=True,
                          scale='binwidth', color='red', histtype='bar')

    if cmdopt == "generate":
        with open(answer_dir+'/answers_simple_hist4.npz', 'wb') as f:
            np.savez(f, bc=output[0], be=output[1])
        plt.title('test_simple_hist4')
        plt.show()
    elif cmdopt == "test":
        answers = np.load(answer_dir+'/answers_simple_hist4.npz')
        assert(np.all(output[0] == answers['bc']))
        assert(np.all(output[1] == answers['be']))
Ejemplo n.º 3
0
def test_error_bars(cmdopt, data_gen):

    output = skh_plt.hist(data_gen[0], bins=20, errorbars=True, err_return=True, scale=5)

    if cmdopt == "generate":
        with open(answer_dir+'/answers_error_bars.npz', 'wb') as f:
            np.savez(f, bc=output[0], be=output[1], berr=output[2])
        plt.title('test_error_bars')
        plt.show()
    elif cmdopt == "test":
        answers = np.load(answer_dir+'/answers_error_bars.npz')
        assert(np.all(output[0] == answers['bc']))
        assert(np.all(output[1] == answers['be']))
        assert(np.all(output[2] == answers['berr']))
Ejemplo n.º 4
0
def test_error_bars4(cmdopt, data_gen):

    output = skh_plt.hist(data_gen[0], bins=50, errorbars=True, err_return=True,
                          histtype='step', err_type='poisson', suppress_zero=True, scale='binwidth')

    if cmdopt == "generate":
        with open(answer_dir+'/answers_error_bars4.npz', 'wb') as f:
            np.savez(f, bc=output[0], be=output[1], berr=output[2])
        plt.title('test_error_bars4')
        plt.show()
    elif cmdopt == "test":
        answers = np.load(answer_dir+'/answers_error_bars4.npz')
        assert(np.all(output[0] == answers['bc']))
        assert(np.all(output[1] == answers['be']))
        assert(np.all(output[2] == answers['berr']))
Ejemplo n.º 5
0
def test_error_bars2(cmdopt, data_gen):

    output = skh_plt.hist(data_gen[0], bins=1, errorbars=True, scale=0.5, normed=True,
                          err_color='k', alpha=0.1, err_type='poisson', err_return=True)

    if cmdopt == "generate":
        with open(answer_dir+'/answers_error_bars2.npz', 'wb') as f:
            np.savez(f, bc=output[0], be=output[1], berr=output[2])
        plt.title('test_error_bars2')
        plt.show()
    elif cmdopt == "test":
        answers = np.load(answer_dir+'/answers_error_bars2.npz')
        assert(np.all(output[0] == answers['bc']))
        assert(np.all(output[1] == answers['be']))
        assert(np.all(output[2] == answers['berr']))
Ejemplo n.º 6
0
def test_error_bars_stacked3(cmdopt, data_gen):

    output = skh_plt.hist([data_gen[0], data_gen[1]], bins=20, histtype='step', stacked=True,
                          weights=[data_gen[2], data_gen[2]], errorbars=True, err_return=True,
                          normed=True, scale=2)

    if cmdopt == "generate":
        with open(answer_dir+'/answers_error_bars_stacked3.npz', 'wb') as f:
            np.savez(f, bc=output[0], be=output[1], berr=output[2])
        plt.title('test_error_bars_stacked2')
        plt.show()
    elif cmdopt == "test":
        answers = np.load(answer_dir+'/answers_error_bars_stacked3.npz')
        assert(np.all(output[0] == answers['bc']))
        assert(np.all(output[1] == answers['be']))
        assert(np.all(output[2] == answers['berr']))
Ejemplo n.º 7
0
 def plot_binned_data_error(self, axis, bin_edges, data, wgt_sqrd, *args,
                            **kwargs):
     binwidth = bin_edges[1] - bin_edges[0]
     errors = np.sqrt(wgt_sqrd)
     if 'density' in kwargs and kwargs['density'] == True:
         errors = errors / np.sum(data) / binwidth
     errors = errors.reindex(np.arange(1, len(bin_edges)), fill_value=0)
     #The dataset values are the bin centres
     x = (bin_edges[1:] + bin_edges[:-1]) / 2.0
     #The weights are the y-values of the input binned data
     weights = data
     return skh_plt.hist(x,
                         ax=axis,
                         bins=bin_edges,
                         weights=weights,
                         errorbars=errors,
                         *args,
                         **kwargs)
Ejemplo n.º 8
0
 def plot_stacked_binned_data_error(self, axis, bin_edges, data, wgt_sqrd,
                                    *args, **kwargs):
     errors = wgt_sqrd[0]
     for i in np.arange(1, len(wgt_sqrd)):
         errors = errors.add(wgt_sqrd[i], fill_value=0)
     errors = np.sqrt(errors)
     errors = np.array(
         errors.reindex(np.arange(1, len(bin_edges)), fill_value=0))
     #The dataset values are the bin centres
     x = (bin_edges[1:] + bin_edges[:-1]) / 2.0
     x = np.array([x]).repeat(len(data), axis=0)
     x = np.transpose(x)
     #The weights are the y-values of the input binned data
     weights = np.transpose(data)
     return skh_plt.hist(x,
                         ax=axis,
                         bins=bin_edges,
                         weights=weights,
                         errorbars=errors,
                         stacked=True,
                         *args,
                         **kwargs)
Ejemplo n.º 9
0
def plotDataMC(setupClient):

    topDF_list = []
    zjetsDF_list = []
    wjetsDF_list = []
    dibosonDF_list = []
    signalDF_list = []

    for itype in setupClient.InputFilesSB.keys():
        for ifile in setupClient.InputFilesSB[itype]:
            print(ifile)
            if 'Top' in ifile:
                topDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Train')
                ]
                topDF_list += [getDFEvents(setupClient.PDPath, ifile, '_Test')]
            if 'Data' in ifile:
                dataDF = getDFEvents(setupClient.PDPath, ifile, 'Data')
            if 'Zjets' in ifile:
                zjetsDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Train')
                ]
                zjetsDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Test')
                ]
            if 'Diboson' in ifile:
                dibosonDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Train')
                ]
                dibosonDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Test')
                ]
            if 'ggF' in ifile:
                signalDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Train')
                ]
                signalDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Test')
                ]
            if 'Wjets' in ifile:
                wjetsDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Train')
                ]
                wjetsDF_list += [
                    getDFEvents(setupClient.PDPath, ifile, '_Test')
                ]

    topDF = pd.concat(topDF_list, ignore_index=True)
    zjetsDF = pd.concat(zjetsDF_list, ignore_index=True)
    wjetsDF = pd.concat(wjetsDF_list, ignore_index=True)
    dibosonDF = pd.concat(dibosonDF_list, ignore_index=True)
    signalDF = pd.concat(signalDF_list, ignore_index=True)

    for var in setupClient.VariablesToPlot:
        print("Plotting variable", var)
        # print ' min:',min(dibosonDF[var]), ' max', max(dibosonDF[var])
        bins = np.linspace(min(dibosonDF[var]), max(dibosonDF[var]), 20)

        plt.hist([topDF[var], dibosonDF[var], zjetsDF[var], wjetsDF[var]],
                 histtype='stepfilled',
                 normed=False,
                 bins=bins,
                 weights=[
                     topDF['weight'], dibosonDF['weight'], zjetsDF['weight'],
                     wjetsDF['weight']
                 ],
                 label=[
                     'Top',
                     'Diboson',
                     'Z + jets',
                     'W + jets',
                 ],
                 stacked=True)

        plt.hist(signalDF[var],
                 histtype='step',
                 normed=False,
                 bins=bins,
                 weights=signalDF['weight'],
                 label=r'ggF',
                 linewidth=1,
                 color='red',
                 linestyle='dashed')
        # plt.hist(dataDF[var], histtype='step', normed=False, bins=bins, label=r'Data', linewidth=2, color='black', linestyle='dashed')
        _ = skh_plt.hist(dataDF[var],
                         bins=bins,
                         errorbars=True,
                         histtype='marker',
                         label='Data',
                         color='black')

        plt.legend(loc='best', prop={'size': 10})
        plt.xlabel(var, fontsize=14)
        plt.savefig(setupClient.VarPlotPath + "/" + var + "_DataMC.png")
        plt.yscale('log')
        plt.savefig(setupClient.VarPlotPath + "/" + var + "_DataMC_log.png")
        plt.clf()
Ejemplo n.º 10
0
def train_and_validate(steps=10000, minibatch=128, LRrange=[0.0001, 0.00001, 10000, 0], beta1=0.9, beta2=0.999, nafdim=16, depth=2, \
    savedir='abcdnn', seed=100, retrain=False, train=True):
    rawinputs, normedinputs, inputmeans, inputsigma, ncat_per_feature = prepdata(
    )
    print(ncat_per_feature)
    inputdim = 4
    ncat_per_feature = ncat_per_feature[0:inputdim]
    conddim = normedinputs.shape[1] - inputdim

    issignal = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] >= 3
                                           )  # signal_selection
    isbackground = ~issignal
    bkgnormed = normedinputs[isbackground]
    bkg = rawinputs[isbackground]
    xmax = np.reshape(inputmeans + 5 * inputsigma, inputmeans.shape[1])

    m = ABCDdnn(ncat_per_feature, inputdim, minibatch=minibatch, conddim=conddim, LRrange=LRrange, \
        beta1=beta1, beta2=beta2, nafdim=nafdim, depth=depth, savedir=savedir, retrain=retrain, seed=seed)
    m.setrealdata(bkgnormed)
    m.savehyperparameters()
    m.monitorevery = 100

    if train:
        m.train(steps)
        m.display_training()

    nj9cut = True
    if nj9cut:
        ncol = 3  # for plots below
        condlist = [[[
            1.,
            0.,
            0.,
            1.,
            0.,
        ]], [[
            0.,
            1.,
            0.,
            1.,
            0.,
        ]], [[
            0.,
            0.,
            1.,
            1.,
            0.,
        ]], [[
            1.,
            0.,
            0.,
            0.,
            1.,
        ]], [[
            0.,
            1.,
            0.,
            0.,
            1.,
        ]], [[
            0.,
            0.,
            1.,
            0.,
            1.,
        ]]]
        select0 = (rawinputs['njet'] == 7) & (rawinputs['nbtag'] == 2)
        select1 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] == 2)
        select2 = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] == 2)
        select3 = (rawinputs['njet'] == 7) & (rawinputs['nbtag'] >= 3)
        select4 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] >= 3)
        select5 = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] >= 3)
        select_data = [select0, select1, select2, select3, select4, select5]

        plottextlist = [
            f'$N_j=7, N_b=2$', f'$N_j=8, N_b=2$', f'$N_j\geq 9, N_b=2$',
            f'$N_j=7, N_b\geq 3$', f'$N_j=8, N_b\geq 3$',
            f'$N_j\geq 9, N_b\geq 3$'
        ]
        njlist = [7, 8, 9, 7, 8, 9]
        nblist = [2, 2, 2, 3, 3, 3]

    else:
        ncol = 3  # for plots
        condlist = [[[
            0.,
            1.,
            0.,
            0.,
            1.,
            0.,
        ]], [[
            0.,
            0.,
            1.,
            0.,
            1.,
            0.,
        ]], [[
            0.,
            0.,
            0.,
            1.,
            1.,
            0.,
        ]], [[
            0.,
            1.,
            0.,
            0.,
            0.,
            1.,
        ]], [[
            0.,
            0.,
            1.,
            0.,
            0.,
            1.,
        ]], [[
            0.,
            0.,
            0.,
            1.,
            0.,
            1.,
        ]]]
        select0 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] == 2)
        select1 = (rawinputs['njet'] == 9) & (rawinputs['nbtag'] == 2)
        select2 = (rawinputs['njet'] >= 10) & (rawinputs['nbtag'] == 2)
        select3 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] >= 3)
        select4 = (rawinputs['njet'] == 9) & (rawinputs['nbtag'] >= 3)
        select5 = (rawinputs['njet'] >= 10) & (rawinputs['nbtag'] >= 3)
        select_data = [select0, select1, select2, select3, select4, select5]

        plottextlist = [
            f'$N_j=8, N_b=2$', f'$N_j=9, N_b=2$', f'$N_j\geq 10, N_b=2$',
            f'$N_j=8, N_b\geq 3$', f'$N_j=9, N_b\geq 3$',
            f'$N_j\geq 10, N_b\geq 3$'
        ]

        njlist = [8, 9, 10, 8, 9, 10]
        nblist = [2, 2, 2, 3, 3, 3]

    # create fake data

    fakedatalist = []
    for cond, nj, nb in zip(condlist, njlist, nblist):
        nmcbatches = int(bkgnormed.shape[0] / minibatch)
        nmcremain = bkgnormed.shape[0] % minibatch
        fakelist = []
        cond_to_append = np.repeat(cond, minibatch, axis=0)
        for _ib in range(nmcbatches):
            xin = bkgnormed[_ib * minibatch:(_ib + 1) * minibatch, :inputdim]
            xin = np.hstack(
                (xin,
                 cond_to_append))  # append conditional to the feature inputs
            xgen = m.model.predict(xin)
            #xgen = m.generate_sample(cond)
            fakelist.append(xgen)
        # last batch
        xin = bkgnormed[nmcbatches * minibatch:, :inputdim]
        xin = np.hstack(
            (xin,
             np.repeat(cond, nmcremain,
                       axis=0)))  # append conditional to the feature inputs
        xgen = m.model.predict(xin)
        fakelist.append(xgen)

        # all data
        fakedata = np.vstack(fakelist)
        fakedata = fakedata * inputsigma[:, :inputdim] + inputmeans[:, :
                                                                    inputdim]
        nfakes = fakedata.shape[0]

        fakedata = np.hstack((fakedata, np.array([nj]*nfakes).reshape((nfakes,1))\
                , np.array([nb]*nfakes).reshape(nfakes,1) )
        )
        fakedatalist.append(fakedata)

    labelsindices = [['MET', 'met', 0.0, xmax[0]], ['H_T', 'ht', 0.0, xmax[1]],\
        ['p_{T5}', 'pt5', 0.0, xmax[2]], ['p_{T6}', 'pt6', 0.0, xmax[3]]]
    nbins = 20
    runplots = True
    if runplots:
        yscales = ['log', 'linear']
        for yscale in yscales:
            for li in labelsindices:
                pos = featurevars.index(li[1])
                fig, ax = plt.subplots(2, ncol, figsize=(3 * ncol, 6))
                iplot = 0
                for fakedata, seld, plottext in zip(fakedatalist, select_data,
                                                    plottextlist):
                    input_data = rawinputs[seld]
                    # Make ratio plots
                    plotaxes = MplPlotter.ratio_plot(dict(x=input_data[li[1]], bins=nbins, range=(li[2], li[3]), errorbars=True, normed=True, histtype='marker'), \
                        dict(x=fakedata[:, pos], bins=nbins, range=(li[2], li[3]), errorbars=True, normed=True), ratio_range=(0.25, 1.9))

                    plotfig = plotaxes[0][0].get_figure()
                    plotaxes[0][0].set_yscale(yscale)
                    plotfig.set_size_inches(5, 5)
                    plotfig.savefig(
                        os.path.join(
                            savedir,
                            f'result_{li[1]}_{iplot}_{yscale}_ratio.pdf'))

                    # make matrix of plots
                    row = iplot // ncol
                    col = iplot % ncol
                    iplot += 1
                    plt.sca(ax[row, col])
                    ax[row, col].set_yscale(yscale)
                    ax[row, col].set_xlabel(f"${li[0]}$ (GeV)")
                    MplPlotter.hist(input_data[li[1]],
                                    bins=nbins,
                                    alpha=0.5,
                                    range=(li[2], li[3]),
                                    errorbars=True,
                                    histtype='marker',
                                    normed=True)
                    MplPlotter.hist(fakedata[:, pos],
                                    bins=nbins,
                                    alpha=0.5,
                                    range=(li[2], li[3]),
                                    errorbars=True,
                                    normed=True)
                    MplPlotter.hist(bkg[li[1]],
                                    bins=nbins,
                                    alpha=0.5,
                                    range=(li[2], li[3]),
                                    histtype='step',
                                    normed=True)
                    plt.text(0.6,
                             0.8,
                             plottext,
                             transform=ax[row, col].transAxes,
                             fontsize=10)

                fig.tight_layout()
                fig.savefig(
                    os.path.join(savedir,
                                 f'result_matrix_{li[1]}_{yscale}.pdf'))

    generatesigsample = True
    if generatesigsample:
        bkgsigfakedata = np.vstack(fakedatalist)

        datadict = {}
        for var, idx in zip(featurevars, range(len(featurevars))):
            datadict[var] = bkgsigfakedata[:, idx]

        writetorootfile(os.path.join(savedir, 'fakedata_NAF.root'), datadict)
    pass
Ejemplo n.º 11
0
def comp_study(input_data,
               n_events,
               xlims=None,
               resamples=100,
               dist_name='2Gauss'):
    bb_dir = os.path.join('/Users/brianpollack/Coding/BayesianBlocks')
    do_log = True

    # data_nom = input_data[:n_events]
    if dist_name == 'Gauss':
        np.random.seed(88)
        data_nom = np.random.normal(125, 2, size=n_events)
        resample_list = np.random.normal(125, 2, size=(resamples, n_events))
        do_log = False

    elif dist_name == '2LP':
        np.random.seed(33)
        data_nom = np.concatenate(
            (np.random.laplace(loc=90, scale=5, size=int(n_events * 0.65)),
             np.random.laplace(loc=110, scale=1.5, size=int(n_events * 0.25)),
             np.random.uniform(low=80, high=120, size=int(n_events * 0.10))))
        resample_list = np.concatenate(
            (np.random.laplace(
                loc=90, scale=5, size=(resamples, int(n_events * 0.65))),
             np.random.laplace(
                 loc=110, scale=1.5, size=(resamples, int(n_events * 0.25))),
             np.random.uniform(
                 low=80, high=120, size=(resamples, int(n_events * 0.10)))),
            axis=1)
        do_log = False

    elif dist_name == 'jPT':
        np.random.seed(11)
        data_nom = np.random.choice(input_data, size=n_events, replace=False)
        resample_list = np.random.choice(input_data,
                                         size=(resamples, n_events),
                                         replace=True)

    elif dist_name == 'DY':
        np.random.seed(200)
        data_nom = np.random.choice(input_data, size=n_events, replace=False)
        resample_list = np.random.choice(input_data,
                                         size=(resamples, n_events),
                                         replace=True)
    else:
        np.random.seed(1)
        data_nom = np.random.choice(input_data, size=n_events, replace=False)
        resample_list = np.random.choice(input_data,
                                         size=(resamples, n_events),
                                         replace=True)

    fig_hist, axes_hist = plt.subplots(3,
                                       3,
                                       sharex=True,
                                       sharey=False,
                                       constrained_layout=True)
    fig_hist.suptitle(f'{dist_name} Distribution, N={n_events}', fontsize=22)
    # fig_hist.text(-0.03, 0.5, 'Entries/Bin Width', va='center', rotation='vertical', fontsize=20)
    # axes_hist[2][0].get_xaxis().set_ticks([])
    # axes_hist[2][1].get_xaxis().set_ticks([])
    # axes_hist[2][2].get_xaxis().set_ticks([])

    axes_hist[0][0].set_title('Sturges')
    hist_sturges_bw = skh_plt.hist(x=data_nom,
                                   histtype='stepfilled',
                                   bins='sturges',
                                   errorbars=False,
                                   alpha=0.5,
                                   log=do_log,
                                   scale='binwidth',
                                   err_type='gaussian',
                                   ax=axes_hist[0][0])

    axes_hist[0][1].set_title('Doane')
    hist_doane_bw = skh_plt.hist(x=data_nom,
                                 histtype='stepfilled',
                                 bins='doane',
                                 errorbars=False,
                                 alpha=0.5,
                                 log=do_log,
                                 scale='binwidth',
                                 err_type='gaussian',
                                 ax=axes_hist[0][1])

    axes_hist[0][2].set_title('Scott')
    hist_scott_bw = skh_plt.hist(x=data_nom,
                                 histtype='stepfilled',
                                 bins='scott',
                                 errorbars=False,
                                 alpha=0.5,
                                 log=do_log,
                                 scale='binwidth',
                                 err_type='gaussian',
                                 ax=axes_hist[0][2])

    axes_hist[1][0].set_title('Freedman Diaconis')
    axes_hist[1][0].set_ylabel('Entries/Bin Width', fontsize=20)
    hist_fd_bw = skh_plt.hist(x=data_nom,
                              histtype='stepfilled',
                              bins='fd',
                              errorbars=False,
                              alpha=0.5,
                              log=do_log,
                              scale='binwidth',
                              err_type='gaussian',
                              ax=axes_hist[1][0])

    axes_hist[1][1].set_title('Knuth')
    _, bk = knuth_bin_width(data_nom, return_bins=True)
    hist_knuth_bw = skh_plt.hist(x=data_nom,
                                 histtype='stepfilled',
                                 bins=bk,
                                 errorbars=False,
                                 alpha=0.5,
                                 log=do_log,
                                 scale='binwidth',
                                 err_type='gaussian',
                                 ax=axes_hist[1][1])

    axes_hist[1][2].set_title('Rice')
    hist_rice_bw = skh_plt.hist(x=data_nom,
                                histtype='stepfilled',
                                bins='rice',
                                errorbars=False,
                                alpha=0.5,
                                log=do_log,
                                scale='binwidth',
                                err_type='gaussian',
                                ax=axes_hist[1][2])

    axes_hist[2][0].set_title('Sqrt(N)')
    hist_sqrt_bw = skh_plt.hist(x=data_nom,
                                histtype='stepfilled',
                                bins='sqrt',
                                errorbars=False,
                                alpha=0.5,
                                log=do_log,
                                scale='binwidth',
                                err_type='gaussian',
                                ax=axes_hist[2][0])

    # bep = bep_optimizer(data_nom)
    # _, bep = pd.qcut(data_nom, nep, retbins=True)

    hist_sturges = np.histogram(data_nom, bins='sturges')
    hist_doane = np.histogram(data_nom, bins='doane')
    hist_scott = np.histogram(data_nom, bins='scott')
    hist_fd = np.histogram(data_nom, bins='fd')
    hist_knuth = np.histogram(data_nom, bins=bk)
    hist_rice = np.histogram(data_nom, bins='rice')
    hist_sqrt = np.histogram(data_nom, bins='sqrt')

    r_sturges = rough(hist_sturges_bw, plot=False)
    r_doane = rough(hist_doane_bw)
    r_scott = rough(hist_scott_bw)
    r_fd = rough(hist_fd_bw)
    r_knuth = rough(hist_knuth_bw, plot=False)
    r_rice = rough(hist_rice_bw)
    r_sqrt = rough(hist_sqrt_bw, plot=False)

    eli_sturges = err_li(data_nom, hist_sturges)
    eli_doane = err_li(data_nom, hist_doane)
    eli_scott = err_li(data_nom, hist_scott)
    eli_fd = err_li(data_nom, hist_fd)
    eli_knuth = err_li(data_nom, hist_knuth)
    eli_rice = err_li(data_nom, hist_rice)
    eli_sqrt = err_li(data_nom, hist_sqrt)

    avg_eli_sturges = []
    avg_eli_doane = []
    avg_eli_scott = []
    avg_eli_fd = []
    avg_eli_knuth = []
    avg_eli_rice = []
    avg_eli_sqrt = []
    for i in resample_list:
        avg_eli_sturges.append(err_li(i, hist_sturges))
        avg_eli_doane.append(err_li(i, hist_doane))
        avg_eli_scott.append(err_li(i, hist_scott))
        avg_eli_fd.append(err_li(i, hist_fd))
        avg_eli_knuth.append(err_li(i, hist_knuth))
        avg_eli_rice.append(err_li(i, hist_rice))
        avg_eli_sqrt.append(err_li(i, hist_sqrt))

    avg_eli_sturges = np.mean(avg_eli_sturges)
    avg_eli_doane = np.mean(avg_eli_doane)
    avg_eli_scott = np.mean(avg_eli_scott)
    avg_eli_fd = np.mean(avg_eli_fd)
    avg_eli_knuth = np.mean(avg_eli_knuth)
    avg_eli_rice = np.mean(avg_eli_rice)
    avg_eli_sqrt = np.mean(avg_eli_sqrt)

    avg_eli_list = [
        avg_eli_sturges, avg_eli_doane, avg_eli_scott, avg_eli_fd,
        avg_eli_knuth, avg_eli_rice, avg_eli_sqrt
    ]
    r_list = [r_sturges, r_doane, r_scott, r_fd, r_knuth, r_rice, r_sqrt]

    elis_list = [
        eli_sturges, eli_doane, eli_scott, eli_fd, eli_knuth, eli_rice,
        eli_sqrt
    ]

    axes_hist[2][1].set_title('Equal Population')
    bep = bep_optimizer(data_nom, resample_list, r_list, avg_eli_list)
    hist_ep_bw = skh_plt.hist(x=data_nom,
                              histtype='stepfilled',
                              bins=bep,
                              errorbars=False,
                              alpha=0.5,
                              log=do_log,
                              scale='binwidth',
                              err_type='gaussian',
                              ax=axes_hist[2][1])
    hist_ep = np.histogram(data_nom, bins=bep)
    r_ep = rough(hist_ep_bw)
    eli_ep = err_li(data_nom, hist_ep)
    avg_eli_ep = []
    for i in resample_list:
        avg_eli_ep.append(err_li(i, hist_ep))
    avg_eli_ep = np.mean(avg_eli_ep)

    axes_hist[2][2].set_title('Bayesian Blocks')
    p0 = bb_optimizer(data_nom, resample_list, r_list, avg_eli_list)
    bb = bayesian_blocks(data_nom, p0=p0)
    if xlims:
        bb[0] = xlims[0]
        bb[-1] = xlims[-1]
    hist_bb_bw = skh_plt.hist(x=data_nom,
                              histtype='stepfilled',
                              bins=bb,
                              errorbars=False,
                              alpha=1,
                              log=do_log,
                              scale='binwidth',
                              err_type='gaussian',
                              ax=axes_hist[2][2])
    # if n_events == 1000 and dist_name == '2LP':
    # axes_hist[2][2].set_ylim((0, 100))
    hist_bb = np.histogram(data_nom, bins=bb)
    r_bb = rough(hist_bb_bw, plot=False)
    eli_bb = err_li(data_nom, hist_bb)
    avg_eli_bb = []
    for i in resample_list:
        avg_eli_bb.append(err_li(i, hist_bb))
    avg_eli_bb = np.mean(avg_eli_bb)

    r_list.append(r_ep)
    r_list.append(r_bb)
    avg_eli_list.append(avg_eli_ep)
    avg_eli_list.append(avg_eli_bb)
    elis_list.append(eli_ep)
    elis_list.append(eli_bb)
    plt.savefig(bb_dir + f'/plots/bin_comp/hists_{dist_name}_{n_events}.pdf')

    xs = [
        'Sturges', 'Doane', 'Scott', 'FD', 'Knuth', 'Rice', 'Sqrt', 'EP', 'BB'
    ]

    fig_metric, axes_metric = plt.subplots(2, 1, constrained_layout=True)
    fig_hist.suptitle(f'{dist_name} Distribution, N={n_events}')
    for i in range(len(elis_list)):
        if xs[i] == 'BB':
            axes_metric[0].scatter(avg_eli_list[i],
                                   r_list[i],
                                   label=xs[i],
                                   s=400,
                                   marker='*',
                                   c='k')
        else:
            axes_metric[0].scatter(avg_eli_list[i],
                                   r_list[i],
                                   label=xs[i],
                                   s=200)
    axes_metric[0].set_ylabel(r'$W_n$ (Wiggles)')
    axes_metric[0].set_xlabel(r'$\hat{E}$ (Average Error)')
    # ax = plt.gca()
    # ax.set_yscale('log')
    # ax.set_xscale('log')
    # ax.relim()
    # ax.autoscale_view()
    axes_metric[0].grid()
    axes_metric[0].legend(ncol=1,
                          bbox_to_anchor=(1.05, 1.15),
                          loc='upper left')
    axes_metric[0].set_title(f'{dist_name} Distribution, N={n_events}',
                             fontsize=22)
    # plt.savefig(bb_dir+f'/plots/bin_comp/scat_{dist_name}_{n_events}.pdf')

    # plt.figure()
    rank_rough = rankdata(r_list, method='min')
    rank_avg_eli = rankdata(avg_eli_list, method='min')

    cont = axes_metric[1].bar(xs,
                              rank_rough,
                              0.35,
                              label=r'$W_n$ Ranking',
                              alpha=0.5)
    cont[-1].set_alpha(1)
    cont = axes_metric[1].bar(xs,
                              rank_avg_eli,
                              0.35,
                              bottom=rank_rough,
                              label=r'$\hat{E}$ Ranking',
                              alpha=0.5)
    cont[-1].set_alpha(1)
    axes_metric[1].legend(loc='upper left', bbox_to_anchor=(1.0, 0.8))
    # axes_metric[1].set_title(f'Combined Ranking, {dist_name} Distribution, N={n_events}')
    axes_metric[1].set_xlabel('Binning Method')
    axes_metric[1].set_ylabel('Rank')
    plt.savefig(bb_dir + f'/plots/bin_comp/metric_{dist_name}_{n_events}.pdf')
Ejemplo n.º 12
0
def test_hist_fails(cmdopt, data_gen):
    with pytest.raises(ValueError):
        skh_plt.hist([data_gen[0], data_gen[1]], stacked=True, histtype='marker')
    with pytest.raises(ValueError):
        skh_plt.hist([data_gen[0], data_gen[1]], histtype='marker')
    with pytest.raises(KeyError):
        skh_plt.hist(1, err_return=True)
    with pytest.raises(ValueError):
        skh_plt.hist([data_gen[0], data_gen[1]], weights=data_gen[2])
    with pytest.raises(ValueError):
        skh_plt.hist(data_gen[0], weights=data_gen[2][0:10])
    with pytest.raises(KeyError):
        skh_plt.hist(data_gen[0], err_type='fake', errorbars=True)

    output1 = skh_plt.hist(5)
    assert(np.all(output1[0] == 1))
    output2 = skh_plt.hist([], range=(0, 1))
    assert(np.all(output2[0] == 0))
Ejemplo n.º 13
0
def plotScores():
    isBlindAnalysis = True
    modelName = 'llqqDNN_100_60_2_0'
    outDirAfterDilep = [
        'Out_AfterDilepton_TrainggF1000_FullStat_1FatJet',
        'Out_AfterDilepton_TrainggF2000_FullStat_1FatJet',
        'Out_AfterDilepton_TrainggF3000_FullStat_1FatJet',
        'Out_AfterDilepton_TrainggF700_FullStat_1FatJet'
    ]

    outDirAfterggF = [
        'Out_AfterggFMerged_TrainggF1000_FullStat_1FatJet',
        'Out_AfterggFMerged_TrainggF2000_FullStat_1FatJet',
        'Out_AfterggFMerged_TrainggF3000_FullStat_1FatJet',
        'Out_AfterggFMerged_TrainggF700_FullStat_1FatJet'
    ]

    for idir in outDirAfterDilep:
        # for idir in outDirAfterggF:
        if isBlindAnalysis == False:
            yhat_data = np.load(os.path.join(idir, modelName, "yhat_data.npy"))

        yhat_train_signal = np.load(
            os.path.join(idir, modelName, "yhat_train_signal.npy"))
        yhat_train_background = np.load(
            os.path.join(idir, modelName, "yhat_train_background.npy"))

        yhat_test_signal = np.load(
            os.path.join(idir, modelName, "yhat_test_signal.npy"))
        yhat_test_background = np.load(
            os.path.join(idir, modelName, "yhat_test_background.npy"))

        bins = np.linspace(0, 1, 50)
        plt.hist(yhat_train_signal,
                 bins=bins,
                 histtype='step',
                 lw=2,
                 alpha=0.5,
                 color='deepskyblue',
                 label='TrainSignal',
                 normed=True)
        plt.hist(yhat_test_signal,
                 bins=bins,
                 histtype='stepfilled',
                 lw=2,
                 alpha=0.5,
                 color='turquoise',
                 label='TestSignal',
                 normed=True)
        plt.hist(yhat_train_background,
                 bins=bins,
                 histtype='step',
                 lw=2,
                 alpha=0.5,
                 color='deeppink',
                 label='TrainBackground',
                 normed=True)
        plt.hist(yhat_test_background,
                 bins=bins,
                 histtype='stepfilled',
                 lw=2,
                 alpha=0.5,
                 color='plum',
                 label='TestBackground',
                 normed=True)
        if isBlindAnalysis == False:
            skh_plt.hist(yhat_data,
                         bins=bins,
                         errorbars=True,
                         histtype='marker',
                         label='Data',
                         color='black',
                         normed=True)
        plt.legend(loc="upper center")
        plt.ylabel('Norm. Entries')
        plt.xlabel('DNN score')
        plt.yscale('log')
        plt.savefig(idir + '/' + modelName + "/MC_TrainTest_Score.pdf")
        # plt.show()
        plt.clf()