Ejemplo n.º 1
0
def test_ratio_plot_log(cmdopt, data_gen):

    output = skh_plt.ratio_plot(dict(x=data_gen[0],
                                     errorbars=True,
                                     histtype='marker',
                                     log=True,
                                     err_x=False),
                                dict(x=data_gen[1],
                                     weights=data_gen[2],
                                     errorbars=True),
                                logx=True,
                                ratio_range=(0, 10))

    if cmdopt == "generate":
        with open(answer_dir + '/answers_ratio_plot_log.npz', 'wb') as f:
            np.savez(f,
                     bc1=output[1][0],
                     be1=output[1][1],
                     bc2=output[2][0],
                     be2=output[2][1])
        output[0][0].set_title('test_ratio_plot_log')
        plt.show()
    elif cmdopt == "test":
        answers = np.load(answer_dir + '/answers_ratio_plot_log.npz')
        assert (np.all(output[1][0] == answers['bc1']))
        assert (np.all(output[1][1] == answers['be1']))
        assert (np.all(output[2][0] == answers['bc2']))
        assert (np.all(output[2][1] == answers['be2']))
Ejemplo n.º 2
0
def test_ratio_plot_stacked(cmdopt, data_gen):

    output = skh_plt.ratio_plot(dict(x=[data_gen[0], data_gen[1]],
                                     stacked=True,
                                     errorbars=True),
                                dict(x=[data_gen[0], data_gen[1]],
                                     weights=[data_gen[2], data_gen[2]],
                                     stacked=True,
                                     errorbars=True,
                                     err_style='line'),
                                range=(-5, 5),
                                bins='blocks')

    if cmdopt == "generate":
        with open(answer_dir + '/answers_ratio_plot_stacked.npz', 'wb') as f:
            np.savez(f,
                     bc1=output[1][0],
                     be1=output[1][1],
                     bc2=output[2][0],
                     be2=output[2][1])
        output[0][0].set_title('test_ratio_plot_stacked')
        plt.show()
    elif cmdopt == "test":
        answers = np.load(answer_dir + '/answers_ratio_plot_stacked.npz')
        assert (np.all(output[1][0] == answers['bc1']))
        assert (np.all(output[1][1] == answers['be1']))
        assert (np.all(output[2][0] == answers['bc2']))
        assert (np.all(output[2][1] == answers['be2']))
Ejemplo n.º 3
0
def test_ratio_plot_quick(cmdopt, data_gen):
    # bin tests
    with pytest.raises(KeyError):
        skh_plt.ratio_plot(dict(x=data_gen[0], bins=10), dict(x=data_gen[1], bins=11))
    output = skh_plt.ratio_plot(dict(x=data_gen[0]), dict(x=data_gen[1], bins=11))
    assert(len(output[1][0]) == 11)
    # range tests
    with pytest.raises(KeyError):
        skh_plt.ratio_plot(dict(x=data_gen[0], range=(0, 1)), dict(x=data_gen[1], range=(1, 2)))
    output = skh_plt.ratio_plot(dict(x=data_gen[0], range=(-0.1, 0.1)), dict(x=data_gen[1]))
    assert(output[1][1][0] >= -0.1 and output[1][1][-1] <= 0.1)
    output = skh_plt.ratio_plot(dict(x=data_gen[0]), dict(x=data_gen[1], range=(-0.1, 0.1)))
    assert(output[1][1][0] >= -0.1 and output[1][1][-1] <= 0.1)
Ejemplo n.º 4
0
def train_and_validate(steps=10000, minibatch=128, LRrange=[0.0001, 0.00001, 10000, 0], beta1=0.9, beta2=0.999, nafdim=16, depth=2, \
    savedir='abcdnn', seed=100, retrain=False, train=True):
    rawinputs, normedinputs, inputmeans, inputsigma, ncat_per_feature = prepdata(
    )
    print(ncat_per_feature)
    inputdim = 4
    ncat_per_feature = ncat_per_feature[0:inputdim]
    conddim = normedinputs.shape[1] - inputdim

    issignal = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] >= 3
                                           )  # signal_selection
    isbackground = ~issignal
    bkgnormed = normedinputs[isbackground]
    bkg = rawinputs[isbackground]
    xmax = np.reshape(inputmeans + 5 * inputsigma, inputmeans.shape[1])

    m = ABCDdnn(ncat_per_feature, inputdim, minibatch=minibatch, conddim=conddim, LRrange=LRrange, \
        beta1=beta1, beta2=beta2, nafdim=nafdim, depth=depth, savedir=savedir, retrain=retrain, seed=seed)
    m.setrealdata(bkgnormed)
    m.savehyperparameters()
    m.monitorevery = 100

    if train:
        m.train(steps)
        m.display_training()

    nj9cut = True
    if nj9cut:
        ncol = 3  # for plots below
        condlist = [[[
            1.,
            0.,
            0.,
            1.,
            0.,
        ]], [[
            0.,
            1.,
            0.,
            1.,
            0.,
        ]], [[
            0.,
            0.,
            1.,
            1.,
            0.,
        ]], [[
            1.,
            0.,
            0.,
            0.,
            1.,
        ]], [[
            0.,
            1.,
            0.,
            0.,
            1.,
        ]], [[
            0.,
            0.,
            1.,
            0.,
            1.,
        ]]]
        select0 = (rawinputs['njet'] == 7) & (rawinputs['nbtag'] == 2)
        select1 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] == 2)
        select2 = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] == 2)
        select3 = (rawinputs['njet'] == 7) & (rawinputs['nbtag'] >= 3)
        select4 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] >= 3)
        select5 = (rawinputs['njet'] >= 9) & (rawinputs['nbtag'] >= 3)
        select_data = [select0, select1, select2, select3, select4, select5]

        plottextlist = [
            f'$N_j=7, N_b=2$', f'$N_j=8, N_b=2$', f'$N_j\geq 9, N_b=2$',
            f'$N_j=7, N_b\geq 3$', f'$N_j=8, N_b\geq 3$',
            f'$N_j\geq 9, N_b\geq 3$'
        ]
        njlist = [7, 8, 9, 7, 8, 9]
        nblist = [2, 2, 2, 3, 3, 3]

    else:
        ncol = 3  # for plots
        condlist = [[[
            0.,
            1.,
            0.,
            0.,
            1.,
            0.,
        ]], [[
            0.,
            0.,
            1.,
            0.,
            1.,
            0.,
        ]], [[
            0.,
            0.,
            0.,
            1.,
            1.,
            0.,
        ]], [[
            0.,
            1.,
            0.,
            0.,
            0.,
            1.,
        ]], [[
            0.,
            0.,
            1.,
            0.,
            0.,
            1.,
        ]], [[
            0.,
            0.,
            0.,
            1.,
            0.,
            1.,
        ]]]
        select0 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] == 2)
        select1 = (rawinputs['njet'] == 9) & (rawinputs['nbtag'] == 2)
        select2 = (rawinputs['njet'] >= 10) & (rawinputs['nbtag'] == 2)
        select3 = (rawinputs['njet'] == 8) & (rawinputs['nbtag'] >= 3)
        select4 = (rawinputs['njet'] == 9) & (rawinputs['nbtag'] >= 3)
        select5 = (rawinputs['njet'] >= 10) & (rawinputs['nbtag'] >= 3)
        select_data = [select0, select1, select2, select3, select4, select5]

        plottextlist = [
            f'$N_j=8, N_b=2$', f'$N_j=9, N_b=2$', f'$N_j\geq 10, N_b=2$',
            f'$N_j=8, N_b\geq 3$', f'$N_j=9, N_b\geq 3$',
            f'$N_j\geq 10, N_b\geq 3$'
        ]

        njlist = [8, 9, 10, 8, 9, 10]
        nblist = [2, 2, 2, 3, 3, 3]

    # create fake data

    fakedatalist = []
    for cond, nj, nb in zip(condlist, njlist, nblist):
        nmcbatches = int(bkgnormed.shape[0] / minibatch)
        nmcremain = bkgnormed.shape[0] % minibatch
        fakelist = []
        cond_to_append = np.repeat(cond, minibatch, axis=0)
        for _ib in range(nmcbatches):
            xin = bkgnormed[_ib * minibatch:(_ib + 1) * minibatch, :inputdim]
            xin = np.hstack(
                (xin,
                 cond_to_append))  # append conditional to the feature inputs
            xgen = m.model.predict(xin)
            #xgen = m.generate_sample(cond)
            fakelist.append(xgen)
        # last batch
        xin = bkgnormed[nmcbatches * minibatch:, :inputdim]
        xin = np.hstack(
            (xin,
             np.repeat(cond, nmcremain,
                       axis=0)))  # append conditional to the feature inputs
        xgen = m.model.predict(xin)
        fakelist.append(xgen)

        # all data
        fakedata = np.vstack(fakelist)
        fakedata = fakedata * inputsigma[:, :inputdim] + inputmeans[:, :
                                                                    inputdim]
        nfakes = fakedata.shape[0]

        fakedata = np.hstack((fakedata, np.array([nj]*nfakes).reshape((nfakes,1))\
                , np.array([nb]*nfakes).reshape(nfakes,1) )
        )
        fakedatalist.append(fakedata)

    labelsindices = [['MET', 'met', 0.0, xmax[0]], ['H_T', 'ht', 0.0, xmax[1]],\
        ['p_{T5}', 'pt5', 0.0, xmax[2]], ['p_{T6}', 'pt6', 0.0, xmax[3]]]
    nbins = 20
    runplots = True
    if runplots:
        yscales = ['log', 'linear']
        for yscale in yscales:
            for li in labelsindices:
                pos = featurevars.index(li[1])
                fig, ax = plt.subplots(2, ncol, figsize=(3 * ncol, 6))
                iplot = 0
                for fakedata, seld, plottext in zip(fakedatalist, select_data,
                                                    plottextlist):
                    input_data = rawinputs[seld]
                    # Make ratio plots
                    plotaxes = MplPlotter.ratio_plot(dict(x=input_data[li[1]], bins=nbins, range=(li[2], li[3]), errorbars=True, normed=True, histtype='marker'), \
                        dict(x=fakedata[:, pos], bins=nbins, range=(li[2], li[3]), errorbars=True, normed=True), ratio_range=(0.25, 1.9))

                    plotfig = plotaxes[0][0].get_figure()
                    plotaxes[0][0].set_yscale(yscale)
                    plotfig.set_size_inches(5, 5)
                    plotfig.savefig(
                        os.path.join(
                            savedir,
                            f'result_{li[1]}_{iplot}_{yscale}_ratio.pdf'))

                    # make matrix of plots
                    row = iplot // ncol
                    col = iplot % ncol
                    iplot += 1
                    plt.sca(ax[row, col])
                    ax[row, col].set_yscale(yscale)
                    ax[row, col].set_xlabel(f"${li[0]}$ (GeV)")
                    MplPlotter.hist(input_data[li[1]],
                                    bins=nbins,
                                    alpha=0.5,
                                    range=(li[2], li[3]),
                                    errorbars=True,
                                    histtype='marker',
                                    normed=True)
                    MplPlotter.hist(fakedata[:, pos],
                                    bins=nbins,
                                    alpha=0.5,
                                    range=(li[2], li[3]),
                                    errorbars=True,
                                    normed=True)
                    MplPlotter.hist(bkg[li[1]],
                                    bins=nbins,
                                    alpha=0.5,
                                    range=(li[2], li[3]),
                                    histtype='step',
                                    normed=True)
                    plt.text(0.6,
                             0.8,
                             plottext,
                             transform=ax[row, col].transAxes,
                             fontsize=10)

                fig.tight_layout()
                fig.savefig(
                    os.path.join(savedir,
                                 f'result_matrix_{li[1]}_{yscale}.pdf'))

    generatesigsample = True
    if generatesigsample:
        bkgsigfakedata = np.vstack(fakedatalist)

        datadict = {}
        for var, idx in zip(featurevars, range(len(featurevars))):
            datadict[var] = bkgsigfakedata[:, idx]

        writetorootfile(os.path.join(savedir, 'fakedata_NAF.root'), datadict)
    pass