예제 #1
0
def efficiency(data, args, feat, title=None):
    """
    Perform study of background efficiency vs. mass for different inclusive
    efficiency cuts

    Saves plot `figures/efficiency_[feat].pdf`

    Arguments:
        data: Pandas data frame from which to read data.
        args: Namespace holding command-line arguments.
        feat: Feature for which to study efficiencies
    """

    # Define common variables
    msk = data['signal'] == 0
    effs = [5, 10, 20, 40, 80]

    # Define cuts
    cuts = list()
    for eff in effs:
        cut = wpercentile(data.loc[msk, feat].values,
                          eff if signal_low(feat) else 100 - eff,
                          weights=data.loc[msk, 'weight_test'].values)
        cuts.append(cut)
        pass

    # Compute cut efficiency vs. mass
    profiles = list()
    for cut, eff in zip(cuts, effs):

        # Get correct pass-cut mask
        msk_pass = data[feat] > cut
        if signal_low(feat):
            msk_pass = ~msk_pass
            pass

        # Fill efficiency profile
        profile = ROOT.TProfile('profile_{}_{}'.format(feat, cut), "",
                                len(MASSBINS) - 1, MASSBINS)

        M = np.vstack((data.loc[msk, 'm'].values, msk_pass[msk])).T
        weights = data.loc[msk, 'weight_test'].values

        root_numpy.fill_profile(profile, M, weights=weights)

        # Add to list
        profiles.append(profile)
        pass

    # Perform plotting
    c = plot(args, data, feat, profiles, cuts, effs)

    # Output
    if title is None:
        path = 'figures/efficiency_{}.pdf'.format(standardise(feat))
    else:
        path = 'figures/' + title + '_efficiency_{}.pdf'.format(
            standardise(feat))
    c.save(path=path)
    return c, args, path
예제 #2
0
def fill_profile (data, var):
    """
    Fill ROOT.TProfile with the average `var` as a function of rhoDDT.
    """

    profile = ROOT.TProfile('profile_{}'.format(var), "", len(BINS) - 1, BINS)
    root_numpy.fill_profile(profile, data[[VAR_RHODDT, var]].values, weights=data[VAR_WEIGHT].values)
    return profile
예제 #3
0
    def GetTProfileHistograms(
        self,
        histogram_name,
        data_dictionary,
        variable_x,
        variable_y,
        list_selections=[],
        bins=1,
        range_low=0.000001,
        range_high=1. - 0.00001,
        xlabel="",
        ylabel="",
    ):
        '''Get a TProfile histogram with variable_y profiled against variable_x, after selections list_selections have been applied'''

        variableNameToFill_x = variable_x.name
        variableNameToFill_y = variable_y.name
        variables = [variable_x, variable_y]
        histogram_dictionary = {}
        for channel in self.channels:
            if (type(bins) == list):
                bins_array = array('d', bins)
                histogram_dictionary[channel] = ROOT.TProfile(
                    histogram_name + channel, histogram_name + channel,
                    len(bins_array) - 1, bins_array)
            else:
                histogram_dictionary[channel] = ROOT.TProfile(
                    histogram_name + channel, histogram_name + channel, bins,
                    range_low + 0.0000001, range_high - 0.000001)
            histogram_dictionary[channel].Sumw2()

        for channel in self.channels:
            for filename in self.channelFiles[channel]:
                variable_dict, selection_dict, weights = data_dictionary[
                    channel][filename]
                total_selection = np.ones(len(weights)) > 0.0
                for selection in list_selections:
                    total_selection &= selection_dict[selection.name]
                to_weight = weights[total_selection]
                n_sel = len(to_weight)
                to_fill = np.zeros((n_sel, 2))
                to_fill[:, 0] = variable_dict[variableNameToFill_x][
                    total_selection]
                to_fill[:, 1] = variable_dict[variableNameToFill_y][
                    total_selection]
                if self.verbose: print to_fill
                if self.verbose: print to_weight
                if self.verbose: print("Filling Variable " + variable.name)
                print("Filling Histogram")
                fill_profile(histogram_dictionary[channel], to_fill, to_weight)
                print("Finished filling histogram")

            histogram_dictionary[channel].GetXaxis().SetTitle(xlabel)
            histogram_dictionary[channel].GetYaxis().SetTitle(ylabel)
        return histogram_dictionary
예제 #4
0
def test_fill_profile():
    n_samples = 1000
    w1D = np.empty(n_samples)
    w1D.fill(2.)
    data1D = RNG.randn(n_samples, 2)
    data2D = RNG.randn(n_samples, 3)
    data3D = RNG.randn(n_samples, 4)

    a = TProfile('th1d', 'test', 100, -5, 5)
    rnp.fill_profile(a, data1D)
    assert_true(a.Integral() != 0)

    a_w = TProfile('th1dw', 'test', 100, -5, 5)
    rnp.fill_profile(a_w, data1D, w1D)
    assert_true(a_w.Integral() != 0)
    assert_equal(a_w.Integral(), a.Integral())

    b = TProfile2D('th2d', 'test', 100, -5, 5, 100, -5, 5)
    rnp.fill_profile(b, data2D)
    assert_true(b.Integral() != 0)

    c = TProfile3D('th3d', 'test', 10, -5, 5, 10, -5, 5, 10, -5, 5)
    rnp.fill_profile(c, data3D)
    assert_true(c.Integral() != 0)

    # array and weights lengths do not match
    assert_raises(ValueError, rnp.fill_profile, c, data3D, np.ones(10))

    # weights is not 1D
    assert_raises(ValueError, rnp.fill_profile, c, data3D,
                  np.ones((data3D.shape[0], 1)))

    # array is not 2D
    assert_raises(ValueError, rnp.fill_profile, c, np.ones(10))

    # length of second axis is not one more than dimensionality of the profile
    for h in (a, b, c):
        assert_raises(ValueError, rnp.fill_profile, h, RNG.randn(10, 5))

    # wrong type
    assert_raises(TypeError, rnp.fill_profile,
                  TH1D("test", "test", 1, 0, 1), data1D)
예제 #5
0
def test_fill_profile():
    np.random.seed(0)
    w1D = np.empty(1E6)
    w1D.fill(2.)
    data1D = np.random.randn(1E6, 2)
    data2D = np.random.randn(1E6, 3)
    data3D = np.random.randn(1E4, 4)

    a = TProfile('th1d', 'test', 1000, -5, 5)
    rnp.fill_profile(a, data1D)
    assert_true(a.Integral() !=0)

    a_w = TProfile('th1dw', 'test', 1000, -5, 5)
    rnp.fill_profile(a_w, data1D, w1D)
    assert_true(a_w.Integral() != 0)
    assert_equal(a_w.Integral(), a.Integral())

    b = TProfile2D('th2d', 'test', 100, -5, 5, 100, -5, 5)
    rnp.fill_profile(b, data2D)
    assert_true(b.Integral() != 0)

    c = TProfile3D('th3d', 'test', 10, -5, 5, 10, -5, 5, 10, -5, 5)
    rnp.fill_profile(c, data3D)
    assert_true(c.Integral() != 0)

    # array and weights lengths do not match
    assert_raises(ValueError, rnp.fill_profile, c, data3D, np.ones(10))

    # weights is not 1D
    assert_raises(ValueError, rnp.fill_profile, c, data3D,
                  np.ones((data3D.shape[0], 1)))

    # array is not 2D
    assert_raises(ValueError, rnp.fill_profile, c, np.ones(10))

    # length of second axis is not one more than dimensionality of the profile
    for h in (a, b, c):
        assert_raises(ValueError, rnp.fill_profile, h, np.random.randn(1E4, 5))

    # wrong type
    assert_raises(TypeError, rnp.fill_profile,
                  TH1D("test", "test", 1, 0, 1), data1D)
def fillprofile(profile, arrx, arry):
    arrxy = combinevectors(arrx, arry)
    root_numpy.fill_profile(profile, arrxy)
예제 #7
0
def main():

    # Set pyplot style
    plt.style.use('ggplot')

    # Whether to save plots
    save = True

    # Get data
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    substructure_vars = ['jet_tau21', 'jet_D2', 'jet_m']
    decorrelation_vars = ['jet_m']
    X, Y, W, P, signal, background, names = getData(decorrelation_vars)

    msk_sig = (Y == 1.)

    # Load pre-trained classifier
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    # Load existing classifier model from file
    classifier = load_model('classifier.h5')

    # Add neural network classifier output, without adversarial training
    signal['NN'] = classifier.predict(X[msk_sig], batch_size=1024)
    background['NN'] = classifier.predict(X[~msk_sig], batch_size=1024)

    # Scale to mean 0.5 and sensible range
    #scaler = preprocessing.StandardScaler().fit(background['NN'].reshape(-1,1))
    #signal    ['NN'] = (scaler.transform(signal    ['NN'].reshape(-1,1)) / 4. + 0.5).reshape(signal    ['jet_m'].shape)
    #background['NN'] = (scaler.transform(background['NN'].reshape(-1,1)) / 4. + 0.5).reshape(background['jet_m'].shape)

    wmean, wstd = weighted_avg_and_std(background['NN'].ravel(),
                                       background['weight'].ravel())
    signal['NN'] = ((signal['NN'] - wmean) / wstd / 8. + 0.5).reshape(
        signal['jet_m'].shape)
    background['NN'] = ((background['NN'] - wmean) / wstd / 8. + 0.5).reshape(
        background['jet_m'].shape)

    # Remember to use 'NN' in comparisons later
    substructure_vars += ['NN']

    # Load adversarially trained models
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    # Combined
    adversarial = adversarial_model(classifier, [(64, 'tanh')] * 2, 1,
                                    P.shape[1])
    load_checkpoint(adversarial)

    # Add neural network classifier output, without adversarial training
    signal['ANN'] = classifier.predict(X[msk_sig], batch_size=1024)
    background['ANN'] = classifier.predict(X[~msk_sig], batch_size=1024)

    # Scale to mean 0.5 and sensible range
    #scaler = preprocessing.StandardScaler().fit(background['ANN'].reshape(-1,1))
    #signal    ['ANN'] = (scaler.transform(signal    ['ANN'].reshape(-1,1)) / 4. + 0.5).reshape(signal    ['jet_m'].shape)
    #background['ANN'] = (scaler.transform(background['ANN'].reshape(-1,1)) / 4. + 0.5).reshape(background['jet_m'].shape)

    wmean, wstd = weighted_avg_and_std(background['ANN'].ravel(),
                                       background['weight'].ravel())
    signal['ANN'] = ((signal['ANN'] - wmean) / wstd / 8. + 0.5).reshape(
        signal['jet_m'].shape)
    background['ANN'] = ((background['ANN'] - wmean) / wstd / 8. +
                         0.5).reshape(background['jet_m'].shape)

    # Remember to use 'ANN' in comparisons later
    substructure_vars += ['ANN']

    # Weights sparsity
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    if False:

        print "\nWeights sparsity:"

        bins = np.linspace(0, 1, 100.)

        for ilayer, layer in enumerate(classifier.layers):

            # If layer doesn't have any weights (e.g. input or output layer), continue
            if len(layer.get_weights()) == 0: continue

            weights = np.sort(np.abs(layer.get_weights()[0]).ravel())
            weights /= weights[-1]
            bins = np.linspace(0, 1, weights.size, endpoint=True)

            plt.plot(bins, weights, alpha=0.4, label='Layer %d' % (ilayer + 1))
            pass

        plt.grid()
        plt.legend()
        plt.show()

        pass

    # Percentile contours
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    if False:

        print "\nPercentile contours:"

        profile_var = 'jet_m'  # Variable against which to compute and show profile

        for var in substructure_vars:
            print "-- %s" % var

            binsx = np.logspace(
                1, 2, 50 + 1, endpoint=True
            ) * 3.  #np.linspace( 0., 300.,  60 + 1, endpoint = True)
            binsy = np.linspace(-50., 500., 10000 + 1, endpoint=True)

            H, _, _ = np.histogram2d(background[profile_var],
                                     background[var], [binsx, binsy],
                                     weights=background['weight'])
            H = np.array(H).T

            num_contours = 15

            binsx = (binsx[:-1] + binsx[1:]) * 0.5
            binsy = (binsy[:-1] + binsy[1:]) * 0.5

            contours = np.zeros((len(binsx), num_contours))

            for bin in range(len(binsx)):
                for c in range(num_contours):
                    eff = (c + 0.5) / float(num_contours)
                    value = wpercentile(binsy, eff, weights=H[:, bin])
                    if value is None: value = np.nan
                    contours[bin, c] = value
                    pass
                pass

            if num_contours % 2:  # odd
                linewidths = [1] * (num_contours // 2) + [
                    3
                ] + [1] * (num_contours // 2)
            else:
                linewidths = [1] * num_contours
                pass

            for c in range(num_contours):
                plt.plot(binsx,
                         contours[:, c],
                         linewidth=linewidths[c],
                         color='red')
                pass
            plt.xlabel(r'%s' % displayNameUnit(profile_var, latex=True))
            plt.ylabel(r'%s' % displayNameUnit(var, latex=True))
            plt.xlim([0, 300])
            if var.endswith('NN'):
                plt.ylim([0, 1])
                pass
            if save: plt.savefig('percentile_countours_%s.pdf' % var)
            plt.show()

            pass

        pass

    # Cost log(s)
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    if True:

        print "\nCost log:"

        # Plot cost log
        colors = [c['color'] for c in list(plt.rcParams['axes.prop_cycle'])]

        costlog = np.loadtxt('cost.log', delimiter=',')
        names = ['loss']

        for i, (key, l) in enumerate(zip(names, costlog.tolist())):
            name = key.replace('loss', '').replace('_', '')
            if name:
                name = r'$L_{%s}$' % name
            else:
                name = r'$L_{classifier} - \lambda L_{adversary}$'
                pass
            plt.plot(l, alpha=0.4, label=name, color=colors[i])
            plt.plot(savgol_filter(l, 101, 3), color=colors[i])
            pass

        clf_opt = hist['classifier_loss'][0]
        N = len(hist['classifier_loss'])
        plt.plot([0, N - 1], [clf_opt, clf_opt], color='gray', linestyle='--')
        plt.yscale('log')
        plt.xlabel('Iteration')
        plt.ylabel('Cost')
        plt.legend()
        plt.grid()
        plt.show()
        '''
        c_log, d_log = list(), list()
        with open('cost.log', 'r') as f:
            for line in f:
                fields = line.split(',')
                d_log.append(float(fields[0]))
                c_log.append(float(fields[1]))
                pass
            pass

        plt.plot(c_log, label='Classifier',    alpha=0.4)
        plt.plot(d_log, label='Discriminator', alpha=0.4)
        plt.plot(savgol_filter(c_log,201,3), label='Classifier (smooth)',)
        plt.plot(savgol_filter(d_log,201,3), label='Discriminator (smooth)',)
        plt.legend()
        plt.show()
        '''

        pass

    # Plot 1D distribution(s)
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    if True:

        print "\n1D distributions:"

        h_sig = dict()
        h_bkg = dict()
        for var in substructure_vars:
            print "-- %s" % var
            bins = np.linspace(
                0, 4.0 if var == 'jet_D2' else
                (300. if var == 'jet_m' else 1.0), 100 + 1, True)
            h_bkg[var] = plt.hist(background[var],
                                  bins,
                                  weights=background['weight'],
                                  alpha=0.6,
                                  label='Background')
            h_sig[var] = plt.hist(signal[var],
                                  bins,
                                  weights=signal['weight'] * 20,
                                  alpha=0.6,
                                  label='Signal (x 20)')
            plt.xlim([bins[0], bins[-1]])
            plt.xlabel(r'%s' % displayNameUnit(var, latex=True))
            plt.ylabel(r'Events [fb]')
            plt.legend()
            if save: plt.savefig('distrib_%s.pdf' % var)
            plt.show()
            pass

        pass

    # Plot ROC curve(s)
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    if True:

        print "\nROC curves:"

        eff_sig, eff_bkg = dict(), dict()
        for var in substructure_vars:
            eff_sig[var], eff_bkg[var] = roc(signal[var], background[var],
                                             signal['weight'],
                                             background['weight'])
            pass

        plt.figure(figsize=(6, 6))

        plt.plot(np.linspace(0, 1, 100 + 1, True),
                 np.linspace(0, 1, 100 + 1, True),
                 color='gray',
                 linestyle='--')
        plt.fill_between(np.linspace(0, 1, 100 + 1, True),
                         np.linspace(0, 1, 100 + 1, True),
                         np.ones(100 + 1),
                         color='black',
                         alpha=0.1)

        for var in substructure_vars:
            plt.plot(eff_sig[var],
                     eff_bkg[var],
                     label=r'%s' % displayName(var, latex=True))
            pass

        plt.xlabel(r'$\epsilon_{sig.}$')
        plt.ylabel(r'$\epsilon_{bkg.}$')
        plt.legend()
        if save: plt.savefig('ROC.pdf')
        plt.show()

        pass

    # Plot substructure profile(s)
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    if True:

        print "\nSubstructure profiles:"

        profile_var = 'jet_m'
        for var in substructure_vars:
            print "-- %s" % var
            bins = np.linspace(0, 300, 50)
            bins += (bins[1] - bins[0]) / 2.

            #for r in [(150, 200), (300, 400), (400, 500), (500, 700), (700, 1000), (1000, 2000), (0, 10000)]:
            for r in [(150, 10000), (200, 10000), (250, 10000), (300, 10000),
                      (0, 10000)]:
                if profile_var == 'jet_m':
                    profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]),
                                       "", len(bins), 0, 300)
                else:
                    profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]),
                                       "", len(bins), -5, -1)
                    pass

                msk = (background['jet_pt'] >= r[0]) & (background['jet_pt'] <
                                                        r[1])
                fill_profile(profile,
                             np.vstack((background[profile_var][msk],
                                        background[var][msk])).T,
                             weights=background['weight'][msk])

                prof = np.zeros(len(bins))
                for ibin in range(len(bins)):
                    prof[ibin] = profile.GetBinContent(ibin + 1)
                    pass
                prof = np.ma.masked_array(prof, mask=(prof == 0))
                if r[0] == 0:
                    plt.plot(bins,
                             prof,
                             color='black',
                             alpha=0.7,
                             label=r'Incl. $p_{T}$')
                elif r[1] >= 10000:
                    plt.scatter(bins, prof, label=r'$p_{T} > %d$ GeV' % r[0])
                else:
                    plt.scatter(bins,
                                prof,
                                label=r'$p_{T} \in [%d, %d]$ GeV' %
                                (r[0], r[1]))
                    pass
                pass

            plt.xlim(
                [profile.GetXaxis().GetXmin(),
                 profile.GetXaxis().GetXmax()])
            plt.ylim([
                0, 4 if var == 'jet_D2' else (1 if var == 'jet_tau21' else
                                              (300. if var == 'jet_m' else 1.))
            ])
            plt.xlabel(displayNameUnit(profile_var, latex=True))
            plt.ylabel(r'$\langle %s \rangle$' %
                       displayName(var, latex=True).replace('$', ''))
            plt.legend()
            if save: plt.savefig('profile_%s.pdf' % var)
            plt.show()
            pass

            pass

    # Plot reverse substructure profile(s)
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    if True:

        print "\nReverse substructure profiles:"

        profile_var = 'jet_m'
        for var in substructure_vars:
            print "-- %s" % var
            if var == 'jet_m':
                bins = np.linspace(0, 300, 50)
            elif var == 'jet_D2':
                bins = np.linspace(0, 5, 50)
            else:
                bins = np.linspace(0, 1, 50)
                pass

            bins += (bins[1] - bins[0]) / 2.

            for r in [(150, 200), (300, 400), (400, 500), (500, 700),
                      (700, 1000), (1000, 2000), (0, 10000)]:
                if profile_var == 'jet_m':
                    profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]),
                                       "", len(bins), bins[0], bins[-1])
                else:
                    profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]),
                                       "", len(bins), bins[0], bins[-1])
                    pass

                msk = (background['jet_pt'] >= r[0]) & (background['jet_pt'] <
                                                        r[1])
                fill_profile(profile,
                             np.vstack((background[var][msk],
                                        background[profile_var][msk])).T,
                             weights=background['weight'][msk])

                prof = np.zeros(len(bins))
                for ibin in range(len(bins)):
                    prof[ibin] = profile.GetBinContent(ibin + 1)
                    pass
                prof = np.ma.masked_array(prof, mask=(prof == 0))
                if r[0] == 0:
                    plt.plot(bins,
                             prof,
                             color='black',
                             alpha=0.7,
                             label=r'Incl. $p_{T}$')
                else:
                    plt.scatter(bins,
                                prof,
                                label=r'$p_{T} \in [%d, %d]$ GeV' %
                                (r[0], r[1]))
                    pass
                pass

            plt.xlim(
                [profile.GetXaxis().GetXmin(),
                 profile.GetXaxis().GetXmax()])
            #plt.ylim([0, 4 if var == 'jet_D2' else (1 if var == 'jet_tau21' else (300. if var == 'jet_m' else 1.))])
            plt.ylim([0, 300.])
            plt.xlabel(displayName(var, latex=True))
            plt.ylabel(r'$\langle %s \rangle$' %
                       displayName(profile_var, latex=True).replace('$', ''))
            plt.legend()
            if save: plt.savefig('reverse_profile_%s.pdf' % var)
            plt.show()
            pass

        pass

    # ...

    return
예제 #8
0
    def fill_2d_tprofile_histograms(
        self,
        histogram_name,
        data,
        variable_x,
        variable_y,
        selections=[],
        bins_x=1,
        range_low_x=0.000001,
        range_high_x=1. - 0.00001,
        xlabel="",
        bins_y=1,
        range_low_y=0.000001,
        range_high_y=1. - 0.00001,
        ylabel="",
        zlabel="",
    ):
        '''the 2-d histgram with variable_x and variable_y drawn'''
        name_to_fill_x = variable_x.name
        name_to_fill_y = variable_y.name
        variables = [variable_x, variable_y]
        histogram_dictionary = {}
        for channel in self.channels:
            if (type(bins_x) == list and type(bins_y) == list):
                bins_array_x = array('d', bins_x)
                bins_array_y = array('d', bins_y)
                histogram_dictionary[channel] = ROOT.TProfile2D(
                    histogram_name + channel, histogram_name + channel,
                    len(bins_array_x) - 1, bins_array_x,
                    len(bins_array_y) - 1, bins_array_y)
            elif (type(bins_x) != list and type(bins_y) != list):
                histogram_dictionary[channel] = ROOT.TProfile2D(
                    histogram_name + channel, histogram_name + channel, bins_x,
                    range_low_x + 0.0000001, range_high_x - 0.000001, bins_y,
                    range_low_y + 0.0000001, range_high_y + 0.0000001)
            else:
                raise ValueError(
                    "both of the bins_x and bins_y variables need to be the same type. Both integers, or both lists"
                )
            histogram_dictionary[channel].GetXaxis().SetTitle(xlabel)
            histogram_dictionary[channel].GetYaxis().SetTitle(ylabel)
            histogram_dictionary[channel].GetZaxis().SetTitle(zlabel)
            histogram_dictionary[channel].GetZaxis().SetTitleSize(0.035)
            histogram_dictionary[channel].GetZaxis().SetTitleOffset(1.35)
            histogram_dictionary[channel].Sumw2()

        for channel in self.channels:
            for filename in self.channel_files[channel]:
                variable_dict, selection_dict, weights = data[channel][
                    filename]
                total_selection = np.ones(len(weights)) > 0.0
                for selection in selections:
                    total_selection &= selection_dict[selection.name]
                to_weight = weights[total_selection]
                n_sel = len(to_weight)
                to_fill = np.zeros((n_sel, 2))
                to_fill[:, 0] = variable_dict[name_to_fill_x][total_selection]
                to_fill[:, 1] = variable_dict[name_to_fill_y][total_selection]
                if self.verbose: print(to_fill)
                if self.verbose: print(to_weight)
                if self.verbose: print("Filling Variable " + variable.name)
                fill_profile(histogram_dictionary[channel], to_fill, to_weight)
        return histogram_dictionary
예제 #9
0
def main ():

    # Set pyplot style
    plt.style.use('ggplot')

    # Whether to save plots
    save = False


    # Get data
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    input_vars = ['m', 'tau21', 'D2']
    X, Y, W, signal, background = getData(sys.argv, input_vars)


    # Load pre-trained classifier
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    # Load existing classifier model from file
    classifier = load_model('classifier.h5')

    # Add neural network classifier output, without adversarial training
    msk_sig = (Y == 1.)
    signal    ['NN'] = classifier.predict(X[ msk_sig], batch_size = 1024)
    background['NN'] = classifier.predict(X[~msk_sig], batch_size = 1024)

    # Scale to mean 0.5 and sensible range
    scaler = preprocessing.StandardScaler().fit(background['NN'].reshape(-1,1))
    signal    ['NN'] = (scaler.transform(signal    ['NN'].reshape(-1,1)) / 4. + 0.5).reshape(signal    ['m'].shape)
    background['NN'] = (scaler.transform(background['NN'].reshape(-1,1)) / 4. + 0.5).reshape(background['m'].shape)

    # Remember to use 'NN' in comparisons later
    input_vars += ['NN']


    # Load adversarially trained models
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    # Classifier
    load_checkpoint(classifier)

    # Discriminator
    discriminator = discriminator_model(5)
    load_checkpoint(discriminator)

    # Add neural network classifier output, without adversarial training
    msk_sig = (Y == 1.)
    signal    ['ANN'] = classifier.predict(X[ msk_sig], batch_size = 1024)
    background['ANN'] = classifier.predict(X[~msk_sig], batch_size = 1024)

    # Scale to mean 0.5 and sensible range
    scaler = preprocessing.StandardScaler().fit(background['ANN'].reshape(-1,1))
    signal    ['ANN'] = (scaler.transform(signal    ['ANN'].reshape(-1,1)) / 4. + 0.5).reshape(signal    ['m'].shape)
    background['ANN'] = (scaler.transform(background['ANN'].reshape(-1,1)) / 4. + 0.5).reshape(background['m'].shape)

    # Remember to use 'ANN' in comparisons later
    input_vars += ['ANN']


    # Plot 1D distribution(s)
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    print "\n1D distributions:"

    h_sig = dict()
    h_bkg = dict()
    for var in input_vars:
        print "-- %s" % var
        bins = np.linspace(0, 4.0 if var == 'D2' else (300. if var == 'm' else 1.0), 100 + 1, True)
        h_bkg[var] = plt.hist(background[var], bins, weights = background['weight'],      alpha = 0.6, label = 'Background')
        h_sig[var] = plt.hist(signal    [var], bins, weights = signal    ['weight'] * 20, alpha = 0.6, label = 'Signal (x 20)')
        plt.xlim([bins[0], bins[-1]])
        plt.xlabel(r'%s' % displayNameUnit(var, latex = True))
        plt.ylabel(r'Events [fb]')
        plt.legend()
        if save: plt.savefig('distrib_%s.pdf' % var)
        plt.show()
        pass


    # Plot ROC curve(s)
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    print "\nROC curves:"

    eff_sig, eff_bkg = dict(), dict()
    for var in input_vars:
        eff_sig[var], eff_bkg[var] = roc(signal[var], background[var], signal['weight'], background['weight'])
        pass

    plt.figure(figsize=(6,6))
    
    plt.plot(np.linspace(0, 1, 100 + 1, True), np.linspace(0, 1, 100 +1, True), color = 'gray', linestyle = '--')
    plt.fill_between(np.linspace(0, 1, 100 + 1, True), 
                     np.linspace(0, 1, 100 + 1, True), 
                     np.ones(100 + 1), color = 'black', alpha = 0.1)

    for var in input_vars:
        plt.plot(eff_sig[var], eff_bkg[var], label = r'%s' % displayName(var, latex = True))
        pass

    plt.xlabel(r'$\epsilon_{sig.}$')
    plt.ylabel(r'$\epsilon_{bkg.}$')
    plt.legend()
    if save: plt.savefig('ROC.pdf')
    plt.show()


    # Plot substructure profile(s)
    # ––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––

    print "\nSubstructure profiles:"

    for var in input_vars:
        print "-- %s" % var
        bins  = np.linspace(0, 300, 50)
        bins += (bins[1] - bins[0]) / 2.

        for r in [(150, 200), (300, 400), (400, 500), (500, 700), (700, 1000), (1000, 2000), (0, 10000)]:
            profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]), "", len(bins), 0, 300)
            msk = (background['pt'] >= r[0]) & (background['pt'] < r[1])
            fill_profile(profile, np.vstack((background['m'][msk], background[var][msk])).T, weights = background['weight'][msk])
            

            prof = np.zeros(len(bins))
            for ibin in range(len(bins)):
                prof[ibin] = profile.GetBinContent(ibin + 1)
                pass
            prof = np.ma.masked_array(prof, mask = (prof == 0))
            if r[0] == 0:
                plt.plot(bins, prof, color = 'black', alpha = 0.7, label = r'Incl. $p_{T}$')
            else:
                plt.scatter(bins, prof, label = r'$p_{T} \in [%d, %d]$ GeV' % (r[0], r[1]))
                pass
            pass

        plt.xlim([0, 300])
        plt.ylim([0, 4 if var == 'D2' else (1 if var == 'tau21' else (300. if var == 'm' else 1.))])
        plt.xlabel(displayNameUnit('m', latex = True))
        plt.ylabel(r'$\langle %s \rangle$' % displayName(var, latex = True).replace('$', ''))
        plt.legend()
        if save: plt.savefig('profile_%s.pdf' % var)
        plt.show()
        pass

    # ...

    return