def efficiency(data, args, feat, title=None): """ Perform study of background efficiency vs. mass for different inclusive efficiency cuts Saves plot `figures/efficiency_[feat].pdf` Arguments: data: Pandas data frame from which to read data. args: Namespace holding command-line arguments. feat: Feature for which to study efficiencies """ # Define common variables msk = data['signal'] == 0 effs = [5, 10, 20, 40, 80] # Define cuts cuts = list() for eff in effs: cut = wpercentile(data.loc[msk, feat].values, eff if signal_low(feat) else 100 - eff, weights=data.loc[msk, 'weight_test'].values) cuts.append(cut) pass # Compute cut efficiency vs. mass profiles = list() for cut, eff in zip(cuts, effs): # Get correct pass-cut mask msk_pass = data[feat] > cut if signal_low(feat): msk_pass = ~msk_pass pass # Fill efficiency profile profile = ROOT.TProfile('profile_{}_{}'.format(feat, cut), "", len(MASSBINS) - 1, MASSBINS) M = np.vstack((data.loc[msk, 'm'].values, msk_pass[msk])).T weights = data.loc[msk, 'weight_test'].values root_numpy.fill_profile(profile, M, weights=weights) # Add to list profiles.append(profile) pass # Perform plotting c = plot(args, data, feat, profiles, cuts, effs) # Output if title is None: path = 'figures/efficiency_{}.pdf'.format(standardise(feat)) else: path = 'figures/' + title + '_efficiency_{}.pdf'.format( standardise(feat)) c.save(path=path) return c, args, path
def fill_profile (data, var): """ Fill ROOT.TProfile with the average `var` as a function of rhoDDT. """ profile = ROOT.TProfile('profile_{}'.format(var), "", len(BINS) - 1, BINS) root_numpy.fill_profile(profile, data[[VAR_RHODDT, var]].values, weights=data[VAR_WEIGHT].values) return profile
def GetTProfileHistograms( self, histogram_name, data_dictionary, variable_x, variable_y, list_selections=[], bins=1, range_low=0.000001, range_high=1. - 0.00001, xlabel="", ylabel="", ): '''Get a TProfile histogram with variable_y profiled against variable_x, after selections list_selections have been applied''' variableNameToFill_x = variable_x.name variableNameToFill_y = variable_y.name variables = [variable_x, variable_y] histogram_dictionary = {} for channel in self.channels: if (type(bins) == list): bins_array = array('d', bins) histogram_dictionary[channel] = ROOT.TProfile( histogram_name + channel, histogram_name + channel, len(bins_array) - 1, bins_array) else: histogram_dictionary[channel] = ROOT.TProfile( histogram_name + channel, histogram_name + channel, bins, range_low + 0.0000001, range_high - 0.000001) histogram_dictionary[channel].Sumw2() for channel in self.channels: for filename in self.channelFiles[channel]: variable_dict, selection_dict, weights = data_dictionary[ channel][filename] total_selection = np.ones(len(weights)) > 0.0 for selection in list_selections: total_selection &= selection_dict[selection.name] to_weight = weights[total_selection] n_sel = len(to_weight) to_fill = np.zeros((n_sel, 2)) to_fill[:, 0] = variable_dict[variableNameToFill_x][ total_selection] to_fill[:, 1] = variable_dict[variableNameToFill_y][ total_selection] if self.verbose: print to_fill if self.verbose: print to_weight if self.verbose: print("Filling Variable " + variable.name) print("Filling Histogram") fill_profile(histogram_dictionary[channel], to_fill, to_weight) print("Finished filling histogram") histogram_dictionary[channel].GetXaxis().SetTitle(xlabel) histogram_dictionary[channel].GetYaxis().SetTitle(ylabel) return histogram_dictionary
def test_fill_profile(): n_samples = 1000 w1D = np.empty(n_samples) w1D.fill(2.) data1D = RNG.randn(n_samples, 2) data2D = RNG.randn(n_samples, 3) data3D = RNG.randn(n_samples, 4) a = TProfile('th1d', 'test', 100, -5, 5) rnp.fill_profile(a, data1D) assert_true(a.Integral() != 0) a_w = TProfile('th1dw', 'test', 100, -5, 5) rnp.fill_profile(a_w, data1D, w1D) assert_true(a_w.Integral() != 0) assert_equal(a_w.Integral(), a.Integral()) b = TProfile2D('th2d', 'test', 100, -5, 5, 100, -5, 5) rnp.fill_profile(b, data2D) assert_true(b.Integral() != 0) c = TProfile3D('th3d', 'test', 10, -5, 5, 10, -5, 5, 10, -5, 5) rnp.fill_profile(c, data3D) assert_true(c.Integral() != 0) # array and weights lengths do not match assert_raises(ValueError, rnp.fill_profile, c, data3D, np.ones(10)) # weights is not 1D assert_raises(ValueError, rnp.fill_profile, c, data3D, np.ones((data3D.shape[0], 1))) # array is not 2D assert_raises(ValueError, rnp.fill_profile, c, np.ones(10)) # length of second axis is not one more than dimensionality of the profile for h in (a, b, c): assert_raises(ValueError, rnp.fill_profile, h, RNG.randn(10, 5)) # wrong type assert_raises(TypeError, rnp.fill_profile, TH1D("test", "test", 1, 0, 1), data1D)
def test_fill_profile(): np.random.seed(0) w1D = np.empty(1E6) w1D.fill(2.) data1D = np.random.randn(1E6, 2) data2D = np.random.randn(1E6, 3) data3D = np.random.randn(1E4, 4) a = TProfile('th1d', 'test', 1000, -5, 5) rnp.fill_profile(a, data1D) assert_true(a.Integral() !=0) a_w = TProfile('th1dw', 'test', 1000, -5, 5) rnp.fill_profile(a_w, data1D, w1D) assert_true(a_w.Integral() != 0) assert_equal(a_w.Integral(), a.Integral()) b = TProfile2D('th2d', 'test', 100, -5, 5, 100, -5, 5) rnp.fill_profile(b, data2D) assert_true(b.Integral() != 0) c = TProfile3D('th3d', 'test', 10, -5, 5, 10, -5, 5, 10, -5, 5) rnp.fill_profile(c, data3D) assert_true(c.Integral() != 0) # array and weights lengths do not match assert_raises(ValueError, rnp.fill_profile, c, data3D, np.ones(10)) # weights is not 1D assert_raises(ValueError, rnp.fill_profile, c, data3D, np.ones((data3D.shape[0], 1))) # array is not 2D assert_raises(ValueError, rnp.fill_profile, c, np.ones(10)) # length of second axis is not one more than dimensionality of the profile for h in (a, b, c): assert_raises(ValueError, rnp.fill_profile, h, np.random.randn(1E4, 5)) # wrong type assert_raises(TypeError, rnp.fill_profile, TH1D("test", "test", 1, 0, 1), data1D)
def fillprofile(profile, arrx, arry): arrxy = combinevectors(arrx, arry) root_numpy.fill_profile(profile, arrxy)
def main(): # Set pyplot style plt.style.use('ggplot') # Whether to save plots save = True # Get data # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– substructure_vars = ['jet_tau21', 'jet_D2', 'jet_m'] decorrelation_vars = ['jet_m'] X, Y, W, P, signal, background, names = getData(decorrelation_vars) msk_sig = (Y == 1.) # Load pre-trained classifier # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– # Load existing classifier model from file classifier = load_model('classifier.h5') # Add neural network classifier output, without adversarial training signal['NN'] = classifier.predict(X[msk_sig], batch_size=1024) background['NN'] = classifier.predict(X[~msk_sig], batch_size=1024) # Scale to mean 0.5 and sensible range #scaler = preprocessing.StandardScaler().fit(background['NN'].reshape(-1,1)) #signal ['NN'] = (scaler.transform(signal ['NN'].reshape(-1,1)) / 4. + 0.5).reshape(signal ['jet_m'].shape) #background['NN'] = (scaler.transform(background['NN'].reshape(-1,1)) / 4. + 0.5).reshape(background['jet_m'].shape) wmean, wstd = weighted_avg_and_std(background['NN'].ravel(), background['weight'].ravel()) signal['NN'] = ((signal['NN'] - wmean) / wstd / 8. + 0.5).reshape( signal['jet_m'].shape) background['NN'] = ((background['NN'] - wmean) / wstd / 8. + 0.5).reshape( background['jet_m'].shape) # Remember to use 'NN' in comparisons later substructure_vars += ['NN'] # Load adversarially trained models # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– # Combined adversarial = adversarial_model(classifier, [(64, 'tanh')] * 2, 1, P.shape[1]) load_checkpoint(adversarial) # Add neural network classifier output, without adversarial training signal['ANN'] = classifier.predict(X[msk_sig], batch_size=1024) background['ANN'] = classifier.predict(X[~msk_sig], batch_size=1024) # Scale to mean 0.5 and sensible range #scaler = preprocessing.StandardScaler().fit(background['ANN'].reshape(-1,1)) #signal ['ANN'] = (scaler.transform(signal ['ANN'].reshape(-1,1)) / 4. + 0.5).reshape(signal ['jet_m'].shape) #background['ANN'] = (scaler.transform(background['ANN'].reshape(-1,1)) / 4. + 0.5).reshape(background['jet_m'].shape) wmean, wstd = weighted_avg_and_std(background['ANN'].ravel(), background['weight'].ravel()) signal['ANN'] = ((signal['ANN'] - wmean) / wstd / 8. + 0.5).reshape( signal['jet_m'].shape) background['ANN'] = ((background['ANN'] - wmean) / wstd / 8. + 0.5).reshape(background['jet_m'].shape) # Remember to use 'ANN' in comparisons later substructure_vars += ['ANN'] # Weights sparsity # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– if False: print "\nWeights sparsity:" bins = np.linspace(0, 1, 100.) for ilayer, layer in enumerate(classifier.layers): # If layer doesn't have any weights (e.g. input or output layer), continue if len(layer.get_weights()) == 0: continue weights = np.sort(np.abs(layer.get_weights()[0]).ravel()) weights /= weights[-1] bins = np.linspace(0, 1, weights.size, endpoint=True) plt.plot(bins, weights, alpha=0.4, label='Layer %d' % (ilayer + 1)) pass plt.grid() plt.legend() plt.show() pass # Percentile contours # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– if False: print "\nPercentile contours:" profile_var = 'jet_m' # Variable against which to compute and show profile for var in substructure_vars: print "-- %s" % var binsx = np.logspace( 1, 2, 50 + 1, endpoint=True ) * 3. #np.linspace( 0., 300., 60 + 1, endpoint = True) binsy = np.linspace(-50., 500., 10000 + 1, endpoint=True) H, _, _ = np.histogram2d(background[profile_var], background[var], [binsx, binsy], weights=background['weight']) H = np.array(H).T num_contours = 15 binsx = (binsx[:-1] + binsx[1:]) * 0.5 binsy = (binsy[:-1] + binsy[1:]) * 0.5 contours = np.zeros((len(binsx), num_contours)) for bin in range(len(binsx)): for c in range(num_contours): eff = (c + 0.5) / float(num_contours) value = wpercentile(binsy, eff, weights=H[:, bin]) if value is None: value = np.nan contours[bin, c] = value pass pass if num_contours % 2: # odd linewidths = [1] * (num_contours // 2) + [ 3 ] + [1] * (num_contours // 2) else: linewidths = [1] * num_contours pass for c in range(num_contours): plt.plot(binsx, contours[:, c], linewidth=linewidths[c], color='red') pass plt.xlabel(r'%s' % displayNameUnit(profile_var, latex=True)) plt.ylabel(r'%s' % displayNameUnit(var, latex=True)) plt.xlim([0, 300]) if var.endswith('NN'): plt.ylim([0, 1]) pass if save: plt.savefig('percentile_countours_%s.pdf' % var) plt.show() pass pass # Cost log(s) # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– if True: print "\nCost log:" # Plot cost log colors = [c['color'] for c in list(plt.rcParams['axes.prop_cycle'])] costlog = np.loadtxt('cost.log', delimiter=',') names = ['loss'] for i, (key, l) in enumerate(zip(names, costlog.tolist())): name = key.replace('loss', '').replace('_', '') if name: name = r'$L_{%s}$' % name else: name = r'$L_{classifier} - \lambda L_{adversary}$' pass plt.plot(l, alpha=0.4, label=name, color=colors[i]) plt.plot(savgol_filter(l, 101, 3), color=colors[i]) pass clf_opt = hist['classifier_loss'][0] N = len(hist['classifier_loss']) plt.plot([0, N - 1], [clf_opt, clf_opt], color='gray', linestyle='--') plt.yscale('log') plt.xlabel('Iteration') plt.ylabel('Cost') plt.legend() plt.grid() plt.show() ''' c_log, d_log = list(), list() with open('cost.log', 'r') as f: for line in f: fields = line.split(',') d_log.append(float(fields[0])) c_log.append(float(fields[1])) pass pass plt.plot(c_log, label='Classifier', alpha=0.4) plt.plot(d_log, label='Discriminator', alpha=0.4) plt.plot(savgol_filter(c_log,201,3), label='Classifier (smooth)',) plt.plot(savgol_filter(d_log,201,3), label='Discriminator (smooth)',) plt.legend() plt.show() ''' pass # Plot 1D distribution(s) # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– if True: print "\n1D distributions:" h_sig = dict() h_bkg = dict() for var in substructure_vars: print "-- %s" % var bins = np.linspace( 0, 4.0 if var == 'jet_D2' else (300. if var == 'jet_m' else 1.0), 100 + 1, True) h_bkg[var] = plt.hist(background[var], bins, weights=background['weight'], alpha=0.6, label='Background') h_sig[var] = plt.hist(signal[var], bins, weights=signal['weight'] * 20, alpha=0.6, label='Signal (x 20)') plt.xlim([bins[0], bins[-1]]) plt.xlabel(r'%s' % displayNameUnit(var, latex=True)) plt.ylabel(r'Events [fb]') plt.legend() if save: plt.savefig('distrib_%s.pdf' % var) plt.show() pass pass # Plot ROC curve(s) # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– if True: print "\nROC curves:" eff_sig, eff_bkg = dict(), dict() for var in substructure_vars: eff_sig[var], eff_bkg[var] = roc(signal[var], background[var], signal['weight'], background['weight']) pass plt.figure(figsize=(6, 6)) plt.plot(np.linspace(0, 1, 100 + 1, True), np.linspace(0, 1, 100 + 1, True), color='gray', linestyle='--') plt.fill_between(np.linspace(0, 1, 100 + 1, True), np.linspace(0, 1, 100 + 1, True), np.ones(100 + 1), color='black', alpha=0.1) for var in substructure_vars: plt.plot(eff_sig[var], eff_bkg[var], label=r'%s' % displayName(var, latex=True)) pass plt.xlabel(r'$\epsilon_{sig.}$') plt.ylabel(r'$\epsilon_{bkg.}$') plt.legend() if save: plt.savefig('ROC.pdf') plt.show() pass # Plot substructure profile(s) # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– if True: print "\nSubstructure profiles:" profile_var = 'jet_m' for var in substructure_vars: print "-- %s" % var bins = np.linspace(0, 300, 50) bins += (bins[1] - bins[0]) / 2. #for r in [(150, 200), (300, 400), (400, 500), (500, 700), (700, 1000), (1000, 2000), (0, 10000)]: for r in [(150, 10000), (200, 10000), (250, 10000), (300, 10000), (0, 10000)]: if profile_var == 'jet_m': profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]), "", len(bins), 0, 300) else: profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]), "", len(bins), -5, -1) pass msk = (background['jet_pt'] >= r[0]) & (background['jet_pt'] < r[1]) fill_profile(profile, np.vstack((background[profile_var][msk], background[var][msk])).T, weights=background['weight'][msk]) prof = np.zeros(len(bins)) for ibin in range(len(bins)): prof[ibin] = profile.GetBinContent(ibin + 1) pass prof = np.ma.masked_array(prof, mask=(prof == 0)) if r[0] == 0: plt.plot(bins, prof, color='black', alpha=0.7, label=r'Incl. $p_{T}$') elif r[1] >= 10000: plt.scatter(bins, prof, label=r'$p_{T} > %d$ GeV' % r[0]) else: plt.scatter(bins, prof, label=r'$p_{T} \in [%d, %d]$ GeV' % (r[0], r[1])) pass pass plt.xlim( [profile.GetXaxis().GetXmin(), profile.GetXaxis().GetXmax()]) plt.ylim([ 0, 4 if var == 'jet_D2' else (1 if var == 'jet_tau21' else (300. if var == 'jet_m' else 1.)) ]) plt.xlabel(displayNameUnit(profile_var, latex=True)) plt.ylabel(r'$\langle %s \rangle$' % displayName(var, latex=True).replace('$', '')) plt.legend() if save: plt.savefig('profile_%s.pdf' % var) plt.show() pass pass # Plot reverse substructure profile(s) # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– if True: print "\nReverse substructure profiles:" profile_var = 'jet_m' for var in substructure_vars: print "-- %s" % var if var == 'jet_m': bins = np.linspace(0, 300, 50) elif var == 'jet_D2': bins = np.linspace(0, 5, 50) else: bins = np.linspace(0, 1, 50) pass bins += (bins[1] - bins[0]) / 2. for r in [(150, 200), (300, 400), (400, 500), (500, 700), (700, 1000), (1000, 2000), (0, 10000)]: if profile_var == 'jet_m': profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]), "", len(bins), bins[0], bins[-1]) else: profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]), "", len(bins), bins[0], bins[-1]) pass msk = (background['jet_pt'] >= r[0]) & (background['jet_pt'] < r[1]) fill_profile(profile, np.vstack((background[var][msk], background[profile_var][msk])).T, weights=background['weight'][msk]) prof = np.zeros(len(bins)) for ibin in range(len(bins)): prof[ibin] = profile.GetBinContent(ibin + 1) pass prof = np.ma.masked_array(prof, mask=(prof == 0)) if r[0] == 0: plt.plot(bins, prof, color='black', alpha=0.7, label=r'Incl. $p_{T}$') else: plt.scatter(bins, prof, label=r'$p_{T} \in [%d, %d]$ GeV' % (r[0], r[1])) pass pass plt.xlim( [profile.GetXaxis().GetXmin(), profile.GetXaxis().GetXmax()]) #plt.ylim([0, 4 if var == 'jet_D2' else (1 if var == 'jet_tau21' else (300. if var == 'jet_m' else 1.))]) plt.ylim([0, 300.]) plt.xlabel(displayName(var, latex=True)) plt.ylabel(r'$\langle %s \rangle$' % displayName(profile_var, latex=True).replace('$', '')) plt.legend() if save: plt.savefig('reverse_profile_%s.pdf' % var) plt.show() pass pass # ... return
def fill_2d_tprofile_histograms( self, histogram_name, data, variable_x, variable_y, selections=[], bins_x=1, range_low_x=0.000001, range_high_x=1. - 0.00001, xlabel="", bins_y=1, range_low_y=0.000001, range_high_y=1. - 0.00001, ylabel="", zlabel="", ): '''the 2-d histgram with variable_x and variable_y drawn''' name_to_fill_x = variable_x.name name_to_fill_y = variable_y.name variables = [variable_x, variable_y] histogram_dictionary = {} for channel in self.channels: if (type(bins_x) == list and type(bins_y) == list): bins_array_x = array('d', bins_x) bins_array_y = array('d', bins_y) histogram_dictionary[channel] = ROOT.TProfile2D( histogram_name + channel, histogram_name + channel, len(bins_array_x) - 1, bins_array_x, len(bins_array_y) - 1, bins_array_y) elif (type(bins_x) != list and type(bins_y) != list): histogram_dictionary[channel] = ROOT.TProfile2D( histogram_name + channel, histogram_name + channel, bins_x, range_low_x + 0.0000001, range_high_x - 0.000001, bins_y, range_low_y + 0.0000001, range_high_y + 0.0000001) else: raise ValueError( "both of the bins_x and bins_y variables need to be the same type. Both integers, or both lists" ) histogram_dictionary[channel].GetXaxis().SetTitle(xlabel) histogram_dictionary[channel].GetYaxis().SetTitle(ylabel) histogram_dictionary[channel].GetZaxis().SetTitle(zlabel) histogram_dictionary[channel].GetZaxis().SetTitleSize(0.035) histogram_dictionary[channel].GetZaxis().SetTitleOffset(1.35) histogram_dictionary[channel].Sumw2() for channel in self.channels: for filename in self.channel_files[channel]: variable_dict, selection_dict, weights = data[channel][ filename] total_selection = np.ones(len(weights)) > 0.0 for selection in selections: total_selection &= selection_dict[selection.name] to_weight = weights[total_selection] n_sel = len(to_weight) to_fill = np.zeros((n_sel, 2)) to_fill[:, 0] = variable_dict[name_to_fill_x][total_selection] to_fill[:, 1] = variable_dict[name_to_fill_y][total_selection] if self.verbose: print(to_fill) if self.verbose: print(to_weight) if self.verbose: print("Filling Variable " + variable.name) fill_profile(histogram_dictionary[channel], to_fill, to_weight) return histogram_dictionary
def main (): # Set pyplot style plt.style.use('ggplot') # Whether to save plots save = False # Get data # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– input_vars = ['m', 'tau21', 'D2'] X, Y, W, signal, background = getData(sys.argv, input_vars) # Load pre-trained classifier # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– # Load existing classifier model from file classifier = load_model('classifier.h5') # Add neural network classifier output, without adversarial training msk_sig = (Y == 1.) signal ['NN'] = classifier.predict(X[ msk_sig], batch_size = 1024) background['NN'] = classifier.predict(X[~msk_sig], batch_size = 1024) # Scale to mean 0.5 and sensible range scaler = preprocessing.StandardScaler().fit(background['NN'].reshape(-1,1)) signal ['NN'] = (scaler.transform(signal ['NN'].reshape(-1,1)) / 4. + 0.5).reshape(signal ['m'].shape) background['NN'] = (scaler.transform(background['NN'].reshape(-1,1)) / 4. + 0.5).reshape(background['m'].shape) # Remember to use 'NN' in comparisons later input_vars += ['NN'] # Load adversarially trained models # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– # Classifier load_checkpoint(classifier) # Discriminator discriminator = discriminator_model(5) load_checkpoint(discriminator) # Add neural network classifier output, without adversarial training msk_sig = (Y == 1.) signal ['ANN'] = classifier.predict(X[ msk_sig], batch_size = 1024) background['ANN'] = classifier.predict(X[~msk_sig], batch_size = 1024) # Scale to mean 0.5 and sensible range scaler = preprocessing.StandardScaler().fit(background['ANN'].reshape(-1,1)) signal ['ANN'] = (scaler.transform(signal ['ANN'].reshape(-1,1)) / 4. + 0.5).reshape(signal ['m'].shape) background['ANN'] = (scaler.transform(background['ANN'].reshape(-1,1)) / 4. + 0.5).reshape(background['m'].shape) # Remember to use 'ANN' in comparisons later input_vars += ['ANN'] # Plot 1D distribution(s) # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– print "\n1D distributions:" h_sig = dict() h_bkg = dict() for var in input_vars: print "-- %s" % var bins = np.linspace(0, 4.0 if var == 'D2' else (300. if var == 'm' else 1.0), 100 + 1, True) h_bkg[var] = plt.hist(background[var], bins, weights = background['weight'], alpha = 0.6, label = 'Background') h_sig[var] = plt.hist(signal [var], bins, weights = signal ['weight'] * 20, alpha = 0.6, label = 'Signal (x 20)') plt.xlim([bins[0], bins[-1]]) plt.xlabel(r'%s' % displayNameUnit(var, latex = True)) plt.ylabel(r'Events [fb]') plt.legend() if save: plt.savefig('distrib_%s.pdf' % var) plt.show() pass # Plot ROC curve(s) # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– print "\nROC curves:" eff_sig, eff_bkg = dict(), dict() for var in input_vars: eff_sig[var], eff_bkg[var] = roc(signal[var], background[var], signal['weight'], background['weight']) pass plt.figure(figsize=(6,6)) plt.plot(np.linspace(0, 1, 100 + 1, True), np.linspace(0, 1, 100 +1, True), color = 'gray', linestyle = '--') plt.fill_between(np.linspace(0, 1, 100 + 1, True), np.linspace(0, 1, 100 + 1, True), np.ones(100 + 1), color = 'black', alpha = 0.1) for var in input_vars: plt.plot(eff_sig[var], eff_bkg[var], label = r'%s' % displayName(var, latex = True)) pass plt.xlabel(r'$\epsilon_{sig.}$') plt.ylabel(r'$\epsilon_{bkg.}$') plt.legend() if save: plt.savefig('ROC.pdf') plt.show() # Plot substructure profile(s) # –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– print "\nSubstructure profiles:" for var in input_vars: print "-- %s" % var bins = np.linspace(0, 300, 50) bins += (bins[1] - bins[0]) / 2. for r in [(150, 200), (300, 400), (400, 500), (500, 700), (700, 1000), (1000, 2000), (0, 10000)]: profile = TProfile("profile_%s_%d_%d" % (var, r[0], r[1]), "", len(bins), 0, 300) msk = (background['pt'] >= r[0]) & (background['pt'] < r[1]) fill_profile(profile, np.vstack((background['m'][msk], background[var][msk])).T, weights = background['weight'][msk]) prof = np.zeros(len(bins)) for ibin in range(len(bins)): prof[ibin] = profile.GetBinContent(ibin + 1) pass prof = np.ma.masked_array(prof, mask = (prof == 0)) if r[0] == 0: plt.plot(bins, prof, color = 'black', alpha = 0.7, label = r'Incl. $p_{T}$') else: plt.scatter(bins, prof, label = r'$p_{T} \in [%d, %d]$ GeV' % (r[0], r[1])) pass pass plt.xlim([0, 300]) plt.ylim([0, 4 if var == 'D2' else (1 if var == 'tau21' else (300. if var == 'm' else 1.))]) plt.xlabel(displayNameUnit('m', latex = True)) plt.ylabel(r'$\langle %s \rangle$' % displayName(var, latex = True).replace('$', '')) plt.legend() if save: plt.savefig('profile_%s.pdf' % var) plt.show() pass # ... return