def main(): opts = parse_options() model = load_model(os.path.join(opts.modelDir, opts.name + '.h5')) scaler = joblib.load(os.path.join(opts.modelDir, opts.name + '_scaler.pkl')) db = (RESOLUTION[2] - RESOLUTION[1] ) / RESOLUTION[0] # bin width in discriminator distribution bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db, db) # bin edges in discriminator distribution center = (bins[:-1] + bins[1:]) / 2 ########################### # Read and evaluate signals ########################### SIGNAL = [opts.signal] Signal = [] for s in SIGNAL: x, y = pickBenchmark(s) df, weight = loadDataFrame(os.path.join(inputDir, s), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize( y_hat[:, 0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({ 'name': s, 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for b in BACKGROUND: df, weight = loadDataFrame(os.path.join(inputDir, b), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:, 0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) ########################### # Determine Significance # ########################### for s in Signal: significance = [] significance_err = [] asimov = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:].sum() > 0.: rel_err_bkg = np.sqrt(np.sum( totalBkgVar[i:])) / totalBkgOutput[i:].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:].sum() > 0.: rel_err_sig = np.sqrt(np.sum( s['output_var'][i:])) / s['outputScore'][i:].sum() else: rel_err_sig = 0. #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. ams = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0. Z_err = 0. ams = 0. else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), total_rel_err) ams = asimovZ(s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) asimov.append(ams) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) s['ams'] = np.array(asimov) #print s['sig'] #print s['ams'] sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0] Z = asimovZ( Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=False) Z_syst = asimovZ( Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=True) #print s['sig'].max(), sigMax_index, Z, Z_syst x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal], dtype=float) #print x, y, z #print Signal[0]['outputScore'][np.where(bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where(bins[:-1] >= sigMax_index)] #print totalBkgOutput[np.where(bins[:-1] >= sigMax_index)], totalBkgVar[np.where(bins[:-1] >= sigMax_index)] #print Signal[0]['outputScore'], Signal[0]['output_var'] #print totalBkgOutput, totalBkgVar ################################### # Write single bin to .root files # ################################## sigFile = ROOT.TFile(opts.name + "_output_sig.root", "RECREATE") sig_sr = ROOT.TH1D("SR", "SR", 1, 0, 1) sig_sr.SetBinContent( 1, np.sum(Signal[0]['outputScore'][np.where(bins[:-1] >= sigMax_index)])) sig_sr.SetBinError( 1, np.sum( np.sqrt( Signal[0]['output_var'][np.where(bins[:-1] >= sigMax_index)]))) sigFile.Write() sigFile.Close() bkgFile = ROOT.TFile(opts.name + "_output_bkg.root", "RECREATE") bkg_sr = ROOT.TH1D("SR", "SR", 1, 0, 1) bkg_sr.SetBinContent( 1, np.sum(totalBkgOutput[np.where(bins[:-1] >= sigMax_index)])) bkg_sr.SetBinError( 1, np.sum(np.sqrt(totalBkgVar[np.where(bins[:-1] >= sigMax_index)]))) bkgFile.Write() bkgFile.Close() ################################### # Write multi bins to .root files # ################################### multibin_sigFile = ROOT.TFile(opts.name + "_output_sig_multibin.root", "RECREATE") multibin_sig_sr = ROOT.TH1D("SR", "SR", 5, 0, 5) for i in xrange(1, 6): index = -6 + i multibin_sig_sr.SetBinContent(i, Signal[0]['outputScore'][index]) multibin_sig_sr.SetBinError(i, np.sqrt(Signal[0]['output_var'][index])) multibin_sigFile.Write() multibin_sigFile.Close() multibin_bkgFile = ROOT.TFile(opts.name + "_output_bkg_multibin.root", "RECREATE") multibin_bkg_sr = ROOT.TH1D("SR", "SR", 5, 0, 5) for i in xrange(1, 6): index = -6 + i multibin_bkg_sr.SetBinContent(i, totalBkgOutput[index]) multibin_bkg_sr.SetBinError(i, np.sqrt(totalBkgVar[index])) multibin_bkgFile.Write() multibin_bkgFile.Close()
def main(): model = load_model(modelDir) scaler = joblib.load(SCALING) infofile = open(modelDir.replace('.h5', '_infofile.txt')) infos = infofile.readlines() analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '') dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace( '\n', '') recurrent = False if analysis.lower() == 'rnn': recurrent = True seq_scaler = dataset + '_scaling.json' db = (RESOLUTION[2] - RESOLUTION[1] ) / RESOLUTION[0] # bin width in discriminator distribution bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db, db) # bin edges in discriminator distribution center = (bins[:-1] + bins[1:]) / 2 print '#----MODEL----#' print modelDir ########################### # Read and evaluate signals ########################### Signal = [] for s in SIGNAL: x, y = pickBenchmark(s) if not recurrent: df, weight = loadDataFrame(os.path.join(inputDir, s + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize( y_hat[:, 0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({ 'name': s, 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for b in BACKGROUND: if not recurrent: df, weight = loadDataFrame(os.path.join(inputDir, b + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize(y_hat[:, 0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] asimov = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:].sum() > 0.: rel_err_bkg = np.sqrt(np.sum( totalBkgVar[i:])) / totalBkgOutput[i:].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:].sum() > 0.: rel_err_sig = np.sqrt(np.sum( s['output_var'][i:])) / s['outputScore'][i:].sum() else: rel_err_sig = 0. #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. ams = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0. Z_err = 0. ams = 0. else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), total_rel_err) ams = asimovZ(s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) asimov.append(ams) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) s['ams'] = np.array(asimov) #print s['sig'] #print s['ams'] #sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0] #Z = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=False) #Z_syst = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=True) #print s['sig'].max(), sigMax_index, Z, Z_syst x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal], dtype=float) #print x, y, z #print Signal[0]['outputScore'][np.where(bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where(bins[:-1] >= sigMax_index)] #print totalBkgOutput[np.where(bins[:-1] >= sigMax_index)], totalBkgVar[np.where(bins[:-1] >= sigMax_index)] #print Signal[0]['outputScore'], Signal[0]['output_var'] #print totalBkgOutput, totalBkgVar # Set up a regular grid of interpolation points print('Plotting the output score...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) ax1.set_xlim((bins[0], bins[-1])) ax1.set_ylabel("Events", horizontalalignment='right', y=1.0) sb_ratio = Signal[0]['outputScore'].sum() / totalBkgOutput.sum() #if sb_ratio < 0.2: # #ATTENTION! Simplified error propagation (treated as uncorrelated) # scaled = Signal[0]['outputScore'] / Signal[0]['outputScore'].sum() * totalBkgOutput.sum() # scaled_var = scaled*scaled * ( (Signal[0]['output_var']/Signal[0]['outputScore'])**2 + (totalBkgVar.sum()/totalBkgOutput.sum())**2 + (Signal[0]['output_var'].sum()/Signal[0]['outputScore'].sum())**2 ) # scaled_label = 'Signal scaled to Bkg' # #else: scaled = Signal[0]['outputScore'] scaled_var = Signal[0]['output_var'] scaled_label = 'Signal' plt.bar(center, totalBkgOutput / totalBkgOutput.sum(), width=db, yerr=np.sqrt(totalBkgVar) / totalBkgOutput.sum(), color='b', alpha=0.25, error_kw=dict(ecolor='b', lw=1.5), label=Background[0]['name']) plt.bar(center, Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(), width=db, yerr=np.sqrt(Signal[0]['output_var']) / Signal[0]['outputScore'].sum(), label=Signal[0]['name'], color='r', alpha=0.25, error_kw=dict(ecolor='r', lw=1.5)) ax1.set_ylim( (0., np.max([ np.max(totalBkgOutput / totalBkgOutput.sum()), np.max(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum()) ]) * 1.3)) #ax1.set_yscale('log') leg = plt.legend(loc="best", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.925, 'Work in progress') #AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.875, lumi=LUMI*0.001) ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) getRatio(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(), bins, np.sqrt(Signal[0]['output_var']) / Signal[0]['outputScore'].sum(), totalBkgOutput / totalBkgOutput.sum(), bins, np.sqrt(totalBkgVar) / totalBkgOutput.sum(), 'r') ax2.set_xlabel('Output score', horizontalalignment='right', x=1.0) ax2.set_ylabel('Reco/Truth') ax2.set_xlim((0., 1.)) ax2.set_ylim((0, 2)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.pdf") plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.png") plt.close()
def plot_classification_datapoint(SignalList, model, preselection, nvar, weight, lumi, save=False, fileName='Test', multiclass=True): ''' Evaluate the classification on certain datapoints. sigList is supposed to be in a form like in config/samples.py ''' print '----- Plotting the Classification for different datapoints-----' print 'Using preselection', preselection #met_cut = False #for pre in preselection: #if pre['name'] == 'met': #met_cut = True #met_threshold = pre['threshold'] #met_cut_addStr = 'met' + str(int(pre['threshold']*0.001)) #if not met_cut: #print 'Using no met-preselection!' input = '/project/etp5/dhandl/samples/SUSY/Stop1L/FullRun2/hdf5/cut_mt30_met60_preselection/' bkgList = [{ 'name': 'powheg_ttbar', 'path': input + 'mc16d_ttbar/' }, { 'name': 'powheg_singletop', 'path': input + 'mc16d_singletop/' }, { 'name': 'sherpa22_Wjets', 'path': input + 'mc16d_Wjets/' }] #Loading background once print 'Loading background...' Background = [] for b in bkgList: print 'Loading background {} from {}...'.format(b['name'], b['path']) Background.append( Sample( b['name'], pT.loadDataFrame(b['path'], preselection, nvar, weight, lumi))) bkg = np.empty([0, Background[0].dataframe[0].shape[1]]) bkg_w = np.empty(0) bkg_y = np.empty(0) for i, b in enumerate(Background): i = i + 1 bkg = np.concatenate((bkg, b.dataframe[0])) bkg_w = np.concatenate((bkg_w, b.dataframe[1])) bkg_y = np.concatenate((bkg_y, np.full(b.dataframe[0].shape[0], i))) print 'Background shape', bkg.shape #Evaluating on signal for each set of points print 'Evaluating on signal sets...' for sigList in SignalList: Signal = [] addStr = '_stop_bWN_' name = False title = '' for s in sigList: if not name: addStr += s['name'].replace('stop_bWN_', '') name = True else: addStr += s['name'].replace(s['name'][:12], '') print 'Loading signal {} from {}...'.format(s['name'], s['path']) Signal.append( Sample( s['name'], pT.loadDataFrame(s['path'], preselection, nvar, weight, lumi))) title = addStr[1:17].replace('_', ' ') mstop = int(addStr[10:13]) mneutralino = int(addStr[14:17]) sample = [ r'$m_{\tilde{t}}$=%i GeV' % mstop, r'$m_{\chi}$=%i GeV' % mneutralino ] sig = np.empty([0, Signal[0].dataframe[0].shape[1]]) sig_w = np.empty(0) sig_y = np.empty(0) for s in Signal: sig = np.concatenate((sig, s.dataframe[0])) sig_w = np.concatenate((sig_w, s.dataframe[1])) sig_y = np.concatenate((sig_y, np.zeros(s.dataframe[0].shape[0]))) X = np.concatenate((sig, bkg)) w = np.concatenate((sig_w, bkg_w)) if multiclass: y = np.concatenate((sig_y, bkg_y)) else: y = [] for _df, ID in [(sig, 0), (bkg, 1)]: y.extend([ID] * _df.shape[0]) y = np.array(y) scaler = StandardScaler() X_scaled = scaler.fit_transform(X) y_predict = model.predict(X_scaled) #if not met_cut: #addStr += '_no_met_cut' #print 'True classes:', y.shape, 'Predicted classes:', y_predict.shape #sig_predicted = deepcopy(y_predict)[y==0] #bkg_predicted = deepcopy(y_predict)[y!=0] #bkg1_predicted= deepcopy(y_predict)[y==1] #bkg2_predicted= deepcopy(y_predict)[y==2] #bkg3_predicted= deepcopy(y_predict)[y==3] #bkg1_w = deepcopy(w)[y==1] #bkg2_w = deepcopy(w)[y==2] #bkg3_w = deepcopy(w)[y==3] variables = nvar plot_classification(y, y_predict, w, save=save, fileName=fileName, weighted=True, sample=sample, addStr=addStr) plot_classification(y[X[:, variables.index('met')] >= 250e3], y_predict[X[:, variables.index('met')] >= 250e3], w[X[:, variables.index('met')] >= 250e3], save=save, fileName=fileName, weighted=True, sample=sample, addStr=addStr + '_met250')
def plot_confusion_matrix_datapoint(SignalList, model, preselection, nvar, weight, lumi, save=False, fileName='Test', multiclass=True): ''' Evaluate the confusion matrix on certain datapoints. sigList is supposed to be in a form like in config/samples.py ''' print '----- Plotting the confusion matrices for different datapoints-----' met_cut = False for pre in preselection: if pre['name'] == 'met': met_cut = True if not met_cut: print 'Using no met-preselection!' input = '/project/etp5/dhandl/samples/SUSY/Stop1L/hdf5/cut_mt30_met60_preselection/' bkgList = [{ 'name': 'powheg_ttbar', 'path': input + 'powheg_ttbar/' }, { 'name': 'powheg_singletop', 'path': input + 'powheg_singletop/' }, { 'name': 'sherpa22_Wjets', 'path': input + 'sherpa22_Wjets/' }] #Loading background once print 'Loading background...' Background = [] for b in bkgList: print 'Loading background {} from {}...'.format(b['name'], b['path']) Background.append( Sample( b['name'], pT.loadDataFrame(b['path'], preselection, nvar, weight, lumi))) bkg = np.empty([0, Background[0].dataframe[0].shape[1]]) bkg_w = np.empty(0) bkg_y = np.empty(0) for i, b in enumerate(Background): i = i + 1 bkg = np.concatenate((bkg, b.dataframe[0])) bkg_w = np.concatenate((bkg_w, b.dataframe[1])) bkg_y = np.concatenate((bkg_y, np.full(b.dataframe[0].shape[0], i))) #Evaluating on signal for each set of points print 'Evaluating on signal sets...' for sigList in SignalList: Signal = [] addStr = '_stop_bWN_' name = False for s in sigList: if not name: addStr += s['name'].replace('stop_bWN_', '') name = True else: addStr += s['name'].replace(s['name'][:12], '') print 'Loading signal {} from {}...'.format(s['name'], s['path']) Signal.append( Sample( s['name'], pT.loadDataFrame(s['path'], preselection, nvar, weight, lumi))) sig = np.empty([0, Signal[0].dataframe[0].shape[1]]) sig_w = np.empty(0) sig_y = np.empty(0) for s in Signal: sig = np.concatenate((sig, s.dataframe[0])) sig_w = np.concatenate((sig_w, s.dataframe[1])) sig_y = np.concatenate((sig_y, np.zeros(s.dataframe[0].shape[0]))) X = np.concatenate((sig, bkg)) w = np.concatenate((sig_w, bkg_w)) if multiclass: y = np.concatenate((sig_y, bkg_y)) else: y = [] for _df, ID in [(sig, 0), (bkg, 1)]: y.extend([ID] * _df.shape[0]) y = np.array(y) scaler = StandardScaler() X_scaled = scaler.fit_transform(X) y_predict = model.predict(X_scaled) y_true = y if not met_cut: addStr += '_no_met_cut' plot_confusion_matrix(y_true, y_predict, filename=fileName, save=save, addStr=addStr)
def main(): # Check number of arguments and act respectively thereof if len(sys.argv) == 2: modelfile = sys.argv[1:][0] else: print 'Usage: evaluate_signal.py <model> (omit directory and file suffix)' return print modelfile, type(modelfile) Dir = 'TrainedModels/models/' DatasetDir = 'TrainedModels/datasets/' modelDir = Dir + modelfile + '.h5' if os.path.exists(os.path.join(Dir, modelfile + '_scaler.pkl')): scaler = joblib.load(os.path.join(Dir, modelfile + '_scaler.pkl')) else: scaler = None infofile = open(modelDir.replace('.h5', '_infofile.txt')) infos = infofile.readlines() analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '') dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace( '\n', '') VAR = infos[5].replace('Used variables for training: ', '').replace('\n', '').split() print VAR recurrent = False if analysis.lower() == 'rnn': recurrent = True seq_scaler = dataset + '_scaling.json' if 'nn' in analysis.lower(): model = load_model(os.path.join(Dir, modelfile + '.h5')) elif 'bdt' in analysis.lower(): model = joblib.load(os.path.join(Dir, modelfile + '.h5')) db = (RESOLUTION[2] - RESOLUTION[1] ) / RESOLUTION[0] # bin width in discriminator distribution bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db, db) # bin edges in discriminator distribution center = (bins[:-1] + bins[1:]) / 2 print '#----MODEL----#' print '\t', modelDir ########################### # Read and evaluate signals ########################### Signal = [] for smp in SIGNAL: first = True for s in smp: print 'Sample:\t', s x, y = pickBenchmark(s) if not recurrent: _df, _weight = loadDataFrame(os.path.join(inputDir, s + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) else: _df, _weight, collection = loadSequentialDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape, collection[0]['df'].shape if first: df = _df.copy() weight = _weight.copy() seq = collection[0]['df'].copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) seq = pd.concat((seq, collection[0]['df']), ignore_index=True) if not recurrent: y_hat = evaluate(model, df.values, scaler, method=analysis) print df.shape, weight.shape else: collection[0]['df'] = seq print df.shape, weight.shape, collection[0]['df'].shape y_hat = evaluate(model, df.values, scaler, seq_scaler, method=analysis, col=collection) bin_index = np.digitize( y_hat[:, 0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({ 'name': s[6:], 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for smp in BACKGROUND: first = True for b in smp: print 'Sample:\t', b if not recurrent: _df, _weight = loadDataFrame(os.path.join(inputDir, b + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) else: _df, _weight, collection = loadSequentialDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape, collection[0]['df'].shape if first: df = _df.copy() weight = _weight.copy() seq = collection[0]['df'].copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) seq = pd.concat((seq, collection[0]['df']), ignore_index=True) if not recurrent: print df.shape, weight.shape y_hat = evaluate(model, df.values, scaler, method=analysis) else: collection[0]['df'] = seq print df.shape, weight.shape, collection[0]['df'].shape y_hat = evaluate(model, df.values, scaler, seq_scaler, method=analysis, col=collection) bin_index = np.digitize(y_hat[:, 0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) print len(Signal), len( Background), Signal[0]['outputScore'][:].sum(), totalBkgOutput for s in Signal: significance = [] significance_err = [] asimov = [] asimov_err = [] roc = [] roc_err = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:].sum() > 0.: rel_err_bkg = np.sqrt(np.sum( totalBkgVar[i:])) / totalBkgOutput[i:].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:].sum() > 0.: rel_err_sig = np.sqrt(np.sum( s['output_var'][i:])) / s['outputScore'][i:].sum() else: rel_err_sig = 0. #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.) if float(eff_sig == 0) or float(eff_bkg == 0): Z = 0. Z_err = 0. ams = 0. ams_err = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0. Z_err = 0. ams = 0. ams_err = 0. else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), total_rel_err) ams = asimovZ(s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) roc.append((eff_sig, 1 - eff_bkg)) ams_plus_sig = asimovZ((s['outputScore'][i:].sum() + np.sqrt(np.sum(s['output_var'][i:]))), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) ams_mins_sig = asimovZ((s['outputScore'][i:].sum() - np.sqrt(np.sum(s['output_var'][i:]))), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) ams_plus_bkg = asimovZ(s['outputScore'][i:].sum(), (totalBkgOutput[i:].sum() + np.sqrt(np.sum(totalBkgVar[i:]))), np.sqrt(totalBkgVar[i:].sum())) ams_mins_bkg = asimovZ(s['outputScore'][i:].sum(), (totalBkgOutput[i:].sum() - np.sqrt(np.sum(totalBkgVar[i:]))), np.sqrt(totalBkgVar[i:].sum())) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) ams_err_sig = abs(ams_plus_sig - ams_mins_sig) / 2. ams_err_bkg = abs(ams_plus_bkg - ams_mins_bkg) / 2. ams_err = np.sqrt(ams_err_sig**2 + ams_err_bkg**2) significance.append(Z) significance_err.append(Z_err) asimov.append(ams) asimov_err.append(ams_err) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) s['ams'] = np.array(asimov) s['ams_err'] = np.array(asimov_err) s['roc'] = np.array(roc) print s['sig'] print s['ams'] #print s['roc'] sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0] amsMax_index = bins[np.where(s['ams'] == s['ams'].max())][0] Z = asimovZ( Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=False) Z_syst = asimovZ( Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=True) print 'RooStats: ', s['sig'].max(), sigMax_index, Z, Z_syst print 'asmiov : ', s['ams'].max(), amsMax_index x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal], dtype=float) #print x, y, z print Signal[0]['outputScore'][np.where( bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where( bins[:-1] >= sigMax_index)] print totalBkgOutput[np.where( bins[:-1] >= sigMax_index)], totalBkgVar[np.where( bins[:-1] >= sigMax_index)] print np.sum(Signal[0]['outputScore'][np.where( bins[:-1] >= sigMax_index)]), np.sqrt( np.sum(Signal[0]['output_var'][np.where( bins[:-1] >= sigMax_index)]**2)) print np.sum(totalBkgOutput[np.where(bins[:-1] >= sigMax_index)]), np.sqrt( np.sum(totalBkgVar[np.where(bins[:-1] >= sigMax_index)]**2)) print Signal[0]['outputScore'], Signal[0]['output_var'] print totalBkgOutput, totalBkgVar # Set up a regular grid of interpolation points print('Plotting the output score...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.set_xlim((bins[0], bins[-1])) ax1.set_xlabel('Output score', horizontalalignment='right', x=1.0) ax1.set_ylabel("Events", horizontalalignment='right', y=1.0) sb_ratio = Signal[0]['outputScore'].sum() / totalBkgOutput.sum() #if sb_ratio < 0.2: # #ATTENTION! Simplified error propagation (treated as uncorrelated) # scaled = Signal[0]['outputScore'] / Signal[0]['outputScore'].sum() * totalBkgOutput.sum() # scaled_var = scaled*scaled * ( (Signal[0]['output_var']/Signal[0]['outputScore'])**2 + (totalBkgVar.sum()/totalBkgOutput.sum())**2 + (Signal[0]['output_var'].sum()/Signal[0]['outputScore'].sum())**2 ) # scaled_label = 'Signal scaled to Bkg' # #else: scaled = Signal[0]['outputScore'] scaled_var = Signal[0]['output_var'] scaled_label = 'Signal' multib = plt.bar(center, Background[4]['outputScore'], width=db, yerr=np.sqrt(Background[4]['output_var']), color='seagreen', alpha=0.5, error_kw=dict(ecolor='seagreen', lw=1.5), label='multiboson') ttV = plt.bar(center, Background[3]['outputScore'], width=db, yerr=np.sqrt(Background[4]['output_var']), color='lightcoral', alpha=0.5, error_kw=dict(ecolor='lightcoral', lw=1.5), label='ttV', bottom=Background[4]['outputScore']) w = plt.bar(center, Background[2]['outputScore'], width=db, yerr=np.sqrt(Background[2]['output_var']), color='gold', alpha=0.5, error_kw=dict(ecolor='gold', lw=1.5), label='W+jets', bottom=Background[4]['outputScore'] + Background[3]['outputScore']) st = plt.bar(center, Background[1]['outputScore'], width=db, yerr=np.sqrt(Background[1]['output_var']), color='limegreen', alpha=0.5, error_kw=dict(ecolor='limegreen', lw=1.5), label='singletop', bottom=Background[4]['outputScore'] + Background[3]['outputScore'] + Background[2]['outputScore']) tt = plt.bar(center, Background[0]['outputScore'], width=db, yerr=np.sqrt(Background[0]['output_var']), color='dodgerblue', alpha=0.5, error_kw=dict(ecolor='dodgerblue', lw=1.5), label='ttbar', bottom=Background[4]['outputScore'] + Background[3]['outputScore'] + Background[2]['outputScore'] + Background[1]['outputScore']) plt.bar(center, Signal[0]['outputScore'], width=db, yerr=np.sqrt(Signal[0]['output_var']), label=Signal[0]['name'], color='r', alpha=0.5, error_kw=dict(ecolor='r', lw=1.5)) #plt.step(center, Signal[0]['outputScore'], width=db, yerr= np.sqrt(Signal[0]['output_var']), label=Signal[0]['name'], color='r', error_kw=dict(ecolor='r', lw=1.5)) ax1.set_ylim((0.1, totalBkgOutput.max() * (15.))) ax1.set_yscale('log') leg = plt.legend(loc="best", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.84, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.79, lumi=LUMI * 0.001) plt.savefig("plots/" + modelfile + "_eval-bWN-500-380_outputScore.pdf") plt.savefig("plots/" + modelfile + "_eval-bWN-500-380_outputScore.png") plt.close() print('Plotting significance...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.set_xlim((bins[0], bins[-1])) ax1.set_xlabel('Output score', horizontalalignment='right', x=1.0) ax1.set_ylabel("Z", horizontalalignment='right', y=1.0) plt.plot(center, Signal[0]['ams'], 'k-', color='cornflowerblue', label='Asimov Z (max = %0.3f at %0.2f)' % (s['ams'].max(), amsMax_index)) plt.fill_between(center, Signal[0]['ams'] - Signal[0]['ams_err'], Signal[0]['ams'] + Signal[0]['ams_err'], alpha=0.2, edgecolor='cornflowerblue', facecolor='cornflowerblue', linewidth=0) ax1.set_ylim((0., Signal[0]['ams'].max() * (1.5))) plt.plot(center, Signal[0]['sig'], 'k-', color='darkred', label='Binomial Z (max = %0.3f at %0.2f)' % (s['sig'].max(), sigMax_index)) plt.fill_between(center, Signal[0]['sig'] - Signal[0]['sig_err'], Signal[0]['sig'] + Signal[0]['sig_err'], alpha=0.2, edgecolor='darkred', facecolor='darkred', linewidth=0) plt.plot(center, len(center) * [3.], '--', color='grey', alpha=0.5) plt.plot(center, len(center) * [5.], '--', color='red', alpha=0.5) leg = plt.legend(loc="best", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.84, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.79, lumi=LUMI * 0.001) plt.savefig("plots/" + modelfile + "_Significance_bWN-500-380.pdf") plt.savefig("plots/" + modelfile + "_Significance_bWN-500-380.png") plt.close() print('Plotting ROC...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.set_xlim((bins[0], bins[-1])) ax1.set_ylim((0, 1)) ax1.set_xlabel('$\epsilon_{Sig.}$', horizontalalignment='right', x=1.0) ax1.set_ylabel("$r_{Bkg.}$", horizontalalignment='right', y=1.0) auc = np.trapz(s['roc'][:, 0], s['roc'][:, 1], dx=db) print 'Area under ROC?!: ', auc plt.plot(s['roc'][:, 0], s['roc'][:, 1], 'k-', color='cornflowerblue', label='ROC (AUC = %0.4f)' % (auc)) #plt.fill_between(center, Signal[0]['ams']-Signal[0]['ams_err'], Signal[0]['ams']+Signal[0]['ams_err'], alpha=0.2, edgecolor='cornflowerblue', facecolor='cornflowerblue', linewidth=0) plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Luck') leg = plt.legend(loc="lower left", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.28, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.23, lumi=LUMI * 0.001) plt.savefig("plots/" + modelfile + "_ROC_bWN-500-380.pdf") plt.savefig("plots/" + modelfile + "_ROC_bWN-500-380.png") plt.close()
def main(): ########################### # Read and evaluate signals ########################### Signal = [] for smp in SIGNAL: first = True for s in smp: print 'Sample:\t', s x, y = pickBenchmark(s) _df, _weight = loadDataFrame(os.path.join(inputDir, s + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) sigma = np.sum(weight.values**2.) Signal.append({ 'name': s[6:], 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'nSigma': np.sqrt(sigma) }) del df, weight ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for smp in BACKGROUND: first = True for b in smp: print 'Sample:\t', b _df, _weight = loadDataFrame(os.path.join(inputDir, b + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) Background.append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'nSigma': np.sqrt(np.sum(weight.values**2.)) }) del df, weight total_rel_err = np.sqrt(totBkgVar / totBkgEvents**2. + (totBkgEvents * 0.25)**2.) print 'Bkg:\t%.2f +/- %.2f' % (totBkgEvents, np.sqrt(totBkgVar)) for s in Signal: significance = [] significance_err = [] asimov = [] asimov_err = [] s['Z'] = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( s['nEvents'], totBkgEvents, total_rel_err) s['ams'] = asimovZ(s['nEvents'], totBkgEvents, np.sqrt(totBkgVar)) print 'Z:\t%.2f' % s['Z'] print 'Asimov:\t%.2f' % s['ams'] print 'Sig %s:\t%.2f +/- %.2f' % (s['name'], s['nEvents'], s['nSigma']) print 'r_bkg:\t%.2f' % (1. - (totBkgEvents / BWN_PRESEL_BKG)) print 'e_sig:\t%.2f' % ((s['nEvents'] / BWN_PRESEL_SIG))
def main(): for m in MODELS: modelDir = DIR + m['mdir'] + '.h5' DatasetDir = 'TrainedModels/datasets/' if os.path.exists(os.path.join(DIR, m['mdir'] + '_scaler.pkl')): m['scaler'] = joblib.load( os.path.join(DIR, m['mdir'] + '_scaler.pkl')) else: m['scaler'] = None infofile = open(modelDir.replace('.h5', '_infofile.txt')) infos = infofile.readlines() m['analysis'] = infos[0].replace('Used analysis method: ', '').replace('\n', '') m['dataset'] = DatasetDir + infos[3].replace('Used dataset: ', '').replace('\n', '') m['VAR'] = infos[5].replace('Used variables for training: ', '').replace('\n', '').split() m['recurrent'] = False if m['analysis'].lower() == 'rnn': m['recurrent'] = True m['seq_scaler'] = m['dataset'] + '_scaling.json' if 'nn' in m['analysis'].lower(): m['model'] = load_model(os.path.join(DIR, m['mdir'] + '.h5')) elif 'bdt' in m['analysis'].lower(): m['model'] = joblib.load(os.path.join(DIR, m['mdir'] + '.h5')) print '#----MODEL----#' print '\t', m['mdir'] ########################### # Read and evaluate signals ########################### m['Signal'] = [] for smp in SIGNAL: first = True for s in smp: print 'Sample:\t', s x, y = pickBenchmark(s) if not m['recurrent']: _df, _weight = loadDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, m['VAR'], WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) else: _df, _weight, collection = loadSequentialDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, m['VAR'], WEIGHTS, LUMI) print _df.shape, _weight.shape, collection[0]['df'].shape if first: df = _df.copy() weight = _weight.copy() seq = collection[0]['df'].copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) seq = pd.concat((seq, collection[0]['df']), ignore_index=True) if not m['recurrent']: m['y_pred_sig'] = evaluate(m['model'], df.values, m['scaler'], method=m['analysis']) m['y_sig'] = np.ones(m['y_pred_sig'].shape[0]) else: collection[0]['df'] = seq.copy() m['y_pred_sig'] = evaluate(m['model'], df.values, m['scaler'], m['seq_scaler'], method=m['analysis'], col=collection) m['y_sig'] = np.ones(m['y_pred_sig'].shape[0]) bin_index = np.digitize( m['y_pred_sig'][:, 0], bins[1:] ) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) m['Signal'].append({ 'name': s[6:], 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ############################### # Read and evaluate backgrounds ############################### m['totBkgEvents'] = 0. m['totBkgVar'] = 0. m['Background'] = [] for smp in BACKGROUND: first = True for b in smp: print 'Sample:\t', b if not m['recurrent']: _df, _weight = loadDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, m['VAR'], WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) else: _df, _weight, collection = loadSequentialDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, m['VAR'], WEIGHTS, LUMI) print _df.shape, _weight.shape, collection[0]['df'].shape if first: df = _df.copy() weight = _weight.copy() seq = collection[0]['df'].copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) seq = pd.concat((seq, collection[0]['df']), ignore_index=True) if not m['recurrent']: print df.shape, weight.shape m['_'.join(['y_pred', b])] = evaluate(m['model'], df.values, m['scaler'], method=m['analysis']) m['_'.join(['y', b])] = np.zeros(m['_'.join(['y_pred', b])].shape[0]) else: collection[0]['df'] = seq print df.shape, weight.shape, collection[0]['df'].shape m['_'.join(['y_pred', b])] = evaluate(m['model'], df.values, m['scaler'], m['seq_scaler'], method=m['analysis'], col=collection) m['_'.join(['y', b])] = np.zeros(m['_'.join(['y_pred', b])].shape[0]) bin_index = np.digitize(m['_'.join(['y_pred', b])][:, 0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] m['totBkgEvents'] += weight.sum() m['totBkgVar'] += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) m['Background'].append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar m['totalBkgOutput'] = np.array( [b['outputScore'] for b in m['Background']]) m['totalBkgOutput'] = m['totalBkgOutput'].sum(axis=0) m['totalBkgVar'] = np.array([b['output_var'] for b in m['Background']]) m['totalBkgVar'] = m['totalBkgVar'].sum(axis=0) for s in m['Signal']: m['roc'] = [] m['roc_err'] = [] m['tot_rel'] = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): eff_sig = s['outputScore'][i:].sum() / s['nEvents'] eff_bkg = m['totalBkgOutput'][i:].sum( ) / m['totalBkgOutput'].sum() err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents'] err_bkg = np.sqrt(np.sum( m['totalBkgVar'][i:])) / m['totalBkgOutput'].sum() if m['totalBkgOutput'][i:].sum() > 0.: rel_err_bkg = np.sqrt(np.sum( m['totalBkgVar'][i:])) / m['totalBkgOutput'][i:].sum() else: rel_err_bkg = 0. if s['outputScore'][i:].sum() > 0.: rel_err_sig = np.sqrt(np.sum( s['output_var'][i:])) / s['outputScore'][i:].sum() else: rel_err_sig = 0. m['total_rel_err'] = np.sqrt(rel_err_bkg**2. + 0.25**2.) m['roc'].append((eff_sig, 1 - eff_bkg)) roc_plus_sig = eff_sig + err_sig roc_mins_sig = eff_sig - err_sig roc_plus_bkg = 1 - (eff_bkg + err_bkg) roc_mins_bkg = 1 - (eff_bkg - err_bkg) #roc_err_sig = abs(roc_plus_sig - roc_mins_sig) / 2. roc_err_bkg = abs(roc_plus_bkg - roc_mins_bkg) / 2. m['roc_err'].append(roc_err_bkg) m['roc'] = np.array(m['roc']) m['roc_err'] = np.array(m['roc_err']) #m['y_bkg'] = np.empty(0) #m['y_pred_bkg'] = np.empty(0) #for b in BACKGROUND: # m['y_bkg'] = np.concatenate((m['y_bkg'], m['_'.join(['y',b])])) # m['y_pred_bkg'] = np.concatenate((m['y_pred_bkg'], m['_'.join(['y_pred',b])][:,0])) #m['y'] = np.concatenate((m['y_sig'], m['y_bkg'])) #m['y_pred'] = np.concatenate((m['y_pred_sig'][:,0], m['y_pred_bkg'])) #m['fpr'], m['tpr'], m['threshold'] = roc_curve(m['y'], m['y_pred']) #m['auc'] = roc_auc_score(m['y'], m['y_pred']) print('Plotting ROC curve ...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) #ax1.set_xlim((bins[0], bins[-1])) #ax1.set_ylim((0, 1)) ax1.set_xlabel('$\epsilon_{Sig.}$', horizontalalignment='right', x=1.0) ax1.set_ylabel('$r_{Bkg.}$', horizontalalignment='right', y=1.0) for m in MODELS: m['auc'] = np.trapz(m['roc'][:, 0], m['roc'][:, 1], dx=db) print 'Area under ROC:\t', m['auc'] if logScale: ax1.set_yscale('log') plt.plot(m['roc'][:, 0], 1. / (1. - m['roc'][:, 1]), 'k-', color=m['color'], label='%s (AUC = %0.4f)' % (m['name'], m['auc'])) plt.fill_between(m['roc'][:, 0], 1. / (1. - (m['roc'][:, 1] - m['roc_err'])), 1. / (1. - (m['roc'][:, 1] + m['roc_err'])), alpha=0.2, edgecolor=m['color'], facecolor=m['color'], linewidth=0) #plt.plot(m['tpr'], 1./m['fpr'], lw=2, label=m['name']+' (AUC = %0.3f)'%(m['auc'])) else: plt.plot(m['roc'][:, 0], m['roc'][:, 1], 'k-', color=m['color'], label='%s (AUC = %0.2f)' % (m['name'], m['auc'])) plt.fill_between(m['roc'][:, 0], (m['roc'][:, 1] - m['roc_err']), (m['roc'][:, 1] + m['roc_err']), alpha=0.2, edgecolor=m['color'], facecolor=m['color'], linewidth=0) #plt.plot(m['tpr'], 1.-m['fpr'], lw=2, label=m['name']+' (AUC = %0.3f)'%(m['auc'])) ax1.set_xlim((0, 0.16)) ax1.set_ylim((0.975, 1.0)) #plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Luck') for p in WP: p['eff_sig'] = p['sig'] / BWN_PRESEL_SIG p['eff_bkg'] = p['bkg'] / BWN_PRESEL_BKG if p['legend']: plt.plot([p['eff_sig']], [1 - p['eff_bkg']], '.', color=p['color'], label=p['name']) else: plt.plot([p['eff_sig']], [1 - p['eff_bkg']], '.', color=p['color']) leg = plt.legend(loc="lower left", frameon=False) #AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.25, 'Work in progress') AtlasStyle_mpl.Text(ax1, 0.14, 0.52, 'Simulation') AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.46, lumi=LUMI * 0.001) plt.savefig(SAVEDIR + FILENAME + '.pdf') plt.savefig(SAVEDIR + FILENAME + '.png') plt.close()
def evaluate_signalGridCuts(modelDir, resolution=np.array([50,0,1], dtype=float), save=False, fileName='Test'): print('Evaluating singal grid...') if fileName=='Grid_test': fileName=modelDir.replace('TrainedModels/models/','').replace('.h5','') infofile = open(modelDir.replace('.h5','_infofile.txt')) infos = infofile.readlines() #Parse Strings for correct datatypes variables=infos[4].replace('Used variables for training: ','').replace('\n','').split() weights=infos[5].replace('Used weights: ', '').replace('\n','').split() lumi=float(infos[7].replace('Used Lumi: ','').replace('\n','')) background=infos[9].replace('Used background files: ','').replace('; \n','').replace(' ','').split(';') preselection = preselection_evaluate print 'Using the following preselection to evaluate:' , preselection signal = ['stop_bWN_250_100', 'stop_bWN_250_130', 'stop_bWN_250_160', 'stop_bWN_300_150', 'stop_bWN_300_180', 'stop_bWN_300_210', 'stop_bWN_350_185', 'stop_bWN_350_200', 'stop_bWN_350_230', 'stop_bWN_350_260', 'stop_bWN_400_235', 'stop_bWN_400_250', 'stop_bWN_400_280', 'stop_bWN_400_310', 'stop_bWN_450_285', 'stop_bWN_450_300', 'stop_bWN_450_330', 'stop_bWN_450_360', 'stop_bWN_500_335', 'stop_bWN_500_350', 'stop_bWN_500_380', 'stop_bWN_550_385', 'stop_bWN_550_400', 'stop_bWN_550_430', 'stop_bWN_550_460', 'stop_bWN_600_435', 'stop_bWN_600_450', 'stop_bWN_600_480', 'stop_bWN_600_510', 'stop_bWN_650_485', 'stop_bWN_650_500', 'stop_bWN_650_530', 'stop_bWN_650_560'] #Get Scaler and model from modelDir model = load_model(modelDir) scalerDir=modelDir.replace('.h5','_scaler.pkl') scaler=joblib.load(scalerDir) #Evaluate db = (resolution[2] - resolution[1]) / resolution[0] # bin width in discriminator distribution bins = np.arange(resolution[1], resolution[2]+db, db) # bin edges in discriminator distribution ########################### # Read and evaluate signals ########################### Signal = [] for s in signal: x, y = pickBenchmark(s) df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), preselection, variables, weights, lumi) y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:,0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for b in background: df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), preselection, variables, weights, lumi) y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:,0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:-1].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:-1].sum() > 0.: rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:-1].sum() > 0.: rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum() else: rel_err_sig = 0. total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0 Z_err = 0 else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) print s['sig'] print s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())] x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal],dtype=float) print x, y, z # Set up a regular grid of interpolation points fig, ax1 = plt.subplots(figsize=(8,6)) xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100) xi, yi = np.meshgrid(xi, yi) # Interpolate rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z) zi = rbf(xi, yi) im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower', extent=[x.min(), x.max(), y.min(), y.max()]) cbar = plt.colorbar(im) cbar.set_label('Significance') ax1.set_xlabel(r'$m_{\tilde{t}}$') ax1.set_xlim([x.min(), x.max()]) ax1.set_ylabel(r'$m_{\chi}$') ax1.set_ylim([y.min(), y.max()]) plt.scatter(x, y, c='black') plt.plot(x, x-84., color='black') plt.plot(x, x-175., color='black') AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=lumi*0.001) #plt.show() if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') isFile = True n = 1 while isFile: filepath = './plots/' + fileName + '_evaluated_grid_cuts_' + str(n) + '_infofile.txt' if os.path.isfile(filepath) and filepath.endswith('.txt'): n += 1 isFile=True else: isFile=False infofile = open(filepath, 'w') print('Saving evaluation informations to ' , filepath) presels = '' for pre in preselection_evaluate: if pre['type'] == 'condition': presels += pre['name'] + '-threshold: ' + str(pre['threshold']) + ' type: ' + pre['type'] + ' variable: ' + pre['variable'] + ' lessthan: ' + str(pre['lessthan']) + ' and morethan: ' + str(pre['morethan']) + '; ' else: presels += pre['name'] + '-threshold: ' + str(pre['threshold']) + ' type: ' + pre['type'] + '; ' infofile.write('Used preselection for evaluation: ' + presels) infofile.close() plt.savefig('plots/'+fileName+'_evaluated_grid_cuts_' + str(n) + '.pdf') plt.savefig('plots/'+fileName+'_evaluated_grid_cuts_' + str(n) + '.png') plt.close()
def evaluate_signalGrid(modelDir, resolution=np.array([50,0,1], dtype=float), save=False, fileName='Test'): print('Evaluating signal grid...') infofile = open(modelDir.replace('.h5','_infofile.txt')) infos = infofile.readlines() #Parse Strings for correct datatypes variables=infos[4].replace('Used variables for training: ','').replace('\n','').split() weights=infos[5].replace('Used weights: ', '').replace('\n','').split() preselection_raw=infos[6].replace('Used preselection: ', '').replace('; \n', '').split(';') preselection=[] for x in preselection_raw: xdict = {} xdict['name']= x.split()[0].split('-')[0] xdict['threshold']= float(x.split()[1]) xdict['type'] = x.split()[3] if xdict['type'] == 'condition': xdict['variable'] = x.split()[5] xdict['lessthan'] = float(x.split()[7]) xdict['morethan'] = float(x.split()[10]) preselection.append(xdict) lumi=float(infos[7].replace('Used Lumi: ','').replace('\n','')) background=infos[9].replace('Used background files: ','').replace('; \n','').replace(' ','').split(';') #signal=infos[8].replace('Used signal files: ','').replace('; \n','').replace(' ','').split(';') signal = ['stop_bWN_250_100', 'stop_bWN_250_130', 'stop_bWN_250_160', 'stop_bWN_300_150', 'stop_bWN_300_180', 'stop_bWN_300_210', 'stop_bWN_350_185', 'stop_bWN_350_200', 'stop_bWN_350_230', 'stop_bWN_350_260', 'stop_bWN_400_235', 'stop_bWN_400_250', 'stop_bWN_400_280', 'stop_bWN_400_310', 'stop_bWN_450_285', 'stop_bWN_450_300', 'stop_bWN_450_330', 'stop_bWN_450_360', 'stop_bWN_500_335', 'stop_bWN_500_350', 'stop_bWN_500_380', 'stop_bWN_550_385', 'stop_bWN_550_400', 'stop_bWN_550_430', 'stop_bWN_550_460', 'stop_bWN_600_435', 'stop_bWN_600_450', 'stop_bWN_600_480', 'stop_bWN_600_510', 'stop_bWN_650_485', 'stop_bWN_650_500', 'stop_bWN_650_530', 'stop_bWN_650_560'] #For Debugging #print variables, type(variables) #print weights, type(variables) #print preselection, type(preselection[1]) #print lumi, type(lumi) #print signal, type(signal) #print background, type(background) #Get Scaler and model from modelDir model = load_model(modelDir) scalerDir=modelDir.replace('.h5','_scaler.pkl') scaler=joblib.load(scalerDir) #Evaluate db = (resolution[2] - resolution[1]) / resolution[0] # bin width in discriminator distribution bins = np.arange(resolution[1], resolution[2]+db, db) # bin edges in discriminator distribution ########################### # Read and evaluate signals ########################### statInfoSig = {} #Infos about statistic Signal = [] for s in signal: x, y = pickBenchmark(s) df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), preselection, variables, weights, lumi) statInfoSig[s]=df.shape[0] y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:,0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### statInfoBkg = {} #Infos about statistic totBkgEvents = 0. totBkgVar = 0. Background = [] for b in background: df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), preselection, variables, weights, lumi) statInfoBkg[b]=df.shape[0] y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:,0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:-1].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:-1].sum() > 0.: rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:-1].sum() > 0.: rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum() else: rel_err_sig = 0. total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0 Z_err = 0 else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) #print s['sig'] print s['m_stop'], s['m_X'], s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())] x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal],dtype=float) #print x, y, z # Set up a regular grid of interpolation points fig, ax1 = plt.subplots(figsize=(8,6)) xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100) xi, yi = np.meshgrid(xi, yi) # Interpolate rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z) zi = rbf(xi, yi) im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower', extent=[x.min(), x.max(), y.min(), y.max()]) cbar = plt.colorbar(im) cbar.set_label('Significance') ax1.set_xlabel(r'$m_{\tilde{t}}$') ax1.set_xlim([x.min(), x.max()]) ax1.set_ylabel(r'$m_{\chi}$') ax1.set_ylim([y.min(), y.max()]) plt.scatter(x, y, c='black') plt.plot(x, x-84., color='black') plt.plot(x, x-175., color='black') AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=lumi*0.001) #plt.show() if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') plt.savefig('plots/'+fileName+'_evaluated_grid.pdf') plt.savefig('plots/'+fileName+'_evaluated_grid.png') plt.close() diag_165 = {} diag_150 = {} diag_120 = {} diag_90 = {} for key, value in statInfoSig.iteritems(): x, y = pickBenchmark(key) deltaM = float(x)-float(y) if deltaM==165.0: diag_165[x]=value elif deltaM==150.0: diag_150[x]=value elif deltaM==120.0: diag_120[x]=value elif deltaM==90.0: diag_90[x]=value else: print 'Error: Unknown diagonal in evaluate_signalGrid' return 0 sortedLabels165 = sorted(diag_165) sortedLabels150 = sorted(diag_150) sortedLabels120 = sorted(diag_120) sortedLabels90 = sorted(diag_90) values_165 = [] values_150 = [] values_120 = [] values_90 = [] for label in sortedLabels165: values_165.append(diag_165[label]) for label in sortedLabels150: values_150.append(diag_150[label]) for label in sortedLabels120: values_120.append(diag_120[label]) for label in sortedLabels90: values_90.append(diag_90[label]) csignal = sum(values_90)+sum(values_120)+sum(values_150)+sum(values_165) trainable_count = int(np.sum([K.count_params(p) for p in set(model.trainable_weights)])) signalP = mpatches.Patch(color='None', label='signal: ' + str(csignal)) ttbar = mpatches.Patch(color='None', label=r'$t\overline{t}$: ' + str(statInfoBkg['mc16d_ttbar'])) singletop = mpatches.Patch(color='None', label= 'single top: '+ str(statInfoBkg['mc16d_singletop'])) Wjets = mpatches.Patch(color='None', label= r'$W$ + jets: '+ str(statInfoBkg['mc16d_Wjets'])) tps = mpatches.Patch(color='None', label='params(t): ' + str(trainable_count)) #Trainable parameters #print sortedLabels90, sortedLabels120, sortedLabels150 #print values_90, values_120, values_150 plt.figure('statistic') d165 = plt.plot(sortedLabels165, values_165, 'b-x',label=r'$\Delta M = 165$ GeV') d150 = plt.plot(sortedLabels150, values_150, 'b-x',label=r'$\Delta M = 150$ GeV') d120 = plt.plot(sortedLabels120, values_120, 'r-x',label=r'$\Delta M = 120$ GeV') d90 = plt.plot(sortedLabels90, values_90, 'g-x', label=r'$\Delta M = 90$ GeV') plt.xlabel(r'$m_{\tilde{t}}$ [GeV]') plt.ylabel('Statistic') plt.title('Statistic of samples') plt.legend(loc='best', handles=[d165[0],d150[0],d120[0],d90[0],signalP,ttbar,singletop,Wjets,tps]) if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') plt.savefig('plots/'+fileName+'_StatisticTraining.pdf') plt.savefig('plots/'+fileName+'_StatisticTraining.png') plt.close() filepath = 'plots/' + fileName + '_StatisticTrainingValues.txt' infofile = open(filepath, 'w') infofile.write('M165: ' + ';'.join(sortedLabels165) + ' ' +';'.join([str(i) for i in values_165])+'\n') infofile.write('M150: ' + ';'.join(sortedLabels150) + ' ' +';'.join([str(i) for i in values_150])+'\n') infofile.write('M120: ' + ';'.join(sortedLabels120) + ' ' + ';'.join([str(i) for i in values_120])+'\n') infofile.write('M90: ' + ';'.join(sortedLabels90) + ' '+ ';'.join([str(i) for i in values_90])) infofile.close()
def main(): model = load_model(modelDir) scaler = joblib.load(SCALING) infofile = open(modelDir.replace('.h5','_infofile.txt')) infos = infofile.readlines() analysis=infos[0].replace('Used analysis method: ','').replace('\n','') dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace('\n','') recurrent = False if analysis.lower() == 'rnn': recurrent = True seq_scaler = dataset+'_scaling.json' db = (RESOLUTION[2] - RESOLUTION[1]) / RESOLUTION[0] # bin width in discriminator distribution bins = np.arange(RESOLUTION[1], RESOLUTION[2]+db, db) # bin edges in discriminator distribution center = (bins[:-1] + bins[1:]) / 2 print '#----MODEL----#' print modelDir ########################### # Read and evaluate signals ########################### Signal = [] for s in SIGNAL: print s x, y = pickBenchmark(s) if not recurrent: df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame(os.path.join(inputDirSig, s+'/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize(y_hat[:,0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for b in BACKGROUND: if not recurrent: df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame(os.path.join(inputDirBkg, b+'/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize(y_hat[:,0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] asimov = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:-1].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:-1].sum() > 0.: rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:-1].sum() > 0.: rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum() else: rel_err_sig = 0. #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. ams = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0. Z_err = 0. ams = 0. else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err) ams = asimovZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) asimov.append(ams) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) s['ams'] = np.array(asimov) print s['sig'] print s['ams'] print s['m_stop'], s['m_X'], s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())] x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal],dtype=float) #print x, y, z # Set up a regular grid of interpolation points fig, ax1 = plt.subplots(figsize=(8,6)) xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100) xi, yi = np.meshgrid(xi, yi) # Interpolate rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z) zi = rbf(xi, yi) im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower', extent=[x.min(), x.max(), y.min(), y.max()]) contours = plt.contour(xi, yi, zi, colors='black', levels=[3.]) cbar = plt.colorbar(im) cbar.set_label('Significance') ax1.set_xlabel(r'$m_{\tilde{t}}$') ax1.set_xlim([x.min(), x.max()]) ax1.set_ylabel(r'$m_{\chi}$') ax1.set_ylim([y.min(), y.max()]) plt.scatter(x, y, c='black', s=[0.75]*len(x)) plt.plot(x, x-84., color='grey') plt.plot(x, x-175., color='grey') AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=LUMI*0.001) plt.savefig("plots/"+modelfile+"_eval-Grid.pdf") plt.savefig("plots/"+modelfile+"_eval-Grid.png") plt.close()