def plot(): print 'test' for s in signal: s['p'] = [] for i in s['cls']: s['p'].append(normQuantileHack(i)) print('Plotting p-value ...') fig = plt.figure(figsize=(8,6)) ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=4) ax1.set_xlabel('Number of bins', horizontalalignment='right', x=1.0) ax1.set_ylabel('p', horizontalalignment='right', y=1.0) for s in signal: if logScale: ax1.set_yscale('log') plt.plot([1,2,3,4,5,6,7,8,9,10], s['p'], 'o-', color=s['color'], label=s['legend'], lw=2) ax1.set_xlim((1, 10)) ax1.set_ylim((0, 2.5)) leg = plt.legend(loc="upper right", frameon=False) #AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.25, 'Work in progress') AtlasStyle_mpl.Text(ax1, 0.15, 0.83, 'Simulation') AtlasStyle_mpl.LumiLabel(ax1, 0.15, 0.77, lumi=LUMI*0.001) plt.savefig(SAVEDIR+FILENAME+'.pdf') plt.savefig(SAVEDIR+FILENAME+'.png') plt.close()
def plot_confusion_matrix(y_test, y_hat, classes, normalize=False, title='Confusion matrix', fileName=None): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ cm = confusion_matrix(y_test, y_hat, sample_weight=sample_weight) np.set_printoptions(precision=3) cmap = plt.cm.Blues if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') print(cm) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() tick_marks = np.arange(len(classes)) plt.xticks(tick_marks, classes, rotation=45) plt.yticks(tick_marks, classes) fmt = '.2f' if normalize else 'd' thresh = cm.max() / 2. for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label') AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress') if fileName: plt.savefig(fileName + ".pdf") plt.savefig(fileName + ".png") plt.close()
def plot_event_display(content, output_fname=None, vmin=1e-3, vmax=1, title=''): ''' Function to help you visualize an event grid topology on a log scale Args: ----- content : numpy array, first arg to imshow, content of the image e.g.: images.mean(axis=0) --> the average image output_fname : string, name of the output file where the plot will be saved. vmin : (default = 1e-1) float, lower bound of the pixel intensity scale before saturation vmax : (default = 1000e3) float, upper bound of the pixel intensity scale before saturation title : (default = '') string, title of the plot, to be displayed on top of the image ''' fig, ax = plt.subplots(figsize=(8, 6)) extent = (-3.2, 3.2, -3, 3) im = ax.imshow(content, interpolation='nearest', origin='lower', extent=extent) #norm=LogNorm(vmin=vmin, vmax=vmax), origin='lower', extent=extent) #norm=LogNorm(vmin=vmin, vmax=vmax), extent=extent) cbar = plt.colorbar(im, fraction=0.05, pad=0.05) cbar.set_label(r'1/$m_{eff}$ [GeV]', y=0.85) plt.xlabel(r'Azimuthal Angle $(\phi)$') plt.ylabel(r'Pseudorapidity $(\eta)$') plt.title(title) AtlasStyle_mpl.ATLASLabel(ax, 0.02, 0.9, 'Work in progress') if not output_fname is None: plt.savefig(output_fname+'.pdf') plt.savefig(output_fname+'.png')
def main(): infofile = open(modelDir.replace('.h5', '_infofile.txt')) infos = infofile.readlines() analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '') dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace( '\n', '') nvar = infos[5].replace('Used variables for training: ', '').replace('\n', '') nvar = nvar.split() model = load_model(modelDir) scaler = joblib.load(SCALING) recurrent = False if analysis.lower() == 'rnn': recurrent = True h5f = h5py.File(dataset + '.h5', 'r') X_train = h5f['X_train'][:] y = h5f['y_train'][:] y_train = deepcopy(y) y_train[y != 0] = 0. y_train[y == 0] = 1. collection = [] if recurrent: for col in COLLECTION: collection.append(h5f['X_train_' + col][:]) h5f.close() where_nan = np.isnan(X_train) X_train[where_nan] = -999. X_train = scaler.transform( X_train) # collection already standardized in training print '#----MODEL----#' print modelDir print model.summary() ###################################### # Read in trained and tested dataset # ###################################### if recurrent: y_hat = model.predict(collection + [X_train]) else: y_hat = model.predict(X_train) importanceBySquaredWeight = getImportanceBySquaredWeight( model, nvar, recurrent) importanceByWeight = getImportanceByWeight(model, nvar, recurrent) impotanceByGrad = getImportanceByGradient(model, nvar, X_train, collection, recurrent) # Re-shuffle for re-evaluate X_train_reshuffled = [] for idx, var in enumerate(nvar): X = np.copy(X_train) print X[:1] np.random.shuffle(X[:, idx]) print X[:1], '\n' X_train_reshuffled.append(X) roc = [] auc = [] for i in xrange(len(X_train_reshuffled)): print type(X_train_reshuffled[i]) if recurrent: y_predict = model.predict(collection + [X_train_reshuffled[i]]) else: y_predict = model.predict(X_train_reshuffled[i]) roc.append(roc_curve(y_train, y_predict[:, 0])) auc.append(roc_auc_score(y_train, y_predict[:, 0])) del y_predict roc.append(roc_curve(y_train, y_hat[:, 0])) auc.append(roc_auc_score(y_train, y_hat[:, 0])) print auc, '\n', importanceBySquaredWeight, '\n', importanceByWeight, '\n', impotanceByGrad, '\n' print 100 * '#' print '\n\t\t\tVariable ranking' print '\n sum of squared weights \t sum of absolute weights \t gradients \t AUC (after shuffle)' print 100 * '-' for i in xrange(len(nvar)): print '{}: {}\t{}: {}\t{}: {}\t{}: {}'.format( importanceBySquaredWeight[i][0], importanceBySquaredWeight[i][1], importanceByWeight[i][0], importanceByWeight[i][1], impotanceByGrad[i][0], impotanceByGrad[i][1], nvar[i], auc[i]) print 100 * '-' print 100 * '#' print('Plotting the ROC curves ...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.set_xlim((0, 1)) ax1.set_ylim((0, 1)) ax1.set_xlabel('$\epsilon_{Sig.}$', horizontalalignment='right', x=1.0) ax1.set_ylabel("$r_{Bkg.}$", horizontalalignment='right', y=1.0) for i in xrange(len(roc)): try: plt.plot(roc[i][1], 1 - roc[i][0], '-', label='w/o %s (AUC = %0.4f)' % (nvar[i], auc[i])) except IndexError: plt.plot(roc[i][1], 1 - roc[i][0], '-', label='Default (AUC = %0.4f)' % (auc[i])) plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Luck') leg = plt.legend(loc="lower left", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.13, 0.9, 'Work in progress') #AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.3, lumi=LUMI*0.001) plt.savefig("plots/" + modelfile + "_ROC_n-1.pdf") plt.savefig("plots/" + modelfile + "_ROC_n-1.png") plt.close()
def plot_classification(y_true, y_predict, weights, fileName="Test", save=False, weighted=False, train=False, sample=None, addStr=''): print('Plotting the classification for true labels...') if weighted: addStr += '_weighted' if train: addStr += '_train' if train and weighted: print 'For weighted events, whole dataset has to be used' return 0 y_predict_class = np.argmax(y_predict, axis=1) classes = [0, 1, 2, 3] #Different classes assignal = [] astt = [] assinglet = [] asWjets = [] explain_patch = mpatches.Patch(color='None', label="predicted label") if weighted: for i in range(0, 4): assignal.append( np.sum(weights[np.logical_and(y_true == i, y_predict_class == 0)])) astt.append( np.sum(weights[np.logical_and(y_true == i, y_predict_class == 1)])) assinglet.append( np.sum(weights[np.logical_and(y_true == i, y_predict_class == 2)])) asWjets.append( np.sum(weights[np.logical_and(y_true == i, y_predict_class == 3)])) else: for i in range(0, 4): n = float(y_predict_class[y_true == i].shape[0]) u, counts = np.unique(y_predict_class[y_true == i], return_counts=True) #print(u.tolist()) #print(counts.tolist()) try: assignal.append(counts[u.tolist().index(0)] / n) except ValueError: assignal.append(0) try: astt.append(counts[u.tolist().index(1)] / n) except ValueError: astt.append(0) try: assinglet.append(counts[u.tolist().index(2)] / n) except ValueError: assinglet.append(0) try: asWjets.append(counts[u.tolist().index(3)] / n) except ValueError: asWjets.append(0) width = 1. bar0 = plt.bar(classes, assignal, width, label=r'Signal', color='r') bar1 = plt.bar(classes, astt, width, bottom=assignal, label=r'$t\overline{t}$', color='b') bar2 = plt.bar(classes, assinglet, width, bottom=np.array(astt) + np.array(assignal), label=r'Single Top', color='g') bar3 = plt.bar(classes, asWjets, width, bottom=np.array(assinglet) + np.array(astt) + np.array(assignal), label='$W$ + jets', color='orange') plt.xlabel('true label') #plt.legend(loc='best',handles=[explain_patch, bar0, bar1, bar2, bar3]) plt.xticks(np.arange(4), (r'Signal', r'$t\overline{t}$', r'Single Top', '$W$ + jets')) plt.title('Classification') if weighted: plt.ylim( 0, max([ assignal[i] + astt[i] + assinglet[i] + asWjets[i] for i in range(0, 4) ]) * (1 + 0.33)) box = plt.gca().get_position() plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height]) if sample is not None: sample_patch1 = mpatches.Patch(color='None', label=sample[0]) sample_patch2 = mpatches.Patch(color='None', label=sample[1]) plt.gca().legend(loc='center left', bbox_to_anchor=(1, 0.5), handles=[ explain_patch, bar0, bar1, bar2, bar3, sample_patch1, sample_patch2 ]) else: plt.gca().legend(loc='center left', bbox_to_anchor=(1, 0.5), handles=[explain_patch, bar0, bar1, bar2, bar3]) if weighted: ax1 = plt.gca() AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140) #plt.gca().set_ylim([0,1.2]) if save: if not os.path.exists("./plots/"): os.makedirs("./plots/") print("Creating folder plots") plt.savefig("plots/" + fileName + "_Classification" + addStr + ".pdf") plt.savefig("plots/" + fileName + "_Classification" + addStr + ".png") plt.close()
def main(): model = load_model(modelDir) scaler = joblib.load(SCALING) infofile = open(modelDir.replace('.h5', '_infofile.txt')) infos = infofile.readlines() analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '') dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace( '\n', '') recurrent = False if analysis.lower() == 'rnn': recurrent = True seq_scaler = dataset + '_scaling.json' db = (RESOLUTION[2] - RESOLUTION[1] ) / RESOLUTION[0] # bin width in discriminator distribution bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db, db) # bin edges in discriminator distribution center = (bins[:-1] + bins[1:]) / 2 print '#----MODEL----#' print modelDir ########################### # Read and evaluate signals ########################### Signal = [] for s in SIGNAL: x, y = pickBenchmark(s) if not recurrent: df, weight = loadDataFrame(os.path.join(inputDir, s + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize( y_hat[:, 0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({ 'name': s, 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for b in BACKGROUND: if not recurrent: df, weight = loadDataFrame(os.path.join(inputDir, b + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize(y_hat[:, 0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] asimov = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:].sum() > 0.: rel_err_bkg = np.sqrt(np.sum( totalBkgVar[i:])) / totalBkgOutput[i:].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:].sum() > 0.: rel_err_sig = np.sqrt(np.sum( s['output_var'][i:])) / s['outputScore'][i:].sum() else: rel_err_sig = 0. #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. ams = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0. Z_err = 0. ams = 0. else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), total_rel_err) ams = asimovZ(s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) asimov.append(ams) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) s['ams'] = np.array(asimov) #print s['sig'] #print s['ams'] #sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0] #Z = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=False) #Z_syst = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=True) #print s['sig'].max(), sigMax_index, Z, Z_syst x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal], dtype=float) #print x, y, z #print Signal[0]['outputScore'][np.where(bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where(bins[:-1] >= sigMax_index)] #print totalBkgOutput[np.where(bins[:-1] >= sigMax_index)], totalBkgVar[np.where(bins[:-1] >= sigMax_index)] #print Signal[0]['outputScore'], Signal[0]['output_var'] #print totalBkgOutput, totalBkgVar # Set up a regular grid of interpolation points print('Plotting the output score...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) ax1.set_xlim((bins[0], bins[-1])) ax1.set_ylabel("Events", horizontalalignment='right', y=1.0) sb_ratio = Signal[0]['outputScore'].sum() / totalBkgOutput.sum() #if sb_ratio < 0.2: # #ATTENTION! Simplified error propagation (treated as uncorrelated) # scaled = Signal[0]['outputScore'] / Signal[0]['outputScore'].sum() * totalBkgOutput.sum() # scaled_var = scaled*scaled * ( (Signal[0]['output_var']/Signal[0]['outputScore'])**2 + (totalBkgVar.sum()/totalBkgOutput.sum())**2 + (Signal[0]['output_var'].sum()/Signal[0]['outputScore'].sum())**2 ) # scaled_label = 'Signal scaled to Bkg' # #else: scaled = Signal[0]['outputScore'] scaled_var = Signal[0]['output_var'] scaled_label = 'Signal' plt.bar(center, totalBkgOutput / totalBkgOutput.sum(), width=db, yerr=np.sqrt(totalBkgVar) / totalBkgOutput.sum(), color='b', alpha=0.25, error_kw=dict(ecolor='b', lw=1.5), label=Background[0]['name']) plt.bar(center, Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(), width=db, yerr=np.sqrt(Signal[0]['output_var']) / Signal[0]['outputScore'].sum(), label=Signal[0]['name'], color='r', alpha=0.25, error_kw=dict(ecolor='r', lw=1.5)) ax1.set_ylim( (0., np.max([ np.max(totalBkgOutput / totalBkgOutput.sum()), np.max(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum()) ]) * 1.3)) #ax1.set_yscale('log') leg = plt.legend(loc="best", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.925, 'Work in progress') #AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.875, lumi=LUMI*0.001) ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) getRatio(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(), bins, np.sqrt(Signal[0]['output_var']) / Signal[0]['outputScore'].sum(), totalBkgOutput / totalBkgOutput.sum(), bins, np.sqrt(totalBkgVar) / totalBkgOutput.sum(), 'r') ax2.set_xlabel('Output score', horizontalalignment='right', x=1.0) ax2.set_ylabel('Reco/Truth') ax2.set_xlim((0., 1.)) ax2.set_ylim((0, 2)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.pdf") plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.png") plt.close()
def plot_TrainTest_score(sig_predicted_train, sig_predicted_test, sig_w_train, sig_w_test, bkg_predicted_train, bkg_predicted_test, bkg_w_train, bkg_w_test, binning, fileName='Test', normed=False, save=False, ratio=True, addStr=''): print('Plotting the train/test score...') fig = plt.figure(figsize=(8, 6)) if ratio: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) #ax1.xaxis.set_ticks([]) else: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') #s_histTrain, s_binsTrain, s_patchesTrain = plt.hist(sig_predicted_train.ravel(), weights=sig_w_train, histtype='stepfilled', color='r', label='Signal (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) s_histTrain, s_binsTrain, s_patchesTrain = plt.hist( sig_predicted_train.ravel(), weights=None, histtype='stepfilled', color='r', label='Signal (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #b_histTrain, b_binsTrain, b_patchesTrain = plt.hist(bkg_predicted_train.ravel(), weights=bkg_w_train, histtype='stepfilled', color='b', label='Background (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_histTrain, b_binsTrain, b_patchesTrain = plt.hist( bkg_predicted_train.ravel(), weights=None, histtype='stepfilled', color='b', label='Background (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(), weights=sig_w_test, bins=binning[0], range=(binning[1], binning[2]), density=normed) s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(), weights=None, bins=binning[0], range=(binning[1], binning[2]), density=normed) #b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(), weights=bkg_w_test, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(), weights=None, bins=binning[0], range=(binning[1], binning[2]), density=normed) width = (s_binsTrain[1] - s_binsTrain[0]) center = (s_binsTrain[:-1] + s_binsTrain[1:]) / 2 s_error = plt.errorbar(center, s_histTest, fmt='o', c='r', label='Signal (Testing)' ) # TODO define yerr = sqrt( sum w^2 ) per bin! b_error = plt.errorbar(center, b_histTest, fmt='o', c='b', label='Background (Testing)' ) # TODO define yerr = sqrt( sum w^2 ) per bin! ks_sig, ks_sig_p = ks_2samp(s_histTrain, s_histTest) ks_bkg, ks_bkg_p = ks_2samp(b_histTrain, b_histTest) #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest) if normed: s_w_test = getSumW2(sig_predicted_test.ravel(), sig_w_test, binning) / np.sum(sig_w_test) b_w_test = getSumW2(bkg_predicted_test.ravel(), bkg_w_test, binning) / np.sum(bkg_w_test) else: s_w_test = getSumW2(sig_predicted_test.ravel(), sig_w_test, binning) b_w_test = getSumW2(bkg_predicted_test.ravel(), bkg_w_test, binning) #Proxy artist for KS Test ks_patch = mpatches.Patch(color='None', label='KS Test S (B): %.3f (%.3f)' % (ks_sig, ks_bkg)) #print sep if normed: ax1.set_ylabel('a. u.', horizontalalignment='right', y=1.0) else: ax1.set_ylabel('Events', horizontalalignment='right', y=1.0) leg = plt.legend(loc='best', frameon=False, handles=[ s_patchesTrain[0], b_patchesTrain[0], s_error, b_error, ks_patch ]) p = leg.get_window_extent() #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9) #ax1.text(0.65, 0.66, 'KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg), transform=ax1.transAxes) #Former y=0.7 #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes) #ax.text(0.55, 0.7, 'KS p-value S (B): %.3f (%.3f)'%(ks_sig_p, ks_bkg_p), transform=ax.transAxes) if ratio: ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) getRatio(s_histTest, s_binsTest, s_w_test, b_histTest, b_binsTest, b_w_test, 'r') ax2.set_xlabel('EPD', horizontalalignment='right', x=1.0) ax2.set_ylabel('S/B') ax2.set_xlim((binning[1], binning[2])) ax2.set_ylim((0, 2)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set_ylim(0., 1.5 * np.maximum(s_histTest.max(), b_histTest.max())) ax1.set_xlabel('EPD', horizontalalignment='right', x=1.0) AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress') if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') plt.savefig('plots/' + fileName + '_TrainTestScore' + addStr + '.pdf') plt.savefig('plots/' + fileName + '_TrainTestScore' + addStr + '.png') plt.close()
def plot_output_score_multiclass(sig_predicted, sig_w, bkg1_predicted, bkg1_w, bkg2_predicted, bkg2_w, bkg3_predicted, bkg3_w, bkg_predicted, bkg_w, binning, fileName="Test", title='Discriminating power', normed=False, save=False, ratio=False, log=False, sample=None, addStr=''): print('Plotting the multiclass output score...') fig = plt.figure(figsize=(8,6)) if ratio: ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=3) ax1.set_xlabel('', fontsize=0.) ax1.set_xticklabels(()) else: ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') #b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(), weights=bkg_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #plt.clf() #b1_hist, b1_bins, b1_patches = plt.hist(bkg1_predicted.ravel(), weights=bkg1_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #b2_hist, b2_bins, b2_patches = plt.hist(bkg2_predicted.ravel(), weights=bkg2_w, histtype='stepfilled', color='g', label='single top', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #b3_hist, b3_bins, b3_patches = plt.hist(bkg3_predicted.ravel(), weights=bkg3_w, histtype='stepfilled', color='m', label='W+jets', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) bkgs = [bkg3_predicted.ravel(),bkg2_predicted.ravel(),bkg1_predicted.ravel()] bweights = [bkg3_w,bkg2_w,bkg1_w] labels = [r'$W$+jets','single top',r'$t\overline{t}$'] colors=['orange','g','b'] s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(), weights=sig_w, histtype='stepfilled', color='r', label='signal', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_hist, b_bins, b_patches = plt.hist(bkgs, weights=bweights, histtype='stepfilled', color=colors,label=labels, alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed, stacked=True) log_str = '' if log: plt.yscale('log', nonposy='clip') log_str = '_log' #s_w = getSumW2(sig_predicted.ravel(), sig_w, binning) #b1_w = getSumW2(bkg1_predicted.ravel(), bkg1_w, binning) #b2_w = getSumW2(bkg2_predicted.ravel(), bkg2_w, binning) #b3_w = getSumW2(bkg3_predicted.ravel(), bkg3_w, binning) #b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning) #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest) #print sep if normed: ax1.set_ylabel("a. u.", ha='left') else: ax1.set_ylabel("Events", ha='left') #ax1.set_ylim((0, s_hist.max()*(1+0.33))) if log: ax1.set_ylim((0, b_hist[2].max()*(30))) else: ax1.set_ylim((0, b_hist[2].max()*(1+0.33))) if sample is not None: sample_patch = mpatches.Patch(color='None', label=sample) leg = plt.legend(loc='best', frameon=False, handles=[s_patches[0], b_patches[0][0], b_patches[1][0], b_patches[2][0], sample_patch]) else: leg = plt.legend(loc='best', frameon=False) p = leg.get_window_extent() #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9) #ax1.text(0.65, 0.7, "KS Test S (B): %.3f (%.3f)"%(ks_sig, ks_bkg), transform=ax1.transAxes) #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes) #ax.text(0.55, 0.7, "KS p-value S (B): %.3f (%.3f)"%(ks_sig_p, ks_bkg_p), transform=ax.transAxes) if title is not None: plt.title(title) AtlasStyle_mpl.ATLASLabel2(ax1, 0.02, 0.9, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140) if ratio: ax2 = plt.subplot2grid((4,4), (3,0), colspan=4, rowspan=1) r = getRatio(b_hist, b_bins, b_w, s_hist, s_bins, s_w, 'r') ax2.set_xlabel('Discriminant') ax2.set_ylabel('variation/nom.') ax2.set_xlim((binning[1],binning[2])) ax2.set_ylim((-0.5,2.5)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set(xlabel='EPD') if save: if not os.path.exists("./plots/"): os.makedirs("./plots/") print("Creating folder plots") plt.savefig("plots/"+fileName+"_output_score_multiclass"+addStr+log_str+".pdf") plt.savefig("plots/"+fileName+"_output_score_multiclass"+addStr+log_str+".png") plt.close() try: return r, s_bins except NameError: print 'ratio is set to False, r is not defined' return 0, s_bins
def main(): # Check number of arguments and act respectively thereof if len(sys.argv) == 2: modelfile = sys.argv[1:][0] else: print 'Usage: evaluate_signal.py <model> (omit directory and file suffix)' return print modelfile, type(modelfile) Dir = 'TrainedModels/models/' DatasetDir = 'TrainedModels/datasets/' modelDir = Dir + modelfile + '.h5' if os.path.exists(os.path.join(Dir, modelfile + '_scaler.pkl')): scaler = joblib.load(os.path.join(Dir, modelfile + '_scaler.pkl')) else: scaler = None infofile = open(modelDir.replace('.h5', '_infofile.txt')) infos = infofile.readlines() analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '') dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace( '\n', '') VAR = infos[5].replace('Used variables for training: ', '').replace('\n', '').split() print VAR recurrent = False if analysis.lower() == 'rnn': recurrent = True seq_scaler = dataset + '_scaling.json' if 'nn' in analysis.lower(): model = load_model(os.path.join(Dir, modelfile + '.h5')) elif 'bdt' in analysis.lower(): model = joblib.load(os.path.join(Dir, modelfile + '.h5')) db = (RESOLUTION[2] - RESOLUTION[1] ) / RESOLUTION[0] # bin width in discriminator distribution bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db, db) # bin edges in discriminator distribution center = (bins[:-1] + bins[1:]) / 2 print '#----MODEL----#' print '\t', modelDir ########################### # Read and evaluate signals ########################### Signal = [] for smp in SIGNAL: first = True for s in smp: print 'Sample:\t', s x, y = pickBenchmark(s) if not recurrent: _df, _weight = loadDataFrame(os.path.join(inputDir, s + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) else: _df, _weight, collection = loadSequentialDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape, collection[0]['df'].shape if first: df = _df.copy() weight = _weight.copy() seq = collection[0]['df'].copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) seq = pd.concat((seq, collection[0]['df']), ignore_index=True) if not recurrent: y_hat = evaluate(model, df.values, scaler, method=analysis) print df.shape, weight.shape else: collection[0]['df'] = seq print df.shape, weight.shape, collection[0]['df'].shape y_hat = evaluate(model, df.values, scaler, seq_scaler, method=analysis, col=collection) bin_index = np.digitize( y_hat[:, 0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({ 'name': s[6:], 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for smp in BACKGROUND: first = True for b in smp: print 'Sample:\t', b if not recurrent: _df, _weight = loadDataFrame(os.path.join(inputDir, b + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) else: _df, _weight, collection = loadSequentialDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) print _df.shape, _weight.shape, collection[0]['df'].shape if first: df = _df.copy() weight = _weight.copy() seq = collection[0]['df'].copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) seq = pd.concat((seq, collection[0]['df']), ignore_index=True) if not recurrent: print df.shape, weight.shape y_hat = evaluate(model, df.values, scaler, method=analysis) else: collection[0]['df'] = seq print df.shape, weight.shape, collection[0]['df'].shape y_hat = evaluate(model, df.values, scaler, seq_scaler, method=analysis, col=collection) bin_index = np.digitize(y_hat[:, 0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) print len(Signal), len( Background), Signal[0]['outputScore'][:].sum(), totalBkgOutput for s in Signal: significance = [] significance_err = [] asimov = [] asimov_err = [] roc = [] roc_err = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:].sum() > 0.: rel_err_bkg = np.sqrt(np.sum( totalBkgVar[i:])) / totalBkgOutput[i:].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:].sum() > 0.: rel_err_sig = np.sqrt(np.sum( s['output_var'][i:])) / s['outputScore'][i:].sum() else: rel_err_sig = 0. #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.) if float(eff_sig == 0) or float(eff_bkg == 0): Z = 0. Z_err = 0. ams = 0. ams_err = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0. Z_err = 0. ams = 0. ams_err = 0. else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), total_rel_err) ams = asimovZ(s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) roc.append((eff_sig, 1 - eff_bkg)) ams_plus_sig = asimovZ((s['outputScore'][i:].sum() + np.sqrt(np.sum(s['output_var'][i:]))), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) ams_mins_sig = asimovZ((s['outputScore'][i:].sum() - np.sqrt(np.sum(s['output_var'][i:]))), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) ams_plus_bkg = asimovZ(s['outputScore'][i:].sum(), (totalBkgOutput[i:].sum() + np.sqrt(np.sum(totalBkgVar[i:]))), np.sqrt(totalBkgVar[i:].sum())) ams_mins_bkg = asimovZ(s['outputScore'][i:].sum(), (totalBkgOutput[i:].sum() - np.sqrt(np.sum(totalBkgVar[i:]))), np.sqrt(totalBkgVar[i:].sum())) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) ams_err_sig = abs(ams_plus_sig - ams_mins_sig) / 2. ams_err_bkg = abs(ams_plus_bkg - ams_mins_bkg) / 2. ams_err = np.sqrt(ams_err_sig**2 + ams_err_bkg**2) significance.append(Z) significance_err.append(Z_err) asimov.append(ams) asimov_err.append(ams_err) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) s['ams'] = np.array(asimov) s['ams_err'] = np.array(asimov_err) s['roc'] = np.array(roc) print s['sig'] print s['ams'] #print s['roc'] sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0] amsMax_index = bins[np.where(s['ams'] == s['ams'].max())][0] Z = asimovZ( Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=False) Z_syst = asimovZ( Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=True) print 'RooStats: ', s['sig'].max(), sigMax_index, Z, Z_syst print 'asmiov : ', s['ams'].max(), amsMax_index x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal], dtype=float) #print x, y, z print Signal[0]['outputScore'][np.where( bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where( bins[:-1] >= sigMax_index)] print totalBkgOutput[np.where( bins[:-1] >= sigMax_index)], totalBkgVar[np.where( bins[:-1] >= sigMax_index)] print np.sum(Signal[0]['outputScore'][np.where( bins[:-1] >= sigMax_index)]), np.sqrt( np.sum(Signal[0]['output_var'][np.where( bins[:-1] >= sigMax_index)]**2)) print np.sum(totalBkgOutput[np.where(bins[:-1] >= sigMax_index)]), np.sqrt( np.sum(totalBkgVar[np.where(bins[:-1] >= sigMax_index)]**2)) print Signal[0]['outputScore'], Signal[0]['output_var'] print totalBkgOutput, totalBkgVar # Set up a regular grid of interpolation points print('Plotting the output score...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.set_xlim((bins[0], bins[-1])) ax1.set_xlabel('Output score', horizontalalignment='right', x=1.0) ax1.set_ylabel("Events", horizontalalignment='right', y=1.0) sb_ratio = Signal[0]['outputScore'].sum() / totalBkgOutput.sum() #if sb_ratio < 0.2: # #ATTENTION! Simplified error propagation (treated as uncorrelated) # scaled = Signal[0]['outputScore'] / Signal[0]['outputScore'].sum() * totalBkgOutput.sum() # scaled_var = scaled*scaled * ( (Signal[0]['output_var']/Signal[0]['outputScore'])**2 + (totalBkgVar.sum()/totalBkgOutput.sum())**2 + (Signal[0]['output_var'].sum()/Signal[0]['outputScore'].sum())**2 ) # scaled_label = 'Signal scaled to Bkg' # #else: scaled = Signal[0]['outputScore'] scaled_var = Signal[0]['output_var'] scaled_label = 'Signal' multib = plt.bar(center, Background[4]['outputScore'], width=db, yerr=np.sqrt(Background[4]['output_var']), color='seagreen', alpha=0.5, error_kw=dict(ecolor='seagreen', lw=1.5), label='multiboson') ttV = plt.bar(center, Background[3]['outputScore'], width=db, yerr=np.sqrt(Background[4]['output_var']), color='lightcoral', alpha=0.5, error_kw=dict(ecolor='lightcoral', lw=1.5), label='ttV', bottom=Background[4]['outputScore']) w = plt.bar(center, Background[2]['outputScore'], width=db, yerr=np.sqrt(Background[2]['output_var']), color='gold', alpha=0.5, error_kw=dict(ecolor='gold', lw=1.5), label='W+jets', bottom=Background[4]['outputScore'] + Background[3]['outputScore']) st = plt.bar(center, Background[1]['outputScore'], width=db, yerr=np.sqrt(Background[1]['output_var']), color='limegreen', alpha=0.5, error_kw=dict(ecolor='limegreen', lw=1.5), label='singletop', bottom=Background[4]['outputScore'] + Background[3]['outputScore'] + Background[2]['outputScore']) tt = plt.bar(center, Background[0]['outputScore'], width=db, yerr=np.sqrt(Background[0]['output_var']), color='dodgerblue', alpha=0.5, error_kw=dict(ecolor='dodgerblue', lw=1.5), label='ttbar', bottom=Background[4]['outputScore'] + Background[3]['outputScore'] + Background[2]['outputScore'] + Background[1]['outputScore']) plt.bar(center, Signal[0]['outputScore'], width=db, yerr=np.sqrt(Signal[0]['output_var']), label=Signal[0]['name'], color='r', alpha=0.5, error_kw=dict(ecolor='r', lw=1.5)) #plt.step(center, Signal[0]['outputScore'], width=db, yerr= np.sqrt(Signal[0]['output_var']), label=Signal[0]['name'], color='r', error_kw=dict(ecolor='r', lw=1.5)) ax1.set_ylim((0.1, totalBkgOutput.max() * (15.))) ax1.set_yscale('log') leg = plt.legend(loc="best", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.84, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.79, lumi=LUMI * 0.001) plt.savefig("plots/" + modelfile + "_eval-bWN-500-380_outputScore.pdf") plt.savefig("plots/" + modelfile + "_eval-bWN-500-380_outputScore.png") plt.close() print('Plotting significance...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.set_xlim((bins[0], bins[-1])) ax1.set_xlabel('Output score', horizontalalignment='right', x=1.0) ax1.set_ylabel("Z", horizontalalignment='right', y=1.0) plt.plot(center, Signal[0]['ams'], 'k-', color='cornflowerblue', label='Asimov Z (max = %0.3f at %0.2f)' % (s['ams'].max(), amsMax_index)) plt.fill_between(center, Signal[0]['ams'] - Signal[0]['ams_err'], Signal[0]['ams'] + Signal[0]['ams_err'], alpha=0.2, edgecolor='cornflowerblue', facecolor='cornflowerblue', linewidth=0) ax1.set_ylim((0., Signal[0]['ams'].max() * (1.5))) plt.plot(center, Signal[0]['sig'], 'k-', color='darkred', label='Binomial Z (max = %0.3f at %0.2f)' % (s['sig'].max(), sigMax_index)) plt.fill_between(center, Signal[0]['sig'] - Signal[0]['sig_err'], Signal[0]['sig'] + Signal[0]['sig_err'], alpha=0.2, edgecolor='darkred', facecolor='darkred', linewidth=0) plt.plot(center, len(center) * [3.], '--', color='grey', alpha=0.5) plt.plot(center, len(center) * [5.], '--', color='red', alpha=0.5) leg = plt.legend(loc="best", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.84, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.79, lumi=LUMI * 0.001) plt.savefig("plots/" + modelfile + "_Significance_bWN-500-380.pdf") plt.savefig("plots/" + modelfile + "_Significance_bWN-500-380.png") plt.close() print('Plotting ROC...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.set_xlim((bins[0], bins[-1])) ax1.set_ylim((0, 1)) ax1.set_xlabel('$\epsilon_{Sig.}$', horizontalalignment='right', x=1.0) ax1.set_ylabel("$r_{Bkg.}$", horizontalalignment='right', y=1.0) auc = np.trapz(s['roc'][:, 0], s['roc'][:, 1], dx=db) print 'Area under ROC?!: ', auc plt.plot(s['roc'][:, 0], s['roc'][:, 1], 'k-', color='cornflowerblue', label='ROC (AUC = %0.4f)' % (auc)) #plt.fill_between(center, Signal[0]['ams']-Signal[0]['ams_err'], Signal[0]['ams']+Signal[0]['ams_err'], alpha=0.2, edgecolor='cornflowerblue', facecolor='cornflowerblue', linewidth=0) plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Luck') leg = plt.legend(loc="lower left", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.28, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.23, lumi=LUMI * 0.001) plt.savefig("plots/" + modelfile + "_ROC_bWN-500-380.pdf") plt.savefig("plots/" + modelfile + "_ROC_bWN-500-380.png") plt.close()
def plot_output_score(sig_predicted, sig_w, bkg_predicted, bkg_w, binning, fileName='Test', normed=False, save=False, addStr='', ratio=True, log=False, sample=None): print('Plotting the binary output score...') fig = plt.figure(figsize=(8, 6)) if ratio: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) ax1.set_xlabel('', fontsize=0.) ax1.set_xticklabels(()) else: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(), weights=sig_w, histtype='stepfilled', color='r', label='Signal', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(), weights=bkg_w, histtype='stepfilled', color='b', label='Background', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) log_str = '' if log: plt.yscale('log', nonposy='clip') log_str = '_log' s_w = getSumW2(sig_predicted.ravel(), sig_w, binning) b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning) #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest) #print sep if normed: ax1.set_ylabel('a. u.', horizontalalignment='right', x=1.0) else: ax1.set_ylabel('Events', horizontalalignment='right', y=1.0) #ax1.set_ylim((0, s_hist.max()*(1+0.33))) if log: ax1.set_ylim((0, b_hist.max() * (30))) else: ax1.set_ylim((0, b_hist.max() * (1 + 0.33))) if sample is not None: sample_patch = mpatches.Patch(color='None', label=sample) leg = plt.legend(loc='best', frameon=False, handles=[s_patches[0], b_patches[0], sample_patch]) else: leg = plt.legend(loc='best', frameon=False) p = leg.get_window_extent() #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9) #ax1.text(0.65, 0.7, 'KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg), transform=ax1.transAxes) #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes) #ax.text(0.55, 0.7, 'KS p-value S (B): %.3f (%.3f)'%(ks_sig_p, ks_bkg_p), transform=ax.transAxes) AtlasStyle_mpl.ATLASLabel2(ax1, 0.02, 0.9, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140) if ratio: ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) r = getRatio(s_hist, s_bins, s_w, b_hist, b_bins, b_w, 'r') ax2.set_xlabel('EPD', horizontalalignment='right', x=1.0) ax2.set_ylabel('S/B') ax2.set_xlim((binning[1], binning[2])) ax2.set_ylim((-0.5, 2.5)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set_xlabel('EPD', horizontalalignment='right', x=1.0) if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') plt.savefig('plots/' + fileName + '_output_score' + addStr + log_str + '.pdf') plt.savefig('plots/' + fileName + '_output_score' + addStr + log_str + '.png') plt.close() return r, s_bins
def plotShape(var, samples, weights, color, binning, xTitle, yTitle="Events", lumi=100, unit=None, legend=None, log=False, ratio=False, ratioTitle='1/nominal', ratioLimit=(0, 2), normed=False, savePlot=False, fileName=None): fig = plt.figure(figsize=(8, 6)) if ratio: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) ax1.set_xlabel('', fontsize=0.) ax1.set_xticklabels(()) else: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') if (unit == None) or (unit.lower() == 'mev'): unit_fact = 1. elif (unit.lower() == 'gev'): unit_fact = 0.001 if not type(samples) == list: if not type(samples) == tuple: print "Expected {} sample as tuple of variables and weights!".format( samples) return 0 sumW2 = getSumW2(samples[0][str(var)].ravel(), samples[1].ravel(), binning) hist, bins, patches = np.histgram(samples[0][str(var)].ravel() * unit_fact, weights=samples[1].ravel(), bins=binning[0], range=(binning[1], binning[2]), density=normed) width = bins[1] - bins[0] center = (bins[:-1] + bins[1:]) / 2 plt.errorbar(center, hist, xerr=[width / 2.] * binning[0], yerr=sumW2.ravel(), fmt='o', color=color, label=legend) _max = hist.max() else: sumW2 = [] hists = [] for i, smp in enumerate(samples): #if not type(smp) == tuple: # print "Expected {} sample as tuple of variables and weights!".format(smp) # return 0 sumW2.append( getSumW2(smp[str(var)].ravel(), weights[i].ravel(), binning)) hists.append( np.histogram(smp[str(var)].ravel() * unit_fact, weights=weights[i], bins=binning[0], range=(binning[1], binning[2]), density=normed)) width = hists[i][1][1] - hists[i][1][0] center = (hists[i][1][:-1] + hists[i][1][1:]) / 2 plt.errorbar(center, hists[i][0], xerr=[width / 2.] * binning[0], yerr=sumW2[i].ravel(), fmt='o', color=color[i], label=legend[i]) _max = np.max([h[0].max() for h in hists]) if normed: ax1.set_ylabel("a. u.", ha='left') else: ax1.set_ylabel("Events", ha='left') if log: ax1.set_yscale('log') ax1.set_ylim((0.01, _max * 100)) else: if normed: ax1.set_ylim((0, 1.5)) else: ax1.set_ylim((0, _max * 1.4)) leg = plt.legend(loc='best', frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=str(lumi)) if ratio: ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) for i in range(1, len(hists)): r = getRatio(hists[i][0], hists[i][1], sumW2[i], hists[0][0], hists[0][1], sumW2[0], color[i]) ax2.set_xlabel(xTitle) ax2.set_ylabel(ratioTitle) ax2.set_xlim((binning[1], binning[2])) ax2.set_ylim(ratioLimit) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set(xlabel=xTitle) if savePlot: plt.savefig(fileName + ".pdf") plt.savefig(fileName + ".png") plt.close()
def main(): for m in MODELS: modelDir = DIR + m['mdir'] + '.h5' DatasetDir = 'TrainedModels/datasets/' if os.path.exists(os.path.join(DIR, m['mdir'] + '_scaler.pkl')): m['scaler'] = joblib.load( os.path.join(DIR, m['mdir'] + '_scaler.pkl')) else: m['scaler'] = None infofile = open(modelDir.replace('.h5', '_infofile.txt')) infos = infofile.readlines() m['analysis'] = infos[0].replace('Used analysis method: ', '').replace('\n', '') m['dataset'] = DatasetDir + infos[3].replace('Used dataset: ', '').replace('\n', '') m['VAR'] = infos[5].replace('Used variables for training: ', '').replace('\n', '').split() m['recurrent'] = False if m['analysis'].lower() == 'rnn': m['recurrent'] = True m['seq_scaler'] = m['dataset'] + '_scaling.json' if 'nn' in m['analysis'].lower(): m['model'] = load_model(os.path.join(DIR, m['mdir'] + '.h5')) elif 'bdt' in m['analysis'].lower(): m['model'] = joblib.load(os.path.join(DIR, m['mdir'] + '.h5')) print '#----MODEL----#' print '\t', m['mdir'] ########################### # Read and evaluate signals ########################### m['Signal'] = [] for smp in SIGNAL: first = True for s in smp: print 'Sample:\t', s x, y = pickBenchmark(s) if not m['recurrent']: _df, _weight = loadDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, m['VAR'], WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) else: _df, _weight, collection = loadSequentialDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, m['VAR'], WEIGHTS, LUMI) print _df.shape, _weight.shape, collection[0]['df'].shape if first: df = _df.copy() weight = _weight.copy() seq = collection[0]['df'].copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) seq = pd.concat((seq, collection[0]['df']), ignore_index=True) if not m['recurrent']: m['y_pred_sig'] = evaluate(m['model'], df.values, m['scaler'], method=m['analysis']) m['y_sig'] = np.ones(m['y_pred_sig'].shape[0]) else: collection[0]['df'] = seq.copy() m['y_pred_sig'] = evaluate(m['model'], df.values, m['scaler'], m['seq_scaler'], method=m['analysis'], col=collection) m['y_sig'] = np.ones(m['y_pred_sig'].shape[0]) bin_index = np.digitize( m['y_pred_sig'][:, 0], bins[1:] ) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) m['Signal'].append({ 'name': s[6:], 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ############################### # Read and evaluate backgrounds ############################### m['totBkgEvents'] = 0. m['totBkgVar'] = 0. m['Background'] = [] for smp in BACKGROUND: first = True for b in smp: print 'Sample:\t', b if not m['recurrent']: _df, _weight = loadDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, m['VAR'], WEIGHTS, LUMI) print _df.shape, _weight.shape if first: df = _df.copy() weight = _weight.copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) else: _df, _weight, collection = loadSequentialDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, m['VAR'], WEIGHTS, LUMI) print _df.shape, _weight.shape, collection[0]['df'].shape if first: df = _df.copy() weight = _weight.copy() seq = collection[0]['df'].copy() first = False else: df = pd.concat((df, _df), ignore_index=True) weight = pd.concat((weight, _weight), ignore_index=True) seq = pd.concat((seq, collection[0]['df']), ignore_index=True) if not m['recurrent']: print df.shape, weight.shape m['_'.join(['y_pred', b])] = evaluate(m['model'], df.values, m['scaler'], method=m['analysis']) m['_'.join(['y', b])] = np.zeros(m['_'.join(['y_pred', b])].shape[0]) else: collection[0]['df'] = seq print df.shape, weight.shape, collection[0]['df'].shape m['_'.join(['y_pred', b])] = evaluate(m['model'], df.values, m['scaler'], m['seq_scaler'], method=m['analysis'], col=collection) m['_'.join(['y', b])] = np.zeros(m['_'.join(['y_pred', b])].shape[0]) bin_index = np.digitize(m['_'.join(['y_pred', b])][:, 0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] m['totBkgEvents'] += weight.sum() m['totBkgVar'] += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) m['Background'].append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar m['totalBkgOutput'] = np.array( [b['outputScore'] for b in m['Background']]) m['totalBkgOutput'] = m['totalBkgOutput'].sum(axis=0) m['totalBkgVar'] = np.array([b['output_var'] for b in m['Background']]) m['totalBkgVar'] = m['totalBkgVar'].sum(axis=0) for s in m['Signal']: m['roc'] = [] m['roc_err'] = [] m['tot_rel'] = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): eff_sig = s['outputScore'][i:].sum() / s['nEvents'] eff_bkg = m['totalBkgOutput'][i:].sum( ) / m['totalBkgOutput'].sum() err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents'] err_bkg = np.sqrt(np.sum( m['totalBkgVar'][i:])) / m['totalBkgOutput'].sum() if m['totalBkgOutput'][i:].sum() > 0.: rel_err_bkg = np.sqrt(np.sum( m['totalBkgVar'][i:])) / m['totalBkgOutput'][i:].sum() else: rel_err_bkg = 0. if s['outputScore'][i:].sum() > 0.: rel_err_sig = np.sqrt(np.sum( s['output_var'][i:])) / s['outputScore'][i:].sum() else: rel_err_sig = 0. m['total_rel_err'] = np.sqrt(rel_err_bkg**2. + 0.25**2.) m['roc'].append((eff_sig, 1 - eff_bkg)) roc_plus_sig = eff_sig + err_sig roc_mins_sig = eff_sig - err_sig roc_plus_bkg = 1 - (eff_bkg + err_bkg) roc_mins_bkg = 1 - (eff_bkg - err_bkg) #roc_err_sig = abs(roc_plus_sig - roc_mins_sig) / 2. roc_err_bkg = abs(roc_plus_bkg - roc_mins_bkg) / 2. m['roc_err'].append(roc_err_bkg) m['roc'] = np.array(m['roc']) m['roc_err'] = np.array(m['roc_err']) #m['y_bkg'] = np.empty(0) #m['y_pred_bkg'] = np.empty(0) #for b in BACKGROUND: # m['y_bkg'] = np.concatenate((m['y_bkg'], m['_'.join(['y',b])])) # m['y_pred_bkg'] = np.concatenate((m['y_pred_bkg'], m['_'.join(['y_pred',b])][:,0])) #m['y'] = np.concatenate((m['y_sig'], m['y_bkg'])) #m['y_pred'] = np.concatenate((m['y_pred_sig'][:,0], m['y_pred_bkg'])) #m['fpr'], m['tpr'], m['threshold'] = roc_curve(m['y'], m['y_pred']) #m['auc'] = roc_auc_score(m['y'], m['y_pred']) print('Plotting ROC curve ...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) #ax1.set_xlim((bins[0], bins[-1])) #ax1.set_ylim((0, 1)) ax1.set_xlabel('$\epsilon_{Sig.}$', horizontalalignment='right', x=1.0) ax1.set_ylabel('$r_{Bkg.}$', horizontalalignment='right', y=1.0) for m in MODELS: m['auc'] = np.trapz(m['roc'][:, 0], m['roc'][:, 1], dx=db) print 'Area under ROC:\t', m['auc'] if logScale: ax1.set_yscale('log') plt.plot(m['roc'][:, 0], 1. / (1. - m['roc'][:, 1]), 'k-', color=m['color'], label='%s (AUC = %0.4f)' % (m['name'], m['auc'])) plt.fill_between(m['roc'][:, 0], 1. / (1. - (m['roc'][:, 1] - m['roc_err'])), 1. / (1. - (m['roc'][:, 1] + m['roc_err'])), alpha=0.2, edgecolor=m['color'], facecolor=m['color'], linewidth=0) #plt.plot(m['tpr'], 1./m['fpr'], lw=2, label=m['name']+' (AUC = %0.3f)'%(m['auc'])) else: plt.plot(m['roc'][:, 0], m['roc'][:, 1], 'k-', color=m['color'], label='%s (AUC = %0.2f)' % (m['name'], m['auc'])) plt.fill_between(m['roc'][:, 0], (m['roc'][:, 1] - m['roc_err']), (m['roc'][:, 1] + m['roc_err']), alpha=0.2, edgecolor=m['color'], facecolor=m['color'], linewidth=0) #plt.plot(m['tpr'], 1.-m['fpr'], lw=2, label=m['name']+' (AUC = %0.3f)'%(m['auc'])) ax1.set_xlim((0, 0.16)) ax1.set_ylim((0.975, 1.0)) #plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Luck') for p in WP: p['eff_sig'] = p['sig'] / BWN_PRESEL_SIG p['eff_bkg'] = p['bkg'] / BWN_PRESEL_BKG if p['legend']: plt.plot([p['eff_sig']], [1 - p['eff_bkg']], '.', color=p['color'], label=p['name']) else: plt.plot([p['eff_sig']], [1 - p['eff_bkg']], '.', color=p['color']) leg = plt.legend(loc="lower left", frameon=False) #AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.25, 'Work in progress') AtlasStyle_mpl.Text(ax1, 0.14, 0.52, 'Simulation') AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.46, lumi=LUMI * 0.001) plt.savefig(SAVEDIR + FILENAME + '.pdf') plt.savefig(SAVEDIR + FILENAME + '.png') plt.close()
def evaluate_signalGridCuts(modelDir, resolution=np.array([50,0,1], dtype=float), save=False, fileName='Test'): print('Evaluating singal grid...') if fileName=='Grid_test': fileName=modelDir.replace('TrainedModels/models/','').replace('.h5','') infofile = open(modelDir.replace('.h5','_infofile.txt')) infos = infofile.readlines() #Parse Strings for correct datatypes variables=infos[4].replace('Used variables for training: ','').replace('\n','').split() weights=infos[5].replace('Used weights: ', '').replace('\n','').split() lumi=float(infos[7].replace('Used Lumi: ','').replace('\n','')) background=infos[9].replace('Used background files: ','').replace('; \n','').replace(' ','').split(';') preselection = preselection_evaluate print 'Using the following preselection to evaluate:' , preselection signal = ['stop_bWN_250_100', 'stop_bWN_250_130', 'stop_bWN_250_160', 'stop_bWN_300_150', 'stop_bWN_300_180', 'stop_bWN_300_210', 'stop_bWN_350_185', 'stop_bWN_350_200', 'stop_bWN_350_230', 'stop_bWN_350_260', 'stop_bWN_400_235', 'stop_bWN_400_250', 'stop_bWN_400_280', 'stop_bWN_400_310', 'stop_bWN_450_285', 'stop_bWN_450_300', 'stop_bWN_450_330', 'stop_bWN_450_360', 'stop_bWN_500_335', 'stop_bWN_500_350', 'stop_bWN_500_380', 'stop_bWN_550_385', 'stop_bWN_550_400', 'stop_bWN_550_430', 'stop_bWN_550_460', 'stop_bWN_600_435', 'stop_bWN_600_450', 'stop_bWN_600_480', 'stop_bWN_600_510', 'stop_bWN_650_485', 'stop_bWN_650_500', 'stop_bWN_650_530', 'stop_bWN_650_560'] #Get Scaler and model from modelDir model = load_model(modelDir) scalerDir=modelDir.replace('.h5','_scaler.pkl') scaler=joblib.load(scalerDir) #Evaluate db = (resolution[2] - resolution[1]) / resolution[0] # bin width in discriminator distribution bins = np.arange(resolution[1], resolution[2]+db, db) # bin edges in discriminator distribution ########################### # Read and evaluate signals ########################### Signal = [] for s in signal: x, y = pickBenchmark(s) df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), preselection, variables, weights, lumi) y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:,0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for b in background: df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), preselection, variables, weights, lumi) y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:,0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:-1].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:-1].sum() > 0.: rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:-1].sum() > 0.: rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum() else: rel_err_sig = 0. total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0 Z_err = 0 else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) print s['sig'] print s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())] x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal],dtype=float) print x, y, z # Set up a regular grid of interpolation points fig, ax1 = plt.subplots(figsize=(8,6)) xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100) xi, yi = np.meshgrid(xi, yi) # Interpolate rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z) zi = rbf(xi, yi) im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower', extent=[x.min(), x.max(), y.min(), y.max()]) cbar = plt.colorbar(im) cbar.set_label('Significance') ax1.set_xlabel(r'$m_{\tilde{t}}$') ax1.set_xlim([x.min(), x.max()]) ax1.set_ylabel(r'$m_{\chi}$') ax1.set_ylim([y.min(), y.max()]) plt.scatter(x, y, c='black') plt.plot(x, x-84., color='black') plt.plot(x, x-175., color='black') AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=lumi*0.001) #plt.show() if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') isFile = True n = 1 while isFile: filepath = './plots/' + fileName + '_evaluated_grid_cuts_' + str(n) + '_infofile.txt' if os.path.isfile(filepath) and filepath.endswith('.txt'): n += 1 isFile=True else: isFile=False infofile = open(filepath, 'w') print('Saving evaluation informations to ' , filepath) presels = '' for pre in preselection_evaluate: if pre['type'] == 'condition': presels += pre['name'] + '-threshold: ' + str(pre['threshold']) + ' type: ' + pre['type'] + ' variable: ' + pre['variable'] + ' lessthan: ' + str(pre['lessthan']) + ' and morethan: ' + str(pre['morethan']) + '; ' else: presels += pre['name'] + '-threshold: ' + str(pre['threshold']) + ' type: ' + pre['type'] + '; ' infofile.write('Used preselection for evaluation: ' + presels) infofile.close() plt.savefig('plots/'+fileName+'_evaluated_grid_cuts_' + str(n) + '.pdf') plt.savefig('plots/'+fileName+'_evaluated_grid_cuts_' + str(n) + '.png') plt.close()
def evaluate_signalGrid(modelDir, resolution=np.array([50,0,1], dtype=float), save=False, fileName='Test'): print('Evaluating signal grid...') infofile = open(modelDir.replace('.h5','_infofile.txt')) infos = infofile.readlines() #Parse Strings for correct datatypes variables=infos[4].replace('Used variables for training: ','').replace('\n','').split() weights=infos[5].replace('Used weights: ', '').replace('\n','').split() preselection_raw=infos[6].replace('Used preselection: ', '').replace('; \n', '').split(';') preselection=[] for x in preselection_raw: xdict = {} xdict['name']= x.split()[0].split('-')[0] xdict['threshold']= float(x.split()[1]) xdict['type'] = x.split()[3] if xdict['type'] == 'condition': xdict['variable'] = x.split()[5] xdict['lessthan'] = float(x.split()[7]) xdict['morethan'] = float(x.split()[10]) preselection.append(xdict) lumi=float(infos[7].replace('Used Lumi: ','').replace('\n','')) background=infos[9].replace('Used background files: ','').replace('; \n','').replace(' ','').split(';') #signal=infos[8].replace('Used signal files: ','').replace('; \n','').replace(' ','').split(';') signal = ['stop_bWN_250_100', 'stop_bWN_250_130', 'stop_bWN_250_160', 'stop_bWN_300_150', 'stop_bWN_300_180', 'stop_bWN_300_210', 'stop_bWN_350_185', 'stop_bWN_350_200', 'stop_bWN_350_230', 'stop_bWN_350_260', 'stop_bWN_400_235', 'stop_bWN_400_250', 'stop_bWN_400_280', 'stop_bWN_400_310', 'stop_bWN_450_285', 'stop_bWN_450_300', 'stop_bWN_450_330', 'stop_bWN_450_360', 'stop_bWN_500_335', 'stop_bWN_500_350', 'stop_bWN_500_380', 'stop_bWN_550_385', 'stop_bWN_550_400', 'stop_bWN_550_430', 'stop_bWN_550_460', 'stop_bWN_600_435', 'stop_bWN_600_450', 'stop_bWN_600_480', 'stop_bWN_600_510', 'stop_bWN_650_485', 'stop_bWN_650_500', 'stop_bWN_650_530', 'stop_bWN_650_560'] #For Debugging #print variables, type(variables) #print weights, type(variables) #print preselection, type(preselection[1]) #print lumi, type(lumi) #print signal, type(signal) #print background, type(background) #Get Scaler and model from modelDir model = load_model(modelDir) scalerDir=modelDir.replace('.h5','_scaler.pkl') scaler=joblib.load(scalerDir) #Evaluate db = (resolution[2] - resolution[1]) / resolution[0] # bin width in discriminator distribution bins = np.arange(resolution[1], resolution[2]+db, db) # bin edges in discriminator distribution ########################### # Read and evaluate signals ########################### statInfoSig = {} #Infos about statistic Signal = [] for s in signal: x, y = pickBenchmark(s) df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), preselection, variables, weights, lumi) statInfoSig[s]=df.shape[0] y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:,0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### statInfoBkg = {} #Infos about statistic totBkgEvents = 0. totBkgVar = 0. Background = [] for b in background: df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), preselection, variables, weights, lumi) statInfoBkg[b]=df.shape[0] y_hat = evaluate(model, df.values, scaler) bin_index = np.digitize(y_hat[:,0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:-1].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:-1].sum() > 0.: rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:-1].sum() > 0.: rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum() else: rel_err_sig = 0. total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0 Z_err = 0 else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) #print s['sig'] print s['m_stop'], s['m_X'], s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())] x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal],dtype=float) #print x, y, z # Set up a regular grid of interpolation points fig, ax1 = plt.subplots(figsize=(8,6)) xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100) xi, yi = np.meshgrid(xi, yi) # Interpolate rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z) zi = rbf(xi, yi) im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower', extent=[x.min(), x.max(), y.min(), y.max()]) cbar = plt.colorbar(im) cbar.set_label('Significance') ax1.set_xlabel(r'$m_{\tilde{t}}$') ax1.set_xlim([x.min(), x.max()]) ax1.set_ylabel(r'$m_{\chi}$') ax1.set_ylim([y.min(), y.max()]) plt.scatter(x, y, c='black') plt.plot(x, x-84., color='black') plt.plot(x, x-175., color='black') AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=lumi*0.001) #plt.show() if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') plt.savefig('plots/'+fileName+'_evaluated_grid.pdf') plt.savefig('plots/'+fileName+'_evaluated_grid.png') plt.close() diag_165 = {} diag_150 = {} diag_120 = {} diag_90 = {} for key, value in statInfoSig.iteritems(): x, y = pickBenchmark(key) deltaM = float(x)-float(y) if deltaM==165.0: diag_165[x]=value elif deltaM==150.0: diag_150[x]=value elif deltaM==120.0: diag_120[x]=value elif deltaM==90.0: diag_90[x]=value else: print 'Error: Unknown diagonal in evaluate_signalGrid' return 0 sortedLabels165 = sorted(diag_165) sortedLabels150 = sorted(diag_150) sortedLabels120 = sorted(diag_120) sortedLabels90 = sorted(diag_90) values_165 = [] values_150 = [] values_120 = [] values_90 = [] for label in sortedLabels165: values_165.append(diag_165[label]) for label in sortedLabels150: values_150.append(diag_150[label]) for label in sortedLabels120: values_120.append(diag_120[label]) for label in sortedLabels90: values_90.append(diag_90[label]) csignal = sum(values_90)+sum(values_120)+sum(values_150)+sum(values_165) trainable_count = int(np.sum([K.count_params(p) for p in set(model.trainable_weights)])) signalP = mpatches.Patch(color='None', label='signal: ' + str(csignal)) ttbar = mpatches.Patch(color='None', label=r'$t\overline{t}$: ' + str(statInfoBkg['mc16d_ttbar'])) singletop = mpatches.Patch(color='None', label= 'single top: '+ str(statInfoBkg['mc16d_singletop'])) Wjets = mpatches.Patch(color='None', label= r'$W$ + jets: '+ str(statInfoBkg['mc16d_Wjets'])) tps = mpatches.Patch(color='None', label='params(t): ' + str(trainable_count)) #Trainable parameters #print sortedLabels90, sortedLabels120, sortedLabels150 #print values_90, values_120, values_150 plt.figure('statistic') d165 = plt.plot(sortedLabels165, values_165, 'b-x',label=r'$\Delta M = 165$ GeV') d150 = plt.plot(sortedLabels150, values_150, 'b-x',label=r'$\Delta M = 150$ GeV') d120 = plt.plot(sortedLabels120, values_120, 'r-x',label=r'$\Delta M = 120$ GeV') d90 = plt.plot(sortedLabels90, values_90, 'g-x', label=r'$\Delta M = 90$ GeV') plt.xlabel(r'$m_{\tilde{t}}$ [GeV]') plt.ylabel('Statistic') plt.title('Statistic of samples') plt.legend(loc='best', handles=[d165[0],d150[0],d120[0],d90[0],signalP,ttbar,singletop,Wjets,tps]) if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') plt.savefig('plots/'+fileName+'_StatisticTraining.pdf') plt.savefig('plots/'+fileName+'_StatisticTraining.png') plt.close() filepath = 'plots/' + fileName + '_StatisticTrainingValues.txt' infofile = open(filepath, 'w') infofile.write('M165: ' + ';'.join(sortedLabels165) + ' ' +';'.join([str(i) for i in values_165])+'\n') infofile.write('M150: ' + ';'.join(sortedLabels150) + ' ' +';'.join([str(i) for i in values_150])+'\n') infofile.write('M120: ' + ';'.join(sortedLabels120) + ' ' + ';'.join([str(i) for i in values_120])+'\n') infofile.write('M90: ' + ';'.join(sortedLabels90) + ' '+ ';'.join([str(i) for i in values_90])) infofile.close()
def main(): model = load_model(modelDir) scaler = joblib.load(SCALING) infofile = open(modelDir.replace('.h5','_infofile.txt')) infos = infofile.readlines() analysis=infos[0].replace('Used analysis method: ','').replace('\n','') dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace('\n','') recurrent = False if analysis.lower() == 'rnn': recurrent = True seq_scaler = dataset+'_scaling.json' db = (RESOLUTION[2] - RESOLUTION[1]) / RESOLUTION[0] # bin width in discriminator distribution bins = np.arange(RESOLUTION[1], RESOLUTION[2]+db, db) # bin edges in discriminator distribution center = (bins[:-1] + bins[1:]) / 2 print '#----MODEL----#' print modelDir ########################### # Read and evaluate signals ########################### Signal = [] for s in SIGNAL: print s x, y = pickBenchmark(s) if not recurrent: df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame(os.path.join(inputDirSig, s+'/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize(y_hat[:,0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for b in BACKGROUND: if not recurrent: df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame(os.path.join(inputDirBkg, b+'/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize(y_hat[:,0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index==i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)}) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] asimov = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:-1].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:-1].sum() > 0.: rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:-1].sum() > 0.: rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum() else: rel_err_sig = 0. #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. ams = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0. Z_err = 0. ams = 0. else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err) ams = asimovZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) asimov.append(ams) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) s['ams'] = np.array(asimov) print s['sig'] print s['ams'] print s['m_stop'], s['m_X'], s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())] x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal],dtype=float) #print x, y, z # Set up a regular grid of interpolation points fig, ax1 = plt.subplots(figsize=(8,6)) xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100) xi, yi = np.meshgrid(xi, yi) # Interpolate rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z) zi = rbf(xi, yi) im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower', extent=[x.min(), x.max(), y.min(), y.max()]) contours = plt.contour(xi, yi, zi, colors='black', levels=[3.]) cbar = plt.colorbar(im) cbar.set_label('Significance') ax1.set_xlabel(r'$m_{\tilde{t}}$') ax1.set_xlim([x.min(), x.max()]) ax1.set_ylabel(r'$m_{\chi}$') ax1.set_ylim([y.min(), y.max()]) plt.scatter(x, y, c='black', s=[0.75]*len(x)) plt.plot(x, x-84., color='grey') plt.plot(x, x-175., color='grey') AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=LUMI*0.001) plt.savefig("plots/"+modelfile+"_eval-Grid.pdf") plt.savefig("plots/"+modelfile+"_eval-Grid.png") plt.close()