def main(): model = load_model(modelDir) scaler = joblib.load(SCALING) infofile = open(modelDir.replace('.h5', '_infofile.txt')) infos = infofile.readlines() analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '') dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace( '\n', '') recurrent = False if analysis.lower() == 'rnn': recurrent = True seq_scaler = dataset + '_scaling.json' db = (RESOLUTION[2] - RESOLUTION[1] ) / RESOLUTION[0] # bin width in discriminator distribution bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db, db) # bin edges in discriminator distribution center = (bins[:-1] + bins[1:]) / 2 print '#----MODEL----#' print modelDir ########################### # Read and evaluate signals ########################### Signal = [] for s in SIGNAL: x, y = pickBenchmark(s) if not recurrent: df, weight = loadDataFrame(os.path.join(inputDir, s + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame( os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize( y_hat[:, 0], bins[1:]) # get the bin index of the output score for each event outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(np.sqrt(len(w))) Signal.append({ 'name': s, 'm_stop': x, 'm_X': y, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar ########################### # Read and evaluate backgrounds ########################### totBkgEvents = 0. totBkgVar = 0. Background = [] for b in BACKGROUND: if not recurrent: df, weight = loadDataFrame(os.path.join(inputDir, b + '/'), PRESELECTION, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler) else: df, weight, collection = loadSequentialDataFrame( os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI) y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection) bin_index = np.digitize(y_hat[:, 0], bins[1:]) outputWeighted = [] outputWeightedVar = [] outputMC = [] outputMCVar = [] totBkgEvents += weight.sum() totBkgVar += np.sum(weight.values**2.) for i in range(len(bins[1:])): w = weight.values[np.where(bin_index == i)[0]] sigma = np.sum(w**2.) outputWeighted.append(w.sum()) outputWeightedVar.append(sigma) outputMC.append(len(w)) outputMCVar.append(len(w)) Background.append({ 'name': b, 'dataset': df, 'weight': weight, 'nEvents': weight.sum(), 'y_pred': y_hat, 'outputScore': np.array(outputWeighted), 'outputMC': np.array(outputMC), 'output_var': np.array(outputWeightedVar), 'outputMC_var': np.array(outputMCVar) }) del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar totalBkgOutput = np.array([b['outputScore'] for b in Background]) totalBkgOutput = totalBkgOutput.sum(axis=0) totalBkgVar = np.array([b['output_var'] for b in Background]) totalBkgVar = totalBkgVar.sum(axis=0) for s in Signal: significance = [] significance_err = [] asimov = [] tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents'] for i in range(len(bins[1:])): #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents'] #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum() eff_sig = s['outputScore'][i:].sum() / s['nEvents'] eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum() #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents'] #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum() err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents'] err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum() #if totalBkgOutput[:i+1].sum() > 0.: # rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum() if totalBkgOutput[i:].sum() > 0.: rel_err_bkg = np.sqrt(np.sum( totalBkgVar[i:])) / totalBkgOutput[i:].sum() else: rel_err_bkg = 0. #if s['outputScore'][:i+1].sum() > 0.: # rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum() if s['outputScore'][i:].sum() > 0.: rel_err_sig = np.sqrt(np.sum( s['output_var'][i:])) / s['outputScore'][i:].sum() else: rel_err_sig = 0. #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.) total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.) if (eff_sig == 0) or (eff_bkg == 0): Z = 0. Z_err = 0. ams = 0. elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75): Z = 0. Z_err = 0. ams = 0. else: #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err) Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), total_rel_err) ams = asimovZ(s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum())) Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( (eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err) Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err) Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ( eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err) Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2 Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2 Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2) significance.append(Z) significance_err.append(Z_err) asimov.append(ams) s['sig'] = np.array(significance) s['sig_max'] = s['sig'].max() s['sig_err'] = np.array(significance_err) s['ams'] = np.array(asimov) #print s['sig'] #print s['ams'] #sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0] #Z = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=False) #Z_syst = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=True) #print s['sig'].max(), sigMax_index, Z, Z_syst x = np.array([s['m_stop'] for s in Signal], dtype=float) y = np.array([s['m_X'] for s in Signal], dtype=float) z = np.array([s['sig_max'] for s in Signal], dtype=float) #print x, y, z #print Signal[0]['outputScore'][np.where(bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where(bins[:-1] >= sigMax_index)] #print totalBkgOutput[np.where(bins[:-1] >= sigMax_index)], totalBkgVar[np.where(bins[:-1] >= sigMax_index)] #print Signal[0]['outputScore'], Signal[0]['output_var'] #print totalBkgOutput, totalBkgVar # Set up a regular grid of interpolation points print('Plotting the output score...') fig = plt.figure(figsize=(8, 6)) ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) ax1.set_xlim((bins[0], bins[-1])) ax1.set_ylabel("Events", horizontalalignment='right', y=1.0) sb_ratio = Signal[0]['outputScore'].sum() / totalBkgOutput.sum() #if sb_ratio < 0.2: # #ATTENTION! Simplified error propagation (treated as uncorrelated) # scaled = Signal[0]['outputScore'] / Signal[0]['outputScore'].sum() * totalBkgOutput.sum() # scaled_var = scaled*scaled * ( (Signal[0]['output_var']/Signal[0]['outputScore'])**2 + (totalBkgVar.sum()/totalBkgOutput.sum())**2 + (Signal[0]['output_var'].sum()/Signal[0]['outputScore'].sum())**2 ) # scaled_label = 'Signal scaled to Bkg' # #else: scaled = Signal[0]['outputScore'] scaled_var = Signal[0]['output_var'] scaled_label = 'Signal' plt.bar(center, totalBkgOutput / totalBkgOutput.sum(), width=db, yerr=np.sqrt(totalBkgVar) / totalBkgOutput.sum(), color='b', alpha=0.25, error_kw=dict(ecolor='b', lw=1.5), label=Background[0]['name']) plt.bar(center, Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(), width=db, yerr=np.sqrt(Signal[0]['output_var']) / Signal[0]['outputScore'].sum(), label=Signal[0]['name'], color='r', alpha=0.25, error_kw=dict(ecolor='r', lw=1.5)) ax1.set_ylim( (0., np.max([ np.max(totalBkgOutput / totalBkgOutput.sum()), np.max(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum()) ]) * 1.3)) #ax1.set_yscale('log') leg = plt.legend(loc="best", frameon=False) AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.925, 'Work in progress') #AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.875, lumi=LUMI*0.001) ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) getRatio(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(), bins, np.sqrt(Signal[0]['output_var']) / Signal[0]['outputScore'].sum(), totalBkgOutput / totalBkgOutput.sum(), bins, np.sqrt(totalBkgVar) / totalBkgOutput.sum(), 'r') ax2.set_xlabel('Output score', horizontalalignment='right', x=1.0) ax2.set_ylabel('Reco/Truth') ax2.set_xlim((0., 1.)) ax2.set_ylim((0, 2)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.pdf") plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.png") plt.close()
def plot_TrainTest_score(sig_predicted_train, sig_predicted_test, sig_w_train, sig_w_test, bkg_predicted_train, bkg_predicted_test, bkg_w_train, bkg_w_test, binning, fileName="KS_test", normed=False, save=False, ratio=True): fig = plt.figure(figsize=(8, 6)) if ratio: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) #ax1.xaxis.set_ticks([]) else: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') s_histTrain, s_binsTrain, s_patchesTrain = plt.hist( sig_predicted_train.ravel(), weights=sig_w_train, histtype='stepfilled', color='r', label='Signal (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), normed=normed) b_histTrain, b_binsTrain, b_patchesTrain = plt.hist( bkg_predicted_train.ravel(), weights=bkg_w_train, histtype='stepfilled', color='b', label='Background (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), normed=normed) s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(), weights=sig_w_test, bins=binning[0], range=(binning[1], binning[2]), normed=normed) b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(), weights=bkg_w_test, bins=binning[0], range=(binning[1], binning[2]), normed=normed) width = (s_binsTrain[1] - s_binsTrain[0]) center = (s_binsTrain[:-1] + s_binsTrain[1:]) / 2 plt.errorbar(center, s_histTest, fmt='o', c='r', label='Signal (Testing)' ) # TODO define yerr = sqrt( sum w^2 ) per bin! plt.errorbar(center, b_histTest, fmt='o', c='b', label='Background (Testing)' ) # TODO define yerr = sqrt( sum w^2 ) per bin! ks_sig, ks_sig_p = ks_2samp(s_histTrain, s_histTest) ks_bkg, ks_bkg_p = ks_2samp(b_histTrain, b_histTest) #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest) #print sep if normed: ax1.set(ylabel="a. u.") else: ax1.set(ylabel="Events") leg = plt.legend(loc="best", frameon=False) p = leg.get_window_extent() #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9) ax1.text(0.65, 0.7, "KS Test S (B): %.3f (%.3f)" % (ks_sig, ks_bkg), transform=ax1.transAxes) #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes) #ax.text(0.55, 0.7, "KS p-value S (B): %.3f (%.3f)"%(ks_sig_p, ks_bkg_p), transform=ax.transAxes) if ratio: ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) getRatio(s_histTest, s_binsTest, b_histTest, b_binsTest) ax2.set(xlabel='Output score', ylabel='S/B') ax2.set_xlim((binning[1], binning[2])) ax2.set_ylim((0, 2)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set(xlabel='Output score') if save: plt.savefig(fileName + ".pdf") plt.savefig(fileName + ".png") plt.close()
def plot_TrainTest_score(sig_predicted_train, sig_predicted_test, sig_w_train, sig_w_test, bkg_predicted_train, bkg_predicted_test, bkg_w_train, bkg_w_test, binning, fileName='Test', normed=False, save=False, ratio=True, addStr=''): print('Plotting the train/test score...') fig = plt.figure(figsize=(8, 6)) if ratio: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) #ax1.xaxis.set_ticks([]) else: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') #s_histTrain, s_binsTrain, s_patchesTrain = plt.hist(sig_predicted_train.ravel(), weights=sig_w_train, histtype='stepfilled', color='r', label='Signal (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) s_histTrain, s_binsTrain, s_patchesTrain = plt.hist( sig_predicted_train.ravel(), weights=None, histtype='stepfilled', color='r', label='Signal (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #b_histTrain, b_binsTrain, b_patchesTrain = plt.hist(bkg_predicted_train.ravel(), weights=bkg_w_train, histtype='stepfilled', color='b', label='Background (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_histTrain, b_binsTrain, b_patchesTrain = plt.hist( bkg_predicted_train.ravel(), weights=None, histtype='stepfilled', color='b', label='Background (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(), weights=sig_w_test, bins=binning[0], range=(binning[1], binning[2]), density=normed) s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(), weights=None, bins=binning[0], range=(binning[1], binning[2]), density=normed) #b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(), weights=bkg_w_test, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(), weights=None, bins=binning[0], range=(binning[1], binning[2]), density=normed) width = (s_binsTrain[1] - s_binsTrain[0]) center = (s_binsTrain[:-1] + s_binsTrain[1:]) / 2 s_error = plt.errorbar(center, s_histTest, fmt='o', c='r', label='Signal (Testing)' ) # TODO define yerr = sqrt( sum w^2 ) per bin! b_error = plt.errorbar(center, b_histTest, fmt='o', c='b', label='Background (Testing)' ) # TODO define yerr = sqrt( sum w^2 ) per bin! ks_sig, ks_sig_p = ks_2samp(s_histTrain, s_histTest) ks_bkg, ks_bkg_p = ks_2samp(b_histTrain, b_histTest) #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest) if normed: s_w_test = getSumW2(sig_predicted_test.ravel(), sig_w_test, binning) / np.sum(sig_w_test) b_w_test = getSumW2(bkg_predicted_test.ravel(), bkg_w_test, binning) / np.sum(bkg_w_test) else: s_w_test = getSumW2(sig_predicted_test.ravel(), sig_w_test, binning) b_w_test = getSumW2(bkg_predicted_test.ravel(), bkg_w_test, binning) #Proxy artist for KS Test ks_patch = mpatches.Patch(color='None', label='KS Test S (B): %.3f (%.3f)' % (ks_sig, ks_bkg)) #print sep if normed: ax1.set_ylabel('a. u.', horizontalalignment='right', y=1.0) else: ax1.set_ylabel('Events', horizontalalignment='right', y=1.0) leg = plt.legend(loc='best', frameon=False, handles=[ s_patchesTrain[0], b_patchesTrain[0], s_error, b_error, ks_patch ]) p = leg.get_window_extent() #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9) #ax1.text(0.65, 0.66, 'KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg), transform=ax1.transAxes) #Former y=0.7 #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes) #ax.text(0.55, 0.7, 'KS p-value S (B): %.3f (%.3f)'%(ks_sig_p, ks_bkg_p), transform=ax.transAxes) if ratio: ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) getRatio(s_histTest, s_binsTest, s_w_test, b_histTest, b_binsTest, b_w_test, 'r') ax2.set_xlabel('EPD', horizontalalignment='right', x=1.0) ax2.set_ylabel('S/B') ax2.set_xlim((binning[1], binning[2])) ax2.set_ylim((0, 2)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set_ylim(0., 1.5 * np.maximum(s_histTest.max(), b_histTest.max())) ax1.set_xlabel('EPD', horizontalalignment='right', x=1.0) AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress') if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') plt.savefig('plots/' + fileName + '_TrainTestScore' + addStr + '.pdf') plt.savefig('plots/' + fileName + '_TrainTestScore' + addStr + '.png') plt.close()
def plot_output_score_multiclass(sig_predicted, sig_w, bkg1_predicted, bkg1_w, bkg2_predicted, bkg2_w, bkg3_predicted, bkg3_w, bkg_predicted, bkg_w, binning, fileName="Test", title='Discriminating power', normed=False, save=False, ratio=False, log=False, sample=None, addStr=''): print('Plotting the multiclass output score...') fig = plt.figure(figsize=(8,6)) if ratio: ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=3) ax1.set_xlabel('', fontsize=0.) ax1.set_xticklabels(()) else: ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') #b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(), weights=bkg_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #plt.clf() #b1_hist, b1_bins, b1_patches = plt.hist(bkg1_predicted.ravel(), weights=bkg1_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #b2_hist, b2_bins, b2_patches = plt.hist(bkg2_predicted.ravel(), weights=bkg2_w, histtype='stepfilled', color='g', label='single top', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) #b3_hist, b3_bins, b3_patches = plt.hist(bkg3_predicted.ravel(), weights=bkg3_w, histtype='stepfilled', color='m', label='W+jets', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) bkgs = [bkg3_predicted.ravel(),bkg2_predicted.ravel(),bkg1_predicted.ravel()] bweights = [bkg3_w,bkg2_w,bkg1_w] labels = [r'$W$+jets','single top',r'$t\overline{t}$'] colors=['orange','g','b'] s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(), weights=sig_w, histtype='stepfilled', color='r', label='signal', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_hist, b_bins, b_patches = plt.hist(bkgs, weights=bweights, histtype='stepfilled', color=colors,label=labels, alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed, stacked=True) log_str = '' if log: plt.yscale('log', nonposy='clip') log_str = '_log' #s_w = getSumW2(sig_predicted.ravel(), sig_w, binning) #b1_w = getSumW2(bkg1_predicted.ravel(), bkg1_w, binning) #b2_w = getSumW2(bkg2_predicted.ravel(), bkg2_w, binning) #b3_w = getSumW2(bkg3_predicted.ravel(), bkg3_w, binning) #b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning) #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest) #print sep if normed: ax1.set_ylabel("a. u.", ha='left') else: ax1.set_ylabel("Events", ha='left') #ax1.set_ylim((0, s_hist.max()*(1+0.33))) if log: ax1.set_ylim((0, b_hist[2].max()*(30))) else: ax1.set_ylim((0, b_hist[2].max()*(1+0.33))) if sample is not None: sample_patch = mpatches.Patch(color='None', label=sample) leg = plt.legend(loc='best', frameon=False, handles=[s_patches[0], b_patches[0][0], b_patches[1][0], b_patches[2][0], sample_patch]) else: leg = plt.legend(loc='best', frameon=False) p = leg.get_window_extent() #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9) #ax1.text(0.65, 0.7, "KS Test S (B): %.3f (%.3f)"%(ks_sig, ks_bkg), transform=ax1.transAxes) #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes) #ax.text(0.55, 0.7, "KS p-value S (B): %.3f (%.3f)"%(ks_sig_p, ks_bkg_p), transform=ax.transAxes) if title is not None: plt.title(title) AtlasStyle_mpl.ATLASLabel2(ax1, 0.02, 0.9, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140) if ratio: ax2 = plt.subplot2grid((4,4), (3,0), colspan=4, rowspan=1) r = getRatio(b_hist, b_bins, b_w, s_hist, s_bins, s_w, 'r') ax2.set_xlabel('Discriminant') ax2.set_ylabel('variation/nom.') ax2.set_xlim((binning[1],binning[2])) ax2.set_ylim((-0.5,2.5)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set(xlabel='EPD') if save: if not os.path.exists("./plots/"): os.makedirs("./plots/") print("Creating folder plots") plt.savefig("plots/"+fileName+"_output_score_multiclass"+addStr+log_str+".pdf") plt.savefig("plots/"+fileName+"_output_score_multiclass"+addStr+log_str+".png") plt.close() try: return r, s_bins except NameError: print 'ratio is set to False, r is not defined' return 0, s_bins
def plotShape(var, samples, weights, color, binning, xTitle, yTitle="Events", lumi=100, unit=None, legend=None, log=False, ratio=False, ratioTitle='1/nominal', ratioLimit=(0, 2), normed=False, savePlot=False, fileName=None): fig = plt.figure(figsize=(8, 6)) if ratio: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) ax1.set_xlabel('', fontsize=0.) ax1.set_xticklabels(()) else: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') if (unit == None) or (unit.lower() == 'mev'): unit_fact = 1. elif (unit.lower() == 'gev'): unit_fact = 0.001 if not type(samples) == list: if not type(samples) == tuple: print "Expected {} sample as tuple of variables and weights!".format( samples) return 0 sumW2 = getSumW2(samples[0][str(var)].ravel(), samples[1].ravel(), binning) hist, bins, patches = np.histgram(samples[0][str(var)].ravel() * unit_fact, weights=samples[1].ravel(), bins=binning[0], range=(binning[1], binning[2]), density=normed) width = bins[1] - bins[0] center = (bins[:-1] + bins[1:]) / 2 plt.errorbar(center, hist, xerr=[width / 2.] * binning[0], yerr=sumW2.ravel(), fmt='o', color=color, label=legend) _max = hist.max() else: sumW2 = [] hists = [] for i, smp in enumerate(samples): #if not type(smp) == tuple: # print "Expected {} sample as tuple of variables and weights!".format(smp) # return 0 sumW2.append( getSumW2(smp[str(var)].ravel(), weights[i].ravel(), binning)) hists.append( np.histogram(smp[str(var)].ravel() * unit_fact, weights=weights[i], bins=binning[0], range=(binning[1], binning[2]), density=normed)) width = hists[i][1][1] - hists[i][1][0] center = (hists[i][1][:-1] + hists[i][1][1:]) / 2 plt.errorbar(center, hists[i][0], xerr=[width / 2.] * binning[0], yerr=sumW2[i].ravel(), fmt='o', color=color[i], label=legend[i]) _max = np.max([h[0].max() for h in hists]) if normed: ax1.set_ylabel("a. u.", va='top') else: ax1.set_ylabel("Events", va='top') if log: ax1.set_yscale('log') ax1.set_ylim((0.01, _max * 100)) else: if normed: ax1.set_ylim((0, 1.5)) else: ax1.set_ylim((0, _max * 1.4)) leg = plt.legend(loc='best', frameon=False) AtlasLabel_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress') AtlasLabel_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=str(lumi)) if ratio: ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) for i in range(1, len(hists)): r = getRatio(hists[i][0], hists[i][1], sumW2[i], hists[0][0], hists[0][1], sumW2[0], color[i]) ax2.set_xlabel(xTitle, ha='right') ax2.set_ylabel(ratioTitle, va='top') ax2.set_xlim((binning[1], binning[2])) ax2.set_ylim(ratioLimit) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set(xlabel=xTitle) if savePlot: plt.savefig(fileName + ".pdf") plt.savefig(fileName + ".png") plt.close()
def plot_output_score(sig_predicted, sig_w, bkg_predicted, bkg_w, binning, fileName='Test', normed=False, save=False, addStr='', ratio=True, log=False, sample=None): print('Plotting the binary output score...') fig = plt.figure(figsize=(8, 6)) if ratio: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) ax1.set_xlabel('', fontsize=0.) ax1.set_xticklabels(()) else: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(), weights=sig_w, histtype='stepfilled', color='r', label='Signal', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(), weights=bkg_w, histtype='stepfilled', color='b', label='Background', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) log_str = '' if log: plt.yscale('log', nonposy='clip') log_str = '_log' s_w = getSumW2(sig_predicted.ravel(), sig_w, binning) b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning) #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest) #print sep if normed: ax1.set_ylabel('a. u.', horizontalalignment='right', x=1.0) else: ax1.set_ylabel('Events', horizontalalignment='right', y=1.0) #ax1.set_ylim((0, s_hist.max()*(1+0.33))) if log: ax1.set_ylim((0, b_hist.max() * (30))) else: ax1.set_ylim((0, b_hist.max() * (1 + 0.33))) if sample is not None: sample_patch = mpatches.Patch(color='None', label=sample) leg = plt.legend(loc='best', frameon=False, handles=[s_patches[0], b_patches[0], sample_patch]) else: leg = plt.legend(loc='best', frameon=False) p = leg.get_window_extent() #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9) #ax1.text(0.65, 0.7, 'KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg), transform=ax1.transAxes) #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes) #ax.text(0.55, 0.7, 'KS p-value S (B): %.3f (%.3f)'%(ks_sig_p, ks_bkg_p), transform=ax.transAxes) AtlasStyle_mpl.ATLASLabel2(ax1, 0.02, 0.9, 'Work in progress') AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140) if ratio: ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) r = getRatio(s_hist, s_bins, s_w, b_hist, b_bins, b_w, 'r') ax2.set_xlabel('EPD', horizontalalignment='right', x=1.0) ax2.set_ylabel('S/B') ax2.set_xlim((binning[1], binning[2])) ax2.set_ylim((-0.5, 2.5)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set_xlabel('EPD', horizontalalignment='right', x=1.0) if save: if not os.path.exists('./plots/'): os.makedirs('./plots/') print('Creating folder plots') plt.savefig('plots/' + fileName + '_output_score' + addStr + log_str + '.pdf') plt.savefig('plots/' + fileName + '_output_score' + addStr + log_str + '.png') plt.close() return r, s_bins
def plot_output_score(sig_predicted, sig_w, bkg_predicted, bkg_w, binning, fileName=None, normed=False, save=False, ratio=True): fig = plt.figure(figsize=(8, 6)) if ratio: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3) ax1.set_xlabel('', fontsize=0.) ax1.set_xticklabels(()) else: ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4) ax1.tick_params(direction='in') ax1.set_xlim((binning[1], binning[2])) ax1.xaxis.set_ticks_position('both') ax1.yaxis.set_ticks_position('both') s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(), weights=sig_w, histtype='stepfilled', color='r', label='ttbar nominal', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(), weights=bkg_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) s_w = getSumW2(sig_predicted.ravel(), sig_w, binning) b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning) #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest) #print sep if normed: ax1.set_ylabel("a. u.", va='top') else: ax1.set_ylabel("Events", va='top') ax1.set_ylim((0, s_hist.max() * (1 + 0.33))) leg = plt.legend(loc="best", frameon=False) p = leg.get_window_extent() #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9) #ax1.text(0.65, 0.7, "KS Test S (B): %.3f (%.3f)"%(ks_sig, ks_bkg), transform=ax1.transAxes) #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes) #ax.text(0.55, 0.7, "KS p-value S (B): %.3f (%.3f)"%(ks_sig_p, ks_bkg_p), transform=ax.transAxes) AtlasLabel_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress') AtlasLabel_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=100) if ratio: ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1) r = getRatio(b_hist, b_bins, b_w, s_hist, s_bins, s_w) ax2.set_xlabel('Discriminant', ha='right') ax2.set_ylabel('variation/nom.', va='top') ax2.set_xlim((binning[1], binning[2])) ax2.set_ylim((-0.5, 2.5)) ax2.grid() ax2.tick_params(direction='in') ax2.xaxis.set_ticks_position('both') ax2.yaxis.set_ticks_position('both') ax1.set(xlabel='Output score') if save: plt.savefig(fileName + ".pdf") plt.savefig(fileName + ".png") plt.close() return r, s_bins