예제 #1
0
def main():

    model = load_model(modelDir)

    scaler = joblib.load(SCALING)

    infofile = open(modelDir.replace('.h5', '_infofile.txt'))
    infos = infofile.readlines()
    analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '')
    dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace(
        '\n', '')
    recurrent = False
    if analysis.lower() == 'rnn':
        recurrent = True
        seq_scaler = dataset + '_scaling.json'

    db = (RESOLUTION[2] - RESOLUTION[1]
          ) / RESOLUTION[0]  # bin width in discriminator distribution
    bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db,
                     db)  # bin edges in discriminator distribution
    center = (bins[:-1] + bins[1:]) / 2

    print '#----MODEL----#'
    print modelDir

    ###########################
    # Read and evaluate signals
    ###########################

    Signal = []
    for s in SIGNAL:
        x, y = pickBenchmark(s)
        if not recurrent:
            df, weight = loadDataFrame(os.path.join(inputDir, s + '/'),
                                       PRESELECTION, VAR, WEIGHTS, LUMI)
            y_hat = evaluate(model, df.values, scaler)
        else:
            df, weight, collection = loadSequentialDataFrame(
                os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION,
                REMOVE_VAR, VAR, WEIGHTS, LUMI)
            y_hat = evaluate(model,
                             df.values,
                             scaler,
                             seq_scaler,
                             rnn=True,
                             col=collection)

        bin_index = np.digitize(
            y_hat[:, 0],
            bins[1:])  # get the bin index of the output score for each event
        outputWeighted = []
        outputWeightedVar = []
        outputMC = []
        outputMCVar = []
        for i in range(len(bins[1:])):
            w = weight.values[np.where(bin_index == i)[0]]
            sigma = np.sum(w**2.)
            outputWeighted.append(w.sum())
            outputWeightedVar.append(sigma)
            outputMC.append(len(w))
            outputMCVar.append(np.sqrt(len(w)))

        Signal.append({
            'name': s,
            'm_stop': x,
            'm_X': y,
            'dataset': df,
            'weight': weight,
            'nEvents': weight.sum(),
            'y_pred': y_hat,
            'outputScore': np.array(outputWeighted),
            'outputMC': np.array(outputMC),
            'output_var': np.array(outputWeightedVar),
            'outputMC_var': np.array(outputMCVar)
        })

        del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

    ###########################
    # Read and evaluate backgrounds
    ###########################

    totBkgEvents = 0.
    totBkgVar = 0.
    Background = []
    for b in BACKGROUND:
        if not recurrent:
            df, weight = loadDataFrame(os.path.join(inputDir, b + '/'),
                                       PRESELECTION, VAR, WEIGHTS, LUMI)
            y_hat = evaluate(model, df.values, scaler)
        else:
            df, weight, collection = loadSequentialDataFrame(
                os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION,
                REMOVE_VAR, VAR, WEIGHTS, LUMI)
            y_hat = evaluate(model,
                             df.values,
                             scaler,
                             seq_scaler,
                             rnn=True,
                             col=collection)

        bin_index = np.digitize(y_hat[:, 0], bins[1:])
        outputWeighted = []
        outputWeightedVar = []
        outputMC = []
        outputMCVar = []

        totBkgEvents += weight.sum()
        totBkgVar += np.sum(weight.values**2.)
        for i in range(len(bins[1:])):
            w = weight.values[np.where(bin_index == i)[0]]
            sigma = np.sum(w**2.)
            outputWeighted.append(w.sum())
            outputWeightedVar.append(sigma)
            outputMC.append(len(w))
            outputMCVar.append(len(w))

        Background.append({
            'name': b,
            'dataset': df,
            'weight': weight,
            'nEvents': weight.sum(),
            'y_pred': y_hat,
            'outputScore': np.array(outputWeighted),
            'outputMC': np.array(outputMC),
            'output_var': np.array(outputWeightedVar),
            'outputMC_var': np.array(outputMCVar)
        })

        del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

    totalBkgOutput = np.array([b['outputScore'] for b in Background])
    totalBkgOutput = totalBkgOutput.sum(axis=0)

    totalBkgVar = np.array([b['output_var'] for b in Background])
    totalBkgVar = totalBkgVar.sum(axis=0)

    for s in Signal:
        significance = []
        significance_err = []
        asimov = []
        tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents']
        for i in range(len(bins[1:])):
            #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents']
            #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum()
            eff_sig = s['outputScore'][i:].sum() / s['nEvents']
            eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum()

            #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents']
            #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum()
            err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents']
            err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum()

            #if totalBkgOutput[:i+1].sum() > 0.:
            #  rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum()
            if totalBkgOutput[i:].sum() > 0.:
                rel_err_bkg = np.sqrt(np.sum(
                    totalBkgVar[i:])) / totalBkgOutput[i:].sum()
            else:
                rel_err_bkg = 0.
            #if s['outputScore'][:i+1].sum() > 0.:
            #  rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum()
            if s['outputScore'][i:].sum() > 0.:
                rel_err_sig = np.sqrt(np.sum(
                    s['output_var'][i:])) / s['outputScore'][i:].sum()
            else:
                rel_err_sig = 0.

            #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.)
            total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.)

            if (eff_sig == 0) or (eff_bkg == 0):
                Z = 0.
                Z_err = 0.
                ams = 0.
            elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75):
                Z = 0.
                Z_err = 0.
                ams = 0.
            else:
                #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err)
                Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(),
                    total_rel_err)
                ams = asimovZ(s['outputScore'][i:].sum(),
                              totalBkgOutput[i:].sum(),
                              np.sqrt(totalBkgVar[i:].sum()))

                Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    (eff_sig + err_sig) * s['nEvents'],
                    eff_bkg * totalBkgOutput.sum(), total_rel_err)
                Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    (eff_sig - err_sig) * s['nEvents'],
                    eff_bkg * totalBkgOutput.sum(), total_rel_err)
                Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    eff_sig * s['nEvents'],
                    (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err)
                Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    eff_sig * s['nEvents'],
                    (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err)

            Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2
            Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2
            Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2)

            significance.append(Z)
            significance_err.append(Z_err)
            asimov.append(ams)

        s['sig'] = np.array(significance)
        s['sig_max'] = s['sig'].max()
        s['sig_err'] = np.array(significance_err)
        s['ams'] = np.array(asimov)
        #print s['sig']
        #print s['ams']
        #sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0]
        #Z = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=False)
        #Z_syst = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=True)
        #print s['sig'].max(), sigMax_index, Z, Z_syst

    x = np.array([s['m_stop'] for s in Signal], dtype=float)
    y = np.array([s['m_X'] for s in Signal], dtype=float)
    z = np.array([s['sig_max'] for s in Signal], dtype=float)

    #print x, y, z

    #print Signal[0]['outputScore'][np.where(bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where(bins[:-1] >= sigMax_index)]
    #print totalBkgOutput[np.where(bins[:-1] >= sigMax_index)], totalBkgVar[np.where(bins[:-1] >= sigMax_index)]

    #print Signal[0]['outputScore'], Signal[0]['output_var']
    #print totalBkgOutput, totalBkgVar
    # Set up a regular grid of interpolation points

    print('Plotting the output score...')
    fig = plt.figure(figsize=(8, 6))
    ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
    ax1.set_xlim((bins[0], bins[-1]))
    ax1.set_ylabel("Events", horizontalalignment='right', y=1.0)

    sb_ratio = Signal[0]['outputScore'].sum() / totalBkgOutput.sum()
    #if sb_ratio < 0.2:
    #  #ATTENTION! Simplified error propagation (treated as uncorrelated)
    #  scaled = Signal[0]['outputScore'] / Signal[0]['outputScore'].sum() * totalBkgOutput.sum()
    #  scaled_var = scaled*scaled * ( (Signal[0]['output_var']/Signal[0]['outputScore'])**2 + (totalBkgVar.sum()/totalBkgOutput.sum())**2 + (Signal[0]['output_var'].sum()/Signal[0]['outputScore'].sum())**2 )
    #  scaled_label = 'Signal scaled to Bkg'
    #
    #else:
    scaled = Signal[0]['outputScore']
    scaled_var = Signal[0]['output_var']
    scaled_label = 'Signal'

    plt.bar(center,
            totalBkgOutput / totalBkgOutput.sum(),
            width=db,
            yerr=np.sqrt(totalBkgVar) / totalBkgOutput.sum(),
            color='b',
            alpha=0.25,
            error_kw=dict(ecolor='b', lw=1.5),
            label=Background[0]['name'])
    plt.bar(center,
            Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(),
            width=db,
            yerr=np.sqrt(Signal[0]['output_var']) /
            Signal[0]['outputScore'].sum(),
            label=Signal[0]['name'],
            color='r',
            alpha=0.25,
            error_kw=dict(ecolor='r', lw=1.5))

    ax1.set_ylim(
        (0.,
         np.max([
             np.max(totalBkgOutput / totalBkgOutput.sum()),
             np.max(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum())
         ]) * 1.3))
    #ax1.set_yscale('log')
    leg = plt.legend(loc="best", frameon=False)

    AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.925, 'Work in progress')
    #AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.875, lumi=LUMI*0.001)

    ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)
    getRatio(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(), bins,
             np.sqrt(Signal[0]['output_var']) / Signal[0]['outputScore'].sum(),
             totalBkgOutput / totalBkgOutput.sum(), bins,
             np.sqrt(totalBkgVar) / totalBkgOutput.sum(), 'r')
    ax2.set_xlabel('Output score', horizontalalignment='right', x=1.0)
    ax2.set_ylabel('Reco/Truth')
    ax2.set_xlim((0., 1.))
    ax2.set_ylim((0, 2))
    ax2.grid()
    ax2.tick_params(direction='in')
    ax2.xaxis.set_ticks_position('both')
    ax2.yaxis.set_ticks_position('both')

    plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.pdf")
    plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.png")
    plt.close()
예제 #2
0
def plot_TrainTest_score(sig_predicted_train,
                         sig_predicted_test,
                         sig_w_train,
                         sig_w_test,
                         bkg_predicted_train,
                         bkg_predicted_test,
                         bkg_w_train,
                         bkg_w_test,
                         binning,
                         fileName="KS_test",
                         normed=False,
                         save=False,
                         ratio=True):
    fig = plt.figure(figsize=(8, 6))
    if ratio:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
        #ax1.xaxis.set_ticks([])
    else:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.tick_params(direction='in')
    ax1.set_xlim((binning[1], binning[2]))
    ax1.xaxis.set_ticks_position('both')
    ax1.yaxis.set_ticks_position('both')

    s_histTrain, s_binsTrain, s_patchesTrain = plt.hist(
        sig_predicted_train.ravel(),
        weights=sig_w_train,
        histtype='stepfilled',
        color='r',
        label='Signal (Training)',
        alpha=0.5,
        bins=binning[0],
        range=(binning[1], binning[2]),
        normed=normed)
    b_histTrain, b_binsTrain, b_patchesTrain = plt.hist(
        bkg_predicted_train.ravel(),
        weights=bkg_w_train,
        histtype='stepfilled',
        color='b',
        label='Background (Training)',
        alpha=0.5,
        bins=binning[0],
        range=(binning[1], binning[2]),
        normed=normed)

    s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(),
                                          weights=sig_w_test,
                                          bins=binning[0],
                                          range=(binning[1], binning[2]),
                                          normed=normed)
    b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(),
                                          weights=bkg_w_test,
                                          bins=binning[0],
                                          range=(binning[1], binning[2]),
                                          normed=normed)

    width = (s_binsTrain[1] - s_binsTrain[0])
    center = (s_binsTrain[:-1] + s_binsTrain[1:]) / 2
    plt.errorbar(center, s_histTest, fmt='o', c='r', label='Signal (Testing)'
                 )  # TODO define yerr = sqrt( sum w^2 ) per bin!
    plt.errorbar(center,
                 b_histTest,
                 fmt='o',
                 c='b',
                 label='Background (Testing)'
                 )  # TODO define yerr = sqrt( sum w^2 ) per bin!

    ks_sig, ks_sig_p = ks_2samp(s_histTrain, s_histTest)
    ks_bkg, ks_bkg_p = ks_2samp(b_histTrain, b_histTest)
    #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest)

    #print sep
    if normed:
        ax1.set(ylabel="a. u.")
    else:
        ax1.set(ylabel="Events")
    leg = plt.legend(loc="best", frameon=False)
    p = leg.get_window_extent()
    #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9)
    ax1.text(0.65,
             0.7,
             "KS Test S (B): %.3f (%.3f)" % (ks_sig, ks_bkg),
             transform=ax1.transAxes)
    #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes)
    #ax.text(0.55, 0.7, "KS p-value S (B): %.3f (%.3f)"%(ks_sig_p, ks_bkg_p), transform=ax.transAxes)

    if ratio:
        ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)
        getRatio(s_histTest, s_binsTest, b_histTest, b_binsTest)
        ax2.set(xlabel='Output score', ylabel='S/B')
        ax2.set_xlim((binning[1], binning[2]))
        ax2.set_ylim((0, 2))
        ax2.grid()
        ax2.tick_params(direction='in')
        ax2.xaxis.set_ticks_position('both')
        ax2.yaxis.set_ticks_position('both')

    ax1.set(xlabel='Output score')

    if save:
        plt.savefig(fileName + ".pdf")
        plt.savefig(fileName + ".png")
        plt.close()
예제 #3
0
def plot_TrainTest_score(sig_predicted_train,
                         sig_predicted_test,
                         sig_w_train,
                         sig_w_test,
                         bkg_predicted_train,
                         bkg_predicted_test,
                         bkg_w_train,
                         bkg_w_test,
                         binning,
                         fileName='Test',
                         normed=False,
                         save=False,
                         ratio=True,
                         addStr=''):
    print('Plotting the train/test score...')
    fig = plt.figure(figsize=(8, 6))
    if ratio:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
        #ax1.xaxis.set_ticks([])
    else:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.tick_params(direction='in')
    ax1.set_xlim((binning[1], binning[2]))
    ax1.xaxis.set_ticks_position('both')
    ax1.yaxis.set_ticks_position('both')

    #s_histTrain, s_binsTrain, s_patchesTrain = plt.hist(sig_predicted_train.ravel(), weights=sig_w_train, histtype='stepfilled', color='r', label='Signal (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
    s_histTrain, s_binsTrain, s_patchesTrain = plt.hist(
        sig_predicted_train.ravel(),
        weights=None,
        histtype='stepfilled',
        color='r',
        label='Signal (Training)',
        alpha=0.5,
        bins=binning[0],
        range=(binning[1], binning[2]),
        density=normed)
    #b_histTrain, b_binsTrain, b_patchesTrain = plt.hist(bkg_predicted_train.ravel(), weights=bkg_w_train, histtype='stepfilled', color='b', label='Background (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
    b_histTrain, b_binsTrain, b_patchesTrain = plt.hist(
        bkg_predicted_train.ravel(),
        weights=None,
        histtype='stepfilled',
        color='b',
        label='Background (Training)',
        alpha=0.5,
        bins=binning[0],
        range=(binning[1], binning[2]),
        density=normed)

    #s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(), weights=sig_w_test, bins=binning[0], range=(binning[1], binning[2]), density=normed)
    s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(),
                                          weights=None,
                                          bins=binning[0],
                                          range=(binning[1], binning[2]),
                                          density=normed)
    #b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(), weights=bkg_w_test, bins=binning[0], range=(binning[1], binning[2]), density=normed)
    b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(),
                                          weights=None,
                                          bins=binning[0],
                                          range=(binning[1], binning[2]),
                                          density=normed)

    width = (s_binsTrain[1] - s_binsTrain[0])
    center = (s_binsTrain[:-1] + s_binsTrain[1:]) / 2
    s_error = plt.errorbar(center,
                           s_histTest,
                           fmt='o',
                           c='r',
                           label='Signal (Testing)'
                           )  # TODO define yerr = sqrt( sum w^2 ) per bin!
    b_error = plt.errorbar(center,
                           b_histTest,
                           fmt='o',
                           c='b',
                           label='Background (Testing)'
                           )  # TODO define yerr = sqrt( sum w^2 ) per bin!

    ks_sig, ks_sig_p = ks_2samp(s_histTrain, s_histTest)
    ks_bkg, ks_bkg_p = ks_2samp(b_histTrain, b_histTest)
    #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest)

    if normed:
        s_w_test = getSumW2(sig_predicted_test.ravel(), sig_w_test,
                            binning) / np.sum(sig_w_test)
        b_w_test = getSumW2(bkg_predicted_test.ravel(), bkg_w_test,
                            binning) / np.sum(bkg_w_test)
    else:
        s_w_test = getSumW2(sig_predicted_test.ravel(), sig_w_test, binning)
        b_w_test = getSumW2(bkg_predicted_test.ravel(), bkg_w_test, binning)

    #Proxy artist for KS Test

    ks_patch = mpatches.Patch(color='None',
                              label='KS Test S (B): %.3f (%.3f)' %
                              (ks_sig, ks_bkg))

    #print sep
    if normed:
        ax1.set_ylabel('a. u.', horizontalalignment='right', y=1.0)
    else:
        ax1.set_ylabel('Events', horizontalalignment='right', y=1.0)
    leg = plt.legend(loc='best',
                     frameon=False,
                     handles=[
                         s_patchesTrain[0], b_patchesTrain[0], s_error,
                         b_error, ks_patch
                     ])
    p = leg.get_window_extent()

    #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9)
    #ax1.text(0.65, 0.66, 'KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg), transform=ax1.transAxes) #Former y=0.7
    #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes)
    #ax.text(0.55, 0.7, 'KS p-value S (B): %.3f (%.3f)'%(ks_sig_p, ks_bkg_p), transform=ax.transAxes)

    if ratio:
        ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)
        getRatio(s_histTest, s_binsTest, s_w_test, b_histTest, b_binsTest,
                 b_w_test, 'r')
        ax2.set_xlabel('EPD', horizontalalignment='right', x=1.0)
        ax2.set_ylabel('S/B')
        ax2.set_xlim((binning[1], binning[2]))
        ax2.set_ylim((0, 2))
        ax2.grid()
        ax2.tick_params(direction='in')
        ax2.xaxis.set_ticks_position('both')
        ax2.yaxis.set_ticks_position('both')

    ax1.set_ylim(0., 1.5 * np.maximum(s_histTest.max(), b_histTest.max()))
    ax1.set_xlabel('EPD', horizontalalignment='right', x=1.0)
    AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress')

    if save:
        if not os.path.exists('./plots/'):
            os.makedirs('./plots/')
            print('Creating folder plots')
        plt.savefig('plots/' + fileName + '_TrainTestScore' + addStr + '.pdf')
        plt.savefig('plots/' + fileName + '_TrainTestScore' + addStr + '.png')
        plt.close()
예제 #4
0
def plot_output_score_multiclass(sig_predicted, sig_w, bkg1_predicted, bkg1_w, bkg2_predicted, bkg2_w, bkg3_predicted, bkg3_w, bkg_predicted, bkg_w, binning, fileName="Test", title='Discriminating power', normed=False, save=False, ratio=False,  log=False, sample=None, addStr=''):
  print('Plotting the multiclass output score...')
  fig = plt.figure(figsize=(8,6))
  if ratio:
    ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=3)
    ax1.set_xlabel('', fontsize=0.)
    ax1.set_xticklabels(())
  else: 
    ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=4)
  ax1.tick_params(direction='in')
  ax1.set_xlim((binning[1], binning[2]))
  ax1.xaxis.set_ticks_position('both')
  ax1.yaxis.set_ticks_position('both')
  
  #b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(), weights=bkg_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
  #plt.clf()

  #b1_hist, b1_bins, b1_patches = plt.hist(bkg1_predicted.ravel(), weights=bkg1_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
  #b2_hist, b2_bins, b2_patches = plt.hist(bkg2_predicted.ravel(), weights=bkg2_w, histtype='stepfilled', color='g', label='single top', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
  #b3_hist, b3_bins, b3_patches = plt.hist(bkg3_predicted.ravel(), weights=bkg3_w, histtype='stepfilled', color='m', label='W+jets', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
  
  bkgs = [bkg3_predicted.ravel(),bkg2_predicted.ravel(),bkg1_predicted.ravel()]
  bweights = [bkg3_w,bkg2_w,bkg1_w]
  labels = [r'$W$+jets','single top',r'$t\overline{t}$']
  colors=['orange','g','b']
  
  s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(), weights=sig_w, histtype='stepfilled', color='r', label='signal', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) 
  b_hist, b_bins, b_patches = plt.hist(bkgs, weights=bweights, histtype='stepfilled', color=colors,label=labels, alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed, stacked=True)
  
  log_str = ''
  
  if log:
      plt.yscale('log', nonposy='clip')
      log_str = '_log'
  
  #s_w = getSumW2(sig_predicted.ravel(), sig_w, binning)
  #b1_w = getSumW2(bkg1_predicted.ravel(), bkg1_w, binning)
  #b2_w = getSumW2(bkg2_predicted.ravel(), bkg2_w, binning)
  #b3_w = getSumW2(bkg3_predicted.ravel(), bkg3_w, binning)
  #b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning)

  #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest)

  #print sep

  if normed:
    ax1.set_ylabel("a. u.", ha='left')
  else:
    ax1.set_ylabel("Events", ha='left')
  
  #ax1.set_ylim((0, s_hist.max()*(1+0.33)))
  if log:
      ax1.set_ylim((0, b_hist[2].max()*(30)))
  else:
      ax1.set_ylim((0, b_hist[2].max()*(1+0.33)))
  
  if sample is not None:
    sample_patch = mpatches.Patch(color='None', label=sample)
    leg = plt.legend(loc='best', frameon=False, handles=[s_patches[0], b_patches[0][0], b_patches[1][0], b_patches[2][0], sample_patch])
  else:
    leg = plt.legend(loc='best', frameon=False)
  
  p = leg.get_window_extent()
  #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9)
  #ax1.text(0.65, 0.7, "KS Test S (B): %.3f (%.3f)"%(ks_sig, ks_bkg), transform=ax1.transAxes)
  #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes)
  #ax.text(0.55, 0.7, "KS p-value S (B): %.3f (%.3f)"%(ks_sig_p, ks_bkg_p), transform=ax.transAxes)
  
  if title is not None:
      plt.title(title)

  AtlasStyle_mpl.ATLASLabel2(ax1, 0.02, 0.9, 'Work in progress')
  AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140)
  if ratio:
    ax2 = plt.subplot2grid((4,4), (3,0), colspan=4, rowspan=1)
    r = getRatio(b_hist, b_bins, b_w, s_hist, s_bins, s_w, 'r')
    ax2.set_xlabel('Discriminant')
    ax2.set_ylabel('variation/nom.')
    ax2.set_xlim((binning[1],binning[2]))
    ax2.set_ylim((-0.5,2.5))
    ax2.grid()
    ax2.tick_params(direction='in')
    ax2.xaxis.set_ticks_position('both')
    ax2.yaxis.set_ticks_position('both')

  ax1.set(xlabel='EPD')

  if save:
    if not os.path.exists("./plots/"):
        os.makedirs("./plots/")
        print("Creating folder plots")
    plt.savefig("plots/"+fileName+"_output_score_multiclass"+addStr+log_str+".pdf")
    plt.savefig("plots/"+fileName+"_output_score_multiclass"+addStr+log_str+".png")
    plt.close()
    
  try:
      return r, s_bins
  except NameError:
      print 'ratio is set to False, r is not defined'
      return 0, s_bins
예제 #5
0
def plotShape(var,
              samples,
              weights,
              color,
              binning,
              xTitle,
              yTitle="Events",
              lumi=100,
              unit=None,
              legend=None,
              log=False,
              ratio=False,
              ratioTitle='1/nominal',
              ratioLimit=(0, 2),
              normed=False,
              savePlot=False,
              fileName=None):

    fig = plt.figure(figsize=(8, 6))

    if ratio:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
        ax1.set_xlabel('', fontsize=0.)
        ax1.set_xticklabels(())
    else:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.tick_params(direction='in')
    ax1.set_xlim((binning[1], binning[2]))
    ax1.xaxis.set_ticks_position('both')
    ax1.yaxis.set_ticks_position('both')

    if (unit == None) or (unit.lower() == 'mev'):
        unit_fact = 1.
    elif (unit.lower() == 'gev'):
        unit_fact = 0.001

    if not type(samples) == list:
        if not type(samples) == tuple:
            print "Expected {} sample as tuple of variables and weights!".format(
                samples)
            return 0

        sumW2 = getSumW2(samples[0][str(var)].ravel(), samples[1].ravel(),
                         binning)

        hist, bins, patches = np.histgram(samples[0][str(var)].ravel() *
                                          unit_fact,
                                          weights=samples[1].ravel(),
                                          bins=binning[0],
                                          range=(binning[1], binning[2]),
                                          density=normed)

        width = bins[1] - bins[0]
        center = (bins[:-1] + bins[1:]) / 2

        plt.errorbar(center,
                     hist,
                     xerr=[width / 2.] * binning[0],
                     yerr=sumW2.ravel(),
                     fmt='o',
                     color=color,
                     label=legend)

        _max = hist.max()

    else:
        sumW2 = []
        hists = []

        for i, smp in enumerate(samples):
            #if not type(smp) == tuple:
            #  print "Expected {} sample as tuple of variables and weights!".format(smp)
            #  return 0

            sumW2.append(
                getSumW2(smp[str(var)].ravel(), weights[i].ravel(), binning))

            hists.append(
                np.histogram(smp[str(var)].ravel() * unit_fact,
                             weights=weights[i],
                             bins=binning[0],
                             range=(binning[1], binning[2]),
                             density=normed))

            width = hists[i][1][1] - hists[i][1][0]
            center = (hists[i][1][:-1] + hists[i][1][1:]) / 2

            plt.errorbar(center,
                         hists[i][0],
                         xerr=[width / 2.] * binning[0],
                         yerr=sumW2[i].ravel(),
                         fmt='o',
                         color=color[i],
                         label=legend[i])

        _max = np.max([h[0].max() for h in hists])

    if normed:
        ax1.set_ylabel("a. u.", va='top')
    else:
        ax1.set_ylabel("Events", va='top')

    if log:
        ax1.set_yscale('log')
        ax1.set_ylim((0.01, _max * 100))
    else:
        if normed:
            ax1.set_ylim((0, 1.5))
        else:
            ax1.set_ylim((0, _max * 1.4))

    leg = plt.legend(loc='best', frameon=False)

    AtlasLabel_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress')
    AtlasLabel_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=str(lumi))

    if ratio:
        ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)

        for i in range(1, len(hists)):
            r = getRatio(hists[i][0], hists[i][1], sumW2[i], hists[0][0],
                         hists[0][1], sumW2[0], color[i])

        ax2.set_xlabel(xTitle, ha='right')
        ax2.set_ylabel(ratioTitle, va='top')
        ax2.set_xlim((binning[1], binning[2]))
        ax2.set_ylim(ratioLimit)
        ax2.grid()
        ax2.tick_params(direction='in')
        ax2.xaxis.set_ticks_position('both')
        ax2.yaxis.set_ticks_position('both')

    ax1.set(xlabel=xTitle)

    if savePlot:
        plt.savefig(fileName + ".pdf")
        plt.savefig(fileName + ".png")
        plt.close()
예제 #6
0
def plot_output_score(sig_predicted,
                      sig_w,
                      bkg_predicted,
                      bkg_w,
                      binning,
                      fileName='Test',
                      normed=False,
                      save=False,
                      addStr='',
                      ratio=True,
                      log=False,
                      sample=None):
    print('Plotting the binary output score...')
    fig = plt.figure(figsize=(8, 6))
    if ratio:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
        ax1.set_xlabel('', fontsize=0.)
        ax1.set_xticklabels(())
    else:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.tick_params(direction='in')
    ax1.set_xlim((binning[1], binning[2]))
    ax1.xaxis.set_ticks_position('both')
    ax1.yaxis.set_ticks_position('both')

    s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(),
                                         weights=sig_w,
                                         histtype='stepfilled',
                                         color='r',
                                         label='Signal',
                                         alpha=0.5,
                                         bins=binning[0],
                                         range=(binning[1], binning[2]),
                                         density=normed)
    b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(),
                                         weights=bkg_w,
                                         histtype='stepfilled',
                                         color='b',
                                         label='Background',
                                         alpha=0.5,
                                         bins=binning[0],
                                         range=(binning[1], binning[2]),
                                         density=normed)

    log_str = ''

    if log:
        plt.yscale('log', nonposy='clip')
        log_str = '_log'

    s_w = getSumW2(sig_predicted.ravel(), sig_w, binning)
    b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning)

    #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest)

    #print sep

    if normed:
        ax1.set_ylabel('a. u.', horizontalalignment='right', x=1.0)
    else:
        ax1.set_ylabel('Events', horizontalalignment='right', y=1.0)

    #ax1.set_ylim((0, s_hist.max()*(1+0.33)))

    if log:
        ax1.set_ylim((0, b_hist.max() * (30)))
    else:
        ax1.set_ylim((0, b_hist.max() * (1 + 0.33)))

    if sample is not None:
        sample_patch = mpatches.Patch(color='None', label=sample)
        leg = plt.legend(loc='best',
                         frameon=False,
                         handles=[s_patches[0], b_patches[0], sample_patch])
    else:
        leg = plt.legend(loc='best', frameon=False)
    p = leg.get_window_extent()
    #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9)
    #ax1.text(0.65, 0.7, 'KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg), transform=ax1.transAxes)
    #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes)
    #ax.text(0.55, 0.7, 'KS p-value S (B): %.3f (%.3f)'%(ks_sig_p, ks_bkg_p), transform=ax.transAxes)

    AtlasStyle_mpl.ATLASLabel2(ax1, 0.02, 0.9, 'Work in progress')
    AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140)
    if ratio:
        ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)
        r = getRatio(s_hist, s_bins, s_w, b_hist, b_bins, b_w, 'r')
        ax2.set_xlabel('EPD', horizontalalignment='right', x=1.0)
        ax2.set_ylabel('S/B')
        ax2.set_xlim((binning[1], binning[2]))
        ax2.set_ylim((-0.5, 2.5))
        ax2.grid()
        ax2.tick_params(direction='in')
        ax2.xaxis.set_ticks_position('both')
        ax2.yaxis.set_ticks_position('both')

    ax1.set_xlabel('EPD', horizontalalignment='right', x=1.0)

    if save:
        if not os.path.exists('./plots/'):
            os.makedirs('./plots/')
            print('Creating folder plots')
        plt.savefig('plots/' + fileName + '_output_score' + addStr + log_str +
                    '.pdf')
        plt.savefig('plots/' + fileName + '_output_score' + addStr + log_str +
                    '.png')
        plt.close()
    return r, s_bins
예제 #7
0
def plot_output_score(sig_predicted,
                      sig_w,
                      bkg_predicted,
                      bkg_w,
                      binning,
                      fileName=None,
                      normed=False,
                      save=False,
                      ratio=True):
    fig = plt.figure(figsize=(8, 6))
    if ratio:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
        ax1.set_xlabel('', fontsize=0.)
        ax1.set_xticklabels(())
    else:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.tick_params(direction='in')
    ax1.set_xlim((binning[1], binning[2]))
    ax1.xaxis.set_ticks_position('both')
    ax1.yaxis.set_ticks_position('both')

    s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(),
                                         weights=sig_w,
                                         histtype='stepfilled',
                                         color='r',
                                         label='ttbar nominal',
                                         alpha=0.5,
                                         bins=binning[0],
                                         range=(binning[1], binning[2]),
                                         density=normed)
    b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(),
                                         weights=bkg_w,
                                         histtype='stepfilled',
                                         color='b',
                                         label='ttbar radiation low',
                                         alpha=0.5,
                                         bins=binning[0],
                                         range=(binning[1], binning[2]),
                                         density=normed)

    s_w = getSumW2(sig_predicted.ravel(), sig_w, binning)
    b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning)

    #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest)

    #print sep

    if normed:
        ax1.set_ylabel("a. u.", va='top')
    else:
        ax1.set_ylabel("Events", va='top')

    ax1.set_ylim((0, s_hist.max() * (1 + 0.33)))
    leg = plt.legend(loc="best", frameon=False)
    p = leg.get_window_extent()
    #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9)
    #ax1.text(0.65, 0.7, "KS Test S (B): %.3f (%.3f)"%(ks_sig, ks_bkg), transform=ax1.transAxes)
    #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes)
    #ax.text(0.55, 0.7, "KS p-value S (B): %.3f (%.3f)"%(ks_sig_p, ks_bkg_p), transform=ax.transAxes)

    AtlasLabel_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress')
    AtlasLabel_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=100)
    if ratio:
        ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)
        r = getRatio(b_hist, b_bins, b_w, s_hist, s_bins, s_w)
        ax2.set_xlabel('Discriminant', ha='right')
        ax2.set_ylabel('variation/nom.', va='top')
        ax2.set_xlim((binning[1], binning[2]))
        ax2.set_ylim((-0.5, 2.5))
        ax2.grid()
        ax2.tick_params(direction='in')
        ax2.xaxis.set_ticks_position('both')
        ax2.yaxis.set_ticks_position('both')

    ax1.set(xlabel='Output score')

    if save:
        plt.savefig(fileName + ".pdf")
        plt.savefig(fileName + ".png")
        plt.close()
    return r, s_bins