Beispiel #1
0
def plot():
  print 'test' 
  for s in signal:
    s['p'] = []
    for i in s['cls']:
      s['p'].append(normQuantileHack(i))

  print('Plotting p-value ...')
  fig = plt.figure(figsize=(8,6))
  ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=4)
  ax1.set_xlabel('Number of bins', horizontalalignment='right', x=1.0)
  ax1.set_ylabel('p', horizontalalignment='right', y=1.0)

  for s in signal:
    if logScale:
      ax1.set_yscale('log')
    plt.plot([1,2,3,4,5,6,7,8,9,10], s['p'], 'o-', color=s['color'], label=s['legend'], lw=2)
    ax1.set_xlim((1, 10))
    ax1.set_ylim((0, 2.5))

  leg = plt.legend(loc="upper right", frameon=False)

  #AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.25, 'Work in progress')
  AtlasStyle_mpl.Text(ax1, 0.15, 0.83, 'Simulation')
  AtlasStyle_mpl.LumiLabel(ax1, 0.15, 0.77, lumi=LUMI*0.001)

  plt.savefig(SAVEDIR+FILENAME+'.pdf')
  plt.savefig(SAVEDIR+FILENAME+'.png')
  plt.close()
Beispiel #2
0
def plot_confusion_matrix(y_test,
                          y_hat,
                          classes,
                          normalize=False,
                          title='Confusion matrix',
                          fileName=None):
    """
  This function prints and plots the confusion matrix.
  Normalization can be applied by setting `normalize=True`.
  """
    cm = confusion_matrix(y_test, y_hat, sample_weight=sample_weight)
    np.set_printoptions(precision=3)

    cmap = plt.cm.Blues

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j,
                 i,
                 format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

    AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress')

    if fileName:
        plt.savefig(fileName + ".pdf")
        plt.savefig(fileName + ".png")
        plt.close()
Beispiel #3
0
def plot_event_display(content, output_fname=None, vmin=1e-3, vmax=1, title=''):
  '''
  Function to help you visualize an event grid topology on a log scale
  Args:
  -----
      content : numpy array, first arg to imshow, 
          content of the image
          e.g.: images.mean(axis=0) --> the average image

      output_fname : string, name of the output file where the plot will be 
          saved. 

      vmin : (default = 1e-1) float, lower bound of the pixel intensity 
          scale before saturation

      vmax : (default = 1000e3) float, upper bound of the pixel intensity 
          scale before saturation

      title : (default = '') string, title of the plot, to be displayed 
          on top of the image
  '''
  fig, ax = plt.subplots(figsize=(8, 6))

  extent = (-3.2, 3.2, -3, 3)

  im = ax.imshow(content, interpolation='nearest',
                 origin='lower', extent=extent)
                 #norm=LogNorm(vmin=vmin, vmax=vmax), origin='lower', extent=extent)
                 #norm=LogNorm(vmin=vmin, vmax=vmax), extent=extent)

  cbar = plt.colorbar(im, fraction=0.05, pad=0.05)
  cbar.set_label(r'1/$m_{eff}$  [GeV]', y=0.85)
  plt.xlabel(r'Azimuthal Angle $(\phi)$')
  plt.ylabel(r'Pseudorapidity $(\eta)$')
  plt.title(title)
  AtlasStyle_mpl.ATLASLabel(ax, 0.02, 0.9, 'Work in progress')

  if not output_fname is None:
    plt.savefig(output_fname+'.pdf')
    plt.savefig(output_fname+'.png')
Beispiel #4
0
def main():

    infofile = open(modelDir.replace('.h5', '_infofile.txt'))
    infos = infofile.readlines()
    analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '')
    dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace(
        '\n', '')
    nvar = infos[5].replace('Used variables for training: ',
                            '').replace('\n', '')
    nvar = nvar.split()

    model = load_model(modelDir)

    scaler = joblib.load(SCALING)

    recurrent = False
    if analysis.lower() == 'rnn':
        recurrent = True

    h5f = h5py.File(dataset + '.h5', 'r')
    X_train = h5f['X_train'][:]
    y = h5f['y_train'][:]

    y_train = deepcopy(y)
    y_train[y != 0] = 0.
    y_train[y == 0] = 1.

    collection = []
    if recurrent:
        for col in COLLECTION:
            collection.append(h5f['X_train_' + col][:])

    h5f.close()

    where_nan = np.isnan(X_train)
    X_train[where_nan] = -999.
    X_train = scaler.transform(
        X_train)  # collection already standardized in training

    print '#----MODEL----#'
    print modelDir
    print model.summary()

    ######################################
    # Read in trained and tested dataset #
    ######################################

    if recurrent:
        y_hat = model.predict(collection + [X_train])
    else:
        y_hat = model.predict(X_train)

    importanceBySquaredWeight = getImportanceBySquaredWeight(
        model, nvar, recurrent)
    importanceByWeight = getImportanceByWeight(model, nvar, recurrent)
    impotanceByGrad = getImportanceByGradient(model, nvar, X_train, collection,
                                              recurrent)

    # Re-shuffle for re-evaluate
    X_train_reshuffled = []
    for idx, var in enumerate(nvar):
        X = np.copy(X_train)
        print X[:1]
        np.random.shuffle(X[:, idx])
        print X[:1], '\n'
        X_train_reshuffled.append(X)

    roc = []
    auc = []

    for i in xrange(len(X_train_reshuffled)):
        print type(X_train_reshuffled[i])
        if recurrent:
            y_predict = model.predict(collection + [X_train_reshuffled[i]])
        else:
            y_predict = model.predict(X_train_reshuffled[i])

        roc.append(roc_curve(y_train, y_predict[:, 0]))
        auc.append(roc_auc_score(y_train, y_predict[:, 0]))
        del y_predict

    roc.append(roc_curve(y_train, y_hat[:, 0]))
    auc.append(roc_auc_score(y_train, y_hat[:, 0]))
    print auc, '\n', importanceBySquaredWeight, '\n', importanceByWeight, '\n', impotanceByGrad, '\n'

    print 100 * '#'
    print '\n\t\t\tVariable ranking'
    print '\n sum of squared weights \t sum of absolute weights \t gradients \t AUC (after shuffle)'
    print 100 * '-'
    for i in xrange(len(nvar)):
        print '{}: {}\t{}: {}\t{}: {}\t{}: {}'.format(
            importanceBySquaredWeight[i][0], importanceBySquaredWeight[i][1],
            importanceByWeight[i][0], importanceByWeight[i][1],
            impotanceByGrad[i][0], impotanceByGrad[i][1], nvar[i], auc[i])
    print 100 * '-'
    print 100 * '#'

    print('Plotting the ROC curves ...')
    fig = plt.figure(figsize=(8, 6))
    ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.set_xlim((0, 1))
    ax1.set_ylim((0, 1))
    ax1.set_xlabel('$\epsilon_{Sig.}$', horizontalalignment='right', x=1.0)
    ax1.set_ylabel("$r_{Bkg.}$", horizontalalignment='right', y=1.0)

    for i in xrange(len(roc)):
        try:
            plt.plot(roc[i][1],
                     1 - roc[i][0],
                     '-',
                     label='w/o %s (AUC = %0.4f)' % (nvar[i], auc[i]))
        except IndexError:
            plt.plot(roc[i][1],
                     1 - roc[i][0],
                     '-',
                     label='Default (AUC = %0.4f)' % (auc[i]))

    plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Luck')
    leg = plt.legend(loc="lower left", frameon=False)

    AtlasStyle_mpl.ATLASLabel(ax1, 0.13, 0.9, 'Work in progress')
    #AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.3, lumi=LUMI*0.001)

    plt.savefig("plots/" + modelfile + "_ROC_n-1.pdf")
    plt.savefig("plots/" + modelfile + "_ROC_n-1.png")
    plt.close()
Beispiel #5
0
def plot_classification(y_true,
                        y_predict,
                        weights,
                        fileName="Test",
                        save=False,
                        weighted=False,
                        train=False,
                        sample=None,
                        addStr=''):
    print('Plotting the classification for true labels...')
    if weighted:
        addStr += '_weighted'
    if train:
        addStr += '_train'
    if train and weighted:
        print 'For weighted events, whole dataset has to be used'
        return 0
    y_predict_class = np.argmax(y_predict, axis=1)
    classes = [0, 1, 2, 3]  #Different classes
    assignal = []
    astt = []
    assinglet = []
    asWjets = []

    explain_patch = mpatches.Patch(color='None', label="predicted label")

    if weighted:
        for i in range(0, 4):
            assignal.append(
                np.sum(weights[np.logical_and(y_true == i,
                                              y_predict_class == 0)]))
            astt.append(
                np.sum(weights[np.logical_and(y_true == i,
                                              y_predict_class == 1)]))
            assinglet.append(
                np.sum(weights[np.logical_and(y_true == i,
                                              y_predict_class == 2)]))
            asWjets.append(
                np.sum(weights[np.logical_and(y_true == i,
                                              y_predict_class == 3)]))
    else:
        for i in range(0, 4):
            n = float(y_predict_class[y_true == i].shape[0])

            u, counts = np.unique(y_predict_class[y_true == i],
                                  return_counts=True)

            #print(u.tolist())
            #print(counts.tolist())

            try:
                assignal.append(counts[u.tolist().index(0)] / n)
            except ValueError:
                assignal.append(0)
            try:
                astt.append(counts[u.tolist().index(1)] / n)
            except ValueError:
                astt.append(0)
            try:
                assinglet.append(counts[u.tolist().index(2)] / n)
            except ValueError:
                assinglet.append(0)
            try:
                asWjets.append(counts[u.tolist().index(3)] / n)
            except ValueError:
                asWjets.append(0)

    width = 1.

    bar0 = plt.bar(classes, assignal, width, label=r'Signal', color='r')
    bar1 = plt.bar(classes,
                   astt,
                   width,
                   bottom=assignal,
                   label=r'$t\overline{t}$',
                   color='b')
    bar2 = plt.bar(classes,
                   assinglet,
                   width,
                   bottom=np.array(astt) + np.array(assignal),
                   label=r'Single Top',
                   color='g')
    bar3 = plt.bar(classes,
                   asWjets,
                   width,
                   bottom=np.array(assinglet) + np.array(astt) +
                   np.array(assignal),
                   label='$W$ + jets',
                   color='orange')

    plt.xlabel('true label')
    #plt.legend(loc='best',handles=[explain_patch, bar0, bar1, bar2, bar3])
    plt.xticks(np.arange(4),
               (r'Signal', r'$t\overline{t}$', r'Single Top', '$W$ + jets'))
    plt.title('Classification')

    if weighted:
        plt.ylim(
            0,
            max([
                assignal[i] + astt[i] + assinglet[i] + asWjets[i]
                for i in range(0, 4)
            ]) * (1 + 0.33))

    box = plt.gca().get_position()
    plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])

    if sample is not None:
        sample_patch1 = mpatches.Patch(color='None', label=sample[0])
        sample_patch2 = mpatches.Patch(color='None', label=sample[1])
        plt.gca().legend(loc='center left',
                         bbox_to_anchor=(1, 0.5),
                         handles=[
                             explain_patch, bar0, bar1, bar2, bar3,
                             sample_patch1, sample_patch2
                         ])
    else:
        plt.gca().legend(loc='center left',
                         bbox_to_anchor=(1, 0.5),
                         handles=[explain_patch, bar0, bar1, bar2, bar3])

    if weighted:
        ax1 = plt.gca()
        AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress')
        AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140)

    #plt.gca().set_ylim([0,1.2])

    if save:
        if not os.path.exists("./plots/"):
            os.makedirs("./plots/")
            print("Creating folder plots")
        plt.savefig("plots/" + fileName + "_Classification" + addStr + ".pdf")
        plt.savefig("plots/" + fileName + "_Classification" + addStr + ".png")
        plt.close()
Beispiel #6
0
def main():

    model = load_model(modelDir)

    scaler = joblib.load(SCALING)

    infofile = open(modelDir.replace('.h5', '_infofile.txt'))
    infos = infofile.readlines()
    analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '')
    dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace(
        '\n', '')
    recurrent = False
    if analysis.lower() == 'rnn':
        recurrent = True
        seq_scaler = dataset + '_scaling.json'

    db = (RESOLUTION[2] - RESOLUTION[1]
          ) / RESOLUTION[0]  # bin width in discriminator distribution
    bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db,
                     db)  # bin edges in discriminator distribution
    center = (bins[:-1] + bins[1:]) / 2

    print '#----MODEL----#'
    print modelDir

    ###########################
    # Read and evaluate signals
    ###########################

    Signal = []
    for s in SIGNAL:
        x, y = pickBenchmark(s)
        if not recurrent:
            df, weight = loadDataFrame(os.path.join(inputDir, s + '/'),
                                       PRESELECTION, VAR, WEIGHTS, LUMI)
            y_hat = evaluate(model, df.values, scaler)
        else:
            df, weight, collection = loadSequentialDataFrame(
                os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION,
                REMOVE_VAR, VAR, WEIGHTS, LUMI)
            y_hat = evaluate(model,
                             df.values,
                             scaler,
                             seq_scaler,
                             rnn=True,
                             col=collection)

        bin_index = np.digitize(
            y_hat[:, 0],
            bins[1:])  # get the bin index of the output score for each event
        outputWeighted = []
        outputWeightedVar = []
        outputMC = []
        outputMCVar = []
        for i in range(len(bins[1:])):
            w = weight.values[np.where(bin_index == i)[0]]
            sigma = np.sum(w**2.)
            outputWeighted.append(w.sum())
            outputWeightedVar.append(sigma)
            outputMC.append(len(w))
            outputMCVar.append(np.sqrt(len(w)))

        Signal.append({
            'name': s,
            'm_stop': x,
            'm_X': y,
            'dataset': df,
            'weight': weight,
            'nEvents': weight.sum(),
            'y_pred': y_hat,
            'outputScore': np.array(outputWeighted),
            'outputMC': np.array(outputMC),
            'output_var': np.array(outputWeightedVar),
            'outputMC_var': np.array(outputMCVar)
        })

        del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

    ###########################
    # Read and evaluate backgrounds
    ###########################

    totBkgEvents = 0.
    totBkgVar = 0.
    Background = []
    for b in BACKGROUND:
        if not recurrent:
            df, weight = loadDataFrame(os.path.join(inputDir, b + '/'),
                                       PRESELECTION, VAR, WEIGHTS, LUMI)
            y_hat = evaluate(model, df.values, scaler)
        else:
            df, weight, collection = loadSequentialDataFrame(
                os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION,
                REMOVE_VAR, VAR, WEIGHTS, LUMI)
            y_hat = evaluate(model,
                             df.values,
                             scaler,
                             seq_scaler,
                             rnn=True,
                             col=collection)

        bin_index = np.digitize(y_hat[:, 0], bins[1:])
        outputWeighted = []
        outputWeightedVar = []
        outputMC = []
        outputMCVar = []

        totBkgEvents += weight.sum()
        totBkgVar += np.sum(weight.values**2.)
        for i in range(len(bins[1:])):
            w = weight.values[np.where(bin_index == i)[0]]
            sigma = np.sum(w**2.)
            outputWeighted.append(w.sum())
            outputWeightedVar.append(sigma)
            outputMC.append(len(w))
            outputMCVar.append(len(w))

        Background.append({
            'name': b,
            'dataset': df,
            'weight': weight,
            'nEvents': weight.sum(),
            'y_pred': y_hat,
            'outputScore': np.array(outputWeighted),
            'outputMC': np.array(outputMC),
            'output_var': np.array(outputWeightedVar),
            'outputMC_var': np.array(outputMCVar)
        })

        del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

    totalBkgOutput = np.array([b['outputScore'] for b in Background])
    totalBkgOutput = totalBkgOutput.sum(axis=0)

    totalBkgVar = np.array([b['output_var'] for b in Background])
    totalBkgVar = totalBkgVar.sum(axis=0)

    for s in Signal:
        significance = []
        significance_err = []
        asimov = []
        tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents']
        for i in range(len(bins[1:])):
            #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents']
            #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum()
            eff_sig = s['outputScore'][i:].sum() / s['nEvents']
            eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum()

            #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents']
            #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum()
            err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents']
            err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum()

            #if totalBkgOutput[:i+1].sum() > 0.:
            #  rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum()
            if totalBkgOutput[i:].sum() > 0.:
                rel_err_bkg = np.sqrt(np.sum(
                    totalBkgVar[i:])) / totalBkgOutput[i:].sum()
            else:
                rel_err_bkg = 0.
            #if s['outputScore'][:i+1].sum() > 0.:
            #  rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum()
            if s['outputScore'][i:].sum() > 0.:
                rel_err_sig = np.sqrt(np.sum(
                    s['output_var'][i:])) / s['outputScore'][i:].sum()
            else:
                rel_err_sig = 0.

            #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.)
            total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.)

            if (eff_sig == 0) or (eff_bkg == 0):
                Z = 0.
                Z_err = 0.
                ams = 0.
            elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75):
                Z = 0.
                Z_err = 0.
                ams = 0.
            else:
                #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err)
                Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(),
                    total_rel_err)
                ams = asimovZ(s['outputScore'][i:].sum(),
                              totalBkgOutput[i:].sum(),
                              np.sqrt(totalBkgVar[i:].sum()))

                Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    (eff_sig + err_sig) * s['nEvents'],
                    eff_bkg * totalBkgOutput.sum(), total_rel_err)
                Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    (eff_sig - err_sig) * s['nEvents'],
                    eff_bkg * totalBkgOutput.sum(), total_rel_err)
                Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    eff_sig * s['nEvents'],
                    (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err)
                Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    eff_sig * s['nEvents'],
                    (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err)

            Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2
            Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2
            Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2)

            significance.append(Z)
            significance_err.append(Z_err)
            asimov.append(ams)

        s['sig'] = np.array(significance)
        s['sig_max'] = s['sig'].max()
        s['sig_err'] = np.array(significance_err)
        s['ams'] = np.array(asimov)
        #print s['sig']
        #print s['ams']
        #sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0]
        #Z = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=False)
        #Z_syst = asimovZ(Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)], totalBkgOutput[np.where(bins[:-1] == sigMax_index)], np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]), syst=True)
        #print s['sig'].max(), sigMax_index, Z, Z_syst

    x = np.array([s['m_stop'] for s in Signal], dtype=float)
    y = np.array([s['m_X'] for s in Signal], dtype=float)
    z = np.array([s['sig_max'] for s in Signal], dtype=float)

    #print x, y, z

    #print Signal[0]['outputScore'][np.where(bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where(bins[:-1] >= sigMax_index)]
    #print totalBkgOutput[np.where(bins[:-1] >= sigMax_index)], totalBkgVar[np.where(bins[:-1] >= sigMax_index)]

    #print Signal[0]['outputScore'], Signal[0]['output_var']
    #print totalBkgOutput, totalBkgVar
    # Set up a regular grid of interpolation points

    print('Plotting the output score...')
    fig = plt.figure(figsize=(8, 6))
    ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
    ax1.set_xlim((bins[0], bins[-1]))
    ax1.set_ylabel("Events", horizontalalignment='right', y=1.0)

    sb_ratio = Signal[0]['outputScore'].sum() / totalBkgOutput.sum()
    #if sb_ratio < 0.2:
    #  #ATTENTION! Simplified error propagation (treated as uncorrelated)
    #  scaled = Signal[0]['outputScore'] / Signal[0]['outputScore'].sum() * totalBkgOutput.sum()
    #  scaled_var = scaled*scaled * ( (Signal[0]['output_var']/Signal[0]['outputScore'])**2 + (totalBkgVar.sum()/totalBkgOutput.sum())**2 + (Signal[0]['output_var'].sum()/Signal[0]['outputScore'].sum())**2 )
    #  scaled_label = 'Signal scaled to Bkg'
    #
    #else:
    scaled = Signal[0]['outputScore']
    scaled_var = Signal[0]['output_var']
    scaled_label = 'Signal'

    plt.bar(center,
            totalBkgOutput / totalBkgOutput.sum(),
            width=db,
            yerr=np.sqrt(totalBkgVar) / totalBkgOutput.sum(),
            color='b',
            alpha=0.25,
            error_kw=dict(ecolor='b', lw=1.5),
            label=Background[0]['name'])
    plt.bar(center,
            Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(),
            width=db,
            yerr=np.sqrt(Signal[0]['output_var']) /
            Signal[0]['outputScore'].sum(),
            label=Signal[0]['name'],
            color='r',
            alpha=0.25,
            error_kw=dict(ecolor='r', lw=1.5))

    ax1.set_ylim(
        (0.,
         np.max([
             np.max(totalBkgOutput / totalBkgOutput.sum()),
             np.max(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum())
         ]) * 1.3))
    #ax1.set_yscale('log')
    leg = plt.legend(loc="best", frameon=False)

    AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.925, 'Work in progress')
    #AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.875, lumi=LUMI*0.001)

    ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)
    getRatio(Signal[0]['outputScore'] / Signal[0]['outputScore'].sum(), bins,
             np.sqrt(Signal[0]['output_var']) / Signal[0]['outputScore'].sum(),
             totalBkgOutput / totalBkgOutput.sum(), bins,
             np.sqrt(totalBkgVar) / totalBkgOutput.sum(), 'r')
    ax2.set_xlabel('Output score', horizontalalignment='right', x=1.0)
    ax2.set_ylabel('Reco/Truth')
    ax2.set_xlim((0., 1.))
    ax2.set_ylim((0, 2))
    ax2.grid()
    ax2.tick_params(direction='in')
    ax2.xaxis.set_ticks_position('both')
    ax2.yaxis.set_ticks_position('both')

    plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.pdf")
    plt.savefig("plots/" + modelfile + "_shapeComparison_outputScore.png")
    plt.close()
Beispiel #7
0
def plot_TrainTest_score(sig_predicted_train,
                         sig_predicted_test,
                         sig_w_train,
                         sig_w_test,
                         bkg_predicted_train,
                         bkg_predicted_test,
                         bkg_w_train,
                         bkg_w_test,
                         binning,
                         fileName='Test',
                         normed=False,
                         save=False,
                         ratio=True,
                         addStr=''):
    print('Plotting the train/test score...')
    fig = plt.figure(figsize=(8, 6))
    if ratio:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
        #ax1.xaxis.set_ticks([])
    else:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.tick_params(direction='in')
    ax1.set_xlim((binning[1], binning[2]))
    ax1.xaxis.set_ticks_position('both')
    ax1.yaxis.set_ticks_position('both')

    #s_histTrain, s_binsTrain, s_patchesTrain = plt.hist(sig_predicted_train.ravel(), weights=sig_w_train, histtype='stepfilled', color='r', label='Signal (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
    s_histTrain, s_binsTrain, s_patchesTrain = plt.hist(
        sig_predicted_train.ravel(),
        weights=None,
        histtype='stepfilled',
        color='r',
        label='Signal (Training)',
        alpha=0.5,
        bins=binning[0],
        range=(binning[1], binning[2]),
        density=normed)
    #b_histTrain, b_binsTrain, b_patchesTrain = plt.hist(bkg_predicted_train.ravel(), weights=bkg_w_train, histtype='stepfilled', color='b', label='Background (Training)', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
    b_histTrain, b_binsTrain, b_patchesTrain = plt.hist(
        bkg_predicted_train.ravel(),
        weights=None,
        histtype='stepfilled',
        color='b',
        label='Background (Training)',
        alpha=0.5,
        bins=binning[0],
        range=(binning[1], binning[2]),
        density=normed)

    #s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(), weights=sig_w_test, bins=binning[0], range=(binning[1], binning[2]), density=normed)
    s_histTest, s_binsTest = np.histogram(sig_predicted_test.ravel(),
                                          weights=None,
                                          bins=binning[0],
                                          range=(binning[1], binning[2]),
                                          density=normed)
    #b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(), weights=bkg_w_test, bins=binning[0], range=(binning[1], binning[2]), density=normed)
    b_histTest, b_binsTest = np.histogram(bkg_predicted_test.ravel(),
                                          weights=None,
                                          bins=binning[0],
                                          range=(binning[1], binning[2]),
                                          density=normed)

    width = (s_binsTrain[1] - s_binsTrain[0])
    center = (s_binsTrain[:-1] + s_binsTrain[1:]) / 2
    s_error = plt.errorbar(center,
                           s_histTest,
                           fmt='o',
                           c='r',
                           label='Signal (Testing)'
                           )  # TODO define yerr = sqrt( sum w^2 ) per bin!
    b_error = plt.errorbar(center,
                           b_histTest,
                           fmt='o',
                           c='b',
                           label='Background (Testing)'
                           )  # TODO define yerr = sqrt( sum w^2 ) per bin!

    ks_sig, ks_sig_p = ks_2samp(s_histTrain, s_histTest)
    ks_bkg, ks_bkg_p = ks_2samp(b_histTrain, b_histTest)
    #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest)

    if normed:
        s_w_test = getSumW2(sig_predicted_test.ravel(), sig_w_test,
                            binning) / np.sum(sig_w_test)
        b_w_test = getSumW2(bkg_predicted_test.ravel(), bkg_w_test,
                            binning) / np.sum(bkg_w_test)
    else:
        s_w_test = getSumW2(sig_predicted_test.ravel(), sig_w_test, binning)
        b_w_test = getSumW2(bkg_predicted_test.ravel(), bkg_w_test, binning)

    #Proxy artist for KS Test

    ks_patch = mpatches.Patch(color='None',
                              label='KS Test S (B): %.3f (%.3f)' %
                              (ks_sig, ks_bkg))

    #print sep
    if normed:
        ax1.set_ylabel('a. u.', horizontalalignment='right', y=1.0)
    else:
        ax1.set_ylabel('Events', horizontalalignment='right', y=1.0)
    leg = plt.legend(loc='best',
                     frameon=False,
                     handles=[
                         s_patchesTrain[0], b_patchesTrain[0], s_error,
                         b_error, ks_patch
                     ])
    p = leg.get_window_extent()

    #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9)
    #ax1.text(0.65, 0.66, 'KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg), transform=ax1.transAxes) #Former y=0.7
    #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes)
    #ax.text(0.55, 0.7, 'KS p-value S (B): %.3f (%.3f)'%(ks_sig_p, ks_bkg_p), transform=ax.transAxes)

    if ratio:
        ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)
        getRatio(s_histTest, s_binsTest, s_w_test, b_histTest, b_binsTest,
                 b_w_test, 'r')
        ax2.set_xlabel('EPD', horizontalalignment='right', x=1.0)
        ax2.set_ylabel('S/B')
        ax2.set_xlim((binning[1], binning[2]))
        ax2.set_ylim((0, 2))
        ax2.grid()
        ax2.tick_params(direction='in')
        ax2.xaxis.set_ticks_position('both')
        ax2.yaxis.set_ticks_position('both')

    ax1.set_ylim(0., 1.5 * np.maximum(s_histTest.max(), b_histTest.max()))
    ax1.set_xlabel('EPD', horizontalalignment='right', x=1.0)
    AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress')

    if save:
        if not os.path.exists('./plots/'):
            os.makedirs('./plots/')
            print('Creating folder plots')
        plt.savefig('plots/' + fileName + '_TrainTestScore' + addStr + '.pdf')
        plt.savefig('plots/' + fileName + '_TrainTestScore' + addStr + '.png')
        plt.close()
def plot_output_score_multiclass(sig_predicted, sig_w, bkg1_predicted, bkg1_w, bkg2_predicted, bkg2_w, bkg3_predicted, bkg3_w, bkg_predicted, bkg_w, binning, fileName="Test", title='Discriminating power', normed=False, save=False, ratio=False,  log=False, sample=None, addStr=''):
  print('Plotting the multiclass output score...')
  fig = plt.figure(figsize=(8,6))
  if ratio:
    ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=3)
    ax1.set_xlabel('', fontsize=0.)
    ax1.set_xticklabels(())
  else: 
    ax1 = plt.subplot2grid((4,4), (0,0), colspan=4, rowspan=4)
  ax1.tick_params(direction='in')
  ax1.set_xlim((binning[1], binning[2]))
  ax1.xaxis.set_ticks_position('both')
  ax1.yaxis.set_ticks_position('both')
  
  #b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(), weights=bkg_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
  #plt.clf()

  #b1_hist, b1_bins, b1_patches = plt.hist(bkg1_predicted.ravel(), weights=bkg1_w, histtype='stepfilled', color='b', label='ttbar radiation low', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
  #b2_hist, b2_bins, b2_patches = plt.hist(bkg2_predicted.ravel(), weights=bkg2_w, histtype='stepfilled', color='g', label='single top', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
  #b3_hist, b3_bins, b3_patches = plt.hist(bkg3_predicted.ravel(), weights=bkg3_w, histtype='stepfilled', color='m', label='W+jets', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed)
  
  bkgs = [bkg3_predicted.ravel(),bkg2_predicted.ravel(),bkg1_predicted.ravel()]
  bweights = [bkg3_w,bkg2_w,bkg1_w]
  labels = [r'$W$+jets','single top',r'$t\overline{t}$']
  colors=['orange','g','b']
  
  s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(), weights=sig_w, histtype='stepfilled', color='r', label='signal', alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed) 
  b_hist, b_bins, b_patches = plt.hist(bkgs, weights=bweights, histtype='stepfilled', color=colors,label=labels, alpha=0.5, bins=binning[0], range=(binning[1], binning[2]), density=normed, stacked=True)
  
  log_str = ''
  
  if log:
      plt.yscale('log', nonposy='clip')
      log_str = '_log'
  
  #s_w = getSumW2(sig_predicted.ravel(), sig_w, binning)
  #b1_w = getSumW2(bkg1_predicted.ravel(), bkg1_w, binning)
  #b2_w = getSumW2(bkg2_predicted.ravel(), bkg2_w, binning)
  #b3_w = getSumW2(bkg3_predicted.ravel(), bkg3_w, binning)
  #b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning)

  #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest)

  #print sep

  if normed:
    ax1.set_ylabel("a. u.", ha='left')
  else:
    ax1.set_ylabel("Events", ha='left')
  
  #ax1.set_ylim((0, s_hist.max()*(1+0.33)))
  if log:
      ax1.set_ylim((0, b_hist[2].max()*(30)))
  else:
      ax1.set_ylim((0, b_hist[2].max()*(1+0.33)))
  
  if sample is not None:
    sample_patch = mpatches.Patch(color='None', label=sample)
    leg = plt.legend(loc='best', frameon=False, handles=[s_patches[0], b_patches[0][0], b_patches[1][0], b_patches[2][0], sample_patch])
  else:
    leg = plt.legend(loc='best', frameon=False)
  
  p = leg.get_window_extent()
  #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9)
  #ax1.text(0.65, 0.7, "KS Test S (B): %.3f (%.3f)"%(ks_sig, ks_bkg), transform=ax1.transAxes)
  #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes)
  #ax.text(0.55, 0.7, "KS p-value S (B): %.3f (%.3f)"%(ks_sig_p, ks_bkg_p), transform=ax.transAxes)
  
  if title is not None:
      plt.title(title)

  AtlasStyle_mpl.ATLASLabel2(ax1, 0.02, 0.9, 'Work in progress')
  AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140)
  if ratio:
    ax2 = plt.subplot2grid((4,4), (3,0), colspan=4, rowspan=1)
    r = getRatio(b_hist, b_bins, b_w, s_hist, s_bins, s_w, 'r')
    ax2.set_xlabel('Discriminant')
    ax2.set_ylabel('variation/nom.')
    ax2.set_xlim((binning[1],binning[2]))
    ax2.set_ylim((-0.5,2.5))
    ax2.grid()
    ax2.tick_params(direction='in')
    ax2.xaxis.set_ticks_position('both')
    ax2.yaxis.set_ticks_position('both')

  ax1.set(xlabel='EPD')

  if save:
    if not os.path.exists("./plots/"):
        os.makedirs("./plots/")
        print("Creating folder plots")
    plt.savefig("plots/"+fileName+"_output_score_multiclass"+addStr+log_str+".pdf")
    plt.savefig("plots/"+fileName+"_output_score_multiclass"+addStr+log_str+".png")
    plt.close()
    
  try:
      return r, s_bins
  except NameError:
      print 'ratio is set to False, r is not defined'
      return 0, s_bins
Beispiel #9
0
def main():

    # Check number of arguments and act respectively thereof
    if len(sys.argv) == 2:
        modelfile = sys.argv[1:][0]
    else:
        print 'Usage: evaluate_signal.py <model> (omit directory and file suffix)'
        return

    print modelfile, type(modelfile)

    Dir = 'TrainedModels/models/'
    DatasetDir = 'TrainedModels/datasets/'

    modelDir = Dir + modelfile + '.h5'

    if os.path.exists(os.path.join(Dir, modelfile + '_scaler.pkl')):
        scaler = joblib.load(os.path.join(Dir, modelfile + '_scaler.pkl'))
    else:
        scaler = None

    infofile = open(modelDir.replace('.h5', '_infofile.txt'))
    infos = infofile.readlines()
    analysis = infos[0].replace('Used analysis method: ', '').replace('\n', '')
    dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace(
        '\n', '')
    VAR = infos[5].replace('Used variables for training: ',
                           '').replace('\n', '').split()

    print VAR

    recurrent = False
    if analysis.lower() == 'rnn':
        recurrent = True
        seq_scaler = dataset + '_scaling.json'

    if 'nn' in analysis.lower():
        model = load_model(os.path.join(Dir, modelfile + '.h5'))
    elif 'bdt' in analysis.lower():
        model = joblib.load(os.path.join(Dir, modelfile + '.h5'))

    db = (RESOLUTION[2] - RESOLUTION[1]
          ) / RESOLUTION[0]  # bin width in discriminator distribution
    bins = np.arange(RESOLUTION[1], RESOLUTION[2] + db,
                     db)  # bin edges in discriminator distribution
    center = (bins[:-1] + bins[1:]) / 2

    print '#----MODEL----#'
    print '\t', modelDir

    ###########################
    # Read and evaluate signals
    ###########################

    Signal = []
    for smp in SIGNAL:
        first = True
        for s in smp:
            print 'Sample:\t', s
            x, y = pickBenchmark(s)
            if not recurrent:
                _df, _weight = loadDataFrame(os.path.join(inputDir, s + '/'),
                                             PRESELECTION, VAR, WEIGHTS, LUMI)
                print _df.shape, _weight.shape
                if first:
                    df = _df.copy()
                    weight = _weight.copy()
                    first = False
                else:
                    df = pd.concat((df, _df), ignore_index=True)
                    weight = pd.concat((weight, _weight), ignore_index=True)
            else:
                _df, _weight, collection = loadSequentialDataFrame(
                    os.path.join(inputDir, s + '/'), PRESELECTION, COLLECTION,
                    REMOVE_VAR, VAR, WEIGHTS, LUMI)
                print _df.shape, _weight.shape, collection[0]['df'].shape
                if first:
                    df = _df.copy()
                    weight = _weight.copy()
                    seq = collection[0]['df'].copy()
                    first = False
                else:
                    df = pd.concat((df, _df), ignore_index=True)
                    weight = pd.concat((weight, _weight), ignore_index=True)
                    seq = pd.concat((seq, collection[0]['df']),
                                    ignore_index=True)

        if not recurrent:
            y_hat = evaluate(model, df.values, scaler, method=analysis)
            print df.shape, weight.shape
        else:
            collection[0]['df'] = seq
            print df.shape, weight.shape, collection[0]['df'].shape
            y_hat = evaluate(model,
                             df.values,
                             scaler,
                             seq_scaler,
                             method=analysis,
                             col=collection)

        bin_index = np.digitize(
            y_hat[:, 0],
            bins[1:])  # get the bin index of the output score for each event
        outputWeighted = []
        outputWeightedVar = []
        outputMC = []
        outputMCVar = []
        for i in range(len(bins[1:])):
            w = weight.values[np.where(bin_index == i)[0]]
            sigma = np.sum(w**2.)
            outputWeighted.append(w.sum())
            outputWeightedVar.append(sigma)
            outputMC.append(len(w))
            outputMCVar.append(np.sqrt(len(w)))

        Signal.append({
            'name': s[6:],
            'm_stop': x,
            'm_X': y,
            'dataset': df,
            'weight': weight,
            'nEvents': weight.sum(),
            'y_pred': y_hat,
            'outputScore': np.array(outputWeighted),
            'outputMC': np.array(outputMC),
            'output_var': np.array(outputWeightedVar),
            'outputMC_var': np.array(outputMCVar)
        })

        del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

    ###########################
    # Read and evaluate backgrounds
    ###########################

    totBkgEvents = 0.
    totBkgVar = 0.
    Background = []
    for smp in BACKGROUND:
        first = True
        for b in smp:
            print 'Sample:\t', b
            if not recurrent:
                _df, _weight = loadDataFrame(os.path.join(inputDir, b + '/'),
                                             PRESELECTION, VAR, WEIGHTS, LUMI)
                print _df.shape, _weight.shape
                if first:
                    df = _df.copy()
                    weight = _weight.copy()
                    first = False
                else:
                    df = pd.concat((df, _df), ignore_index=True)
                    weight = pd.concat((weight, _weight), ignore_index=True)
            else:
                _df, _weight, collection = loadSequentialDataFrame(
                    os.path.join(inputDir, b + '/'), PRESELECTION, COLLECTION,
                    REMOVE_VAR, VAR, WEIGHTS, LUMI)
                print _df.shape, _weight.shape, collection[0]['df'].shape
                if first:
                    df = _df.copy()
                    weight = _weight.copy()
                    seq = collection[0]['df'].copy()
                    first = False
                else:
                    df = pd.concat((df, _df), ignore_index=True)
                    weight = pd.concat((weight, _weight), ignore_index=True)
                    seq = pd.concat((seq, collection[0]['df']),
                                    ignore_index=True)

        if not recurrent:
            print df.shape, weight.shape
            y_hat = evaluate(model, df.values, scaler, method=analysis)
        else:
            collection[0]['df'] = seq
            print df.shape, weight.shape, collection[0]['df'].shape
            y_hat = evaluate(model,
                             df.values,
                             scaler,
                             seq_scaler,
                             method=analysis,
                             col=collection)

        bin_index = np.digitize(y_hat[:, 0], bins[1:])
        outputWeighted = []
        outputWeightedVar = []
        outputMC = []
        outputMCVar = []

        totBkgEvents += weight.sum()
        totBkgVar += np.sum(weight.values**2.)
        for i in range(len(bins[1:])):
            w = weight.values[np.where(bin_index == i)[0]]
            sigma = np.sum(w**2.)
            outputWeighted.append(w.sum())
            outputWeightedVar.append(sigma)
            outputMC.append(len(w))
            outputMCVar.append(len(w))

        Background.append({
            'name': b,
            'dataset': df,
            'weight': weight,
            'nEvents': weight.sum(),
            'y_pred': y_hat,
            'outputScore': np.array(outputWeighted),
            'outputMC': np.array(outputMC),
            'output_var': np.array(outputWeightedVar),
            'outputMC_var': np.array(outputMCVar)
        })

        del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

    totalBkgOutput = np.array([b['outputScore'] for b in Background])
    totalBkgOutput = totalBkgOutput.sum(axis=0)

    totalBkgVar = np.array([b['output_var'] for b in Background])
    totalBkgVar = totalBkgVar.sum(axis=0)

    print len(Signal), len(
        Background), Signal[0]['outputScore'][:].sum(), totalBkgOutput

    for s in Signal:
        significance = []
        significance_err = []
        asimov = []
        asimov_err = []
        roc = []
        roc_err = []

        tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents']
        for i in range(len(bins[1:])):
            #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents']
            #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum()
            eff_sig = s['outputScore'][i:].sum() / s['nEvents']
            eff_bkg = totalBkgOutput[i:].sum() / totalBkgOutput.sum()

            #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents']
            #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum()
            err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents']
            err_bkg = np.sqrt(np.sum(totalBkgVar[i:])) / totalBkgOutput.sum()

            #if totalBkgOutput[:i+1].sum() > 0.:
            #  rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum()
            if totalBkgOutput[i:].sum() > 0.:
                rel_err_bkg = np.sqrt(np.sum(
                    totalBkgVar[i:])) / totalBkgOutput[i:].sum()
            else:
                rel_err_bkg = 0.
            #if s['outputScore'][:i+1].sum() > 0.:
            #  rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum()
            if s['outputScore'][i:].sum() > 0.:
                rel_err_sig = np.sqrt(np.sum(
                    s['output_var'][i:])) / s['outputScore'][i:].sum()
            else:
                rel_err_sig = 0.

            #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.)
            total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.)

            if float(eff_sig == 0) or float(eff_bkg == 0):
                Z = 0.
                Z_err = 0.
                ams = 0.
                ams_err = 0.
            elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75):
                Z = 0.
                Z_err = 0.
                ams = 0.
                ams_err = 0.
            else:
                #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err)
                Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(),
                    total_rel_err)
                ams = asimovZ(s['outputScore'][i:].sum(),
                              totalBkgOutput[i:].sum(),
                              np.sqrt(totalBkgVar[i:].sum()))
                roc.append((eff_sig, 1 - eff_bkg))

                ams_plus_sig = asimovZ((s['outputScore'][i:].sum() +
                                        np.sqrt(np.sum(s['output_var'][i:]))),
                                       totalBkgOutput[i:].sum(),
                                       np.sqrt(totalBkgVar[i:].sum()))
                ams_mins_sig = asimovZ((s['outputScore'][i:].sum() -
                                        np.sqrt(np.sum(s['output_var'][i:]))),
                                       totalBkgOutput[i:].sum(),
                                       np.sqrt(totalBkgVar[i:].sum()))
                ams_plus_bkg = asimovZ(s['outputScore'][i:].sum(),
                                       (totalBkgOutput[i:].sum() +
                                        np.sqrt(np.sum(totalBkgVar[i:]))),
                                       np.sqrt(totalBkgVar[i:].sum()))
                ams_mins_bkg = asimovZ(s['outputScore'][i:].sum(),
                                       (totalBkgOutput[i:].sum() -
                                        np.sqrt(np.sum(totalBkgVar[i:]))),
                                       np.sqrt(totalBkgVar[i:].sum()))

                Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    (eff_sig + err_sig) * s['nEvents'],
                    eff_bkg * totalBkgOutput.sum(), total_rel_err)
                Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    (eff_sig - err_sig) * s['nEvents'],
                    eff_bkg * totalBkgOutput.sum(), total_rel_err)
                Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    eff_sig * s['nEvents'],
                    (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err)
                Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(
                    eff_sig * s['nEvents'],
                    (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err)

                Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2
                Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2
                Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2)

                ams_err_sig = abs(ams_plus_sig - ams_mins_sig) / 2.
                ams_err_bkg = abs(ams_plus_bkg - ams_mins_bkg) / 2.
                ams_err = np.sqrt(ams_err_sig**2 + ams_err_bkg**2)

            significance.append(Z)
            significance_err.append(Z_err)
            asimov.append(ams)
            asimov_err.append(ams_err)

        s['sig'] = np.array(significance)
        s['sig_max'] = s['sig'].max()
        s['sig_err'] = np.array(significance_err)
        s['ams'] = np.array(asimov)
        s['ams_err'] = np.array(asimov_err)
        s['roc'] = np.array(roc)

        print s['sig']
        print s['ams']
        #print s['roc']
        sigMax_index = bins[np.where(s['sig'] == s['sig'].max())][0]
        amsMax_index = bins[np.where(s['ams'] == s['ams'].max())][0]
        Z = asimovZ(
            Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)],
            totalBkgOutput[np.where(bins[:-1] == sigMax_index)],
            np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]),
            syst=False)
        Z_syst = asimovZ(
            Signal[0]['outputScore'][np.where(bins[:-1] == sigMax_index)],
            totalBkgOutput[np.where(bins[:-1] == sigMax_index)],
            np.sqrt(totalBkgVar[np.where(bins[:-1] == sigMax_index)]),
            syst=True)
        print 'RooStats: ', s['sig'].max(), sigMax_index, Z, Z_syst
        print 'asmiov : ', s['ams'].max(), amsMax_index

    x = np.array([s['m_stop'] for s in Signal], dtype=float)
    y = np.array([s['m_X'] for s in Signal], dtype=float)
    z = np.array([s['sig_max'] for s in Signal], dtype=float)

    #print x, y, z

    print Signal[0]['outputScore'][np.where(
        bins[:-1] >= sigMax_index)], Signal[0]['output_var'][np.where(
            bins[:-1] >= sigMax_index)]
    print totalBkgOutput[np.where(
        bins[:-1] >= sigMax_index)], totalBkgVar[np.where(
            bins[:-1] >= sigMax_index)]

    print np.sum(Signal[0]['outputScore'][np.where(
        bins[:-1] >= sigMax_index)]), np.sqrt(
            np.sum(Signal[0]['output_var'][np.where(
                bins[:-1] >= sigMax_index)]**2))
    print np.sum(totalBkgOutput[np.where(bins[:-1] >= sigMax_index)]), np.sqrt(
        np.sum(totalBkgVar[np.where(bins[:-1] >= sigMax_index)]**2))

    print Signal[0]['outputScore'], Signal[0]['output_var']
    print totalBkgOutput, totalBkgVar
    # Set up a regular grid of interpolation points

    print('Plotting the output score...')
    fig = plt.figure(figsize=(8, 6))
    ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.set_xlim((bins[0], bins[-1]))
    ax1.set_xlabel('Output score', horizontalalignment='right', x=1.0)
    ax1.set_ylabel("Events", horizontalalignment='right', y=1.0)

    sb_ratio = Signal[0]['outputScore'].sum() / totalBkgOutput.sum()
    #if sb_ratio < 0.2:
    #  #ATTENTION! Simplified error propagation (treated as uncorrelated)
    #  scaled = Signal[0]['outputScore'] / Signal[0]['outputScore'].sum() * totalBkgOutput.sum()
    #  scaled_var = scaled*scaled * ( (Signal[0]['output_var']/Signal[0]['outputScore'])**2 + (totalBkgVar.sum()/totalBkgOutput.sum())**2 + (Signal[0]['output_var'].sum()/Signal[0]['outputScore'].sum())**2 )
    #  scaled_label = 'Signal scaled to Bkg'
    #
    #else:
    scaled = Signal[0]['outputScore']
    scaled_var = Signal[0]['output_var']
    scaled_label = 'Signal'

    multib = plt.bar(center,
                     Background[4]['outputScore'],
                     width=db,
                     yerr=np.sqrt(Background[4]['output_var']),
                     color='seagreen',
                     alpha=0.5,
                     error_kw=dict(ecolor='seagreen', lw=1.5),
                     label='multiboson')
    ttV = plt.bar(center,
                  Background[3]['outputScore'],
                  width=db,
                  yerr=np.sqrt(Background[4]['output_var']),
                  color='lightcoral',
                  alpha=0.5,
                  error_kw=dict(ecolor='lightcoral', lw=1.5),
                  label='ttV',
                  bottom=Background[4]['outputScore'])
    w = plt.bar(center,
                Background[2]['outputScore'],
                width=db,
                yerr=np.sqrt(Background[2]['output_var']),
                color='gold',
                alpha=0.5,
                error_kw=dict(ecolor='gold', lw=1.5),
                label='W+jets',
                bottom=Background[4]['outputScore'] +
                Background[3]['outputScore'])
    st = plt.bar(center,
                 Background[1]['outputScore'],
                 width=db,
                 yerr=np.sqrt(Background[1]['output_var']),
                 color='limegreen',
                 alpha=0.5,
                 error_kw=dict(ecolor='limegreen', lw=1.5),
                 label='singletop',
                 bottom=Background[4]['outputScore'] +
                 Background[3]['outputScore'] + Background[2]['outputScore'])
    tt = plt.bar(center,
                 Background[0]['outputScore'],
                 width=db,
                 yerr=np.sqrt(Background[0]['output_var']),
                 color='dodgerblue',
                 alpha=0.5,
                 error_kw=dict(ecolor='dodgerblue', lw=1.5),
                 label='ttbar',
                 bottom=Background[4]['outputScore'] +
                 Background[3]['outputScore'] + Background[2]['outputScore'] +
                 Background[1]['outputScore'])
    plt.bar(center,
            Signal[0]['outputScore'],
            width=db,
            yerr=np.sqrt(Signal[0]['output_var']),
            label=Signal[0]['name'],
            color='r',
            alpha=0.5,
            error_kw=dict(ecolor='r', lw=1.5))
    #plt.step(center, Signal[0]['outputScore'], width=db, yerr= np.sqrt(Signal[0]['output_var']), label=Signal[0]['name'], color='r', error_kw=dict(ecolor='r', lw=1.5))

    ax1.set_ylim((0.1, totalBkgOutput.max() * (15.)))
    ax1.set_yscale('log')
    leg = plt.legend(loc="best", frameon=False)

    AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.84, 'Work in progress')
    AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.79, lumi=LUMI * 0.001)

    plt.savefig("plots/" + modelfile + "_eval-bWN-500-380_outputScore.pdf")
    plt.savefig("plots/" + modelfile + "_eval-bWN-500-380_outputScore.png")
    plt.close()

    print('Plotting significance...')
    fig = plt.figure(figsize=(8, 6))
    ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.set_xlim((bins[0], bins[-1]))
    ax1.set_xlabel('Output score', horizontalalignment='right', x=1.0)
    ax1.set_ylabel("Z", horizontalalignment='right', y=1.0)

    plt.plot(center,
             Signal[0]['ams'],
             'k-',
             color='cornflowerblue',
             label='Asimov Z (max = %0.3f at %0.2f)' %
             (s['ams'].max(), amsMax_index))
    plt.fill_between(center,
                     Signal[0]['ams'] - Signal[0]['ams_err'],
                     Signal[0]['ams'] + Signal[0]['ams_err'],
                     alpha=0.2,
                     edgecolor='cornflowerblue',
                     facecolor='cornflowerblue',
                     linewidth=0)
    ax1.set_ylim((0., Signal[0]['ams'].max() * (1.5)))

    plt.plot(center,
             Signal[0]['sig'],
             'k-',
             color='darkred',
             label='Binomial Z (max = %0.3f at %0.2f)' %
             (s['sig'].max(), sigMax_index))
    plt.fill_between(center,
                     Signal[0]['sig'] - Signal[0]['sig_err'],
                     Signal[0]['sig'] + Signal[0]['sig_err'],
                     alpha=0.2,
                     edgecolor='darkred',
                     facecolor='darkred',
                     linewidth=0)
    plt.plot(center, len(center) * [3.], '--', color='grey', alpha=0.5)
    plt.plot(center, len(center) * [5.], '--', color='red', alpha=0.5)
    leg = plt.legend(loc="best", frameon=False)

    AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.84, 'Work in progress')
    AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.79, lumi=LUMI * 0.001)

    plt.savefig("plots/" + modelfile + "_Significance_bWN-500-380.pdf")
    plt.savefig("plots/" + modelfile + "_Significance_bWN-500-380.png")
    plt.close()

    print('Plotting ROC...')
    fig = plt.figure(figsize=(8, 6))
    ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.set_xlim((bins[0], bins[-1]))
    ax1.set_ylim((0, 1))
    ax1.set_xlabel('$\epsilon_{Sig.}$', horizontalalignment='right', x=1.0)
    ax1.set_ylabel("$r_{Bkg.}$", horizontalalignment='right', y=1.0)

    auc = np.trapz(s['roc'][:, 0], s['roc'][:, 1], dx=db)
    print 'Area under ROC?!: ', auc

    plt.plot(s['roc'][:, 0],
             s['roc'][:, 1],
             'k-',
             color='cornflowerblue',
             label='ROC (AUC = %0.4f)' % (auc))
    #plt.fill_between(center, Signal[0]['ams']-Signal[0]['ams_err'], Signal[0]['ams']+Signal[0]['ams_err'], alpha=0.2, edgecolor='cornflowerblue', facecolor='cornflowerblue', linewidth=0)
    plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Luck')
    leg = plt.legend(loc="lower left", frameon=False)

    AtlasStyle_mpl.ATLASLabel(ax1, 0.14, 0.28, 'Work in progress')
    AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.23, lumi=LUMI * 0.001)

    plt.savefig("plots/" + modelfile + "_ROC_bWN-500-380.pdf")
    plt.savefig("plots/" + modelfile + "_ROC_bWN-500-380.png")
    plt.close()
Beispiel #10
0
def plot_output_score(sig_predicted,
                      sig_w,
                      bkg_predicted,
                      bkg_w,
                      binning,
                      fileName='Test',
                      normed=False,
                      save=False,
                      addStr='',
                      ratio=True,
                      log=False,
                      sample=None):
    print('Plotting the binary output score...')
    fig = plt.figure(figsize=(8, 6))
    if ratio:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
        ax1.set_xlabel('', fontsize=0.)
        ax1.set_xticklabels(())
    else:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.tick_params(direction='in')
    ax1.set_xlim((binning[1], binning[2]))
    ax1.xaxis.set_ticks_position('both')
    ax1.yaxis.set_ticks_position('both')

    s_hist, s_bins, s_patches = plt.hist(sig_predicted.ravel(),
                                         weights=sig_w,
                                         histtype='stepfilled',
                                         color='r',
                                         label='Signal',
                                         alpha=0.5,
                                         bins=binning[0],
                                         range=(binning[1], binning[2]),
                                         density=normed)
    b_hist, b_bins, b_patches = plt.hist(bkg_predicted.ravel(),
                                         weights=bkg_w,
                                         histtype='stepfilled',
                                         color='b',
                                         label='Background',
                                         alpha=0.5,
                                         bins=binning[0],
                                         range=(binning[1], binning[2]),
                                         density=normed)

    log_str = ''

    if log:
        plt.yscale('log', nonposy='clip')
        log_str = '_log'

    s_w = getSumW2(sig_predicted.ravel(), sig_w, binning)
    b_w = getSumW2(bkg_predicted.ravel(), bkg_w, binning)

    #sep = getSeparation(s_histTest, s_binsTest, b_histTest, b_binsTest)

    #print sep

    if normed:
        ax1.set_ylabel('a. u.', horizontalalignment='right', x=1.0)
    else:
        ax1.set_ylabel('Events', horizontalalignment='right', y=1.0)

    #ax1.set_ylim((0, s_hist.max()*(1+0.33)))

    if log:
        ax1.set_ylim((0, b_hist.max() * (30)))
    else:
        ax1.set_ylim((0, b_hist.max() * (1 + 0.33)))

    if sample is not None:
        sample_patch = mpatches.Patch(color='None', label=sample)
        leg = plt.legend(loc='best',
                         frameon=False,
                         handles=[s_patches[0], b_patches[0], sample_patch])
    else:
        leg = plt.legend(loc='best', frameon=False)
    p = leg.get_window_extent()
    #ax.annotate('KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg),(p.p0[0], p.p1[1]), (p.p0[0], p.p1[1]), xycoords='figure pixels', zorder=9)
    #ax1.text(0.65, 0.7, 'KS Test S (B): %.3f (%.3f)'%(ks_sig, ks_bkg), transform=ax1.transAxes)
    #ax1.text(0.65, 0.70, '$<S^2>$ = %.3f'%(sep), transform=ax1.transAxes)
    #ax.text(0.55, 0.7, 'KS p-value S (B): %.3f (%.3f)'%(ks_sig_p, ks_bkg_p), transform=ax.transAxes)

    AtlasStyle_mpl.ATLASLabel2(ax1, 0.02, 0.9, 'Work in progress')
    AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=140)
    if ratio:
        ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)
        r = getRatio(s_hist, s_bins, s_w, b_hist, b_bins, b_w, 'r')
        ax2.set_xlabel('EPD', horizontalalignment='right', x=1.0)
        ax2.set_ylabel('S/B')
        ax2.set_xlim((binning[1], binning[2]))
        ax2.set_ylim((-0.5, 2.5))
        ax2.grid()
        ax2.tick_params(direction='in')
        ax2.xaxis.set_ticks_position('both')
        ax2.yaxis.set_ticks_position('both')

    ax1.set_xlabel('EPD', horizontalalignment='right', x=1.0)

    if save:
        if not os.path.exists('./plots/'):
            os.makedirs('./plots/')
            print('Creating folder plots')
        plt.savefig('plots/' + fileName + '_output_score' + addStr + log_str +
                    '.pdf')
        plt.savefig('plots/' + fileName + '_output_score' + addStr + log_str +
                    '.png')
        plt.close()
    return r, s_bins
Beispiel #11
0
def plotShape(var,
              samples,
              weights,
              color,
              binning,
              xTitle,
              yTitle="Events",
              lumi=100,
              unit=None,
              legend=None,
              log=False,
              ratio=False,
              ratioTitle='1/nominal',
              ratioLimit=(0, 2),
              normed=False,
              savePlot=False,
              fileName=None):

    fig = plt.figure(figsize=(8, 6))

    if ratio:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=3)
        ax1.set_xlabel('', fontsize=0.)
        ax1.set_xticklabels(())
    else:
        ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    ax1.tick_params(direction='in')
    ax1.set_xlim((binning[1], binning[2]))
    ax1.xaxis.set_ticks_position('both')
    ax1.yaxis.set_ticks_position('both')

    if (unit == None) or (unit.lower() == 'mev'):
        unit_fact = 1.
    elif (unit.lower() == 'gev'):
        unit_fact = 0.001

    if not type(samples) == list:
        if not type(samples) == tuple:
            print "Expected {} sample as tuple of variables and weights!".format(
                samples)
            return 0

        sumW2 = getSumW2(samples[0][str(var)].ravel(), samples[1].ravel(),
                         binning)

        hist, bins, patches = np.histgram(samples[0][str(var)].ravel() *
                                          unit_fact,
                                          weights=samples[1].ravel(),
                                          bins=binning[0],
                                          range=(binning[1], binning[2]),
                                          density=normed)

        width = bins[1] - bins[0]
        center = (bins[:-1] + bins[1:]) / 2

        plt.errorbar(center,
                     hist,
                     xerr=[width / 2.] * binning[0],
                     yerr=sumW2.ravel(),
                     fmt='o',
                     color=color,
                     label=legend)

        _max = hist.max()

    else:
        sumW2 = []
        hists = []

        for i, smp in enumerate(samples):
            #if not type(smp) == tuple:
            #  print "Expected {} sample as tuple of variables and weights!".format(smp)
            #  return 0

            sumW2.append(
                getSumW2(smp[str(var)].ravel(), weights[i].ravel(), binning))

            hists.append(
                np.histogram(smp[str(var)].ravel() * unit_fact,
                             weights=weights[i],
                             bins=binning[0],
                             range=(binning[1], binning[2]),
                             density=normed))

            width = hists[i][1][1] - hists[i][1][0]
            center = (hists[i][1][:-1] + hists[i][1][1:]) / 2

            plt.errorbar(center,
                         hists[i][0],
                         xerr=[width / 2.] * binning[0],
                         yerr=sumW2[i].ravel(),
                         fmt='o',
                         color=color[i],
                         label=legend[i])

        _max = np.max([h[0].max() for h in hists])

    if normed:
        ax1.set_ylabel("a. u.", ha='left')
    else:
        ax1.set_ylabel("Events", ha='left')

    if log:
        ax1.set_yscale('log')
        ax1.set_ylim((0.01, _max * 100))
    else:
        if normed:
            ax1.set_ylim((0, 1.5))
        else:
            ax1.set_ylim((0, _max * 1.4))

    leg = plt.legend(loc='best', frameon=False)

    AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.9, 'Work in progress')
    AtlasStyle_mpl.LumiLabel(ax1, 0.02, 0.8, lumi=str(lumi))

    if ratio:
        ax2 = plt.subplot2grid((4, 4), (3, 0), colspan=4, rowspan=1)

        for i in range(1, len(hists)):
            r = getRatio(hists[i][0], hists[i][1], sumW2[i], hists[0][0],
                         hists[0][1], sumW2[0], color[i])

        ax2.set_xlabel(xTitle)
        ax2.set_ylabel(ratioTitle)
        ax2.set_xlim((binning[1], binning[2]))
        ax2.set_ylim(ratioLimit)
        ax2.grid()
        ax2.tick_params(direction='in')
        ax2.xaxis.set_ticks_position('both')
        ax2.yaxis.set_ticks_position('both')

    ax1.set(xlabel=xTitle)

    if savePlot:
        plt.savefig(fileName + ".pdf")
        plt.savefig(fileName + ".png")
        plt.close()
Beispiel #12
0
def main():

    for m in MODELS:

        modelDir = DIR + m['mdir'] + '.h5'
        DatasetDir = 'TrainedModels/datasets/'

        if os.path.exists(os.path.join(DIR, m['mdir'] + '_scaler.pkl')):
            m['scaler'] = joblib.load(
                os.path.join(DIR, m['mdir'] + '_scaler.pkl'))
        else:
            m['scaler'] = None

        infofile = open(modelDir.replace('.h5', '_infofile.txt'))
        infos = infofile.readlines()
        m['analysis'] = infos[0].replace('Used analysis method: ',
                                         '').replace('\n', '')
        m['dataset'] = DatasetDir + infos[3].replace('Used dataset: ',
                                                     '').replace('\n', '')
        m['VAR'] = infos[5].replace('Used variables for training: ',
                                    '').replace('\n', '').split()

        m['recurrent'] = False
        if m['analysis'].lower() == 'rnn':
            m['recurrent'] = True
            m['seq_scaler'] = m['dataset'] + '_scaling.json'

        if 'nn' in m['analysis'].lower():
            m['model'] = load_model(os.path.join(DIR, m['mdir'] + '.h5'))
        elif 'bdt' in m['analysis'].lower():
            m['model'] = joblib.load(os.path.join(DIR, m['mdir'] + '.h5'))

        print '#----MODEL----#'
        print '\t', m['mdir']

        ###########################
        # Read and evaluate signals
        ###########################

        m['Signal'] = []
        for smp in SIGNAL:
            first = True
            for s in smp:
                print 'Sample:\t', s
                x, y = pickBenchmark(s)
                if not m['recurrent']:
                    _df, _weight = loadDataFrame(
                        os.path.join(inputDir, s + '/'), PRESELECTION,
                        m['VAR'], WEIGHTS, LUMI)
                    print _df.shape, _weight.shape
                    if first:
                        df = _df.copy()
                        weight = _weight.copy()
                        first = False
                    else:
                        df = pd.concat((df, _df), ignore_index=True)
                        weight = pd.concat((weight, _weight),
                                           ignore_index=True)
                else:
                    _df, _weight, collection = loadSequentialDataFrame(
                        os.path.join(inputDir, s + '/'), PRESELECTION,
                        COLLECTION, REMOVE_VAR, m['VAR'], WEIGHTS, LUMI)
                    print _df.shape, _weight.shape, collection[0]['df'].shape
                    if first:
                        df = _df.copy()
                        weight = _weight.copy()
                        seq = collection[0]['df'].copy()
                        first = False
                    else:
                        df = pd.concat((df, _df), ignore_index=True)
                        weight = pd.concat((weight, _weight),
                                           ignore_index=True)
                        seq = pd.concat((seq, collection[0]['df']),
                                        ignore_index=True)

            if not m['recurrent']:
                m['y_pred_sig'] = evaluate(m['model'],
                                           df.values,
                                           m['scaler'],
                                           method=m['analysis'])
                m['y_sig'] = np.ones(m['y_pred_sig'].shape[0])
            else:
                collection[0]['df'] = seq.copy()
                m['y_pred_sig'] = evaluate(m['model'],
                                           df.values,
                                           m['scaler'],
                                           m['seq_scaler'],
                                           method=m['analysis'],
                                           col=collection)
                m['y_sig'] = np.ones(m['y_pred_sig'].shape[0])

            bin_index = np.digitize(
                m['y_pred_sig'][:, 0], bins[1:]
            )  # get the bin index of the output score for each event
            outputWeighted = []
            outputWeightedVar = []
            outputMC = []
            outputMCVar = []
            for i in range(len(bins[1:])):
                w = weight.values[np.where(bin_index == i)[0]]
                sigma = np.sum(w**2.)
                outputWeighted.append(w.sum())
                outputWeightedVar.append(sigma)
                outputMC.append(len(w))
                outputMCVar.append(np.sqrt(len(w)))

            m['Signal'].append({
                'name': s[6:],
                'm_stop': x,
                'm_X': y,
                'dataset': df,
                'weight': weight,
                'nEvents': weight.sum(),
                'outputScore': np.array(outputWeighted),
                'outputMC': np.array(outputMC),
                'output_var': np.array(outputWeightedVar),
                'outputMC_var': np.array(outputMCVar)
            })

            del df, weight, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

        ###############################
        # Read and evaluate backgrounds
        ###############################

        m['totBkgEvents'] = 0.
        m['totBkgVar'] = 0.
        m['Background'] = []
        for smp in BACKGROUND:
            first = True
            for b in smp:
                print 'Sample:\t', b
                if not m['recurrent']:
                    _df, _weight = loadDataFrame(
                        os.path.join(inputDir, b + '/'), PRESELECTION,
                        m['VAR'], WEIGHTS, LUMI)
                    print _df.shape, _weight.shape
                    if first:
                        df = _df.copy()
                        weight = _weight.copy()
                        first = False
                    else:
                        df = pd.concat((df, _df), ignore_index=True)
                        weight = pd.concat((weight, _weight),
                                           ignore_index=True)
                else:
                    _df, _weight, collection = loadSequentialDataFrame(
                        os.path.join(inputDir, b + '/'), PRESELECTION,
                        COLLECTION, REMOVE_VAR, m['VAR'], WEIGHTS, LUMI)
                    print _df.shape, _weight.shape, collection[0]['df'].shape
                    if first:
                        df = _df.copy()
                        weight = _weight.copy()
                        seq = collection[0]['df'].copy()
                        first = False
                    else:
                        df = pd.concat((df, _df), ignore_index=True)
                        weight = pd.concat((weight, _weight),
                                           ignore_index=True)
                        seq = pd.concat((seq, collection[0]['df']),
                                        ignore_index=True)

            if not m['recurrent']:
                print df.shape, weight.shape
                m['_'.join(['y_pred', b])] = evaluate(m['model'],
                                                      df.values,
                                                      m['scaler'],
                                                      method=m['analysis'])
                m['_'.join(['y', b])] = np.zeros(m['_'.join(['y_pred',
                                                             b])].shape[0])
            else:
                collection[0]['df'] = seq
                print df.shape, weight.shape, collection[0]['df'].shape
                m['_'.join(['y_pred', b])] = evaluate(m['model'],
                                                      df.values,
                                                      m['scaler'],
                                                      m['seq_scaler'],
                                                      method=m['analysis'],
                                                      col=collection)
                m['_'.join(['y', b])] = np.zeros(m['_'.join(['y_pred',
                                                             b])].shape[0])

            bin_index = np.digitize(m['_'.join(['y_pred', b])][:, 0], bins[1:])
            outputWeighted = []
            outputWeightedVar = []
            outputMC = []
            outputMCVar = []

            m['totBkgEvents'] += weight.sum()
            m['totBkgVar'] += np.sum(weight.values**2.)
            for i in range(len(bins[1:])):
                w = weight.values[np.where(bin_index == i)[0]]
                sigma = np.sum(w**2.)
                outputWeighted.append(w.sum())
                outputWeightedVar.append(sigma)
                outputMC.append(len(w))
                outputMCVar.append(len(w))

            m['Background'].append({
                'name': b,
                'dataset': df,
                'weight': weight,
                'nEvents': weight.sum(),
                'outputScore': np.array(outputWeighted),
                'outputMC': np.array(outputMC),
                'output_var': np.array(outputWeightedVar),
                'outputMC_var': np.array(outputMCVar)
            })

            del df, weight, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

        m['totalBkgOutput'] = np.array(
            [b['outputScore'] for b in m['Background']])
        m['totalBkgOutput'] = m['totalBkgOutput'].sum(axis=0)

        m['totalBkgVar'] = np.array([b['output_var'] for b in m['Background']])
        m['totalBkgVar'] = m['totalBkgVar'].sum(axis=0)

        for s in m['Signal']:
            m['roc'] = []
            m['roc_err'] = []

            m['tot_rel'] = np.sqrt(np.sum(s['output_var'])) / s['nEvents']
            for i in range(len(bins[1:])):
                eff_sig = s['outputScore'][i:].sum() / s['nEvents']
                eff_bkg = m['totalBkgOutput'][i:].sum(
                ) / m['totalBkgOutput'].sum()

                err_sig = np.sqrt(np.sum(s['output_var'][i:])) / s['nEvents']
                err_bkg = np.sqrt(np.sum(
                    m['totalBkgVar'][i:])) / m['totalBkgOutput'].sum()

                if m['totalBkgOutput'][i:].sum() > 0.:
                    rel_err_bkg = np.sqrt(np.sum(
                        m['totalBkgVar'][i:])) / m['totalBkgOutput'][i:].sum()
                else:
                    rel_err_bkg = 0.
                if s['outputScore'][i:].sum() > 0.:
                    rel_err_sig = np.sqrt(np.sum(
                        s['output_var'][i:])) / s['outputScore'][i:].sum()
                else:
                    rel_err_sig = 0.

                m['total_rel_err'] = np.sqrt(rel_err_bkg**2. + 0.25**2.)

                m['roc'].append((eff_sig, 1 - eff_bkg))

                roc_plus_sig = eff_sig + err_sig
                roc_mins_sig = eff_sig - err_sig
                roc_plus_bkg = 1 - (eff_bkg + err_bkg)
                roc_mins_bkg = 1 - (eff_bkg - err_bkg)

                #roc_err_sig = abs(roc_plus_sig - roc_mins_sig) / 2.
                roc_err_bkg = abs(roc_plus_bkg - roc_mins_bkg) / 2.
                m['roc_err'].append(roc_err_bkg)

            m['roc'] = np.array(m['roc'])
            m['roc_err'] = np.array(m['roc_err'])

        #m['y_bkg'] = np.empty(0)
        #m['y_pred_bkg'] = np.empty(0)

        #for b in BACKGROUND:
        #  m['y_bkg'] = np.concatenate((m['y_bkg'], m['_'.join(['y',b])]))
        #  m['y_pred_bkg'] = np.concatenate((m['y_pred_bkg'], m['_'.join(['y_pred',b])][:,0]))

        #m['y'] = np.concatenate((m['y_sig'], m['y_bkg']))
        #m['y_pred'] = np.concatenate((m['y_pred_sig'][:,0], m['y_pred_bkg']))

        #m['fpr'], m['tpr'], m['threshold'] = roc_curve(m['y'], m['y_pred'])
        #m['auc'] = roc_auc_score(m['y'], m['y_pred'])

    print('Plotting ROC curve ...')
    fig = plt.figure(figsize=(8, 6))
    ax1 = plt.subplot2grid((4, 4), (0, 0), colspan=4, rowspan=4)
    #ax1.set_xlim((bins[0], bins[-1]))
    #ax1.set_ylim((0, 1))
    ax1.set_xlabel('$\epsilon_{Sig.}$', horizontalalignment='right', x=1.0)
    ax1.set_ylabel('$r_{Bkg.}$', horizontalalignment='right', y=1.0)

    for m in MODELS:
        m['auc'] = np.trapz(m['roc'][:, 0], m['roc'][:, 1], dx=db)
        print 'Area under ROC:\t', m['auc']
        if logScale:
            ax1.set_yscale('log')
            plt.plot(m['roc'][:, 0],
                     1. / (1. - m['roc'][:, 1]),
                     'k-',
                     color=m['color'],
                     label='%s (AUC = %0.4f)' % (m['name'], m['auc']))
            plt.fill_between(m['roc'][:, 0],
                             1. / (1. - (m['roc'][:, 1] - m['roc_err'])),
                             1. / (1. - (m['roc'][:, 1] + m['roc_err'])),
                             alpha=0.2,
                             edgecolor=m['color'],
                             facecolor=m['color'],
                             linewidth=0)
            #plt.plot(m['tpr'], 1./m['fpr'], lw=2, label=m['name']+' (AUC = %0.3f)'%(m['auc']))
        else:
            plt.plot(m['roc'][:, 0],
                     m['roc'][:, 1],
                     'k-',
                     color=m['color'],
                     label='%s (AUC = %0.2f)' % (m['name'], m['auc']))
            plt.fill_between(m['roc'][:, 0], (m['roc'][:, 1] - m['roc_err']),
                             (m['roc'][:, 1] + m['roc_err']),
                             alpha=0.2,
                             edgecolor=m['color'],
                             facecolor=m['color'],
                             linewidth=0)
            #plt.plot(m['tpr'], 1.-m['fpr'], lw=2, label=m['name']+' (AUC = %0.3f)'%(m['auc']))
            ax1.set_xlim((0, 0.16))
            ax1.set_ylim((0.975, 1.0))

    #plt.plot([0, 1], [1, 0], '--', color=(0.6, 0.6, 0.6), label='Luck')

    for p in WP:
        p['eff_sig'] = p['sig'] / BWN_PRESEL_SIG
        p['eff_bkg'] = p['bkg'] / BWN_PRESEL_BKG
        if p['legend']:
            plt.plot([p['eff_sig']], [1 - p['eff_bkg']],
                     '.',
                     color=p['color'],
                     label=p['name'])
        else:
            plt.plot([p['eff_sig']], [1 - p['eff_bkg']], '.', color=p['color'])

    leg = plt.legend(loc="lower left", frameon=False)

    #AtlasStyle_mpl.ATLASLabel(ax1, 0.02, 0.25, 'Work in progress')
    AtlasStyle_mpl.Text(ax1, 0.14, 0.52, 'Simulation')
    AtlasStyle_mpl.LumiLabel(ax1, 0.14, 0.46, lumi=LUMI * 0.001)

    plt.savefig(SAVEDIR + FILENAME + '.pdf')
    plt.savefig(SAVEDIR + FILENAME + '.png')
    plt.close()
Beispiel #13
0
def evaluate_signalGridCuts(modelDir, resolution=np.array([50,0,1], dtype=float), save=False, fileName='Test'):
  print('Evaluating singal grid...') 
  
  if fileName=='Grid_test':
      fileName=modelDir.replace('TrainedModels/models/','').replace('.h5','')
    
  infofile = open(modelDir.replace('.h5','_infofile.txt'))
  infos = infofile.readlines()
  
  #Parse Strings for correct datatypes
  
  variables=infos[4].replace('Used variables for training: ','').replace('\n','').split()
  weights=infos[5].replace('Used weights: ', '').replace('\n','').split()
  lumi=float(infos[7].replace('Used Lumi: ','').replace('\n',''))
  background=infos[9].replace('Used background files: ','').replace('; \n','').replace(' ','').split(';')
  
  preselection = preselection_evaluate
  
  print 'Using the following preselection to evaluate:' , preselection
  
  signal = ['stop_bWN_250_100', 'stop_bWN_250_130', 'stop_bWN_250_160', 'stop_bWN_300_150', 'stop_bWN_300_180', 'stop_bWN_300_210', 'stop_bWN_350_185', 'stop_bWN_350_200', 'stop_bWN_350_230', 'stop_bWN_350_260', 'stop_bWN_400_235', 'stop_bWN_400_250', 'stop_bWN_400_280', 'stop_bWN_400_310', 'stop_bWN_450_285', 'stop_bWN_450_300', 'stop_bWN_450_330', 'stop_bWN_450_360', 'stop_bWN_500_335', 'stop_bWN_500_350', 'stop_bWN_500_380', 'stop_bWN_550_385', 'stop_bWN_550_400', 'stop_bWN_550_430', 'stop_bWN_550_460', 'stop_bWN_600_435', 'stop_bWN_600_450', 'stop_bWN_600_480', 'stop_bWN_600_510', 'stop_bWN_650_485', 'stop_bWN_650_500', 'stop_bWN_650_530', 'stop_bWN_650_560']
  
  #Get Scaler and model from modelDir
   
  model = load_model(modelDir)
  
  scalerDir=modelDir.replace('.h5','_scaler.pkl')
  scaler=joblib.load(scalerDir)
    
  #Evaluate

  db = (resolution[2] - resolution[1]) / resolution[0]    # bin width in discriminator distribution
  bins = np.arange(resolution[1], resolution[2]+db, db)   # bin edges in discriminator distribution

  ###########################
  # Read and evaluate signals
  ###########################

  Signal = []
  for s in signal:
    x, y = pickBenchmark(s)
    df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), preselection, variables, weights, lumi)
    y_hat = evaluate(model, df.values, scaler)
    bin_index = np.digitize(y_hat[:,0], bins[1:])   # get the bin index of the output score for each event 
    outputWeighted = []
    outputWeightedVar = []
    outputMC = []
    outputMCVar = []
    for i in range(len(bins[1:])):
      w = weight.values[np.where(bin_index==i)[0]]
      sigma = np.sum(w**2.)
      outputWeighted.append(w.sum())
      outputWeightedVar.append(sigma)
      outputMC.append(len(w))
      outputMCVar.append(np.sqrt(len(w)))
    
    Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)})

    del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

  ###########################
  # Read and evaluate backgrounds 
  ###########################
  
  totBkgEvents = 0.
  totBkgVar = 0.
  Background = []
  for b in background:
    df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), preselection, variables, weights, lumi)
    y_hat = evaluate(model, df.values, scaler)
    bin_index = np.digitize(y_hat[:,0], bins[1:])
    outputWeighted = []
    outputWeightedVar = []
    outputMC = []
    outputMCVar = []

    totBkgEvents += weight.sum()
    totBkgVar += np.sum(weight.values**2.)
    for i in range(len(bins[1:])):
      w = weight.values[np.where(bin_index==i)[0]]
      sigma = np.sum(w**2.)
      outputWeighted.append(w.sum())
      outputWeightedVar.append(sigma)
      outputMC.append(len(w))
      outputMCVar.append(len(w))

    Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)})

    del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar
  
  totalBkgOutput = np.array([b['outputScore'] for b in Background]) 
  totalBkgOutput = totalBkgOutput.sum(axis=0)
  
  totalBkgVar = np.array([b['output_var'] for b in Background])
  totalBkgVar = totalBkgVar.sum(axis=0)
   
  for s in Signal:
    significance = []
    significance_err = []
    tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents']
    for i in range(len(bins[1:])):
      #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents']
      #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum()
      eff_sig = s['outputScore'][i:-1].sum() / s['nEvents']
      eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum()
 
      #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents']
      #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum()
      err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents']
      err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum()

      #if totalBkgOutput[:i+1].sum() > 0.:
      #  rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum()
      if totalBkgOutput[i:-1].sum() > 0.:
        rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum()
      else:
        rel_err_bkg = 0.
      #if s['outputScore'][:i+1].sum() > 0.:
      #  rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum()
      if s['outputScore'][i:-1].sum() > 0.:
        rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum()
      else:
        rel_err_sig = 0.
      
      total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.)

      if (eff_sig == 0) or (eff_bkg == 0):
        Z = 0.
        Z_err = 0.
      elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75):
        Z = 0
        Z_err = 0
      else:
        #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err)
        Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err)

        Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err)
        Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err)
        Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err)
        Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err)

      Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2
      Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2
      Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2)

      significance.append(Z)
      significance_err.append(Z_err)

    s['sig'] = np.array(significance)
    s['sig_max'] = s['sig'].max()
    s['sig_err'] = np.array(significance_err)
    print s['sig']
    print s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())]

  x = np.array([s['m_stop'] for s in Signal], dtype=float)
  y = np.array([s['m_X'] for s in Signal], dtype=float)
  z = np.array([s['sig_max'] for s in Signal],dtype=float)

  print x, y, z
  # Set up a regular grid of interpolation points
  fig, ax1 = plt.subplots(figsize=(8,6))
  xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100)
  xi, yi = np.meshgrid(xi, yi)

  # Interpolate
  rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z)
  zi = rbf(xi, yi)

  im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower',
             extent=[x.min(), x.max(), y.min(), y.max()])
  cbar = plt.colorbar(im)
  cbar.set_label('Significance')
  ax1.set_xlabel(r'$m_{\tilde{t}}$')
  ax1.set_xlim([x.min(), x.max()])
  ax1.set_ylabel(r'$m_{\chi}$')
  ax1.set_ylim([y.min(), y.max()])
  plt.scatter(x, y, c='black')
  plt.plot(x, x-84., color='black')
  plt.plot(x, x-175., color='black')
  AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress')
  AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=lumi*0.001)
  #plt.show()
  
  if save:
        if not os.path.exists('./plots/'):
            os.makedirs('./plots/')
            print('Creating folder plots')
        isFile = True
        n = 1
        while isFile:
            filepath = './plots/' + fileName + '_evaluated_grid_cuts_' + str(n) + '_infofile.txt'
            if os.path.isfile(filepath) and filepath.endswith('.txt'):
                n += 1
                isFile=True
            else: 
                isFile=False
                infofile = open(filepath, 'w')
                print('Saving evaluation informations to ' , filepath)
                presels = ''
                for pre in preselection_evaluate:
                    if pre['type'] == 'condition':
                        presels += pre['name'] + '-threshold: ' + str(pre['threshold']) + ' type: ' + pre['type'] + ' variable: ' + pre['variable'] + ' lessthan: ' + str(pre['lessthan']) + ' and morethan: ' +  str(pre['morethan']) + '; '
                    else:
                        presels += pre['name'] + '-threshold: ' + str(pre['threshold']) + ' type: ' + pre['type'] + '; '
                infofile.write('Used preselection for evaluation: ' + presels)
                infofile.close()            
        plt.savefig('plots/'+fileName+'_evaluated_grid_cuts_' + str(n) + '.pdf')
        plt.savefig('plots/'+fileName+'_evaluated_grid_cuts_' + str(n) + '.png')
        plt.close()
Beispiel #14
0
def evaluate_signalGrid(modelDir, resolution=np.array([50,0,1], dtype=float), save=False, fileName='Test'):
  print('Evaluating signal grid...')  
  
  infofile = open(modelDir.replace('.h5','_infofile.txt'))
  infos = infofile.readlines()
  
  #Parse Strings for correct datatypes
  
  variables=infos[4].replace('Used variables for training: ','').replace('\n','').split()
  weights=infos[5].replace('Used weights: ', '').replace('\n','').split()
  preselection_raw=infos[6].replace('Used preselection: ', '').replace('; \n', '').split(';')
  preselection=[]
  for x in preselection_raw:
      xdict = {}
      xdict['name']= x.split()[0].split('-')[0]
      xdict['threshold']= float(x.split()[1])
      xdict['type'] = x.split()[3]
      if xdict['type'] == 'condition':
          xdict['variable'] = x.split()[5]
          xdict['lessthan'] = float(x.split()[7])
          xdict['morethan'] = float(x.split()[10])
      preselection.append(xdict)
  lumi=float(infos[7].replace('Used Lumi: ','').replace('\n',''))
  background=infos[9].replace('Used background files: ','').replace('; \n','').replace(' ','').split(';')
  #signal=infos[8].replace('Used signal files: ','').replace('; \n','').replace(' ','').split(';')
  
  signal = ['stop_bWN_250_100', 'stop_bWN_250_130', 'stop_bWN_250_160', 'stop_bWN_300_150', 'stop_bWN_300_180', 'stop_bWN_300_210', 'stop_bWN_350_185', 'stop_bWN_350_200', 'stop_bWN_350_230', 'stop_bWN_350_260', 'stop_bWN_400_235', 'stop_bWN_400_250', 'stop_bWN_400_280', 'stop_bWN_400_310', 'stop_bWN_450_285', 'stop_bWN_450_300', 'stop_bWN_450_330', 'stop_bWN_450_360', 'stop_bWN_500_335', 'stop_bWN_500_350', 'stop_bWN_500_380', 'stop_bWN_550_385', 'stop_bWN_550_400', 'stop_bWN_550_430', 'stop_bWN_550_460', 'stop_bWN_600_435', 'stop_bWN_600_450', 'stop_bWN_600_480', 'stop_bWN_600_510', 'stop_bWN_650_485', 'stop_bWN_650_500', 'stop_bWN_650_530', 'stop_bWN_650_560']
  
   
  #For Debugging
  #print variables, type(variables)
  #print weights, type(variables)
  #print preselection, type(preselection[1])
  #print lumi, type(lumi)
  #print signal, type(signal)
  #print background, type(background)
   
  #Get Scaler and model from modelDir
   
  model = load_model(modelDir)
  
  scalerDir=modelDir.replace('.h5','_scaler.pkl')
  scaler=joblib.load(scalerDir)
    
  #Evaluate

  db = (resolution[2] - resolution[1]) / resolution[0]    # bin width in discriminator distribution
  bins = np.arange(resolution[1], resolution[2]+db, db)   # bin edges in discriminator distribution

  ###########################
  # Read and evaluate signals
  ###########################
  
  statInfoSig = {}
  #Infos about statistic

  Signal = []
  for s in signal:
    x, y = pickBenchmark(s)
    df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), preselection, variables, weights, lumi)
    statInfoSig[s]=df.shape[0]
    y_hat = evaluate(model, df.values, scaler)
    bin_index = np.digitize(y_hat[:,0], bins[1:])   # get the bin index of the output score for each event 
    outputWeighted = []
    outputWeightedVar = []
    outputMC = []
    outputMCVar = []
    for i in range(len(bins[1:])):
      w = weight.values[np.where(bin_index==i)[0]]
      sigma = np.sum(w**2.)
      outputWeighted.append(w.sum())
      outputWeightedVar.append(sigma)
      outputMC.append(len(w))
      outputMCVar.append(np.sqrt(len(w)))
    
    Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)})

    del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

  ###########################
  # Read and evaluate backgrounds 
  ###########################
  
  statInfoBkg = {}
  #Infos about statistic
  
  totBkgEvents = 0.
  totBkgVar = 0.
  Background = []
  for b in background:
    df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), preselection, variables, weights, lumi)
    statInfoBkg[b]=df.shape[0]
    y_hat = evaluate(model, df.values, scaler)
    bin_index = np.digitize(y_hat[:,0], bins[1:])
    outputWeighted = []
    outputWeightedVar = []
    outputMC = []
    outputMCVar = []

    totBkgEvents += weight.sum()
    totBkgVar += np.sum(weight.values**2.)
    for i in range(len(bins[1:])):
      w = weight.values[np.where(bin_index==i)[0]]
      sigma = np.sum(w**2.)
      outputWeighted.append(w.sum())
      outputWeightedVar.append(sigma)
      outputMC.append(len(w))
      outputMCVar.append(len(w))

    Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)})

    del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar
  
  totalBkgOutput = np.array([b['outputScore'] for b in Background]) 
  totalBkgOutput = totalBkgOutput.sum(axis=0)
  
  totalBkgVar = np.array([b['output_var'] for b in Background])
  totalBkgVar = totalBkgVar.sum(axis=0)
   
  for s in Signal:
    significance = []
    significance_err = []
    tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents']
    for i in range(len(bins[1:])):
      #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents']
      #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum()
      eff_sig = s['outputScore'][i:-1].sum() / s['nEvents']
      eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum()
 
      #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents']
      #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum()
      err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents']
      err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum()

      #if totalBkgOutput[:i+1].sum() > 0.:
      #  rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum()
      if totalBkgOutput[i:-1].sum() > 0.:
        rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum()
      else:
        rel_err_bkg = 0.
      #if s['outputScore'][:i+1].sum() > 0.:
      #  rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum()
      if s['outputScore'][i:-1].sum() > 0.:
        rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum()
      else:
        rel_err_sig = 0.
      
      total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.)

      if (eff_sig == 0) or (eff_bkg == 0):
        Z = 0.
        Z_err = 0.
      elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75):
        Z = 0
        Z_err = 0
      else:
        #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err)
        Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err)

        Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err)
        Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err)
        Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err)
        Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err)

      Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2
      Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2
      Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2)

      significance.append(Z)
      significance_err.append(Z_err)

    s['sig'] = np.array(significance)
    s['sig_max'] = s['sig'].max()
    s['sig_err'] = np.array(significance_err)
    #print s['sig']
    print s['m_stop'], s['m_X'], s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())]

  x = np.array([s['m_stop'] for s in Signal], dtype=float)
  y = np.array([s['m_X'] for s in Signal], dtype=float)
  z = np.array([s['sig_max'] for s in Signal],dtype=float)

  #print x, y, z
  # Set up a regular grid of interpolation points
  fig, ax1 = plt.subplots(figsize=(8,6))
  xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100)
  xi, yi = np.meshgrid(xi, yi)

  # Interpolate
  rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z)
  zi = rbf(xi, yi)

  im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower',
             extent=[x.min(), x.max(), y.min(), y.max()])
  cbar = plt.colorbar(im)
  cbar.set_label('Significance')
  ax1.set_xlabel(r'$m_{\tilde{t}}$')
  ax1.set_xlim([x.min(), x.max()])
  ax1.set_ylabel(r'$m_{\chi}$')
  ax1.set_ylim([y.min(), y.max()])
  plt.scatter(x, y, c='black')
  plt.plot(x, x-84., color='black')
  plt.plot(x, x-175., color='black')
  AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress')
  AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=lumi*0.001)
  #plt.show()
  
  if save:
        if not os.path.exists('./plots/'):
            os.makedirs('./plots/')
            print('Creating folder plots')
        plt.savefig('plots/'+fileName+'_evaluated_grid.pdf')
        plt.savefig('plots/'+fileName+'_evaluated_grid.png')
        plt.close()
  
  diag_165 = {}
  diag_150 = {}
  diag_120 = {}
  diag_90 = {}
  
  for key, value in statInfoSig.iteritems():
      x, y = pickBenchmark(key)
      deltaM = float(x)-float(y)
      if deltaM==165.0:
          diag_165[x]=value
      elif deltaM==150.0:
          diag_150[x]=value
      elif deltaM==120.0:
          diag_120[x]=value
      elif deltaM==90.0:
          diag_90[x]=value
      else:
          print 'Error: Unknown diagonal in evaluate_signalGrid'
          return 0 
  
  sortedLabels165 = sorted(diag_165)
  sortedLabels150 = sorted(diag_150)
  sortedLabels120 = sorted(diag_120)
  sortedLabels90 = sorted(diag_90)
  
  values_165 = []
  values_150 = []
  values_120 = []
  values_90 = []
  
  for label in sortedLabels165:
      values_165.append(diag_165[label])

  for label in sortedLabels150:
      values_150.append(diag_150[label])
      
  for label in sortedLabels120:
      values_120.append(diag_120[label])
      
  for label in sortedLabels90:
      values_90.append(diag_90[label])
      
  csignal = sum(values_90)+sum(values_120)+sum(values_150)+sum(values_165)
  trainable_count = int(np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
      
  signalP = mpatches.Patch(color='None', label='signal: ' + str(csignal))
  ttbar = mpatches.Patch(color='None', label=r'$t\overline{t}$: ' + str(statInfoBkg['mc16d_ttbar']))
  singletop = mpatches.Patch(color='None', label= 'single top: '+ str(statInfoBkg['mc16d_singletop']))
  Wjets = mpatches.Patch(color='None', label= r'$W$ + jets: '+ str(statInfoBkg['mc16d_Wjets']))
  tps = mpatches.Patch(color='None', label='params(t): ' + str(trainable_count)) #Trainable parameters
  
  #print sortedLabels90, sortedLabels120, sortedLabels150
  #print values_90, values_120, values_150
  
  plt.figure('statistic')
  d165 = plt.plot(sortedLabels165, values_165, 'b-x',label=r'$\Delta M = 165$ GeV')
  d150 = plt.plot(sortedLabels150, values_150, 'b-x',label=r'$\Delta M = 150$ GeV')
  d120 = plt.plot(sortedLabels120, values_120, 'r-x',label=r'$\Delta M = 120$ GeV')
  d90 = plt.plot(sortedLabels90, values_90, 'g-x', label=r'$\Delta M = 90$ GeV')
  plt.xlabel(r'$m_{\tilde{t}}$ [GeV]')
  plt.ylabel('Statistic')
  plt.title('Statistic of samples')
  plt.legend(loc='best', handles=[d165[0],d150[0],d120[0],d90[0],signalP,ttbar,singletop,Wjets,tps])
  
  if save:
        if not os.path.exists('./plots/'):
            os.makedirs('./plots/')
            print('Creating folder plots')
        plt.savefig('plots/'+fileName+'_StatisticTraining.pdf')
        plt.savefig('plots/'+fileName+'_StatisticTraining.png')
        plt.close()
        
        filepath = 'plots/' + fileName + '_StatisticTrainingValues.txt'
        infofile = open(filepath, 'w')
        infofile.write('M165: ' + ';'.join(sortedLabels165) + ' ' +';'.join([str(i) for i in values_165])+'\n')
        infofile.write('M150: ' + ';'.join(sortedLabels150) + ' ' +';'.join([str(i) for i in values_150])+'\n')
        infofile.write('M120: ' + ';'.join(sortedLabels120) + ' ' + ';'.join([str(i) for i in values_120])+'\n')
        infofile.write('M90: ' + ';'.join(sortedLabels90) + ' '+ ';'.join([str(i) for i in values_90]))
        infofile.close()
Beispiel #15
0
def main():
  
  model = load_model(modelDir)

  scaler = joblib.load(SCALING)

  infofile = open(modelDir.replace('.h5','_infofile.txt'))
  infos = infofile.readlines()
  analysis=infos[0].replace('Used analysis method: ','').replace('\n','')
  dataset = DatasetDir + infos[3].replace('Used dataset: ', '').replace('\n','')
  recurrent = False
  if analysis.lower() == 'rnn':
    recurrent = True
    seq_scaler = dataset+'_scaling.json'


  db = (RESOLUTION[2] - RESOLUTION[1]) / RESOLUTION[0]    # bin width in discriminator distribution
  bins = np.arange(RESOLUTION[1], RESOLUTION[2]+db, db)   # bin edges in discriminator distribution
  center = (bins[:-1] + bins[1:]) / 2

  print '#----MODEL----#'
  print modelDir


  ###########################
  # Read and evaluate signals
  ###########################

  Signal = []
  for s in SIGNAL:
    print s
    x, y = pickBenchmark(s)
    if not recurrent:
      df, weight = loadDataFrame(os.path.join(inputDirSig, s+'/'), PRESELECTION, VAR, WEIGHTS, LUMI)
      y_hat = evaluate(model, df.values, scaler)
    else:
      df, weight, collection = loadSequentialDataFrame(os.path.join(inputDirSig, s+'/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI)
      y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection)

    bin_index = np.digitize(y_hat[:,0], bins[1:])   # get the bin index of the output score for each event 
    outputWeighted = []
    outputWeightedVar = []
    outputMC = []
    outputMCVar = []
    for i in range(len(bins[1:])):
      w = weight.values[np.where(bin_index==i)[0]]
      sigma = np.sum(w**2.)
      outputWeighted.append(w.sum())
      outputWeightedVar.append(sigma)
      outputMC.append(len(w))
      outputMCVar.append(np.sqrt(len(w)))
    
    Signal.append({'name':s, 'm_stop':x, 'm_X':y, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)})

    del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar

  ###########################
  # Read and evaluate backgrounds 
  ###########################
  
  totBkgEvents = 0.
  totBkgVar = 0.
  Background = []
  for b in BACKGROUND:
    if not recurrent:
      df, weight = loadDataFrame(os.path.join(inputDirBkg, b+'/'), PRESELECTION, VAR, WEIGHTS, LUMI)
      y_hat = evaluate(model, df.values, scaler)
    else:
      df, weight, collection = loadSequentialDataFrame(os.path.join(inputDirBkg, b+'/'), PRESELECTION, COLLECTION, REMOVE_VAR, VAR, WEIGHTS, LUMI)
      y_hat = evaluate(model, df.values, scaler, seq_scaler, rnn=True, col=collection)
     
    bin_index = np.digitize(y_hat[:,0], bins[1:])
    outputWeighted = []
    outputWeightedVar = []
    outputMC = []
    outputMCVar = []

    totBkgEvents += weight.sum()
    totBkgVar += np.sum(weight.values**2.)
    for i in range(len(bins[1:])):
      w = weight.values[np.where(bin_index==i)[0]]
      sigma = np.sum(w**2.)
      outputWeighted.append(w.sum())
      outputWeightedVar.append(sigma)
      outputMC.append(len(w))
      outputMCVar.append(len(w))

    Background.append({'name':b, 'dataset':df, 'weight':weight, 'nEvents':weight.sum(), 'y_pred':y_hat, 'outputScore':np.array(outputWeighted), 'outputMC':np.array(outputMC), 'output_var':np.array(outputWeightedVar), 'outputMC_var':np.array(outputMCVar)})

    del df, weight, y_hat, bin_index, outputWeighted, outputWeightedVar, outputMC, outputMCVar
  
  totalBkgOutput = np.array([b['outputScore'] for b in Background]) 
  totalBkgOutput = totalBkgOutput.sum(axis=0)
  
  totalBkgVar = np.array([b['output_var'] for b in Background])
  totalBkgVar = totalBkgVar.sum(axis=0)
   
  for s in Signal:
    significance = []
    significance_err = []
    asimov = []
    tot_rel = np.sqrt(np.sum(s['output_var'])) / s['nEvents']
    for i in range(len(bins[1:])):
      #eff_sig = s['outputScore'][:i+1].sum() / s['nEvents']
      #eff_bkg = totalBkgOutput[:i+1].sum() / totalBkgOutput.sum()
      eff_sig = s['outputScore'][i:-1].sum() / s['nEvents']
      eff_bkg = totalBkgOutput[i:-1].sum() / totalBkgOutput.sum()
 
      #err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['nEvents']
      #err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput.sum()
      err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['nEvents']
      err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput.sum()

      #if totalBkgOutput[:i+1].sum() > 0.:
      #  rel_err_bkg = np.sqrt(np.sum(totalBkgVar[:i+1])) / totalBkgOutput[:i+1].sum()
      if totalBkgOutput[i:-1].sum() > 0.:
        rel_err_bkg = np.sqrt(np.sum(totalBkgVar[i:-1])) / totalBkgOutput[i:-1].sum()
      else:
        rel_err_bkg = 0.
      #if s['outputScore'][:i+1].sum() > 0.:
      #  rel_err_sig = np.sqrt(np.sum(s['output_var'][:i+1])) / s['outputScore'][:i+1].sum()
      if s['outputScore'][i:-1].sum() > 0.:
        rel_err_sig = np.sqrt(np.sum(s['output_var'][i:-1])) / s['outputScore'][i:-1].sum()
      else:
        rel_err_sig = 0.
      
      #total_rel_err = np.sqrt(rel_err_sig**2. + rel_err_bkg**2. + 0.25**2.)
      total_rel_err = np.sqrt(rel_err_bkg**2. + 0.25**2.)

      if (eff_sig == 0) or (eff_bkg == 0):
        Z = 0.
        Z_err = 0.
        ams = 0.
      elif (err_sig / eff_sig > 0.75) or (err_bkg / eff_bkg > 0.75):
        Z = 0.
        Z_err = 0.
        ams = 0.
      else:
        #Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][:i+1].sum(), totalBkgOutput[:i+1].sum(), total_rel_err)
        Z = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(s['outputScore'][i:-1].sum(), totalBkgOutput[i:-1].sum(), total_rel_err)
        ams = asimovZ( s['outputScore'][i:].sum(), totalBkgOutput[i:].sum(), np.sqrt(totalBkgVar[i:].sum()))

        Zplus_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig + err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err)
        Zmins_sig = ROOT.RooStats.NumberCountingUtils.BinomialExpZ((eff_sig - err_sig) * s['nEvents'], eff_bkg * totalBkgOutput.sum(), total_rel_err)
        Zplus_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg + err_bkg) * totalBkgOutput.sum(), total_rel_err)
        Zmins_bkg = ROOT.RooStats.NumberCountingUtils.BinomialExpZ(eff_sig * s['nEvents'], (eff_bkg - err_bkg) * totalBkgOutput.sum(), total_rel_err)

      Z_err_sig = abs(Zplus_sig - Zmins_sig) / 2
      Z_err_bkg = abs(Zplus_bkg - Zmins_bkg) / 2
      Z_err = np.sqrt(Z_err_sig**2 + Z_err_bkg**2)

      significance.append(Z)
      significance_err.append(Z_err)
      asimov.append(ams)

    s['sig'] = np.array(significance)
    s['sig_max'] = s['sig'].max()
    s['sig_err'] = np.array(significance_err)
    s['ams'] = np.array(asimov)
    print s['sig']
    print s['ams']
    print s['m_stop'], s['m_X'], s['sig'].max(), bins[np.where(s['sig'] == s['sig'].max())]

  x = np.array([s['m_stop'] for s in Signal], dtype=float)
  y = np.array([s['m_X'] for s in Signal], dtype=float)
  z = np.array([s['sig_max'] for s in Signal],dtype=float)

  #print x, y, z
  # Set up a regular grid of interpolation points
  fig, ax1 = plt.subplots(figsize=(8,6))
  xi, yi = np.linspace(x.min(), x.max(), 100), np.linspace(y.min(), y.max(), 100)
  xi, yi = np.meshgrid(xi, yi)

  # Interpolate
  rbf = scipy.interpolate.LinearNDInterpolator(points=np.array((x, y)).T, values=z)
  zi = rbf(xi, yi)

  im = ax1.imshow(zi, vmin=0., vmax=5., origin='lower',
             extent=[x.min(), x.max(), y.min(), y.max()])
  
  contours = plt.contour(xi, yi, zi, colors='black', levels=[3.])
  cbar = plt.colorbar(im)
  cbar.set_label('Significance')
  ax1.set_xlabel(r'$m_{\tilde{t}}$')
  ax1.set_xlim([x.min(), x.max()])
  ax1.set_ylabel(r'$m_{\chi}$')
  ax1.set_ylim([y.min(), y.max()])
  plt.scatter(x, y, c='black', s=[0.75]*len(x))
  plt.plot(x, x-84., color='grey')
  plt.plot(x, x-175., color='grey')

  AtlasStyle_mpl.ATLASLabel(ax1, 0.022, 0.925, 'Work in progress')
  AtlasStyle_mpl.LumiLabel(ax1, 0.022, 0.875, lumi=LUMI*0.001)

  plt.savefig("plots/"+modelfile+"_eval-Grid.pdf")
  plt.savefig("plots/"+modelfile+"_eval-Grid.png")
  plt.close()