Example #1
0
X_train = buildArraysFromROOT(tree,susyFeaturesNtup,cutBackground,0,nBackgroundEvents,"TRAINING SAMPLE (background only)")
W_train = buildArraysFromROOT(tree,susyWeightsNtup,cutBackground,0,nBackgroundEvents,"TRAINING SAMPLE WEIGHTS").reshape(X_train.shape[0])
X_test = buildArraysFromROOT(tree,susyFeaturesNtup,cutBackground,nBackgroundEvents,nBackgroundEvents,"TESTING SAMPLE - background")
X_signal = buildArraysFromROOT(tree,susyFeaturesNtup,cutSignal,0,nSignalEvents,"TESTING SAMPLE - signal")

# Feature scaling
min_max_scaler = preprocessing.MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.transform(X_test)
X_signal = min_max_scaler.transform(X_signal)

# Calculate feature means
means = np.mean(X_train,0)

# Reconstruction error
rec_errors_same = reconstructionError(X_train,means)
rec_errors_diff = reconstructionError(X_test,means)
rec_errors_signal = reconstructionError(X_signal,means)

# Plotting - reconstruction errors
fig, axs = plt.subplots(3, 1)
ax1, ax2, ax3 = axs.ravel()
for ax in ax1, ax2, ax3:
    ax.set_ylabel("Events")
    ax.set_xlabel("log10(Reconstruction error)")
ax1.hist(rec_errors_same, 250, facecolor='blue', alpha=0.4, histtype='stepfilled')
ax2.hist(rec_errors_diff, 250, facecolor='green', alpha=0.4, histtype='stepfilled')
ax3.hist(rec_errors_diff, 250, facecolor='green', alpha=0.4, histtype='stepfilled', normed=True)
ax3.hist(rec_errors_signal, 250, facecolor='red', alpha=0.4, histtype='stepfilled', normed=True)

# Plotting - raw variables
                       batch_size = 100,
                       #learning_rule = "momentum",
                       n_iter=100)
                       #valid_size=0.25)
        # Training
        nn.fit(X_train_bg,Y_train)
        pickle.dump(nn, open('autoencoder.pkl', 'wb'))
    if not runTraining:
        nn = pickle.load(open('autoencoder.pkl', 'rb'))

    # Testing
    predicted_diff = nn.predict(X_test_bg)
    predicted_signal = nn.predict(X_test_sig)

    # Reconstruction error
    rec_errors_diff = reconstructionError(X_test_bg,predicted_diff)
    rec_errors_sig = reconstructionError(X_test_sig,predicted_signal)

    # Reconstruction errors by variable
    rec_errors_varwise_diff = reconstructionErrorByFeature(X_test_bg,predicted_diff)
    rec_errors_varwise_sig = reconstructionErrorByFeature(X_test_sig,predicted_signal)

    ## Plotting - performance curves
    ## ROC
    true_positive,false_positive,precisions,recalls,f1s = makeMetrics(2000,rec_errors_sig,rec_errors_diff)
    auc = areaUnderROC(true_positive,false_positive)
    print "Area under ROC = ",auc
    print ""
    print ""
    print ""
    axesROC[axesCounter].plot(false_positive, true_positive, label='ROC curve')
        ae.add(Dense(units=20, input_dim=71))
        ae.add(Activation("relu"))
        ae.add(Dense(units=71, input_dim=20))
        ae.add(Activation("linear"))
        ae.compile(loss="mean_squared_error", optimizer="sgd")
        ae.fit(X_train_ref_ae, X_train_ref_ae, epochs=100, batch_size=100)
        ae.save("autoencoder.h5")
    if not runTraining:
        print "Reading pre-trained autoencoder"
        ae = load_model("autoencoder.h5")
    # Testing
    predicted_same_ae = ae.predict_proba(X_train_ref_ae)
    predicted_diff_ae = ae.predict_proba(X_test_ref_ae)
    predicted_new_ae = ae.predict_proba(X_test_new_ae)
    # Reconstruction error
    rec_errors_same_ae = reconstructionError(X_train_ref_ae, predicted_same_ae)
    rec_errors_diff_ae = reconstructionError(X_test_ref_ae, predicted_diff_ae)
    rec_errors_new_ae = reconstructionError(X_test_new_ae, predicted_new_ae)

#############################
# Plotting
#############################
y_pos = np.arange(len(trackingFeatures))
bins = np.linspace(-0.5, 1.5, 250)

if runBDT:
    # Outputs
    figBDT, axsBDT = plt.subplots(2, 1)
    axBDT1, axBDT2 = axsBDT.ravel()
    for ax in axBDT1, axBDT2:
        ax.set_ylabel("Events")
        #learning_rule = "momentum",
        n_iter=2000,
        valid_size=0.25)
    # Training
    nn.fit(X_train, Y_train)
    pickle.dump(nn, open('autoencoder.pkl', 'wb'))
if not runTraining:
    nn = pickle.load(open('autoencoder.pkl', 'rb'))

# Testing
predicted_same = nn.predict(X_train)
predicted_diff = nn.predict(X_test)
predicted_signal = nn.predict(X_signal)

# Reconstruction error
rec_errors_same = reconstructionError(X_train, predicted_same)
rec_errors_diff = reconstructionError(X_test, predicted_diff)
rec_errors_sig = reconstructionError(X_signal, predicted_signal)

# Reconstruction errors by variable
rec_errors_varwise_same = reconstructionErrorByFeature(X_train, predicted_same)
rec_errors_varwise_diff = reconstructionErrorByFeature(X_test, predicted_diff)
rec_errors_varwise_sig = reconstructionErrorByFeature(X_signal,
                                                      predicted_signal)

# Plotting - reconstruction errors
fig, axs = plt.subplots(3, 1)
ax1, ax2, ax3 = axs.ravel()
for ax in ax1, ax2, ax3:
    ax.set_ylabel("Events")
    ax.set_xlabel("log10(Reconstruction error)")
bestCLScore = -1.0

for fn in os.listdir('trained/'):
    print "Processing ",fn
    splitname = fn.split("_")
    type = splitname[0]
    nUnits = splitname[1]
    learningRate = splitname[2]
    nCycles = (splitname[3]).split('.')[0]
    inputFile = 'trained/'+fn
    
    if (type=='ae'):
        nn = pickle.load(open(inputFile, 'rb'))
        reconstucted_background = nn.predict(X_test_background)
        reconstucted_signal = nn.predict(X_test_signal)
        errors_background = reconstructionError(X_test_background,reconstucted_background)
        errors_signal = reconstructionError(X_test_signal,reconstucted_signal)
        true_positive,false_positive,precisions,recalls,f1s = makeMetrics(500,errors_signal,errors_background)
        tmpAUC = areaUnderROC(true_positive,false_positive)
        outputTextReport.write(type+' '+nUnits+' '+learningRate+' '+nCycles+' '+str(tmpAUC)+'\n')
        if tmpAUC > bestAEScore:
            bestAEScore = tmpAUC
            bestAE = [tmpAUC,type,nUnits,learningRate,nCycles]

    if (type=='cl'):
        nn = pickle.load(open(inputFile, 'rb'))
        predicted = nn.predict(X_test)
        probabilities = nn.predict_proba(X_test)
        fpr, tpr, thresholds = roc_curve(Y, probabilities[:,1], pos_label=1)
        tmpAUC = roc_auc_score(Y, probabilities[:,1])
        outputTextReport.write(type+' '+nUnits+' '+learningRate+' '+nCycles+' '+str(tmpAUC)+'\n')
bestCLScore = -1.0

for fn in os.listdir('trained/'):
    print "Processing ", fn
    splitname = fn.split("_")
    type = splitname[0]
    nUnits = splitname[1]
    learningRate = splitname[2]
    nCycles = (splitname[3]).split('.')[0]
    inputFile = 'trained/' + fn

    if (type == 'ae'):
        nn = pickle.load(open(inputFile, 'rb'))
        reconstucted_background = nn.predict(X_test_background)
        reconstucted_signal = nn.predict(X_test_signal)
        errors_background = reconstructionError(X_test_background,
                                                reconstucted_background)
        errors_signal = reconstructionError(X_test_signal, reconstucted_signal)
        true_positive, false_positive, precisions, recalls, f1s = makeMetrics(
            500, errors_signal, errors_background)
        tmpAUC = areaUnderROC(true_positive, false_positive)
        outputTextReport.write(type + ' ' + nUnits + ' ' + learningRate + ' ' +
                               nCycles + ' ' + str(tmpAUC) + '\n')
        if tmpAUC > bestAEScore:
            bestAEScore = tmpAUC
            bestAE = [tmpAUC, type, nUnits, learningRate, nCycles]

    if (type == 'cl'):
        nn = pickle.load(open(inputFile, 'rb'))
        predicted = nn.predict(X_test)
        probabilities = nn.predict_proba(X_test)
        fpr, tpr, thresholds = roc_curve(Y, probabilities[:, 1], pos_label=1)
        ae.add(Dense(units=20, input_dim=71))
        ae.add(Activation("relu"))
        ae.add(Dense(units=71, input_dim=20))
        ae.add(Activation("linear"))
        ae.compile(loss="mean_squared_error",optimizer="sgd")
        ae.fit(X_train_ref_ae,X_train_ref_ae,epochs=100,batch_size=100)
        ae.save("autoencoder.h5")
    if not runTraining:
        print "Reading pre-trained autoencoder"
        ae = load_model("autoencoder.h5")
    # Testing
    predicted_same_ae = ae.predict_proba(X_train_ref_ae)
    predicted_diff_ae = ae.predict_proba(X_test_ref_ae)
    predicted_new_ae = ae.predict_proba(X_test_new_ae)
    # Reconstruction error
    rec_errors_same_ae = reconstructionError(X_train_ref_ae,predicted_same_ae)
    rec_errors_diff_ae = reconstructionError(X_test_ref_ae,predicted_diff_ae)
    rec_errors_new_ae = reconstructionError(X_test_new_ae,predicted_new_ae)


#############################
# Plotting
#############################
y_pos = np.arange(len(trackingFeatures))
bins = np.linspace(-0.5, 1.5, 250)

if runBDT:
    # Outputs
    figBDT, axsBDT = plt.subplots(2, 1)
    axBDT1, axBDT2 = axsBDT.ravel()
    for ax in axBDT1, axBDT2:
Example #8
0
    model.add(Activation("linear"))
    model.compile(loss="mean_squared_error",optimizer="sgd")
    model.fit(X_train,Y_train,nb_epoch=100,batch_size=100)
    model.save("autoencoder.h5")

if not runTraining:
    print "Reading pre-trained autoencoder"
    model = load_model("autoencoder.h5")

# Testing
predicted_same = model.predict_proba(X_train)
predicted_diff = model.predict_proba(X_test)
predicted_new = model.predict_proba(X_new)

# Reconstruction error
rec_errors_same = reconstructionError(X_train,predicted_same)
rec_errors_diff = reconstructionError(X_test,predicted_diff)
rec_errors_new = reconstructionError(X_new,predicted_new)

# Reconstruction errors by variable
rec_errors_varwise_same = relativeErrorByFeature(X_train,predicted_same)
rec_errors_varwise_diff = relativeErrorByFeature(X_test,predicted_diff)
rec_errors_varwise_new = relativeErrorByFeature(X_new,predicted_new)

# Plotting - reconstruction errors
fig, axs = plt.subplots(3, 1)
ax1, ax2, ax3 = axs.ravel()
for ax in ax1, ax2, ax3:
    ax.set_ylabel("Events")
    ax.set_xlabel("log10(Reconstruction error)")
bins = np.linspace(-2.0, 5.0, 250)