X_train = buildArraysFromROOT(tree,susyFeaturesNtup,cutBackground,0,nBackgroundEvents,"TRAINING SAMPLE (background only)") W_train = buildArraysFromROOT(tree,susyWeightsNtup,cutBackground,0,nBackgroundEvents,"TRAINING SAMPLE WEIGHTS").reshape(X_train.shape[0]) X_test = buildArraysFromROOT(tree,susyFeaturesNtup,cutBackground,nBackgroundEvents,nBackgroundEvents,"TESTING SAMPLE - background") X_signal = buildArraysFromROOT(tree,susyFeaturesNtup,cutSignal,0,nSignalEvents,"TESTING SAMPLE - signal") # Feature scaling min_max_scaler = preprocessing.MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) X_test = min_max_scaler.transform(X_test) X_signal = min_max_scaler.transform(X_signal) # Calculate feature means means = np.mean(X_train,0) # Reconstruction error rec_errors_same = reconstructionError(X_train,means) rec_errors_diff = reconstructionError(X_test,means) rec_errors_signal = reconstructionError(X_signal,means) # Plotting - reconstruction errors fig, axs = plt.subplots(3, 1) ax1, ax2, ax3 = axs.ravel() for ax in ax1, ax2, ax3: ax.set_ylabel("Events") ax.set_xlabel("log10(Reconstruction error)") ax1.hist(rec_errors_same, 250, facecolor='blue', alpha=0.4, histtype='stepfilled') ax2.hist(rec_errors_diff, 250, facecolor='green', alpha=0.4, histtype='stepfilled') ax3.hist(rec_errors_diff, 250, facecolor='green', alpha=0.4, histtype='stepfilled', normed=True) ax3.hist(rec_errors_signal, 250, facecolor='red', alpha=0.4, histtype='stepfilled', normed=True) # Plotting - raw variables
batch_size = 100, #learning_rule = "momentum", n_iter=100) #valid_size=0.25) # Training nn.fit(X_train_bg,Y_train) pickle.dump(nn, open('autoencoder.pkl', 'wb')) if not runTraining: nn = pickle.load(open('autoencoder.pkl', 'rb')) # Testing predicted_diff = nn.predict(X_test_bg) predicted_signal = nn.predict(X_test_sig) # Reconstruction error rec_errors_diff = reconstructionError(X_test_bg,predicted_diff) rec_errors_sig = reconstructionError(X_test_sig,predicted_signal) # Reconstruction errors by variable rec_errors_varwise_diff = reconstructionErrorByFeature(X_test_bg,predicted_diff) rec_errors_varwise_sig = reconstructionErrorByFeature(X_test_sig,predicted_signal) ## Plotting - performance curves ## ROC true_positive,false_positive,precisions,recalls,f1s = makeMetrics(2000,rec_errors_sig,rec_errors_diff) auc = areaUnderROC(true_positive,false_positive) print "Area under ROC = ",auc print "" print "" print "" axesROC[axesCounter].plot(false_positive, true_positive, label='ROC curve')
ae.add(Dense(units=20, input_dim=71)) ae.add(Activation("relu")) ae.add(Dense(units=71, input_dim=20)) ae.add(Activation("linear")) ae.compile(loss="mean_squared_error", optimizer="sgd") ae.fit(X_train_ref_ae, X_train_ref_ae, epochs=100, batch_size=100) ae.save("autoencoder.h5") if not runTraining: print "Reading pre-trained autoencoder" ae = load_model("autoencoder.h5") # Testing predicted_same_ae = ae.predict_proba(X_train_ref_ae) predicted_diff_ae = ae.predict_proba(X_test_ref_ae) predicted_new_ae = ae.predict_proba(X_test_new_ae) # Reconstruction error rec_errors_same_ae = reconstructionError(X_train_ref_ae, predicted_same_ae) rec_errors_diff_ae = reconstructionError(X_test_ref_ae, predicted_diff_ae) rec_errors_new_ae = reconstructionError(X_test_new_ae, predicted_new_ae) ############################# # Plotting ############################# y_pos = np.arange(len(trackingFeatures)) bins = np.linspace(-0.5, 1.5, 250) if runBDT: # Outputs figBDT, axsBDT = plt.subplots(2, 1) axBDT1, axBDT2 = axsBDT.ravel() for ax in axBDT1, axBDT2: ax.set_ylabel("Events")
#learning_rule = "momentum", n_iter=2000, valid_size=0.25) # Training nn.fit(X_train, Y_train) pickle.dump(nn, open('autoencoder.pkl', 'wb')) if not runTraining: nn = pickle.load(open('autoencoder.pkl', 'rb')) # Testing predicted_same = nn.predict(X_train) predicted_diff = nn.predict(X_test) predicted_signal = nn.predict(X_signal) # Reconstruction error rec_errors_same = reconstructionError(X_train, predicted_same) rec_errors_diff = reconstructionError(X_test, predicted_diff) rec_errors_sig = reconstructionError(X_signal, predicted_signal) # Reconstruction errors by variable rec_errors_varwise_same = reconstructionErrorByFeature(X_train, predicted_same) rec_errors_varwise_diff = reconstructionErrorByFeature(X_test, predicted_diff) rec_errors_varwise_sig = reconstructionErrorByFeature(X_signal, predicted_signal) # Plotting - reconstruction errors fig, axs = plt.subplots(3, 1) ax1, ax2, ax3 = axs.ravel() for ax in ax1, ax2, ax3: ax.set_ylabel("Events") ax.set_xlabel("log10(Reconstruction error)")
bestCLScore = -1.0 for fn in os.listdir('trained/'): print "Processing ",fn splitname = fn.split("_") type = splitname[0] nUnits = splitname[1] learningRate = splitname[2] nCycles = (splitname[3]).split('.')[0] inputFile = 'trained/'+fn if (type=='ae'): nn = pickle.load(open(inputFile, 'rb')) reconstucted_background = nn.predict(X_test_background) reconstucted_signal = nn.predict(X_test_signal) errors_background = reconstructionError(X_test_background,reconstucted_background) errors_signal = reconstructionError(X_test_signal,reconstucted_signal) true_positive,false_positive,precisions,recalls,f1s = makeMetrics(500,errors_signal,errors_background) tmpAUC = areaUnderROC(true_positive,false_positive) outputTextReport.write(type+' '+nUnits+' '+learningRate+' '+nCycles+' '+str(tmpAUC)+'\n') if tmpAUC > bestAEScore: bestAEScore = tmpAUC bestAE = [tmpAUC,type,nUnits,learningRate,nCycles] if (type=='cl'): nn = pickle.load(open(inputFile, 'rb')) predicted = nn.predict(X_test) probabilities = nn.predict_proba(X_test) fpr, tpr, thresholds = roc_curve(Y, probabilities[:,1], pos_label=1) tmpAUC = roc_auc_score(Y, probabilities[:,1]) outputTextReport.write(type+' '+nUnits+' '+learningRate+' '+nCycles+' '+str(tmpAUC)+'\n')
bestCLScore = -1.0 for fn in os.listdir('trained/'): print "Processing ", fn splitname = fn.split("_") type = splitname[0] nUnits = splitname[1] learningRate = splitname[2] nCycles = (splitname[3]).split('.')[0] inputFile = 'trained/' + fn if (type == 'ae'): nn = pickle.load(open(inputFile, 'rb')) reconstucted_background = nn.predict(X_test_background) reconstucted_signal = nn.predict(X_test_signal) errors_background = reconstructionError(X_test_background, reconstucted_background) errors_signal = reconstructionError(X_test_signal, reconstucted_signal) true_positive, false_positive, precisions, recalls, f1s = makeMetrics( 500, errors_signal, errors_background) tmpAUC = areaUnderROC(true_positive, false_positive) outputTextReport.write(type + ' ' + nUnits + ' ' + learningRate + ' ' + nCycles + ' ' + str(tmpAUC) + '\n') if tmpAUC > bestAEScore: bestAEScore = tmpAUC bestAE = [tmpAUC, type, nUnits, learningRate, nCycles] if (type == 'cl'): nn = pickle.load(open(inputFile, 'rb')) predicted = nn.predict(X_test) probabilities = nn.predict_proba(X_test) fpr, tpr, thresholds = roc_curve(Y, probabilities[:, 1], pos_label=1)
ae.add(Dense(units=20, input_dim=71)) ae.add(Activation("relu")) ae.add(Dense(units=71, input_dim=20)) ae.add(Activation("linear")) ae.compile(loss="mean_squared_error",optimizer="sgd") ae.fit(X_train_ref_ae,X_train_ref_ae,epochs=100,batch_size=100) ae.save("autoencoder.h5") if not runTraining: print "Reading pre-trained autoencoder" ae = load_model("autoencoder.h5") # Testing predicted_same_ae = ae.predict_proba(X_train_ref_ae) predicted_diff_ae = ae.predict_proba(X_test_ref_ae) predicted_new_ae = ae.predict_proba(X_test_new_ae) # Reconstruction error rec_errors_same_ae = reconstructionError(X_train_ref_ae,predicted_same_ae) rec_errors_diff_ae = reconstructionError(X_test_ref_ae,predicted_diff_ae) rec_errors_new_ae = reconstructionError(X_test_new_ae,predicted_new_ae) ############################# # Plotting ############################# y_pos = np.arange(len(trackingFeatures)) bins = np.linspace(-0.5, 1.5, 250) if runBDT: # Outputs figBDT, axsBDT = plt.subplots(2, 1) axBDT1, axBDT2 = axsBDT.ravel() for ax in axBDT1, axBDT2:
model.add(Activation("linear")) model.compile(loss="mean_squared_error",optimizer="sgd") model.fit(X_train,Y_train,nb_epoch=100,batch_size=100) model.save("autoencoder.h5") if not runTraining: print "Reading pre-trained autoencoder" model = load_model("autoencoder.h5") # Testing predicted_same = model.predict_proba(X_train) predicted_diff = model.predict_proba(X_test) predicted_new = model.predict_proba(X_new) # Reconstruction error rec_errors_same = reconstructionError(X_train,predicted_same) rec_errors_diff = reconstructionError(X_test,predicted_diff) rec_errors_new = reconstructionError(X_new,predicted_new) # Reconstruction errors by variable rec_errors_varwise_same = relativeErrorByFeature(X_train,predicted_same) rec_errors_varwise_diff = relativeErrorByFeature(X_test,predicted_diff) rec_errors_varwise_new = relativeErrorByFeature(X_new,predicted_new) # Plotting - reconstruction errors fig, axs = plt.subplots(3, 1) ax1, ax2, ax3 = axs.ravel() for ax in ax1, ax2, ax3: ax.set_ylabel("Events") ax.set_xlabel("log10(Reconstruction error)") bins = np.linspace(-2.0, 5.0, 250)