def PFN_AUC_calculation(jet_array_1, jet_array_2, train_size, test_size): X = np.concatenate([jet_array_1, jet_array_2])[:,:,:4] y = np.concatenate([np.ones(len(jet_array_1)), np.zeros(len(jet_array_2))]) ################################### SETTINGS ################################### # data controls train, val, test = train_size, X.shape[0]-train_size-test_size, test_size use_pids = True # network architecture parameters Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100) # network training parameters num_epoch = 10 batch_size = 500 ################################################################################ # convert labels to categorical Y = to_categorical(y, num_classes=2) # preprocess by centering jets and normalizing pts for x in X: mask = x[:,0] > 0 yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0) x[mask,1:3] -= yphi_avg x[mask,0] /= x[:,0].sum() # handle particle id channel if use_pids: remap_pids(X, pid_i=3) else: X = X[:,:,:3] # do train/val/test split (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test) # build architecture pfn = 0 with suppress_stdout(): pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes) # train model pfn.fit(X_train, Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=0) # get predictions on test data preds = pfn.predict(X_test, batch_size=1000) # get area under the ROC curve auc = roc_auc_score(Y_test[:,1], preds[:,1]) return auc
x[mask, 3] = map_func(x[mask, 3]) return X if __name__ == '__main__': phi_sizes = (16, 32, 64, 128) f_sizes = (128, 64, 32, 16) X, Y = load_data(2000000, 'final_efn_train') X = preprocess(X) Y = ef.utils.to_categorical(Y) X_train, X_val, X_test, Y_train, Y_val, Y_test = split_data( X, Y, test_prop=1.0 / 5, val_prop=1.0 / 5) adam = optimizers.Adam(lr=.0006) pfn = PFN(input_dim=X_train.shape[-1], Phi_sizes=phi_sizes, F_sizes=f_sizes, optimizer=adam) pfn.fit(X_train, Y_train, epochs=NUM_EPOCHS, batch_size=250, validation_data=(X_val, Y_val), verbose=1) preds = pfn.predict(X_test, batch_size=1000) fpr, tpr, thresholds = roc_curve(Y_test[:, 1], preds[:, 1]) print('AUC: ' + str(auc(fpr, tpr)))
# Shuffle loaded datasets and begin inds = range(len(X_train)) np.random.shuffle(inds) X_epoch, Y_epoch, W_epoch, MVA_epoch = X_train[inds], Y[inds], W_train[ inds], MVA_epoch[inds] if (np.min(W_train) == np.nan): continue if (np.min(W_train) == np.inf): continue model.fit(X_epoch, Y_epoch, epochs=1, batch_size=4 * 512, verbose=1) pd.DataFrame(X_epoch).to_csv("X_example.csv", index=False) pd.DataFrame(Y_epoch).to_csv("Y_example.csv", index=False) pd.DataFrame(MVA_epoch).to_csv("MVA_example.csv", index=False) if (epoch % 10 == 0): model.save( '/beegfs/desy/user/dydukhle/TauId/models/keras__deep_set__%i.model' % (epoch)) preds = model.predict(X_epoch, batch_size=1 * 2048) mva = MVA_train df_preds = pd.DataFrame({ "pred": [i[0] for i in preds], "labels": [y for y in Y_epoch] }) #, 'mva':[m[0] for m in mva] }) df_preds.to_csv( "./DeepSets_res/labels_and_pred_e_{0}.csv".format(epoch)) df_mva = pd.DataFrame(mva) df_mva.to_csv("./DeepSets_res/mva_{0}.csv".format(epoch)) # Get next batch from background loader res X_train, Y, W_train, MVA_train = utils.BuildBatch( ) #res.get(timeout=180)