def PFN_AUC_calculation(jet_array_1, jet_array_2, train_size, test_size): X = np.concatenate([jet_array_1, jet_array_2])[:,:,:4] y = np.concatenate([np.ones(len(jet_array_1)), np.zeros(len(jet_array_2))]) ################################### SETTINGS ################################### # data controls train, val, test = train_size, X.shape[0]-train_size-test_size, test_size use_pids = True # network architecture parameters Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100) # network training parameters num_epoch = 10 batch_size = 500 ################################################################################ # convert labels to categorical Y = to_categorical(y, num_classes=2) # preprocess by centering jets and normalizing pts for x in X: mask = x[:,0] > 0 yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0) x[mask,1:3] -= yphi_avg x[mask,0] /= x[:,0].sum() # handle particle id channel if use_pids: remap_pids(X, pid_i=3) else: X = X[:,:,:3] # do train/val/test split (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test) # build architecture pfn = 0 with suppress_stdout(): pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes) # train model pfn.fit(X_train, Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=0) # get predictions on test data preds = pfn.predict(X_test, batch_size=1000) # get area under the ROC curve auc = roc_auc_score(Y_test[:,1], preds[:,1]) return auc
Y = to_categorical(y, num_classes=2) print('Loaded quark and gluon jets') # preprocess by centering jets and normalizing pts for x in X: mask = x[:, 0] > 0 yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0) x[mask, 1:3] -= yphi_avg x[mask, 0] /= x[:, 0].sum() print('Finished preprocessing') # do train/val/test split (z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val, Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val, test=test) print('Done train/val/test split') print('Model summary:') # build architecture efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes) # train model efn.fit([z_train, p_train], Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=([z_val, p_val], Y_val), verbose=1)
print() print('Loaded quark and gluon jets') # preprocess by centering jets and normalizing pts for x in X: mask = x[:, 0] > 0 yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0) x[mask, 1:3] -= yphi_avg x[mask, 0] /= x[:, 0].sum() print('Finished preprocessing') # do train/val/test split (z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val, Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val_frac, test=test_frac) print('Done train/val/test split') print('Model summary:') # build architecture efn = EFN(input_dim=2, ppm_sizes=ppm_sizes, dense_sizes=dense_sizes) # train model efn.fit([z_train, p_train], Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=([z_val, p_val], Y_val), verbose=1)
mask = x[:, 0] > 0 yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0) x[mask, 1:3] -= yphi_avg x[mask, 0] /= x[:, 0].sum() # handle particle id channel if use_pids: remap_pids(X, pid_i=3) else: X = X[:, :, :3] print('Finished preprocessing') # do train/val/test split (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test) print('Done train/val/test split') print('Model summary:') # build architecture pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes) # train model pfn.fit(X_train, Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=1)
# make jet images images = np.asarray([ pixelate(x, npix=npix, img_width=img_width, nb_chan=nb_chan, charged_counts_only=True, norm=norm) for x in X ]) print('Done making jet images') # do train/val/test split (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(images, Y, val=val_frac, test=test_frac) print('Done train/val/test split') # preprocess by zero centering images and standardizing each pixel X_train, X_val, X_test = standardize(*zero_center(X_train, X_val, X_test)) print('Finished preprocessing') print('Model summary:') # build architecture hps = { 'input_shape': input_shape, 'filter_sizes': filter_sizes, 'num_filters': num_filters,
# convert labels to categorical Y = to_categorical(y, num_classes=2) print('Loaded quark and gluon jets') print('Model summary:') # train models with different numbers of nsubs as input rocs = [] for i, num_nsub in enumerate(num_nsubs): # build architecture dnn = DNN(input_dim=num_nsub, dense_sizes=dense_sizes, summary=(i == 0)) # do train/val/test split (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X[:, :num_nsub], Y, val=val_frac, test=test_frac) print('Done train/val/test split') # train model dnn.fit(X_train, Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=1) # get predictions on test data preds = dnn.predict(X_test, batch_size=1000) # get ROC curve if we have sklearn
X = efpset.batch_compute(masked_X) print('Done') # train models with different numbers of EFPs as input rocs = [] for d in range(1, dmax + 1): # build architecture model = LinearClassifier(linclass_type='lda') # select EFPs with degree <= d X_d = X[:, efpset.sel(('d<=', d))] # do train/val/test split (X_train, X_test, y_train, y_test) = data_split(X_d, y, val=0, test=test_frac) print('Done train/val/test split') # train model model.fit(X_train, y_train) # get predictions on test data preds = model.predict(X_test) # get ROC curve if we have sklearn if roc_curve: rocs.append(roc_curve(y_test, preds[:, 1])) # get area under the ROC curve auc = roc_auc_score(y_test, preds[:, 1])
X_bkg_rich = X_bkg_rich[bkg_mask] Y_bkg_rich = Y_bkg_rich[bkg_mask] X_cat = np.concatenate((X_sig_rich, X_bkg_rich)) Y_cat = np.concatenate((Y_sig_rich, Y_bkg_rich)) labels = np.concatenate((np.ones((X_sig_rich.shape[0]), dtype=np.float32), np.zeros((X_bkg_rich.shape[0]), dtype=np.float32))) X_new, Y_new_true, Y_new = sk_shuffle(X_cat, Y_cat, labels, random_state=123) print_signal_fractions(Y_new_true, Y_new) (X_train, X_val, X_test, Y_true_train, Y_true_val, Y_true_test, Y_train, Y_val, Y_test) = data_split(X_new, Y_new_true, Y_new, val=val_frac, test=test_frac, shuffle=False) evt_weights = np.ones(X_train.shape[0]) myoptimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.8, beta_2=0.99, epsilon=1e-08, decay=0.0005) if (options.use_dense): my_model = dense_net(X_train.shape[1]) else: my_model = CNN(X_train[0].shape) my_model.summary()
keep_events = (Y_lab < 0.1) | (mjj_window_sig & (Y_lab > 0.9)) X = X[keep_events] Y_lab = Y_lab[keep_events] Y_true = Y_true[keep_events] jet_pts = jet_pts[keep_events] print("New sig fracs are: ") print_signal_fractions(Y_true, Y_lab) (X_train, X_val, X_test, jet_pts_train, jet_pts_val, jet_pts_test, Y_true_train, Y_true_val, Y_true_test, Y_lab_train, Y_lab_val, Y_lab_test) = data_split(X, jet_pts, Y_true, Y_lab, val=val_frac, test=test_frac, shuffle = True) evt_weights = np.ones(X_train.shape[0]) print(Y_lab_train) if(options.reweight): print("Doing reweighting based on jet pt") sr_pts = jet_pts_train[Y_lab_train[:,0] > 0.9] br_pts = jet_pts_train[Y_lab_train[:,0] < 0.1] labels = ['Signal', 'Background'] colors = ['b', 'r'] n_pt_bins = 20
(options.mjj_high + window_size)))) keep_event = bkg_sample | (Y_mjj_window == 1) #print(window_size) #print(mjj[:20]) #print(keep_event[:20]) #print(X.shape, Y_mjj_window.shape) X = X[keep_event] Y = Y[keep_event] Y_mjj_window = Y_mjj_window[keep_event] #print(X.shape, Y_mjj_window.shape) (X_train, X_val, X_test, Y_train, Y_val, Y_test, Y_train_true, Y_val_true, Y_test_true) = data_split(X, Y_mjj_window, Y, val=val_frac, test=test_frac, shuffle=True) print_signal_fractions(Y_train_true, Y_train) evt_weights = np.ones(X_train.shape[0]) myoptimizer = tf.keras.optimizers.Adam(lr=0.001, beta_1=0.8, beta_2=0.99, epsilon=1e-08, decay=0.0005) if (options.use_dense): my_model = dense_net(X_train.shape[1]) else: