def PFN_AUC_calculation(jet_array_1, jet_array_2, train_size, test_size): X = np.concatenate([jet_array_1, jet_array_2])[:,:,:4] y = np.concatenate([np.ones(len(jet_array_1)), np.zeros(len(jet_array_2))]) ################################### SETTINGS ################################### # data controls train, val, test = train_size, X.shape[0]-train_size-test_size, test_size use_pids = True # network architecture parameters Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100) # network training parameters num_epoch = 10 batch_size = 500 ################################################################################ # convert labels to categorical Y = to_categorical(y, num_classes=2) # preprocess by centering jets and normalizing pts for x in X: mask = x[:,0] > 0 yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0) x[mask,1:3] -= yphi_avg x[mask,0] /= x[:,0].sum() # handle particle id channel if use_pids: remap_pids(X, pid_i=3) else: X = X[:,:,:3] # do train/val/test split (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test) # build architecture pfn = 0 with suppress_stdout(): pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes) # train model pfn.fit(X_train, Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=0) # get predictions on test data preds = pfn.predict(X_test, batch_size=1000) # get area under the ROC curve auc = roc_auc_score(Y_test[:,1], preds[:,1]) return auc
def model_build(nParticles=60,nFeatures=47, Phi_sizes=(50, 50, 12), F_sizes=(50, 50, 50)): """ :return: """ model = PFN(input_dim=nFeatures, Phi_sizes=Phi_sizes, F_sizes=F_sizes, output_dim=1, output_act='sigmoid', loss='binary_crossentropy') return model
x[mask, 3] = map_func(x[mask, 3]) return X if __name__ == '__main__': phi_sizes = (16, 32, 64, 128) f_sizes = (128, 64, 32, 16) X, Y = load_data(2000000, 'final_efn_train') X = preprocess(X) Y = ef.utils.to_categorical(Y) X_train, X_val, X_test, Y_train, Y_val, Y_test = split_data( X, Y, test_prop=1.0 / 5, val_prop=1.0 / 5) adam = optimizers.Adam(lr=.0006) pfn = PFN(input_dim=X_train.shape[-1], Phi_sizes=phi_sizes, F_sizes=f_sizes, optimizer=adam) pfn.fit(X_train, Y_train, epochs=NUM_EPOCHS, batch_size=250, validation_data=(X_val, Y_val), verbose=1) preds = pfn.predict(X_test, batch_size=1000) fpr, tpr, thresholds = roc_curve(Y_test[:, 1], preds[:, 1]) print('AUC: ' + str(auc(fpr, tpr)))
def build_gaussianAnsatz_PFN(x_dim, y_dim, Phi_layers, F_layers, acts, opt=None, l2_reg=0.0, d_l1_reg=0.0, d_multiplier=1.0, loadfile=None): """Helper function to build a basic gIFN DNN in one line Args: x_dim (int): X-dimension y_dim (int): Y-dimension Phi_layers (int array): Hidden Phi layer sizes. All 4 networks use the same size F_layers (int array): Hidden F layer sizes. All 4 networks use the same size opt (Keras optimizer, optional): If provided, compiles the network. Defaults to None. l2_reg (float, optional): L2 regularization to apply to all weights in all 4 networks. Defaults to 0.0. d_l1_reg (float, optional): L1 regularization to apply to the D-Network output. Defaults to 0.0. loadfile (string, optional): If provided, loads in weights from a file. Defaults to None. Returns: gIFN: [description] """ model_A = PFN( input_dim=x_dim, Phi_sizes=Phi_layers, F_sizes=F_layers, Phi_acts=acts, F_acts=acts, output_act='linear', output_dim=1, Phi_l2_regs=l2_reg, F_l2_regs=l2_reg, name_layers=False, ).model model_B = PFN( input_dim=x_dim, Phi_sizes=Phi_layers, F_sizes=F_layers, Phi_acts=acts, F_acts=acts, output_act='linear', output_dim=y_dim, Phi_l2_regs=l2_reg, F_l2_regs=l2_reg, name_layers=False, ).model model_D = PFN( input_dim=x_dim, Phi_sizes=Phi_layers, F_sizes=F_layers, Phi_acts=acts, F_acts=acts, output_act='linear', output_dim=y_dim, Phi_l2_regs=l2_reg, F_l2_regs=l2_reg, name_layers=False, ).model model_C = PFN( input_dim=x_dim, Phi_sizes=Phi_layers, F_sizes=F_layers, Phi_acts=acts, F_acts=acts, output_act='linear', output_dim=y_dim * y_dim, num_global_features=y_dim, Phi_l2_regs=l2_reg, F_l2_regs=l2_reg, name_layers=False, ).model ifn = GaussianAnsatz(model_A, model_B, model_C, model_D, d_multiplier=d_multiplier, y_dim=y_dim, d_l1_reg=d_l1_reg) # Compile if opt is not None: ifn.compile(loss=mine_loss, optimizer=opt, metrics=[MI, joint, marginal]) # Load a previous model, or pretrain if loadfile is not None: ifn.built = True ifn.load_weights(loadfile) return ifn
else: X = X[:, :, :3] print('Finished preprocessing') # do train/val/test split (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test) print('Done train/val/test split') print('Model summary:') # build architecture pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes) # train model pfn.fit(X_train, Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=1) # get predictions on test data preds = pfn.predict(X_test, batch_size=1000) # get ROC curve if we have sklearn if roc_curve: pfn_fp, pfn_tp, threshs = roc_curve(Y_test[:, 1], preds[:, 1])
kwargs.update({'kernel_regularizer': l2(l2_reg), 'bias_regularizer': l2(l2_reg)}) # a new dense layer new_layer = _apply_act(act, Dense(s, **kwargs)(dense_layers[-1])) # apply dropout (does nothing if dropout is zero) if dropout > 0.: new_layer = Dropout(dropout)(new_layer) # apply new layer to previous and append to list dense_layers.append(new_layer) return dense_layers # get two PFNs for muons and electrons muon_pfn = PFN(input_dim=5, Phi_sizes=[100, 100], F_sizes=[50], compile=False, name_layers=False) electron_pfn = PFN(input_dim=5, Phi_sizes=[100, 100], F_sizes=[50], compile=False, name_layers=False) # make some dense layers (including an input layer) for the jet variables dnn jet_vars_dnn = make_dense_layers([100, 100], input_shape=(10,)) # a list of the input layers inputs = muon_pfn.inputs + electron_pfn.inputs + [jet_vars_dnn[0]] # the concatenated layer concat_layer = concatenate([muon_pfn.F[-1], electron_pfn.F[-1], jet_vars_dnn[-1]]) # a DNN to combine things on the backend combo_dnn = make_dense_layers([100, 100], concat_layer) # a binary-classification-like output
test = 0.2 Phi_sizes, F_sizes = (200, 200, 256), (200, 200, 200) num_epoch = 1000 (X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test) es = EarlyStopping(monitor='val_auc', mode='max', verbose=1, patience=20, restore_best_weights=True) #mc = ModelCheckpoint('best_model.h5', monitor='val_auc', mode='max', verbose=1, save_best_only=True) pfn = PFN(input_dim=3, Phi_sizes=Phi_sizes, F_sizes=F_sizes, metrics=['acc', auc], latent_dropout=0.2, F_dropouts=0.2) history = pfn.fit(X_train, Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=(X_val, Y_val), verbose=1, callbacks=[es]) #dependencies = { # 'auc': tf.keras.metrics.AUC(name="auc") #} #saved_model = load_model('best_model.h5', custom_objects=dependencies) #preds = saved_model.predict([z_test, p_test], batch_size=1000)
from energyflow.utils import data_split, remap_pids, to_categorical from glob import glob from keras.utils import plot_model MODEL_DIR = "./DeepSets/" if __name__ == "__main__": # Specify number of particles to use and number of features nParticles = 60 # nFeatures=51 nFeatures = 47 Phi_sizes, F_sizes = (50, 50, 12), (50, 50, 50) model = PFN(input_dim=nFeatures, Phi_sizes=Phi_sizes, F_sizes=F_sizes, output_dim=1, output_act='sigmoid', loss='binary_crossentropy') plot_model(model, to_file='deepset.png') utils = Utilities(nParticles) # Build the first training dataset X_train, Y, W_train, MVA_train = utils.BuildBatch() print(MVA_train.shape) for epoch in range(10000): # Shuffle loaded datasets and begin inds = range(len(X_train)) np.random.shuffle(inds) X_epoch, Y_epoch, W_epoch, MVA_epoch = X_train[inds], Y[inds], W_train[
def mk_PFN(Phi_sizes=(128, 128), F_sizes=(128, 128), use_EFN=False, center_jets=True, latent_dropout=0., randomize_az=False): # set up either an Energyflow or Particleflow network from the # energyflow package if use_EFN: efn_core = EFN(input_dim=3, Phi_sizes=Phi_sizes, F_sizes=F_sizes, loss='binary_crossentropy', output_dim=1, output_act='sigmoid', latent_dropout=latent_dropout) else: pfn_core = PFN(input_dim=4, Phi_sizes=Phi_sizes, F_sizes=F_sizes, loss='binary_crossentropy', output_dim=1, output_act='sigmoid', latent_dropout=latent_dropout) # input: constituents' pt/eta/phi pfn_in = layers.Input((defs.N_CONST, 3)) x = pfn_in # optionally, center the constituents about the jet axis, # then apply a random azimutal rotation about that axis if center_jets: x = util.CenterJet()(x) if randomize_az: x = util.RandomizeAz()(x) # format the centered constituents by masking empty items and # converting phi->sin(phi),cos(phi). # This is done to prevent adversarial perturbations causing phi # to either wrap around or go out of range. x = layers.Lambda(_format_constituents, name='phi_format')(x) if use_EFN: # if we are using the EFN model, we have to split up # the pT and angular parts of the constituents def getpt(x): # return just the pT for each constituent xpt, _, _, _ = tf.split(x, 4, axis=-1) return xpt def getangle(x): # return the eta, sin(phi), cos(phi) for each constituent _, xeta, xphi_s, xphi_c = tf.split(x, 4, axis=-1) return tf.concat([xeta, xphi_s, xphi_c], axis=-1) xpt = layers.Lambda(getpt)(x) xangle = layers.Lambda(getangle)(x) # apply the PFN model to the pt and angular inputs pfn_out = efn_core.model([xpt, xangle]) # also the EFN model comes with an extra tensor dimension # which we need to remove: pfn_out = layers.Lambda(lambda x: tf.squeeze(x, axis=-1))(pfn_out) print(pfn_out.shape) else: pfn_out = pfn_core.model(x) print(pfn_out.shape) pfn = Model(pfn_in, pfn_out) pfn.compile(optimizer='adam', loss='binary_crossentropy') return pfn
def DeepSet(nParticles,nFeatures, Phi_sizes= (50, 50, 12), F_sizes=(50, 50, 50)): model = PFN(input_dim=nFeatures, Phi_sizes=Phi_sizes, F_sizes=F_sizes, output_dim=1, output_act='sigmoid', loss='binary_crossentropy') plot_model(model, to_file='deepset.png') return model