def main(): X_train, y_train, inpz_train = load('data/train/merged.npz') X_test, y_test, inpz_test = load('data/test/merged.npz') # One-hot encoded Y_train = to_categorical(y_train, 2) Y_test = to_categorical(y_test, 2) Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100) efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes) efn.load_weights('ckpts_Apr06_134930/ckpt-40-val_acc-0.73.hdf5') ckpt_dir = strftime('ckpts_%b%d_%H%M%S') if not osp.isdir(ckpt_dir): os.makedirs(ckpt_dir) checkpoint_callback = ModelCheckpoint( ckpt_dir + '/ckpt-{epoch:02d}-val_acc-{val_acc:.3f}.hdf5', monitor='val_acc', verbose=1, save_best_only=False, mode='max') best_checkpoint_callback = ModelCheckpoint(ckpt_dir + '/ckpt-best.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max') efn.fit([X_train[:, :, 0], X_train[:, :, 1:]], Y_train, epochs=40, batch_size=64, validation_data=([X_test[:, :, 0], X_test[:, :, 1:]], Y_test), verbose=1, callbacks=[checkpoint_callback, best_checkpoint_callback])
mask = x[:, 0] > 0 yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0) x[mask, 1:3] -= yphi_avg x[mask, 0] /= x[:, 0].sum() print('Finished preprocessing') # do train/val/test split (z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val, Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val, test=test) print('Done train/val/test split') print('Model summary:') # build architecture efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes) # train model efn.fit([z_train, p_train], Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=([z_val, p_val], Y_val), verbose=1) # get predictions on test data preds = efn.predict([z_test, p_test], batch_size=1000) # get ROC curve efn_fp, efn_tp, threshs = roc_curve(Y_test[:, 1], preds[:, 1])
mask = x[:, 0] > 0 yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0) x[mask, 1:3] -= yphi_avg x[mask, 0] /= x[:, 0].sum() print('Finished preprocessing') # do train/val/test split (z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val, Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val_frac, test=test_frac) print('Done train/val/test split') print('Model summary:') # build architecture efn = EFN(input_dim=2, ppm_sizes=ppm_sizes, dense_sizes=dense_sizes) # train model efn.fit([z_train, p_train], Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=([z_val, p_val], Y_val), verbose=1) # get predictions on test data preds = efn.predict([z_test, p_test], batch_size=1000) # get ROC curve if we have sklearn if roc_curve: efn_fp, efn_tp, threshs = roc_curve(Y_test[:, 1], preds[:, 1])
def build_gaussianAnsatz_EFN(x_dim, y_dim, Phi_layers, F_layers, acts, pad, opt=None, l2_reg=0.0, d_l1_reg=0.0, d_multiplier=1.0, loadfile=None): """Helper function to build a basic gIFN DNN in one line Args: x_dim (int): X-dimension y_dim (int): Y-dimension Phi_layers (int array): Hidden Phi layer sizes. All 4 networks use the same size F_layers (int array): Hidden F layer sizes. All 4 networks use the same size opt (Keras optimizer, optional): If provided, compiles the network. Defaults to None. l2_reg (float, optional): L2 regularization to apply to all weights in all 4 networks. Defaults to 0.0. d_l1_reg (float, optional): L1 regularization to apply to the D-Network output. Defaults to 0.0. loadfile (string, optional): If provided, loads in weights from a file. Defaults to None. Returns: gIFN: [description] """ model_A = EFN( input_dim=x_dim - 1, Phi_sizes=Phi_layers, F_sizes=F_layers, Phi_acts=acts, F_acts=acts, output_act='linear', output_dim=1, Phi_l2_regs=l2_reg, F_l2_regs=l2_reg, name_layers=False, ).model model_B = EFN( input_dim=x_dim - 1, Phi_sizes=Phi_layers, F_sizes=F_layers, Phi_acts=acts, F_acts=acts, output_act='linear', output_dim=y_dim, Phi_l2_regs=l2_reg, F_l2_regs=l2_reg, name_layers=False, ).model model_D = EFN( input_dim=x_dim - 1, Phi_sizes=Phi_layers, F_sizes=F_layers, Phi_acts=acts, F_acts=acts, output_act='linear', output_dim=y_dim, Phi_l2_regs=l2_reg, F_l2_regs=l2_reg, name_layers=False, ).model model_C = EFN( input_dim=x_dim - 1, Phi_sizes=Phi_layers, F_sizes=F_layers, Phi_acts=acts, F_acts=acts, output_act='linear', output_dim=y_dim * y_dim, num_global_features=y_dim, Phi_l2_regs=l2_reg, F_l2_regs=l2_reg, name_layers=False, ).model # EFN Converter model_A = efn_input_converter(model_A, shape=(pad, x_dim)) model_B = efn_input_converter(model_B, shape=(pad, x_dim)) model_C = efn_input_converter(model_C, shape=(pad, x_dim), num_global_features=y_dim) model_D = efn_input_converter(model_D, shape=(pad, x_dim)) ifn = GaussianAnsatz(model_A, model_B, model_C, model_D, d_multiplier=d_multiplier, y_dim=y_dim, d_l1_reg=d_l1_reg) # Compile if opt is not None: ifn.compile(loss=mine_loss, optimizer=opt, metrics=[MI, joint, marginal]) # Load a previous model, or pretrain if loadfile is not None: ifn.built = True ifn.load_weights(loadfile) return ifn
f_sizes = (100, 100, 100) X, Y = load_data(2000000, 'final_efn_train') X = preprocess(X) Y = ef.utils.to_categorical(Y) (Z_train, Z_val, Z_test, P_train, P_val, P_test, Y_train, Y_val, Y_test) = split_data(X[:, :, 0], X[:, :, [1, 2]], Y, test_prop=1.0 / 5, val_prop=1.0 / 5) #adam = optimizers.Adam(lr=.005) efn = EFN(input_dim=P_train.shape[-1], Phi_sizes=phi_sizes, F_sizes=f_sizes) efn.fit([Z_train, P_train], Y_train, epochs=NUM_EPOCHS, batch_size=500, validation_data=([Z_val, P_val], Y_val), verbose=1) preds = efn.predict([Z_test, P_test], batch_size=1000) fpr, tpr, thresholds = roc_curve(Y_test[:, 1], preds[:, 1]) print('AUC: ' + str(auc(fpr, tpr))) plt.plot(tpr, 1 - fpr, '-', color='black', label='EFN') plt.show()
obs = np.log10(np.asarray([np.sum(x[:,0]*(x[:,1:3]**2).sum(1))/x[:,0].sum() for x in X])) obs_mean, obs_std = np.mean(obs), np.std(obs) obs -= obs_mean obs /= obs_std print('Finished computing observables') # do train/val/test split (z_train, z_val, z_test, p_train, p_val, p_test, y_train, y_val, y_test) = ef.utils.data_split(X[:,:,0], X[:,:,1:], obs, val=val, test=test) print('Done train/val/test split') # build architecture efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes, output_act=output_act, output_dim=output_dim, loss=loss, metrics=[]) # train model efn.fit([z_train, p_train], y_train, epochs=num_epoch, batch_size=batch_size, validation_data=([z_val, p_val], y_val), verbose=1) # get predictions on test data preds = efn.predict([z_test, p_test], batch_size=1000)[:,0]*obs_std + obs_mean ######################### Observable Distributions Plot ######################### # some nicer plot settings plt.rcParams['font.family'] = 'serif'
def mk_PFN(Phi_sizes=(128, 128), F_sizes=(128, 128), use_EFN=False, center_jets=True, latent_dropout=0., randomize_az=False): # set up either an Energyflow or Particleflow network from the # energyflow package if use_EFN: efn_core = EFN(input_dim=3, Phi_sizes=Phi_sizes, F_sizes=F_sizes, loss='binary_crossentropy', output_dim=1, output_act='sigmoid', latent_dropout=latent_dropout) else: pfn_core = PFN(input_dim=4, Phi_sizes=Phi_sizes, F_sizes=F_sizes, loss='binary_crossentropy', output_dim=1, output_act='sigmoid', latent_dropout=latent_dropout) # input: constituents' pt/eta/phi pfn_in = layers.Input((defs.N_CONST, 3)) x = pfn_in # optionally, center the constituents about the jet axis, # then apply a random azimutal rotation about that axis if center_jets: x = util.CenterJet()(x) if randomize_az: x = util.RandomizeAz()(x) # format the centered constituents by masking empty items and # converting phi->sin(phi),cos(phi). # This is done to prevent adversarial perturbations causing phi # to either wrap around or go out of range. x = layers.Lambda(_format_constituents, name='phi_format')(x) if use_EFN: # if we are using the EFN model, we have to split up # the pT and angular parts of the constituents def getpt(x): # return just the pT for each constituent xpt, _, _, _ = tf.split(x, 4, axis=-1) return xpt def getangle(x): # return the eta, sin(phi), cos(phi) for each constituent _, xeta, xphi_s, xphi_c = tf.split(x, 4, axis=-1) return tf.concat([xeta, xphi_s, xphi_c], axis=-1) xpt = layers.Lambda(getpt)(x) xangle = layers.Lambda(getangle)(x) # apply the PFN model to the pt and angular inputs pfn_out = efn_core.model([xpt, xangle]) # also the EFN model comes with an extra tensor dimension # which we need to remove: pfn_out = layers.Lambda(lambda x: tf.squeeze(x, axis=-1))(pfn_out) print(pfn_out.shape) else: pfn_out = pfn_core.model(x) print(pfn_out.shape) pfn = Model(pfn_in, pfn_out) pfn.compile(optimizer='adam', loss='binary_crossentropy') return pfn
batch_size = 500 val = 0.2 test = 0.2 Phi_sizes, F_sizes = (500, 500, 256), (500, 500, 300) num_epoch = 1000 (z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val, Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val, test=test) es = EarlyStopping(monitor='val_auc', mode='max', verbose=1, patience=20, restore_best_weights=True) #mc = ModelCheckpoint('best_model.h5', monitor='val_auc', mode='max', verbose=1, save_best_only=True) efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes, metrics=['acc', auc], Phi_l2_regs=5e-05, F_l2_regs=5e-05) history = efn.fit([z_train, p_train], Y_train, epochs=num_epoch, batch_size=batch_size, validation_data=([z_val, p_val], Y_val), verbose=1, callbacks=[es]) #dependencies = { # 'auc': tf.keras.metrics.AUC(name="auc") #} #saved_model = load_model('best_model.h5', custom_objects=dependencies) #preds = saved_model.predict([z_test, p_test], batch_size=1000)