Example #1
0
def main():
    X_train, y_train, inpz_train = load('data/train/merged.npz')
    X_test, y_test, inpz_test = load('data/test/merged.npz')

    # One-hot encoded
    Y_train = to_categorical(y_train, 2)
    Y_test = to_categorical(y_test, 2)

    Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)
    efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes)

    efn.load_weights('ckpts_Apr06_134930/ckpt-40-val_acc-0.73.hdf5')

    ckpt_dir = strftime('ckpts_%b%d_%H%M%S')
    if not osp.isdir(ckpt_dir): os.makedirs(ckpt_dir)

    checkpoint_callback = ModelCheckpoint(
        ckpt_dir + '/ckpt-{epoch:02d}-val_acc-{val_acc:.3f}.hdf5',
        monitor='val_acc',
        verbose=1,
        save_best_only=False,
        mode='max')
    best_checkpoint_callback = ModelCheckpoint(ckpt_dir + '/ckpt-best.hdf5',
                                               monitor='val_acc',
                                               verbose=1,
                                               save_best_only=True,
                                               mode='max')

    efn.fit([X_train[:, :, 0], X_train[:, :, 1:]],
            Y_train,
            epochs=40,
            batch_size=64,
            validation_data=([X_test[:, :, 0], X_test[:, :, 1:]], Y_test),
            verbose=1,
            callbacks=[checkpoint_callback, best_checkpoint_callback])
Example #2
0
    mask = x[:, 0] > 0
    yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0)
    x[mask, 1:3] -= yphi_avg
    x[mask, 0] /= x[:, 0].sum()

print('Finished preprocessing')

# do train/val/test split
(z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val,
 Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val, test=test)

print('Done train/val/test split')
print('Model summary:')

# build architecture
efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes)

# train model
efn.fit([z_train, p_train],
        Y_train,
        epochs=num_epoch,
        batch_size=batch_size,
        validation_data=([z_val, p_val], Y_val),
        verbose=1)

# get predictions on test data
preds = efn.predict([z_test, p_test], batch_size=1000)

# get ROC curve
efn_fp, efn_tp, threshs = roc_curve(Y_test[:, 1], preds[:, 1])
Example #3
0
    mask = x[:, 0] > 0
    yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0)
    x[mask, 1:3] -= yphi_avg
    x[mask, 0] /= x[:, 0].sum()

print('Finished preprocessing')

# do train/val/test split
(z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val,
 Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val_frac, test=test_frac)

print('Done train/val/test split')
print('Model summary:')

# build architecture
efn = EFN(input_dim=2, ppm_sizes=ppm_sizes, dense_sizes=dense_sizes)

# train model
efn.fit([z_train, p_train],
        Y_train,
        epochs=num_epoch,
        batch_size=batch_size,
        validation_data=([z_val, p_val], Y_val),
        verbose=1)

# get predictions on test data
preds = efn.predict([z_test, p_test], batch_size=1000)

# get ROC curve if we have sklearn
if roc_curve:
    efn_fp, efn_tp, threshs = roc_curve(Y_test[:, 1], preds[:, 1])
Example #4
0
def build_gaussianAnsatz_EFN(x_dim,
                             y_dim,
                             Phi_layers,
                             F_layers,
                             acts,
                             pad,
                             opt=None,
                             l2_reg=0.0,
                             d_l1_reg=0.0,
                             d_multiplier=1.0,
                             loadfile=None):
    """Helper function to build a basic gIFN DNN in one line

    Args:
        x_dim (int): X-dimension
        y_dim (int): Y-dimension
        Phi_layers (int array): Hidden Phi layer sizes. All 4 networks use the same size
        F_layers (int array): Hidden F layer sizes. All 4 networks use the same size
        opt (Keras optimizer, optional): If provided, compiles the network. Defaults to None.
        l2_reg (float, optional): L2 regularization to apply to all weights in all 4 networks. Defaults to 0.0.
        d_l1_reg (float, optional): L1 regularization to apply to the D-Network output. Defaults to 0.0.
        loadfile (string, optional): If provided, loads in weights from a file. Defaults to None.

    Returns:
        gIFN: [description]
    """

    model_A = EFN(
        input_dim=x_dim - 1,
        Phi_sizes=Phi_layers,
        F_sizes=F_layers,
        Phi_acts=acts,
        F_acts=acts,
        output_act='linear',
        output_dim=1,
        Phi_l2_regs=l2_reg,
        F_l2_regs=l2_reg,
        name_layers=False,
    ).model
    model_B = EFN(
        input_dim=x_dim - 1,
        Phi_sizes=Phi_layers,
        F_sizes=F_layers,
        Phi_acts=acts,
        F_acts=acts,
        output_act='linear',
        output_dim=y_dim,
        Phi_l2_regs=l2_reg,
        F_l2_regs=l2_reg,
        name_layers=False,
    ).model
    model_D = EFN(
        input_dim=x_dim - 1,
        Phi_sizes=Phi_layers,
        F_sizes=F_layers,
        Phi_acts=acts,
        F_acts=acts,
        output_act='linear',
        output_dim=y_dim,
        Phi_l2_regs=l2_reg,
        F_l2_regs=l2_reg,
        name_layers=False,
    ).model
    model_C = EFN(
        input_dim=x_dim - 1,
        Phi_sizes=Phi_layers,
        F_sizes=F_layers,
        Phi_acts=acts,
        F_acts=acts,
        output_act='linear',
        output_dim=y_dim * y_dim,
        num_global_features=y_dim,
        Phi_l2_regs=l2_reg,
        F_l2_regs=l2_reg,
        name_layers=False,
    ).model

    # EFN Converter
    model_A = efn_input_converter(model_A, shape=(pad, x_dim))
    model_B = efn_input_converter(model_B, shape=(pad, x_dim))
    model_C = efn_input_converter(model_C,
                                  shape=(pad, x_dim),
                                  num_global_features=y_dim)
    model_D = efn_input_converter(model_D, shape=(pad, x_dim))

    ifn = GaussianAnsatz(model_A,
                         model_B,
                         model_C,
                         model_D,
                         d_multiplier=d_multiplier,
                         y_dim=y_dim,
                         d_l1_reg=d_l1_reg)

    # Compile
    if opt is not None:
        ifn.compile(loss=mine_loss,
                    optimizer=opt,
                    metrics=[MI, joint, marginal])

    # Load a previous model, or pretrain
    if loadfile is not None:
        ifn.built = True
        ifn.load_weights(loadfile)

    return ifn
Example #5
0
    f_sizes = (100, 100, 100)

    X, Y = load_data(2000000, 'final_efn_train')
    X = preprocess(X)
    Y = ef.utils.to_categorical(Y)

    (Z_train, Z_val, Z_test, P_train, P_val, P_test, Y_train, Y_val,
     Y_test) = split_data(X[:, :, 0],
                          X[:, :, [1, 2]],
                          Y,
                          test_prop=1.0 / 5,
                          val_prop=1.0 / 5)

    #adam = optimizers.Adam(lr=.005)
    efn = EFN(input_dim=P_train.shape[-1],
              Phi_sizes=phi_sizes,
              F_sizes=f_sizes)
    efn.fit([Z_train, P_train],
            Y_train,
            epochs=NUM_EPOCHS,
            batch_size=500,
            validation_data=([Z_val, P_val], Y_val),
            verbose=1)
    preds = efn.predict([Z_test, P_test], batch_size=1000)

    fpr, tpr, thresholds = roc_curve(Y_test[:, 1], preds[:, 1])
    print('AUC: ' + str(auc(fpr, tpr)))

    plt.plot(tpr, 1 - fpr, '-', color='black', label='EFN')
    plt.show()
Example #6
0
obs = np.log10(np.asarray([np.sum(x[:,0]*(x[:,1:3]**2).sum(1))/x[:,0].sum() for x in X]))
obs_mean, obs_std = np.mean(obs), np.std(obs)
obs -= obs_mean
obs /= obs_std

print('Finished computing observables')

# do train/val/test split 
(z_train, z_val, z_test, 
 p_train, p_val, p_test,
 y_train, y_val, y_test) = ef.utils.data_split(X[:,:,0], X[:,:,1:], obs, val=val, test=test)

print('Done train/val/test split')

# build architecture
efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes, 
          output_act=output_act, output_dim=output_dim, loss=loss, metrics=[])

# train model
efn.fit([z_train, p_train], y_train,
        epochs=num_epoch,
        batch_size=batch_size,
        validation_data=([z_val, p_val], y_val),
        verbose=1)

# get predictions on test data
preds = efn.predict([z_test, p_test], batch_size=1000)[:,0]*obs_std + obs_mean

######################### Observable Distributions Plot #########################

# some nicer plot settings 
plt.rcParams['font.family'] = 'serif'
Example #7
0
def mk_PFN(Phi_sizes=(128, 128),
           F_sizes=(128, 128),
           use_EFN=False,
           center_jets=True,
           latent_dropout=0.,
           randomize_az=False):

    # set up either an Energyflow or Particleflow network from the
    # energyflow package
    if use_EFN:
        efn_core = EFN(input_dim=3,
                       Phi_sizes=Phi_sizes,
                       F_sizes=F_sizes,
                       loss='binary_crossentropy',
                       output_dim=1,
                       output_act='sigmoid',
                       latent_dropout=latent_dropout)
    else:
        pfn_core = PFN(input_dim=4,
                       Phi_sizes=Phi_sizes,
                       F_sizes=F_sizes,
                       loss='binary_crossentropy',
                       output_dim=1,
                       output_act='sigmoid',
                       latent_dropout=latent_dropout)

    # input: constituents' pt/eta/phi
    pfn_in = layers.Input((defs.N_CONST, 3))
    x = pfn_in

    # optionally, center the constituents about the jet axis,
    # then apply a random azimutal rotation about that axis
    if center_jets:
        x = util.CenterJet()(x)
        if randomize_az:
            x = util.RandomizeAz()(x)

    # format the centered constituents by masking empty items and
    # converting phi->sin(phi),cos(phi).
    # This is done to prevent adversarial perturbations causing phi
    # to either wrap around or go out of range.
    x = layers.Lambda(_format_constituents, name='phi_format')(x)

    if use_EFN:
        # if we are using the EFN model, we have to split up
        # the pT and angular parts of the constituents
        def getpt(x):
            # return just the pT for each constituent
            xpt, _, _, _ = tf.split(x, 4, axis=-1)
            return xpt

        def getangle(x):
            # return the eta, sin(phi), cos(phi) for each constituent
            _, xeta, xphi_s, xphi_c = tf.split(x, 4, axis=-1)
            return tf.concat([xeta, xphi_s, xphi_c], axis=-1)

        xpt = layers.Lambda(getpt)(x)
        xangle = layers.Lambda(getangle)(x)

        # apply the PFN model to the pt and angular inputs
        pfn_out = efn_core.model([xpt, xangle])

        # also the EFN model comes with an extra tensor dimension
        # which we need to remove:
        pfn_out = layers.Lambda(lambda x: tf.squeeze(x, axis=-1))(pfn_out)
        print(pfn_out.shape)
    else:
        pfn_out = pfn_core.model(x)
        print(pfn_out.shape)

    pfn = Model(pfn_in, pfn_out)
    pfn.compile(optimizer='adam', loss='binary_crossentropy')

    return pfn
Example #8
0
batch_size = 500
val = 0.2
test = 0.2
Phi_sizes, F_sizes = (500, 500, 256), (500, 500, 300)
num_epoch = 1000
(z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val,
 Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val, test=test)
es = EarlyStopping(monitor='val_auc',
                   mode='max',
                   verbose=1,
                   patience=20,
                   restore_best_weights=True)
#mc = ModelCheckpoint('best_model.h5', monitor='val_auc', mode='max', verbose=1, save_best_only=True)
efn = EFN(input_dim=2,
          Phi_sizes=Phi_sizes,
          F_sizes=F_sizes,
          metrics=['acc', auc],
          Phi_l2_regs=5e-05,
          F_l2_regs=5e-05)
history = efn.fit([z_train, p_train],
                  Y_train,
                  epochs=num_epoch,
                  batch_size=batch_size,
                  validation_data=([z_val, p_val], Y_val),
                  verbose=1,
                  callbacks=[es])

#dependencies = {
#  'auc': tf.keras.metrics.AUC(name="auc")
#}
#saved_model = load_model('best_model.h5', custom_objects=dependencies)
#preds = saved_model.predict([z_test, p_test], batch_size=1000)