Beispiel #1
0
def PFN_AUC_calculation(jet_array_1, jet_array_2, train_size, test_size):
    X = np.concatenate([jet_array_1, jet_array_2])[:,:,:4]
    y = np.concatenate([np.ones(len(jet_array_1)), np.zeros(len(jet_array_2))])

    ################################### SETTINGS ###################################

    # data controls
    train, val, test = train_size, X.shape[0]-train_size-test_size, test_size
    use_pids = True

    # network architecture parameters
    Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)

    # network training parameters
    num_epoch = 10
    batch_size = 500

    ################################################################################

    # convert labels to categorical
    Y = to_categorical(y, num_classes=2)

    # preprocess by centering jets and normalizing pts
    for x in X:
        mask = x[:,0] > 0
        yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
        x[mask,1:3] -= yphi_avg
        x[mask,0] /= x[:,0].sum()

    # handle particle id channel
    if use_pids:
        remap_pids(X, pid_i=3)
    else:
        X = X[:,:,:3]

    # do train/val/test split 
    (X_train, X_val, X_test,
     Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test)

    # build architecture
    pfn = 0
    with suppress_stdout():
        pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)

    # train model
    pfn.fit(X_train, Y_train,
              epochs=num_epoch,
              batch_size=batch_size,
              validation_data=(X_val, Y_val),
              verbose=0)

    # get predictions on test data
    preds = pfn.predict(X_test, batch_size=1000)

    # get area under the ROC curve
    auc = roc_auc_score(Y_test[:,1], preds[:,1])
    
    return auc
Beispiel #2
0
        x[mask, 3] = map_func(x[mask, 3])
    return X


if __name__ == '__main__':
    phi_sizes = (16, 32, 64, 128)
    f_sizes = (128, 64, 32, 16)

    X, Y = load_data(2000000, 'final_efn_train')
    X = preprocess(X)
    Y = ef.utils.to_categorical(Y)

    X_train, X_val, X_test, Y_train, Y_val, Y_test = split_data(
        X, Y, test_prop=1.0 / 5, val_prop=1.0 / 5)

    adam = optimizers.Adam(lr=.0006)
    pfn = PFN(input_dim=X_train.shape[-1],
              Phi_sizes=phi_sizes,
              F_sizes=f_sizes,
              optimizer=adam)
    pfn.fit(X_train,
            Y_train,
            epochs=NUM_EPOCHS,
            batch_size=250,
            validation_data=(X_val, Y_val),
            verbose=1)
    preds = pfn.predict(X_test, batch_size=1000)

    fpr, tpr, thresholds = roc_curve(Y_test[:, 1], preds[:, 1])
    print('AUC: ' + str(auc(fpr, tpr)))
Beispiel #3
0
# do train/val/test split
(X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X,
                                                              Y,
                                                              val=val,
                                                              test=test)

print('Done train/val/test split')
print('Model summary:')

# build architecture
pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)

# train model
pfn.fit(X_train,
        Y_train,
        epochs=num_epoch,
        batch_size=batch_size,
        validation_data=(X_val, Y_val),
        verbose=1)

# get predictions on test data
preds = pfn.predict(X_test, batch_size=1000)

# get ROC curve if we have sklearn
if roc_curve:
    pfn_fp, pfn_tp, threshs = roc_curve(Y_test[:, 1], preds[:, 1])

    # get area under the ROC curve
    auc = roc_auc_score(Y_test[:, 1], preds[:, 1])
    print()
    print('PFN AUC:', auc)
    print()
Beispiel #4
0
es = EarlyStopping(monitor='val_auc',
                   mode='max',
                   verbose=1,
                   patience=20,
                   restore_best_weights=True)
#mc = ModelCheckpoint('best_model.h5', monitor='val_auc', mode='max', verbose=1, save_best_only=True)
pfn = PFN(input_dim=3,
          Phi_sizes=Phi_sizes,
          F_sizes=F_sizes,
          metrics=['acc', auc],
          latent_dropout=0.2,
          F_dropouts=0.2)
history = pfn.fit(X_train,
                  Y_train,
                  epochs=num_epoch,
                  batch_size=batch_size,
                  validation_data=(X_val, Y_val),
                  verbose=1,
                  callbacks=[es])

#dependencies = {
#  'auc': tf.keras.metrics.AUC(name="auc")
#}
#saved_model = load_model('best_model.h5', custom_objects=dependencies)
#preds = saved_model.predict([z_test, p_test], batch_size=1000)
preds = pfn.predict(X_test, batch_size=1000)
auc = roc_auc_score(Y_test[:, 1], preds[:, 1])
print('PFN AUC:', auc)

#save plots
name = "{0}Jet_{1}Tag".format(args.nJets, args.nTags)
Beispiel #5
0
    utils = Utilities(nParticles)

    # Build the first training dataset
    X_train, Y, W_train, MVA_train = utils.BuildBatch()
    print(MVA_train.shape)

    for epoch in range(10000):
        # Shuffle loaded datasets and begin
        inds = range(len(X_train))
        np.random.shuffle(inds)
        X_epoch, Y_epoch, W_epoch, MVA_epoch = X_train[inds], Y[inds], W_train[
            inds], MVA_epoch[inds]
        if (np.min(W_train) == np.nan): continue
        if (np.min(W_train) == np.inf): continue

        model.fit(X_epoch, Y_epoch, epochs=1, batch_size=4 * 512, verbose=1)
        pd.DataFrame(X_epoch).to_csv("X_example.csv", index=False)
        pd.DataFrame(Y_epoch).to_csv("Y_example.csv", index=False)
        pd.DataFrame(MVA_epoch).to_csv("MVA_example.csv", index=False)
        if (epoch % 10 == 0):

            model.save(
                '/beegfs/desy/user/dydukhle/TauId/models/keras__deep_set__%i.model'
                % (epoch))
            preds = model.predict(X_epoch, batch_size=1 * 2048)
            mva = MVA_train
            df_preds = pd.DataFrame({
                "pred": [i[0] for i in preds],
                "labels": [y for y in Y_epoch]
            })  #, 'mva':[m[0] for m in mva] })
            df_preds.to_csv(