Ejemplo n.º 1
0
def PFN_AUC_calculation(jet_array_1, jet_array_2, train_size, test_size):
    X = np.concatenate([jet_array_1, jet_array_2])[:,:,:4]
    y = np.concatenate([np.ones(len(jet_array_1)), np.zeros(len(jet_array_2))])

    ################################### SETTINGS ###################################

    # data controls
    train, val, test = train_size, X.shape[0]-train_size-test_size, test_size
    use_pids = True

    # network architecture parameters
    Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)

    # network training parameters
    num_epoch = 10
    batch_size = 500

    ################################################################################

    # convert labels to categorical
    Y = to_categorical(y, num_classes=2)

    # preprocess by centering jets and normalizing pts
    for x in X:
        mask = x[:,0] > 0
        yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
        x[mask,1:3] -= yphi_avg
        x[mask,0] /= x[:,0].sum()

    # handle particle id channel
    if use_pids:
        remap_pids(X, pid_i=3)
    else:
        X = X[:,:,:3]

    # do train/val/test split 
    (X_train, X_val, X_test,
     Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test)

    # build architecture
    pfn = 0
    with suppress_stdout():
        pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)

    # train model
    pfn.fit(X_train, Y_train,
              epochs=num_epoch,
              batch_size=batch_size,
              validation_data=(X_val, Y_val),
              verbose=0)

    # get predictions on test data
    preds = pfn.predict(X_test, batch_size=1000)

    # get area under the ROC curve
    auc = roc_auc_score(Y_test[:,1], preds[:,1])
    
    return auc
Ejemplo n.º 2
0
Y = to_categorical(y, num_classes=2)

print('Loaded quark and gluon jets')

# preprocess by centering jets and normalizing pts
for x in X:
    mask = x[:, 0] > 0
    yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0)
    x[mask, 1:3] -= yphi_avg
    x[mask, 0] /= x[:, 0].sum()

print('Finished preprocessing')

# do train/val/test split
(z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val,
 Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val, test=test)

print('Done train/val/test split')
print('Model summary:')

# build architecture
efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes)

# train model
efn.fit([z_train, p_train],
        Y_train,
        epochs=num_epoch,
        batch_size=batch_size,
        validation_data=([z_val, p_val], Y_val),
        verbose=1)
Ejemplo n.º 3
0
print()
print('Loaded quark and gluon jets')

# preprocess by centering jets and normalizing pts
for x in X:
    mask = x[:, 0] > 0
    yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0)
    x[mask, 1:3] -= yphi_avg
    x[mask, 0] /= x[:, 0].sum()

print('Finished preprocessing')

# do train/val/test split
(z_train, z_val, z_test, p_train, p_val, p_test, Y_train, Y_val,
 Y_test) = data_split(X[:, :, 0], X[:, :, 1:], Y, val=val_frac, test=test_frac)

print('Done train/val/test split')
print('Model summary:')

# build architecture
efn = EFN(input_dim=2, ppm_sizes=ppm_sizes, dense_sizes=dense_sizes)

# train model
efn.fit([z_train, p_train],
        Y_train,
        epochs=num_epoch,
        batch_size=batch_size,
        validation_data=([z_val, p_val], Y_val),
        verbose=1)
Ejemplo n.º 4
0
    mask = x[:, 0] > 0
    yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0)
    x[mask, 1:3] -= yphi_avg
    x[mask, 0] /= x[:, 0].sum()

# handle particle id channel
if use_pids:
    remap_pids(X, pid_i=3)
else:
    X = X[:, :, :3]

print('Finished preprocessing')

# do train/val/test split
(X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X,
                                                              Y,
                                                              val=val,
                                                              test=test)

print('Done train/val/test split')
print('Model summary:')

# build architecture
pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)

# train model
pfn.fit(X_train,
        Y_train,
        epochs=num_epoch,
        batch_size=batch_size,
        validation_data=(X_val, Y_val),
        verbose=1)
Ejemplo n.º 5
0
# make jet images
images = np.asarray([
    pixelate(x,
             npix=npix,
             img_width=img_width,
             nb_chan=nb_chan,
             charged_counts_only=True,
             norm=norm) for x in X
])

print('Done making jet images')

# do train/val/test split
(X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(images,
                                                              Y,
                                                              val=val_frac,
                                                              test=test_frac)

print('Done train/val/test split')

# preprocess by zero centering images and standardizing each pixel
X_train, X_val, X_test = standardize(*zero_center(X_train, X_val, X_test))

print('Finished preprocessing')
print('Model summary:')

# build architecture
hps = {
    'input_shape': input_shape,
    'filter_sizes': filter_sizes,
    'num_filters': num_filters,
Ejemplo n.º 6
0
# convert labels to categorical
Y = to_categorical(y, num_classes=2)

print('Loaded quark and gluon jets')
print('Model summary:')

# train models with different numbers of nsubs as input
rocs = []
for i, num_nsub in enumerate(num_nsubs):

    # build architecture
    dnn = DNN(input_dim=num_nsub, dense_sizes=dense_sizes, summary=(i == 0))

    # do train/val/test split
    (X_train, X_val, X_test, Y_train, Y_val,
     Y_test) = data_split(X[:, :num_nsub], Y, val=val_frac, test=test_frac)

    print('Done train/val/test split')

    # train model
    dnn.fit(X_train,
            Y_train,
            epochs=num_epoch,
            batch_size=batch_size,
            validation_data=(X_val, Y_val),
            verbose=1)

    # get predictions on test data
    preds = dnn.predict(X_test, batch_size=1000)

    # get ROC curve if we have sklearn
Ejemplo n.º 7
0
X = efpset.batch_compute(masked_X)
print('Done')

# train models with different numbers of EFPs as input
rocs = []
for d in range(1, dmax + 1):

    # build architecture
    model = LinearClassifier(linclass_type='lda')

    # select EFPs with degree <= d
    X_d = X[:, efpset.sel(('d<=', d))]

    # do train/val/test split
    (X_train, X_test, y_train, y_test) = data_split(X_d,
                                                    y,
                                                    val=0,
                                                    test=test_frac)
    print('Done train/val/test split')

    # train model
    model.fit(X_train, y_train)

    # get predictions on test data
    preds = model.predict(X_test)

    # get ROC curve if we have sklearn
    if roc_curve:
        rocs.append(roc_curve(y_test, preds[:, 1]))

        # get area under the ROC curve
        auc = roc_auc_score(y_test, preds[:, 1])
Ejemplo n.º 8
0
X_bkg_rich = X_bkg_rich[bkg_mask]
Y_bkg_rich = Y_bkg_rich[bkg_mask]

X_cat = np.concatenate((X_sig_rich, X_bkg_rich))
Y_cat = np.concatenate((Y_sig_rich, Y_bkg_rich))
labels = np.concatenate((np.ones((X_sig_rich.shape[0]), dtype=np.float32),
                         np.zeros((X_bkg_rich.shape[0]), dtype=np.float32)))

X_new, Y_new_true, Y_new = sk_shuffle(X_cat, Y_cat, labels, random_state=123)

print_signal_fractions(Y_new_true, Y_new)

(X_train, X_val, X_test, Y_true_train, Y_true_val, Y_true_test, Y_train, Y_val,
 Y_test) = data_split(X_new,
                      Y_new_true,
                      Y_new,
                      val=val_frac,
                      test=test_frac,
                      shuffle=False)

evt_weights = np.ones(X_train.shape[0])

myoptimizer = keras.optimizers.Adam(lr=0.001,
                                    beta_1=0.8,
                                    beta_2=0.99,
                                    epsilon=1e-08,
                                    decay=0.0005)
if (options.use_dense):
    my_model = dense_net(X_train.shape[1])
else:
    my_model = CNN(X_train[0].shape)
my_model.summary()
Ejemplo n.º 9
0
        keep_events =  (Y_lab < 0.1) | (mjj_window_sig & (Y_lab > 0.9))


    X = X[keep_events]
    Y_lab = Y_lab[keep_events]
    Y_true = Y_true[keep_events]
    jet_pts = jet_pts[keep_events]


    print("New sig fracs are:  ")
    print_signal_fractions(Y_true, Y_lab)

(X_train, X_val, X_test, 
        jet_pts_train, jet_pts_val, jet_pts_test,
        Y_true_train, Y_true_val, Y_true_test,
        Y_lab_train, Y_lab_val, Y_lab_test) = data_split(X, jet_pts, Y_true, Y_lab, val=val_frac, test=test_frac, shuffle = True)

evt_weights = np.ones(X_train.shape[0])



print(Y_lab_train)


if(options.reweight):
    print("Doing reweighting based on jet pt")
    sr_pts = jet_pts_train[Y_lab_train[:,0] > 0.9]
    br_pts = jet_pts_train[Y_lab_train[:,0] < 0.1]
    labels = ['Signal', 'Background']
    colors = ['b', 'r']
    n_pt_bins = 20
                                           (options.mjj_high + window_size))))
keep_event = bkg_sample | (Y_mjj_window == 1)

#print(window_size)
#print(mjj[:20])
#print(keep_event[:20])
#print(X.shape, Y_mjj_window.shape)
X = X[keep_event]
Y = Y[keep_event]
Y_mjj_window = Y_mjj_window[keep_event]
#print(X.shape, Y_mjj_window.shape)

(X_train, X_val, X_test, Y_train, Y_val, Y_test, Y_train_true, Y_val_true,
 Y_test_true) = data_split(X,
                           Y_mjj_window,
                           Y,
                           val=val_frac,
                           test=test_frac,
                           shuffle=True)

print_signal_fractions(Y_train_true, Y_train)

evt_weights = np.ones(X_train.shape[0])

myoptimizer = tf.keras.optimizers.Adam(lr=0.001,
                                       beta_1=0.8,
                                       beta_2=0.99,
                                       epsilon=1e-08,
                                       decay=0.0005)
if (options.use_dense):
    my_model = dense_net(X_train.shape[1])
else: