Esempio n. 1
0
def PFN_AUC_calculation(jet_array_1, jet_array_2, train_size, test_size):
    X = np.concatenate([jet_array_1, jet_array_2])[:,:,:4]
    y = np.concatenate([np.ones(len(jet_array_1)), np.zeros(len(jet_array_2))])

    ################################### SETTINGS ###################################

    # data controls
    train, val, test = train_size, X.shape[0]-train_size-test_size, test_size
    use_pids = True

    # network architecture parameters
    Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)

    # network training parameters
    num_epoch = 10
    batch_size = 500

    ################################################################################

    # convert labels to categorical
    Y = to_categorical(y, num_classes=2)

    # preprocess by centering jets and normalizing pts
    for x in X:
        mask = x[:,0] > 0
        yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
        x[mask,1:3] -= yphi_avg
        x[mask,0] /= x[:,0].sum()

    # handle particle id channel
    if use_pids:
        remap_pids(X, pid_i=3)
    else:
        X = X[:,:,:3]

    # do train/val/test split 
    (X_train, X_val, X_test,
     Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test)

    # build architecture
    pfn = 0
    with suppress_stdout():
        pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)

    # train model
    pfn.fit(X_train, Y_train,
              epochs=num_epoch,
              batch_size=batch_size,
              validation_data=(X_val, Y_val),
              verbose=0)

    # get predictions on test data
    preds = pfn.predict(X_test, batch_size=1000)

    # get area under the ROC curve
    auc = roc_auc_score(Y_test[:,1], preds[:,1])
    
    return auc
Esempio n. 2
0
# convert labels to categorical
Y = to_categorical(y, num_classes=2)

print('Loaded quark and gluon jets')

# preprocess by centering jets and normalizing pts
for x in X:
    mask = x[:, 0] > 0
    yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0)
    x[mask, 1:3] -= yphi_avg
    x[mask, 0] /= x[:, 0].sum()

# handle particle id channel
if use_pids:
    remap_pids(X, pid_i=3)
else:
    X = X[:, :, :3]

print('Finished preprocessing')

# do train/val/test split
(X_train, X_val, X_test, Y_train, Y_val, Y_test) = data_split(X,
                                                              Y,
                                                              val=val,
                                                              test=test)

print('Done train/val/test split')
print('Model summary:')

# build architecture
Esempio n. 3
0
def load_data(cache_dir,
              pt_lower,
              pt_upper,
              eta,
              quality,
              pad,
              x_dim=3,
              momentum_scale=250,
              n=100000,
              amount=1,
              max_particle_select=None,
              frac=1.0,
              return_pfcs=True):

    # Load data
    specs = [
        f'{pt_lower} <= gen_jet_pts <= {pt_upper}', f'abs_jet_eta < {eta}',
        f'quality >= {quality}'
    ]
    sim = ef.mod.load(*specs,
                      cache_dir=cache_dir,
                      dataset='sim',
                      amount=amount)

    # Gen_pt for Y
    Y1 = sim.jets_f[:, sim.gen_jet_pt]
    Y = np.zeros((Y1.shape[0], 1), dtype=np.float32)
    Y[:, 0] = Y1 / momentum_scale

    # Sim_pt for X
    X = np.zeros((Y1.shape[0], 3), dtype=np.float32)
    X[:, 0] = sim.jets_f[:, sim.jet_pt] / momentum_scale
    X[:, 1] = sim.jets_f[:, sim.jet_eta]
    X[:, 2] = sim.jets_f[:, sim.jet_phi]

    # CMS JEC's
    C = sim.jets_f[:, sim.jec]

    # PFC's
    pfcs = sim.particles

    # Shuffle and trim
    shuffle_indices = np.random.choice(np.arange(pfcs.shape[0]),
                                       size=int(pfcs.shape[0] * frac),
                                       replace=False)
    pfcs = pfcs[shuffle_indices]
    Y = Y[shuffle_indices]
    X = X[shuffle_indices]
    C = C[shuffle_indices]

    pfcs = pfcs[:n]
    Y = Y[:n]
    X = X[:n]
    C = C[:n]

    # PFC's
    dataset = np.zeros((pfcs.shape[0], pad, x_dim), dtype=np.float32)
    particle_counts = []
    if return_pfcs:
        for (i, jet) in enumerate(pfcs):
            size = min(jet.shape[0], pad)
            indices = (-jet[:, 0]).argsort()
            dataset[i, :size, 0] = jet[indices[:size], 0] / momentum_scale
            dataset[i, :size, 1] = jet[indices[:size], 1]
            dataset[i, :size, 2] = jet[indices[:size], 2]
            if x_dim == 4:
                dataset[i, :size, 3] = jet[indices[:size], 4]  # PID
            particle_counts.append(jet.shape[0])
        if x_dim == 4:
            remap_pids(dataset, pid_i=3, error_on_unknown=False)

        for x in dataset:
            mask = x[:, 0] > 0
            yphi_avg = np.average(x[mask, 1:3], weights=x[mask, 0], axis=0)
            x[mask, 1:3] -= yphi_avg

    particle_counts = np.array(particle_counts)

    # Trim and shuffle
    if max_particle_select is not None:
        dataset = dataset[particle_counts < max_particle_select]
        Y = Y[particle_counts < max_particle_select]
        X = X[particle_counts < max_particle_select]
        C = C[particle_counts < max_particle_select]
        particle_counts = particle_counts[
            particle_counts < max_particle_select]

    shuffle_indices = np.random.choice(np.arange(dataset.shape[0]),
                                       size=int(dataset.shape[0] * frac),
                                       replace=False)

    print("X: ", X.shape, X.dtype)
    print("Y: ", Y.shape, Y.dtype)
    print("PFCs: ", dataset.shape, dataset.dtype)

    if not return_pfcs:
        return X, Y, C, particle_counts

    print("Max # of particles: %d" % max(particle_counts))
    return X, dataset, Y, C, particle_counts