Beispiel #1
0
def test_coordinate_transforms(nevents, nparticles):
    p4s = ef.gen_random_events(nevents, nparticles, dim=4,
                               mass='random').reshape(nevents, nparticles, 4)
    ptyphims = ef.ptyphims_from_p4s(p4s)
    new_p4s = ef.p4s_from_ptyphims(ptyphims)

    assert epsilon_diff(p4s, new_p4s, 1e-11)
Beispiel #2
0
    def fit_pfn(self, model_settings):

        # convert labels to categorical
        Y_PFN = energyflow.utils.to_categorical(self.y, num_classes=2)
        
        # preprocess by centering jets and normalizing pts
        X_PFN = self.X_particles
        for x_PFN in X_PFN:
            mask = x_PFN[:,0] > 0
            yphi_avg = np.average(x_PFN[mask,1:3], weights=x_PFN[mask,0], axis=0)
            x_PFN[mask,1:3] -= yphi_avg
            x_PFN[mask,0] /= x_PFN[:,0].sum()
        
        # handle particle id channel
        if model_settings['use_pids']:
            self.my_remap_pids(X_PFN)
        else:
            X_PFN = X_PFN[:,:,:3]    
            
        # Split data into train, val and test sets
        (X_PFN_train, X_PFN_val, X_PFN_test,Y_PFN_train, Y_PFN_val, Y_PFN_test) = energyflow.utils.data_split(X_PFN, Y_PFN,
                                                                                             val=self.n_val, test=self.n_test)
        # build architecture
        pfn = energyflow.archs.PFN(input_dim=X_PFN.shape[-1],
                                   Phi_sizes=model_settings['Phi_sizes'],
                                   F_sizes=model_settings['F_sizes'])

        # train model
        pfn.fit(X_PFN_train, Y_PFN_train,
                epochs=model_settings['epochs'],
                batch_size=model_settings['batch_size'],
                validation_data=(X_PFN_val, Y_PFN_val),
                verbose=1)
        
        # get predictions on test data
        preds_PFN = pfn.predict(X_PFN_test, batch_size=1000)

        # Get AUC and ROC curve + make plot
        auc_PFN = sklearn.metrics.roc_auc_score(Y_PFN_test[:,1], preds_PFN[:,1])
        print('Particle Flow Networks/Deep Sets: AUC = {} (test set)'.format(auc_PFN))
        
        self.roc_curve_dict['PFN'] = sklearn.metrics.roc_curve(Y_PFN_test[:,1], preds_PFN[:,1])
        
        # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
        
        # Now we compare the PFN ROC curve to single observables

        # 1. Jet mass (Note: This takes in (pt,y,phi) and converts it to 4-vectors and computes jet mass)
        #             (Note: X_PFN_train is centered and normalized .. should be ok)
        masses = np.asarray([energyflow.ms_from_p4s(energyflow.p4s_from_ptyphims(x).sum(axis=0)) for x in X_PFN_train])
        self.roc_curve_dict['Jet_mass'] = sklearn.metrics.roc_curve(Y_PFN_train[:,1], -masses)
        
        # 2. Multiplicity (Is this a useful observable for pp vs AA?)
        mults = np.asarray([np.count_nonzero(x[:,0]) for x in X_PFN_train])
        self.roc_curve_dict['Multiplicity'] = sklearn.metrics.roc_curve(Y_PFN_train[:,1], -mults)
Beispiel #3
0
def test_phis_from_p4s(nevents, nparticles, phi_ref):
    phis = 2 * np.pi * np.random.rand(nevents, nparticles, 1)
    ys = 6 * np.random.rand(nevents, nparticles, 1) - 3
    pts = 100 * np.random.rand(nevents, nparticles, 1)
    ms = np.random.rand(nevents, nparticles, 1)

    p4s = ef.p4s_from_ptyphims(np.concatenate((pts, ys, phis, ms), axis=-1))

    if isinstance(phi_ref, str) and phi_ref == 'array':
        phi_ref = 2 * np.pi * np.random.rand(nevents)

    new_phis = ef.phis_from_p4s(p4s, phi_ref=phi_ref)

    if phi_ref is None:
        assert epsilon_diff(new_phis, phis[..., 0])

    else:
        if isinstance(phi_ref, str) and phi_ref == 'hardest':
            phi_ref = np.asarray(
                [phis[i, np.argmax(pts[i, :, 0]), 0] for i in range(nevents)])

        assert np.all(np.abs(new_phis.T - phi_ref) <= np.pi)
Beispiel #4
0
def test_sum_ptyphims(nparticles, scheme):
    p4s = ef.gen_random_events(10, nparticles, dim=4, mass='random')
    ptyphims = ef.ptyphims_from_p4s(p4s)

    if scheme == 'escheme':

        for ev_p4s, ev_ptyphims in zip(p4s, ptyphims):
            tot = ef.p4s_from_ptyphims(
                ef.sum_ptyphims(ev_ptyphims, scheme=scheme))
            tot_p4 = ev_p4s.sum(axis=0)

            assert epsilon_diff(tot, tot_p4, 10**-12)

    elif scheme == 'ptscheme':

        for ev_ptyphims in ptyphims:
            tot = ef.sum_ptyphims(ev_ptyphims, scheme=scheme)

            pt = ev_ptyphims[:, 0].sum()
            y = np.sum(ev_ptyphims[:, 0] * ev_ptyphims[:, 1]) / pt
            phi = np.sum(ev_ptyphims[:, 0] * ev_ptyphims[:, 2]) / pt

            assert epsilon_diff(tot, np.array([pt, y, phi]), 10**-12)
Beispiel #5
0
auc = roc_auc_score(Y_test[:, 1], preds[:, 1])
print()
print('EFN AUC:', auc)
print()

# some nicer plot settings
plt.rcParams['font.family'] = 'serif'
plt.rcParams['figure.autolayout'] = True

fig, axes = plt.subplots(1, 2, figsize=(8, 4))

######################### ROC Curve Plot #########################

# get multiplicity and mass for comparison
masses = np.asarray(
    [ef.ms_from_p4s(ef.p4s_from_ptyphims(x).sum(axis=0)) for x in X])
mults = np.asarray([np.count_nonzero(x[:, 0]) for x in X])
mass_fp, mass_tp, threshs = roc_curve(Y[:, 1], -masses)
mult_fp, mult_tp, threshs = roc_curve(Y[:, 1], -mults)

# plot the ROC curves
axes[0].plot(efn_tp, 1 - efn_fp, '-', color='black', label='EFN')
axes[0].plot(mass_tp, 1 - mass_fp, '-', color='blue', label='Jet Mass')
axes[0].plot(mult_tp, 1 - mult_fp, '-', color='red', label='Multiplicity')

# axes labels
axes[0].set_xlabel('Quark Jet Efficiency')
axes[0].set_ylabel('Gluon Jet Rejection')

# axes limits
axes[0].set_xlim(0, 1)
    def __init__(self, sim):
        sim_numbers = set(sim.evns)
        t1_start = process_time()

        self.event_list = []
        self.event_jet_labels = []

        self.event_pts = []
        self.event_etas = []
        self.event_phis = []
        self.event_ms = []

        i = 1

        print("Starting event processing")

        for evn_num in sim_numbers:
            if i % 1000 == 0:
                print("Working on event " + str(i))

            self.event_list.append(
                np.asarray(sim.particles[sim.jets_i[:, sim.evn] == evn_num]))
            self.event_jet_labels.append(
                np.asarray(sim.hard_pids[sim.jets_i[:, sim.evn] == evn_num]))

            self.event_pts.append(
                np.asarray(sim.jet_pts[sim.jets_i[:, sim.evn] == evn_num]))
            self.event_etas.append(
                np.asarray(sim.jet_etas[sim.jets_i[:, sim.evn] == evn_num]))
            self.event_phis.append(
                np.asarray(sim.jet_phis[sim.jets_i[:, sim.evn] == evn_num]))
            self.event_ms.append(
                np.asarray(sim.jet_ms[sim.jets_i[:, sim.evn] == evn_num]))

            if i % 1000 == 0:
                print(str(i) + " events processed")

            i += 1

        print()

        i = 1

        print("Starting mass calculation")

        self.event_stats = []

        for i in range(len(self.event_pts)):
            self.event_stats.append([])

            for j in range(len(self.event_pts[i])):
                ptyphims = []
                ptyphims.append(self.event_pts[i][j])
                ptyphims.append(self.event_etas[i][j])
                ptyphims.append(self.event_phis[i][j])
                ptyphims.append(self.event_ms[i][j])
                p4s = ef.p4s_from_ptyphims(np.array(ptyphims))

                self.event_stats[i].append(p4s.tolist())

            if i % 1000 == 0:
                print(str(i) + " event masses calculated")

            i += 1

        t1_stop = process_time()

        print("Elapsed time during the whole program in seconds:",
              t1_stop - t1_start)
Beispiel #7
0
# get ROC curve if we have sklearn
if roc_curve:
    cnn_fp, cnn_tp, threshs = roc_curve(Y_test[:,1], preds[:,1])

    # get area under the ROC curve
    auc = roc_auc_score(Y_test[:,1], preds[:,1])
    print()
    print('CNN AUC:', auc)
    print()

    # make ROC curve plot if we have matplotlib
    if plt:

        # get multiplicity and mass for comparison
        masses = np.asarray([ef.ms_from_p4s(ef.p4s_from_ptyphims(x).sum(axis=0)) for x in X])
        mults = np.asarray([np.count_nonzero(x[:,0]) for x in X])
        mass_fp, mass_tp, threshs = roc_curve(Y[:,1], -masses)
        mult_fp, mult_tp, threshs = roc_curve(Y[:,1], -mults)

        # some nicer plot settings 
        plt.rcParams['figure.figsize'] = (4,4)
        plt.rcParams['font.family'] = 'serif'
        plt.rcParams['figure.autolayout'] = True

        # plot the ROC curves
        plt.plot(cnn_tp, 1-cnn_fp, '-', color='black', label='CNN')
        plt.plot(mass_tp, 1-mass_fp, '-', color='blue', label='Jet Mass')
        plt.plot(mult_tp, 1-mult_fp, '-', color='red', label='Multiplicity')

        # axes labels
    def __init__(self, sim):
        t1_start = process_time()

        print("Starting event processing")

        event_list = {}
        event_jet_labels = {}

        event_pts = {}
        event_etas = {}
        event_phis = {}
        event_ms = {}

        i = 0

        for jet in sim.particles:
            evn_num = sim.evns[i]

            if evn_num in event_list:
                event_list[evn_num].append(jet)
                event_jet_labels[evn_num].append(sim.hard_pids[i])
                event_pts[evn_num].append(sim.jet_pts[i])
                event_etas[evn_num].append(sim.jet_etas[i])
                event_phis[evn_num].append(sim.jet_phis[i])
                event_ms[evn_num].append(sim.jet_ms[i])

            else:
                event_list[evn_num] = [jet]
                event_jet_labels[evn_num] = [sim.hard_pids[i]]
                event_pts[evn_num] = [sim.jet_pts[i]]
                event_etas[evn_num] = [sim.jet_etas[i]]
                event_phis[evn_num] = [sim.jet_phis[i]]
                event_ms[evn_num] = [sim.jet_ms[i]]

            i += 1

        self.event_list = list(event_list.values())
        self.event_jet_labels = list(event_jet_labels.values())

        self.event_pts = list(event_pts.values())
        self.event_etas = list(event_etas.values())
        self.event_phis = list(event_phis.values())
        self.event_ms = list(event_ms.values())

        print("Event processing finished")

        print("Starting 4-vector conversion")

        i = 1

        self.event_stats = []

        for i in range(len(self.event_pts)):
            self.event_stats.append([])

            for j in range(len(self.event_pts[i])):
                ptyphims = []
                ptyphims.append(self.event_pts[i][j])
                ptyphims.append(self.event_etas[i][j])
                ptyphims.append(self.event_phis[i][j])
                ptyphims.append(self.event_ms[i][j])
                p4s = ef.p4s_from_ptyphims(np.array(ptyphims))

                self.event_stats[i].append(p4s.tolist())

            i += 1

        t1_stop = process_time()

        print("4-vector conversion finished")

        print("Elapsed time during event and 4-vector parsing in seconds:",
              t1_stop - t1_start)