def test_coordinate_transforms(nevents, nparticles): p4s = ef.gen_random_events(nevents, nparticles, dim=4, mass='random').reshape(nevents, nparticles, 4) ptyphims = ef.ptyphims_from_p4s(p4s) new_p4s = ef.p4s_from_ptyphims(ptyphims) assert epsilon_diff(p4s, new_p4s, 1e-11)
def fit_pfn(self, model_settings): # convert labels to categorical Y_PFN = energyflow.utils.to_categorical(self.y, num_classes=2) # preprocess by centering jets and normalizing pts X_PFN = self.X_particles for x_PFN in X_PFN: mask = x_PFN[:,0] > 0 yphi_avg = np.average(x_PFN[mask,1:3], weights=x_PFN[mask,0], axis=0) x_PFN[mask,1:3] -= yphi_avg x_PFN[mask,0] /= x_PFN[:,0].sum() # handle particle id channel if model_settings['use_pids']: self.my_remap_pids(X_PFN) else: X_PFN = X_PFN[:,:,:3] # Split data into train, val and test sets (X_PFN_train, X_PFN_val, X_PFN_test,Y_PFN_train, Y_PFN_val, Y_PFN_test) = energyflow.utils.data_split(X_PFN, Y_PFN, val=self.n_val, test=self.n_test) # build architecture pfn = energyflow.archs.PFN(input_dim=X_PFN.shape[-1], Phi_sizes=model_settings['Phi_sizes'], F_sizes=model_settings['F_sizes']) # train model pfn.fit(X_PFN_train, Y_PFN_train, epochs=model_settings['epochs'], batch_size=model_settings['batch_size'], validation_data=(X_PFN_val, Y_PFN_val), verbose=1) # get predictions on test data preds_PFN = pfn.predict(X_PFN_test, batch_size=1000) # Get AUC and ROC curve + make plot auc_PFN = sklearn.metrics.roc_auc_score(Y_PFN_test[:,1], preds_PFN[:,1]) print('Particle Flow Networks/Deep Sets: AUC = {} (test set)'.format(auc_PFN)) self.roc_curve_dict['PFN'] = sklearn.metrics.roc_curve(Y_PFN_test[:,1], preds_PFN[:,1]) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Now we compare the PFN ROC curve to single observables # 1. Jet mass (Note: This takes in (pt,y,phi) and converts it to 4-vectors and computes jet mass) # (Note: X_PFN_train is centered and normalized .. should be ok) masses = np.asarray([energyflow.ms_from_p4s(energyflow.p4s_from_ptyphims(x).sum(axis=0)) for x in X_PFN_train]) self.roc_curve_dict['Jet_mass'] = sklearn.metrics.roc_curve(Y_PFN_train[:,1], -masses) # 2. Multiplicity (Is this a useful observable for pp vs AA?) mults = np.asarray([np.count_nonzero(x[:,0]) for x in X_PFN_train]) self.roc_curve_dict['Multiplicity'] = sklearn.metrics.roc_curve(Y_PFN_train[:,1], -mults)
def test_phis_from_p4s(nevents, nparticles, phi_ref): phis = 2 * np.pi * np.random.rand(nevents, nparticles, 1) ys = 6 * np.random.rand(nevents, nparticles, 1) - 3 pts = 100 * np.random.rand(nevents, nparticles, 1) ms = np.random.rand(nevents, nparticles, 1) p4s = ef.p4s_from_ptyphims(np.concatenate((pts, ys, phis, ms), axis=-1)) if isinstance(phi_ref, str) and phi_ref == 'array': phi_ref = 2 * np.pi * np.random.rand(nevents) new_phis = ef.phis_from_p4s(p4s, phi_ref=phi_ref) if phi_ref is None: assert epsilon_diff(new_phis, phis[..., 0]) else: if isinstance(phi_ref, str) and phi_ref == 'hardest': phi_ref = np.asarray( [phis[i, np.argmax(pts[i, :, 0]), 0] for i in range(nevents)]) assert np.all(np.abs(new_phis.T - phi_ref) <= np.pi)
def test_sum_ptyphims(nparticles, scheme): p4s = ef.gen_random_events(10, nparticles, dim=4, mass='random') ptyphims = ef.ptyphims_from_p4s(p4s) if scheme == 'escheme': for ev_p4s, ev_ptyphims in zip(p4s, ptyphims): tot = ef.p4s_from_ptyphims( ef.sum_ptyphims(ev_ptyphims, scheme=scheme)) tot_p4 = ev_p4s.sum(axis=0) assert epsilon_diff(tot, tot_p4, 10**-12) elif scheme == 'ptscheme': for ev_ptyphims in ptyphims: tot = ef.sum_ptyphims(ev_ptyphims, scheme=scheme) pt = ev_ptyphims[:, 0].sum() y = np.sum(ev_ptyphims[:, 0] * ev_ptyphims[:, 1]) / pt phi = np.sum(ev_ptyphims[:, 0] * ev_ptyphims[:, 2]) / pt assert epsilon_diff(tot, np.array([pt, y, phi]), 10**-12)
auc = roc_auc_score(Y_test[:, 1], preds[:, 1]) print() print('EFN AUC:', auc) print() # some nicer plot settings plt.rcParams['font.family'] = 'serif' plt.rcParams['figure.autolayout'] = True fig, axes = plt.subplots(1, 2, figsize=(8, 4)) ######################### ROC Curve Plot ######################### # get multiplicity and mass for comparison masses = np.asarray( [ef.ms_from_p4s(ef.p4s_from_ptyphims(x).sum(axis=0)) for x in X]) mults = np.asarray([np.count_nonzero(x[:, 0]) for x in X]) mass_fp, mass_tp, threshs = roc_curve(Y[:, 1], -masses) mult_fp, mult_tp, threshs = roc_curve(Y[:, 1], -mults) # plot the ROC curves axes[0].plot(efn_tp, 1 - efn_fp, '-', color='black', label='EFN') axes[0].plot(mass_tp, 1 - mass_fp, '-', color='blue', label='Jet Mass') axes[0].plot(mult_tp, 1 - mult_fp, '-', color='red', label='Multiplicity') # axes labels axes[0].set_xlabel('Quark Jet Efficiency') axes[0].set_ylabel('Gluon Jet Rejection') # axes limits axes[0].set_xlim(0, 1)
def __init__(self, sim): sim_numbers = set(sim.evns) t1_start = process_time() self.event_list = [] self.event_jet_labels = [] self.event_pts = [] self.event_etas = [] self.event_phis = [] self.event_ms = [] i = 1 print("Starting event processing") for evn_num in sim_numbers: if i % 1000 == 0: print("Working on event " + str(i)) self.event_list.append( np.asarray(sim.particles[sim.jets_i[:, sim.evn] == evn_num])) self.event_jet_labels.append( np.asarray(sim.hard_pids[sim.jets_i[:, sim.evn] == evn_num])) self.event_pts.append( np.asarray(sim.jet_pts[sim.jets_i[:, sim.evn] == evn_num])) self.event_etas.append( np.asarray(sim.jet_etas[sim.jets_i[:, sim.evn] == evn_num])) self.event_phis.append( np.asarray(sim.jet_phis[sim.jets_i[:, sim.evn] == evn_num])) self.event_ms.append( np.asarray(sim.jet_ms[sim.jets_i[:, sim.evn] == evn_num])) if i % 1000 == 0: print(str(i) + " events processed") i += 1 print() i = 1 print("Starting mass calculation") self.event_stats = [] for i in range(len(self.event_pts)): self.event_stats.append([]) for j in range(len(self.event_pts[i])): ptyphims = [] ptyphims.append(self.event_pts[i][j]) ptyphims.append(self.event_etas[i][j]) ptyphims.append(self.event_phis[i][j]) ptyphims.append(self.event_ms[i][j]) p4s = ef.p4s_from_ptyphims(np.array(ptyphims)) self.event_stats[i].append(p4s.tolist()) if i % 1000 == 0: print(str(i) + " event masses calculated") i += 1 t1_stop = process_time() print("Elapsed time during the whole program in seconds:", t1_stop - t1_start)
# get ROC curve if we have sklearn if roc_curve: cnn_fp, cnn_tp, threshs = roc_curve(Y_test[:,1], preds[:,1]) # get area under the ROC curve auc = roc_auc_score(Y_test[:,1], preds[:,1]) print() print('CNN AUC:', auc) print() # make ROC curve plot if we have matplotlib if plt: # get multiplicity and mass for comparison masses = np.asarray([ef.ms_from_p4s(ef.p4s_from_ptyphims(x).sum(axis=0)) for x in X]) mults = np.asarray([np.count_nonzero(x[:,0]) for x in X]) mass_fp, mass_tp, threshs = roc_curve(Y[:,1], -masses) mult_fp, mult_tp, threshs = roc_curve(Y[:,1], -mults) # some nicer plot settings plt.rcParams['figure.figsize'] = (4,4) plt.rcParams['font.family'] = 'serif' plt.rcParams['figure.autolayout'] = True # plot the ROC curves plt.plot(cnn_tp, 1-cnn_fp, '-', color='black', label='CNN') plt.plot(mass_tp, 1-mass_fp, '-', color='blue', label='Jet Mass') plt.plot(mult_tp, 1-mult_fp, '-', color='red', label='Multiplicity') # axes labels
def __init__(self, sim): t1_start = process_time() print("Starting event processing") event_list = {} event_jet_labels = {} event_pts = {} event_etas = {} event_phis = {} event_ms = {} i = 0 for jet in sim.particles: evn_num = sim.evns[i] if evn_num in event_list: event_list[evn_num].append(jet) event_jet_labels[evn_num].append(sim.hard_pids[i]) event_pts[evn_num].append(sim.jet_pts[i]) event_etas[evn_num].append(sim.jet_etas[i]) event_phis[evn_num].append(sim.jet_phis[i]) event_ms[evn_num].append(sim.jet_ms[i]) else: event_list[evn_num] = [jet] event_jet_labels[evn_num] = [sim.hard_pids[i]] event_pts[evn_num] = [sim.jet_pts[i]] event_etas[evn_num] = [sim.jet_etas[i]] event_phis[evn_num] = [sim.jet_phis[i]] event_ms[evn_num] = [sim.jet_ms[i]] i += 1 self.event_list = list(event_list.values()) self.event_jet_labels = list(event_jet_labels.values()) self.event_pts = list(event_pts.values()) self.event_etas = list(event_etas.values()) self.event_phis = list(event_phis.values()) self.event_ms = list(event_ms.values()) print("Event processing finished") print("Starting 4-vector conversion") i = 1 self.event_stats = [] for i in range(len(self.event_pts)): self.event_stats.append([]) for j in range(len(self.event_pts[i])): ptyphims = [] ptyphims.append(self.event_pts[i][j]) ptyphims.append(self.event_etas[i][j]) ptyphims.append(self.event_phis[i][j]) ptyphims.append(self.event_ms[i][j]) p4s = ef.p4s_from_ptyphims(np.array(ptyphims)) self.event_stats[i].append(p4s.tolist()) i += 1 t1_stop = process_time() print("4-vector conversion finished") print("Elapsed time during event and 4-vector parsing in seconds:", t1_stop - t1_start)