def convert_vector(yaml_config, parameters, labels, parameters_df, labels_df, parameters_val, labels_val): """ Return: Numpy array (Events, Features) Input: yaml_config | Dictionary of config options parameters | List of parameters labels | List of labels parameters_df | Parameters dataframe labels_df | Labels dataframe parameters_val | Parameters array labels_val | Labels array """ print('Converting data to vector...') max_particles = yaml_config['MaxParticles'] vec_length = max_particles * (len(parameters) - 1) # Allocate space parameters_vec = np.zeros((len(labels_df), vec_length)) # Progress event_num = 0. ; total_num = len(labels_df) for i in range(len(labels_df)): parameters_df_i = parameters_df[parameters_df['jet_index'] == labels_df['jet_index'].iloc[i]] index_values = parameters_df_i.index.values parameters_val_i = parameters_val[index_values, :-1] num_particles = len(parameters_val_i) if num_particles > max_particles: parameters_val_i = parameters_val_i[0:max_particles, :] else: parameters_val_i = np.concatenate([parameters_val_i, np.zeros((max_particles - num_particles, len(parameters) - 1))]) if yaml_config['Shuffle']: np.random.shuffle(parameters_val_i) vector = [] for idx in range(len(parameters_val_i)): vector.extend(parameters_val_i[idx]) parameters_vec[i,:] = vector event_num += 1. progress.update_progress(event_num/total_num) parameters_val = parameters_vec return parameters_val, labels_val
def convert_image(yaml_config, parameters, labels, parameters_df, labels_df, parameters_val, labels_val): """ Return: Numpy array (Events, Eta, Phi, Features) Input: yaml_config | Dictionary of config options parameters | List of parameters labels | List of labels parameters_df | Parameters dataframe labels_df | Labels dataframe parameters_val | Parameters array labels_val | Labels array """ print('Converting data to image...') BinsX = yaml_config['BinsX'] BinsY = yaml_config['BinsY'] xbins = np.linspace(yaml_config['MinX'], yaml_config['MaxX'], BinsX + 1) ybins = np.linspace(yaml_config['MinY'], yaml_config['MaxY'], BinsY + 1) parameters.remove('eta'); parameters.remove('phi') parameters.remove('jet_eta'); parameters.remove('jet_phi'); parameters.remove('jet_index') # Allocate space parameters_image = np.zeros((len(labels_df), BinsX, BinsY, len(parameters))) # Progress event_num = 0. ; total_num = len(labels_df) for i in range(len(labels_df)): parameters_df_i = parameters_df[parameters_df['jet_index'] == labels_df['jet_index'].iloc[i]] eta = np.asarray(parameters_df_i['eta']) phi = np.asarray(parameters_df_i['phi']) scaled_eta = np.asarray(parameters_df_i['jet_eta']) - eta scaled_phi = np.asarray(parameters_df_i['jet_phi']) - phi for param_idx, param in enumerate(parameters): w = np.asarray(parameters_df_i[param]) hist, _, _ = np.histogram2d(scaled_eta, scaled_phi, weights=w, bins=(xbins, ybins)) for ix in range(BinsX): for iy in range(BinsY): parameters_image[i, ix, iy, param_idx] = hist[ix, iy] event_num += 1. progress.update_progress(event_num/total_num) parameters_val = parameters_image return parameters_val, labels_val
def convert_data(tree, number=None, regression=False, puppi=False): """ Particle flow inputs => Numpy structured array """ event_num = 0. # Event counter jet_num = 0 # Jet counter particle_num = 0 # Particle counter if number: total_num = number else: total_num = int(tree.GetEntries()) # Parameter lists pt = [] eta = [] phi = [] energy = [] photon_ID = [] electron_ID = [] muon_ID = [] neutral_hadron_ID = [] charged_hadron_ID = [] jet_pt = [] jet_eta = [] jet_phi = [] jet_index = [] if regression: tau_pt = [] tau_eta = [] tau_phi = [] classification = [] for event in tree: if event_num == total_num: break jet_candidates = create_jets(event, puppi=puppi) tau_candidates = create_taus(event) jets = match_taus(jet_candidates, tau_candidates) # Fill parameters lists for jet, seed, tau in jets: if regression and (not tau): continue for part, ID in jet: # ID Parameters if ID == 22: photon_ID.append(1) electron_ID.append(0) muon_ID.append(0) neutral_hadron_ID.append(0) charged_hadron_ID.append(0) elif abs(ID) == 11: photon_ID.append(0) electron_ID.append(1) muon_ID.append(0) neutral_hadron_ID.append(0) charged_hadron_ID.append(0) elif abs(ID) == 13: photon_ID.append(0) electron_ID.append(0) muon_ID.append(1) neutral_hadron_ID.append(0) charged_hadron_ID.append(0) elif ID == 130: photon_ID.append(0) electron_ID.append(0) muon_ID.append(0) neutral_hadron_ID.append(1) charged_hadron_ID.append(0) elif ID == 211: photon_ID.append(0) electron_ID.append(0) muon_ID.append(0) neutral_hadron_ID.append(0) charged_hadron_ID.append(1) else: continue # particle parameters eta_val = seed.Eta() - part.Eta() phi_val = delta_phi(seed.Phi(), part.Phi()) pt.append(part.Pt()) eta.append(eta_val) phi.append(phi_val) energy.append(part.E()) # jet parameters jet_pt.append(seed.Pt()) jet_eta.append(seed.Eta()) jet_phi.append(seed.Phi()) jet_index.append(jet_num) # tau parameters if tau: if regression: tau_pt.append(tau.Pt()) tau_eta.append(tau.Eta()) tau_phi.append(tau.Phi()) classification.append(1) else: classification.append(0) particle_num += 1 jet_num += 1 event_num += 1 progress.update_progress(event_num / total_num) fields = [('pt', 'f8'), ('eta', 'f8'), ('phi', 'f8'), ('energy', 'f8'), ('photon_ID', 'i4'), ('electron_ID', 'i4'), ('muon_ID', 'i4'), ('neutral_hadron_ID', 'i4'), ('charged_hadron_ID', 'i4'), ('jet_pt', 'f8'), ('jet_eta', 'f8'), ('jet_phi', 'f8'), ('jet_index', 'i8'), ('classification', 'i4')] if regression: fields.extend([('tau_pt', 'f8'), ('tau_eta', 'f8'), ('tau_phi', 'f8')]) data = np.zeros(particle_num, dtype=fields) data['pt'] = pt data['eta'] = eta data['phi'] = phi data['energy'] = energy data['photon_ID'] = photon_ID data['electron_ID'] = electron_ID data['muon_ID'] = muon_ID data['neutral_hadron_ID'] = neutral_hadron_ID data['charged_hadron_ID'] = charged_hadron_ID data['jet_pt'] = jet_pt data['jet_eta'] = jet_eta data['jet_phi'] = jet_phi if regression: data['tau_pt'] = tau_pt data['tau_eta'] = tau_eta data['tau_phi'] = tau_phi data['jet_index'] = jet_index data['classification'] = classification return data
def plot_data(tree, number=None): """ Plot matching efficiencies """ event_num = 0. # Event counter if number: total_num = number else: total_num = int(tree.GetEntries()) taus_matched = 0 taus_reconstructed = 0 # Initialize histograms hist_pt1, hist_pt2 = initialize_hist(25, 0, 500) hist_eta1, hist_eta2 = initialize_hist(15, -2.5, 2.5) hist_lpt1, hist_lpt2 = initialize_hist(25, 0, 500) hist_leta1, hist_leta2 = initialize_hist(15, -2.5, 2.5) hist_slpt1, hist_slpt2 = initialize_hist(25, 0, 500) hist_sleta1, hist_sleta2 = initialize_hist(15, -2.5, 2.5) cnt = Counter() for event in tree: if event_num == total_num: break jet_candidates = create_jets(event) tau_candidates = create_taus(event) jets = [] # List of jets used_candidates = [] for seed, jet in jet_candidates: tau = None for vec in tau_candidates: if (seed.DeltaR(vec) < 0.4) and (vec not in used_candidates): tau = vec used_candidates.append(vec) break jets.append((jet, seed, tau)) # Fill histograms taus_matched += len(used_candidates) taus_reconstructed += len(tau_candidates) for vec in used_candidates: hist_pt1.Fill(vec.Pt()) hist_eta1.Fill(vec.Eta()) for vec in tau_candidates: hist_pt2.Fill(vec.Pt()) hist_eta2.Fill(vec.Eta()) tau_candidates = sorted(tau_candidates, key=lambda x: x.Pt())[::-1] if len(tau_candidates) >= 1: hist_lpt2.Fill(tau_candidates[0].Pt()) hist_leta2.Fill(tau_candidates[0].Eta()) if tau_candidates[0] in used_candidates: hist_lpt1.Fill(tau_candidates[0].Pt()) hist_leta1.Fill(tau_candidates[0].Eta()) if len(tau_candidates) >= 2: hist_slpt2.Fill(tau_candidates[1].Pt()) hist_sleta2.Fill(tau_candidates[1].Eta()) if tau_candidates[1] in used_candidates: hist_slpt1.Fill(tau_candidates[1].Pt()) hist_sleta1.Fill(tau_candidates[1].Eta()) event_num += 1 progress.update_progress(event_num / total_num) # Plot histograms print 'Taus matched: ', taus_matched print 'Taus reconstructed: ', taus_reconstructed plot_hist(hist_pt1, hist_pt2, 'matching_efficiency_pt', 'pt', 'efficiency') plot_hist(hist_lpt1, hist_lpt2, 'matching_efficiency_leadingpt', 'pt', 'efficiency') plot_hist(hist_slpt1, hist_slpt2, 'matching_efficiency_subleadingpt', 'pt', 'efficiency') plot_hist(hist_eta1, hist_eta2, 'matching_efficiency_eta', 'eta', 'efficiency') plot_hist(hist_leta1, hist_leta2, 'matching_efficiency_leadingeta', 'eta', 'efficiency') plot_hist(hist_sleta1, hist_sleta2, 'matching_efficiency_subleadingeta', 'eta', 'efficiency') return None
def convert_data(tree, number=None): """ Convert data for binary classification """ event_num = 0. jet_num = 0 particle_num = 0 if number: total_num = number else: total_num = int(tree.GetEntries()) # Parameter lists pt = [] eta = [] phi = [] et = [] photon_ID = [] electron_ID = [] hadron_ID = [] jet_pt = [] jet_eta = [] jet_phi = [] jet_et = [] jet_index = [] classification = [] for event in tree: if event_num == total_num: break for jet_idx, jet_id in enumerate( event.genjetid): # iterate through jet for k, _ in enumerate( event.genindex): # iterate through jet particles if (event.genindex[k] == jet_idx): if (event.genid[k] == 22): photon_ID.append(1) electron_ID.append(0) hadron_ID.append(0) elif (abs(event.genid[k]) == 11): photon_ID.append(0) electron_ID.append(1) hadron_ID.append(0) elif (abs(event.genid[k]) > 40): photon_ID.append(0) electron_ID.append(0) hadron_ID.append(1) else: continue # particle parameters eta_val = event.genjeteta[jet_idx] - event.geneta[k] phi_val = delta_phi(event.genjetphi[jet_idx], event.genphi[k]) pt.append(event.genpt[k]) eta.append(eta_val) phi.append(phi_val) et.append(event.genet[k]) # jet parameters jet_pt.append(event.genjetpt[jet_idx]) jet_eta.append(event.genjeteta[jet_idx]) jet_phi.append(event.genjetphi[jet_idx]) jet_et.append(event.genjetet[jet_idx]) jet_index.append(jet_num) if (abs(jet_id) >= 4): classification.append(1) else: classification.append(0) particle_num += 1 jet_num += 1 event_num += 1. progress.update_progress(event_num / total_num) fields = [('pt', 'f8'), ('eta', 'f8'), ('phi', 'f8'), ('et', 'f8'), ('photon_ID', 'i4'), ('electron_ID', 'i4'), ('hadron_ID', 'i4'), ('jet_pt', 'f8'), ('jet_eta', 'f8'), ('jet_phi', 'f8'), ('jet_et', 'f8'), ('jet_index', 'i8'), ('classification', 'i4')] data = np.zeros(particle_num, dtype=fields) data['pt'] = pt data['eta'] = eta data['phi'] = phi data['et'] = et data['photon_ID'] = photon_ID data['electron_ID'] = electron_ID data['hadron_ID'] = hadron_ID data['jet_pt'] = jet_pt data['jet_eta'] = jet_eta data['jet_phi'] = jet_phi data['jet_et'] = jet_et data['jet_index'] = jet_index data['classification'] = classification return data
def convert_regression_data(tree, number=None): """ Convert data for regression """ event_num = 0. jet_num = 0 particle_num = 0 if number: total_num = number else: total_num = int(tree.GetEntries()) # Parameter lists pt = [] eta = [] phi = [] et = [] photon_ID = [] electron_ID = [] hadron_ID = [] jet_pt = [] jet_eta = [] jet_phi = [] jet_et = [] jet_index = [] tau_pt = [] tau_eta = [] tau_phi = [] tau_energy = [] for event in tree: if event_num == total_num: break for jet_idx, jet_id in enumerate( event.genjetid): # iterate through jet if (abs(jet_id) >= 4): # consider tau jets particle_vec = [] for k, _ in enumerate( event.genindex): # iterate through jet particles if (event.genindex[k] == jet_idx): if (event.genstatus[k] == 1) and ((event.genid[k] in [11, -11, 22]) or (abs(event.genid[k]) > 40)): index = k while ((event.genparent[index] != -2) and (abs(event.genid[index] != 15))): index = event.genparent[index] if (abs(event.genid[index]) == 15): vec_pt = event.genpt[k] vec_eta = event.geneta[k] vec_phi = event.genphi[k] vec_energy = event.genet[k] * np.cosh(vec_eta) vec = TLorentzVector() vec.SetPtEtaPhiE(vec_pt, vec_eta, vec_phi, vec_energy) particle_vec.append(vec) if particle_vec: vec_sum = TLorentzVector() vec_sum.SetPtEtaPhiE(0., 0., 0., 0.) for vec in particle_vec: vec_sum += vec tau_pt_val = vec_sum.Pt() tau_energy_val = vec_sum.E() tau_eta_val = vec_sum.Eta() tau_phi_val = vec_sum.Phi() for k, _ in enumerate( event.genindex): # iterate through jet particles if (event.genindex[k] == jet_idx): if (event.genid[k] == 22): photon_ID.append(1) electron_ID.append(0) hadron_ID.append(0) elif (abs(event.genid[k]) == 11): photon_ID.append(0) electron_ID.append(1) hadron_ID.append(0) elif (abs(event.genid[k]) > 40): photon_ID.append(0) electron_ID.append(0) hadron_ID.append(1) else: continue # particle parameters eta_val = event.genjeteta[jet_idx] - event.geneta[k] phi_val = delta_phi(event.genjetphi[jet_idx], event.genphi[k]) pt.append(event.genpt[k]) eta.append(eta_val) phi.append(phi_val) et.append(event.genet[k]) # jet parameters jet_pt.append(event.genjetpt[jet_idx]) jet_eta.append(event.genjeteta[jet_idx]) jet_phi.append(event.genjetphi[jet_idx]) jet_et.append(event.genjetet[jet_idx]) jet_index.append(jet_num) # tau parameters tau_pt.append(tau_pt_val) tau_eta.append(tau_eta_val) tau_phi.append(tau_phi_val) tau_energy.append(tau_energy_val) particle_num += 1 jet_num += 1 event_num += 1. progress.update_progress(event_num / total_num) fields = [('pt', 'f8'), ('eta', 'f8'), ('phi', 'f8'), ('et', 'f8'), ('photon_ID', 'i4'), ('electron_ID', 'i4'), ('hadron_ID', 'i4'), ('jet_pt', 'f8'), ('jet_eta', 'f8'), ('jet_phi', 'f8'), ('jet_et', 'f8'), ('jet_index', 'i8'), ('tau_pt', 'f8'), ('tau_eta', 'f8'), ('tau_phi', 'f8'), ('tau_energy', 'f8')] data = np.zeros(particle_num, dtype=fields) data['pt'] = pt data['eta'] = eta data['phi'] = phi data['et'] = et data['photon_ID'] = photon_ID data['electron_ID'] = electron_ID data['hadron_ID'] = hadron_ID data['jet_pt'] = jet_pt data['jet_eta'] = jet_eta data['jet_phi'] = jet_phi data['jet_et'] = jet_et data['tau_pt'] = tau_pt data['tau_eta'] = tau_eta data['tau_phi'] = tau_phi data['tau_energy'] = tau_energy data['jet_index'] = jet_index return data