def test_corrected_jets_factory(): import os from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack events = None from coffea.nanoevents import NanoEventsFactory factory = NanoEventsFactory.from_root( os.path.abspath("tests/samples/nano_dy.root")) events = factory.events() jec_stack_names = [ "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi", "Spring16_25nsV10_MC_PtResolution_AK4PFPuppi", "Spring16_25nsV10_MC_SF_AK4PFPuppi", ] for key in evaluator.keys(): if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in key: jec_stack_names.append(key) jec_inputs = {name: evaluator[name] for name in jec_stack_names} jec_stack = JECStack(jec_inputs) name_map = jec_stack.blank_name_map name_map["JetPt"] = "pt" name_map["JetMass"] = "mass" name_map["JetEta"] = "eta" name_map["JetA"] = "area" jets = events.Jet jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"] jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"] jets["pt_gen"] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] name_map["ptGenJet"] = "pt_gen" name_map["ptRaw"] = "pt_raw" name_map["massRaw"] = "mass_raw" name_map["Rho"] = "rho" jec_cache = cachetools.Cache(np.inf) print(name_map) tic = time.time() jet_factory = CorrectedJetsFactory(name_map, jec_stack) toc = time.time() print("setup corrected jets time =", toc - tic) tic = time.time() prof = pyinstrument.Profiler() prof.start() corrected_jets = jet_factory.build(jets, lazy_cache=jec_cache) prof.stop() toc = time.time() print("corrected_jets build time =", toc - tic) print(prof.output_text(unicode=True, color=True, show_all=True)) tic = time.time() print("Generated jet pt:", corrected_jets.pt_gen) print("Original jet pt:", corrected_jets.pt_orig) print("Raw jet pt:", jets.pt_raw) print("Corrected jet pt:", corrected_jets.pt) print("Original jet mass:", corrected_jets.mass_orig) print("Raw jet mass:", jets["mass_raw"]) print("Corrected jet mass:", corrected_jets.mass) print("jet eta:", jets.eta) for unc in jet_factory.uncertainties(): print(unc) print(corrected_jets[unc].up.pt) print(corrected_jets[unc].down.pt) toc = time.time() print("build all jet variations =", toc - tic) # Test that the corrections were applied correctly from coffea.jetmet_tools import ( FactorizedJetCorrector, JetResolution, JetResolutionScaleFactor, ) scalar_form = ak.without_parameters(jets["pt_raw"]).layout.form corrector = FactorizedJetCorrector( **{name: evaluator[name] for name in jec_stack_names[0:4]}) corrs = corrector.getCorrection(JetEta=jets["eta"], Rho=jets["rho"], JetPt=jets["pt_raw"], JetA=jets["area"]) reso = JetResolution( **{name: evaluator[name] for name in jec_stack_names[4:5]}) jets["jet_energy_resolution"] = reso.getResolution( JetEta=jets["eta"], Rho=jets["rho"], JetPt=jets["pt_raw"], form=scalar_form, lazy_cache=jec_cache, ) resosf = JetResolutionScaleFactor( **{name: evaluator[name] for name in jec_stack_names[5:6]}) jets["jet_energy_resolution_scale_factor"] = resosf.getScaleFactor( JetEta=jets["eta"], lazy_cache=jec_cache) # Filter out the non-deterministic (no gen pt) jets def smear_factor(jetPt, pt_gen, jersf): return (ak.full_like(jetPt, 1.0) + (jersf[:, 0] - ak.full_like(jetPt, 1.0)) * (jetPt - pt_gen) / jetPt) test_gen_pt = ak.concatenate( [corrected_jets.pt_gen[0, :-2], corrected_jets.pt_gen[-1, :-1]]) test_raw_pt = ak.concatenate([jets.pt_raw[0, :-2], jets.pt_raw[-1, :-1]]) test_pt = ak.concatenate( [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]]) test_eta = ak.concatenate([jets.eta[0, :-2], jets.eta[-1, :-1]]) test_jer = ak.concatenate([ jets.jet_energy_resolution[0, :-2], jets.jet_energy_resolution[-1, :-1] ]) test_jer_sf = ak.concatenate([ jets.jet_energy_resolution_scale_factor[0, :-2], jets.jet_energy_resolution_scale_factor[-1, :-1], ]) test_jec = ak.concatenate([corrs[0, :-2], corrs[-1, :-1]]) test_corrected_pt = ak.concatenate( [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]]) test_corr_pt = test_raw_pt * test_jec test_pt_smear_corr = test_corr_pt * smear_factor(test_corr_pt, test_gen_pt, test_jer_sf) # Print the results of the "by-hand" calculations and confirm that the values match the expected values print("\nConfirm the CorrectedJetsFactory values:") print("Jet pt (gen)", test_gen_pt.tolist()) print("Jet pt (raw)", test_raw_pt.tolist()) print("Jet pt (nano):", test_pt.tolist()) print("Jet eta:", test_eta.tolist()) print("Jet energy resolution:", test_jer.tolist()) print("Jet energy resolution sf:", test_jer_sf.tolist()) print("Jet energy correction:", test_jec.tolist()) print("Corrected jet pt (ref)", test_corr_pt.tolist()) print("Corrected & smeared jet pt (ref):", test_pt_smear_corr.tolist()) print("Corrected & smeared jet pt:", test_corrected_pt.tolist(), "\n") assert ak.all(np.abs(test_pt_smear_corr - test_corrected_pt) < 1e-6) name_map["METpt"] = "pt" name_map["METphi"] = "phi" name_map["JetPhi"] = "phi" name_map["UnClusteredEnergyDeltaX"] = "MetUnclustEnUpDeltaX" name_map["UnClusteredEnergyDeltaY"] = "MetUnclustEnUpDeltaY" tic = time.time() met_factory = CorrectedMETFactory(name_map) toc = time.time() print("setup corrected MET time =", toc - tic) met = events.MET tic = time.time() # prof = pyinstrument.Profiler() # prof.start() corrected_met = met_factory.build(met, corrected_jets, lazy_cache=jec_cache) # prof.stop() toc = time.time() # print(prof.output_text(unicode=True, color=True, show_all=True)) print("corrected_met build time =", toc - tic) tic = time.time() print(corrected_met.pt_orig) print(corrected_met.pt) prof = pyinstrument.Profiler() prof.start() for unc in jet_factory.uncertainties() + met_factory.uncertainties(): print(unc) print(corrected_met[unc].up.pt) print(corrected_met[unc].down.pt) prof.stop() toc = time.time() print("build all met variations =", toc - tic) print(prof.output_text(unicode=True, color=True, show_all=True))
def main(args, return_df=False, return_wfsim_instructions=False, strax=False): """Call this function from the run_epix script""" if args['debug']: print("epix configuration: ", args) # TODO: also add memory information (see straxer) and change this to debug # Getting time information: starttime = time.time() tnow = starttime # Loading data: epix_file_loader = epix.file_loader( args['path'], args['file_name'], args['debug'], outer_cylinder=args['outer_cylinder'], kwargs={ 'entry_start': args['entry_start'], 'entry_stop': args['entry_stop'] }, cut_by_eventid=args.get('cut_by_eventid', False), cut_nr_only=args.get('nr_only', False), ) inter, n_simulated_events = epix_file_loader.load_file() if args['debug']: tnow = monitor_time(tnow, 'load data.') print( f"Finding clusters of interactions with a dr = {args['micro_separation']} mm" f" and dt = {args['micro_separation_time']} ns") # Cluster finding and clustering (convert micro_separation mm -> cm): inter = epix.find_cluster(inter, args['micro_separation'] / 10, args['micro_separation_time']) if args['debug']: tnow = monitor_time(tnow, 'find clusters.') result = epix.cluster(inter, args['tag_cluster_by'] == 'energy') if args['debug']: tnow = monitor_time(tnow, 'merge clusters.') # Add eventid again: result['evtid'] = ak.broadcast_arrays(inter['evtid'][:, 0], result['ed'])[0] # Add x_pri, y_pri, z_pri again: result['x_pri'] = ak.broadcast_arrays(inter['x_pri'][:, 0], result['ed'])[0] result['y_pri'] = ak.broadcast_arrays(inter['y_pri'][:, 0], result['ed'])[0] result['z_pri'] = ak.broadcast_arrays(inter['z_pri'][:, 0], result['ed'])[0] # Sort detector volumes and keep interactions in selected ones: if args['debug']: print('Removing clusters not in volumes:', *[v.name for v in args['detector_config']]) print(f'Number of clusters before: {np.sum(ak_num(result["ed"]))}') # Returns all interactions which are inside in one of the volumes, # Checks for volume overlap, assigns Xe density and create_S2 to # interactions. EField comes later since interpolated maps cannot be # called inside numba functions. res_det = epix.in_sensitive_volume(result, args['detector_config']) # Adding new fields to result: for field in res_det.fields: result[field] = res_det[field] m = result['vol_id'] > 0 # All volumes have an id larger zero result = result[m] # Removing now empty events as a result of the selection above: m = ak_num(result['ed']) > 0 result = result[m] if args['debug']: print(f'Number of clusters after: {np.sum(ak_num(result["ed"]))}') print('Assigning electric field to clusters') # Add electric field to array: efields = np.zeros(np.sum(ak_num(result)), np.float32) # Loop over volume and assign values: for volume in args['detector_config']: if isinstance(volume.electric_field, (float, int)): ids = epix.awkward_to_flat_numpy(result['vol_id']) m = ids == volume.volume_id efields[m] = volume.electric_field else: efields = volume.electric_field( epix.awkward_to_flat_numpy(result.x), epix.awkward_to_flat_numpy(result.y), epix.awkward_to_flat_numpy(result.z)) result['e_field'] = epix.reshape_awkward(efields, ak_num(result)) # Sort entries (in an event) by in time, then chop all delayed # events which are too far away from the rest. # (This is a requirement of WFSim) result = result[ak.argsort(result['t'])] dt = calc_dt(result) result = result[dt <= args['max_delay']] if args['debug']: print('Generating photons and electrons for events') # Generate quanta: if len(result) > 0: photons, electrons, excitons = epix.quanta_from_NEST( epix.awkward_to_flat_numpy(result['ed']), epix.awkward_to_flat_numpy(result['nestid']), epix.awkward_to_flat_numpy(result['e_field']), epix.awkward_to_flat_numpy(result['A']), epix.awkward_to_flat_numpy(result['Z']), epix.awkward_to_flat_numpy(result['create_S2']), density=epix.awkward_to_flat_numpy(result['xe_density'])) result['photons'] = epix.reshape_awkward(photons, ak_num(result['ed'])) result['electrons'] = epix.reshape_awkward(electrons, ak_num(result['ed'])) result['excitons'] = epix.reshape_awkward(excitons, ak_num(result['ed'])) else: result['photons'] = np.empty(0) result['electrons'] = np.empty(0) result['excitons'] = np.empty(0) if args['debug']: _ = monitor_time(tnow, 'get quanta.') # Separate events in time number_of_events = len(result["t"]) if args['source_rate'] == -1: # Only needed for a clean separation: result['t'] = calc_dt(result) dt = epix.times_for_clean_separation(number_of_events, args['max_delay']) if args['debug']: print('Clean event separation') elif args['source_rate'] == 0: # In case no delay should be applied we just add zeros dt = np.zeros(number_of_events) else: # Rate offset computed based on the specified rate and job_id. # Assumes all jobs were started with the same number of events. offset = (args['job_number'] * n_simulated_events) / args['source_rate'] dt = epix.times_from_fixed_rate(args['source_rate'], number_of_events, n_simulated_events, offset) if args['debug']: print(f"Fixed event rate of {args['source_rate']} Hz") result['t'] = apply_time_offset(result, dt) # Reshape instructions: if args['debug'] & (len(result) == 0): warnings.warn('No interactions left, return empty DataFrame.') instructions = epix.awkward_to_wfsim_row_style(result) if args['source_rate'] != 0: # Only sort by time again if source rates were applied, otherwise # things are already sorted within the events and should stay this way. instructions = np.sort(instructions, order='time') ins_df = pd.DataFrame(instructions) if return_df: if args['output_path'] and not os.path.isdir(args['output_path']): os.makedirs(args['output_path']) output_path_and_name = os.path.join( args['output_path'], args['file_name'][:-5] + "_wfsim_instructions.csv") if os.path.isfile(output_path_and_name): warnings.warn("Output file already exists - Overwriting") ins_df.to_csv(output_path_and_name, index=False) print('Done') print('Instructions saved to ', output_path_and_name) if args['debug']: _ = monitor_time(starttime, 'run epix.') if return_wfsim_instructions: return instructions
def applyJEC(self, jets, fixedGridRhoFastjetAll, events_cache, typeJet, isData, JECversion): '''Based on https://coffeateam.github.io/coffea/notebooks/applying_corrections.html#Applying-energy-scale-transformations-to-Jets''' ext = lookup_tools.extractor() JECtypes = [ 'L1FastJet', 'L2Relative', 'L2Residual', 'L3Absolute', 'L2L3Residual' ] jec_stack_names = [ JECversion + '_' + k + '_' + typeJet for k in JECtypes ] JECtypesfiles = [ '* * ' + self.corrJECfolder + '/' + k + '.txt' for k in jec_stack_names ] ext.add_weight_sets(JECtypesfiles) ext.finalize() evaluator = ext.make_evaluator() print("available evaluator keys:") for key in evaluator.keys(): print("\t", key) jec_inputs = {name: evaluator[name] for name in jec_stack_names} corrector = FactorizedJetCorrector(**jec_inputs) for i in jec_inputs: print(i, '\n', evaluator[i]) print(dir(evaluator)) print() jec_stack = JECStack(jec_inputs) name_map = jec_stack.blank_name_map name_map['JetPt'] = 'pt' name_map['JetMass'] = 'mass' name_map['JetEta'] = 'eta' name_map['JetA'] = 'area' jets['pt_raw'] = (1 - jets['rawFactor']) * jets['pt'] jets['mass_raw'] = (1 - jets['rawFactor']) * jets['mass'] jets['rho'] = ak.broadcast_arrays(fixedGridRhoFastjetAll, jets.pt)[0] name_map['ptRaw'] = 'pt_raw' name_map['massRaw'] = 'mass_raw' name_map['Rho'] = 'rho' if not isData: jets['pt_gen'] = ak.values_astype( ak.fill_none(jets.matched_gen.pt, 0), np.float32) name_map['ptGenJet'] = 'pt_gen' jet_factory = CorrectedJetsFactory(name_map, jec_stack) corrected_jets = jet_factory.build(jets, lazy_cache=events_cache) print() print('starting columns:', ak.fields(jets)) print() print('untransformed pt ratios', jets.pt / jets.pt_raw) print('untransformed mass ratios', jets.mass / jets.mass_raw) print('transformed pt ratios', corrected_jets.pt / corrected_jets.pt_raw) print('transformed mass ratios', corrected_jets.mass / corrected_jets.mass_raw) print() print('transformed columns:', ak.fields(corrected_jets)) return corrected_jets
def mass(self): return awkward.broadcast_arrays(self.pt, 0.0)[1]
def test_corrected_jets_factory(): import os from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack events = None cache = {} from coffea.nanoevents import NanoEventsFactory factory = NanoEventsFactory.from_root( os.path.abspath('tests/samples/nano_dy.root')) events = factory.events() jec_stack_names = [ 'Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi', 'Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi', 'Spring16_25nsV10_MC_PtResolution_AK4PFPuppi', 'Spring16_25nsV10_MC_SF_AK4PFPuppi' ] for key in evaluator.keys(): if 'Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi' in key: jec_stack_names.append(key) jec_inputs = {name: evaluator[name] for name in jec_stack_names} jec_stack = JECStack(jec_inputs) name_map = jec_stack.blank_name_map name_map['JetPt'] = 'pt' name_map['JetMass'] = 'mass' name_map['JetEta'] = 'eta' name_map['JetA'] = 'area' jets = events.Jet jets['pt_raw'] = (1 - jets['rawFactor']) * jets['pt'] jets['mass_raw'] = (1 - jets['rawFactor']) * jets['mass'] jets['pt_gen'] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) jets['rho'] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] name_map['ptGenJet'] = 'pt_gen' name_map['ptRaw'] = 'pt_raw' name_map['massRaw'] = 'mass_raw' name_map['Rho'] = 'rho' events_cache = events.caches[0] print(name_map) tic = time.time() jet_factory = CorrectedJetsFactory(name_map, jec_stack) toc = time.time() print('setup corrected jets time =', toc - tic) tic = time.time() #prof = pyinstrument.Profiler() #prof.start() corrected_jets = jet_factory.build(jets, lazy_cache=events_cache) #prof.stop() toc = time.time() print('corrected_jets build time =', toc - tic) #sprint(prof.output_text(unicode=True, color=True, show_all=True)) tic = time.time() print(corrected_jets.pt_orig) print(corrected_jets.pt) for unc in jet_factory.uncertainties(): print(unc) print(corrected_jets[unc].up.pt) print(corrected_jets[unc].down.pt) toc = time.time() print('build all jet variations =', toc - tic) name_map['METpt'] = 'pt' name_map['METphi'] = 'phi' name_map['METx'] = 'x' name_map['METy'] = 'y' name_map['JETx'] = 'x' name_map['JETy'] = 'y' name_map['xMETRaw'] = 'x_raw' name_map['yMETRaw'] = 'y_raw' name_map['UnClusteredEnergyDeltaX'] = 'MetUnclustEnUpDeltaX' name_map['UnClusteredEnergyDeltaY'] = 'MetUnclustEnUpDeltaY' tic = time.time() met_factory = CorrectedMETFactory(name_map) toc = time.time() print('setup corrected MET time =', toc - tic) met = events.MET tic = time.time() #prof = pyinstrument.Profiler() #prof.start() corrected_met = met_factory.build(met, corrected_jets, lazy_cache=events_cache) #prof.stop() toc = time.time() #print(prof.output_text(unicode=True, color=True, show_all=True)) print('corrected_met build time =', toc - tic) tic = time.time() print(corrected_met.pt_orig) print(corrected_met.pt) for unc in (jet_factory.uncertainties() + met_factory.uncertainties()): print(unc) print(corrected_met[unc].up.pt) print(corrected_met[unc].down.pt) toc = time.time() print('build all met variations =', toc - tic)
def awkwardReshape(akArray, npArray): if len(akArray) == 0: return ak.Array([]) else: return ak.broadcast_arrays(akArray.pt, 1.0)[1] * npArray
def process_jets(events, year, corrections=None): jets = events["Jet"] jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"] jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"] jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] if not events.metadata["dataset"].startswith("data_Single"): jets["pt_gen"] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) ## add btag wps for bdiscr in btag_values[year].keys(): for wp in btag_values[year][bdiscr].keys(): jets[wp] = (jets[bdiscr] > btag_values[year][bdiscr][wp]) ## apply jet corrections if (jet_pars["applyJER"] == 1) and corrections is not None: if events.metadata["dataset"].startswith("data_Single"): era = [ key for key in corrections["DATA"].keys() if events.metadata["dataset"].split(year)[-1] in key ] if year == "2016APV": if (("Bv2" in events.metadata["dataset"]) or ("C" in events.metadata["dataset"]) or ("D" in events.metadata["dataset"])): era = ["BCD"] elif (("E" in events.metadata["dataset"]) or ("F" in events.metadata["dataset"])): era = ["EF"] else: raise ValueError("Era not found for 2016APV dataset.") if year == "2016": if (("F" in events.metadata["dataset"]) or ("G" in events.metadata["dataset"]) or ("H" in events.metadata["dataset"])): era = ["FGH"] else: raise ValueError("Era not found for 2016 dataset.") if len(era) != 1: raise ValueError("Only one era should be used for %s" % events.metadata["dataset"]) jet_factory = corrections["DATA"][era[0]]["JetsFactory"] met_factory = corrections["DATA"][era[0]]["METFactory"] else: jet_factory = corrections["MC"]["JetsFactory"] met_factory = corrections["MC"]["METFactory"] cache = LRUCache(int(1e10), lambda a: a.nbytes) corrected_jets = jet_factory.build(jets, lazy_cache=cache) corrected_met = met_factory.build(events["MET"], corrected_jets, lazy_cache=cache) else: corrected_jets = jets corrected_met = events["MET"] return corrected_jets, corrected_met