def process(self, df): output = self.accumulator.identity() dataset = df['dataset'] genjets = setup_lhe_cleaned_genjets(df) # Dilepton gen = setup_gen_candidates(df) tags = ['stat1', 'lhe'] if is_lo_w(dataset) or is_nlo_w(dataset) or is_lo_z( dataset) or is_nlo_z(dataset): dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) tags.append('dress') elif is_lo_g(dataset) or is_nlo_g(dataset) or is_lo_g_ewk( dataset) or is_nlo_g_ewk(dataset): photons = gen[(gen.status == 1) & (gen.pdg == 22)] df['gen_v_pt_stat1'] = photons.pt.max() df['gen_v_phi_stat1'] = photons[photons.pt.argmax()].phi.max() df['gen_v_pt_lhe'] = df['LHE_Vpt'] df['gen_v_phi_lhe'] = np.zeros(df.size) for tag in tags: # Dijet for VBF dijet = genjets[:, :2].distincts() # Selection vbf_sel = vbf_selection(df[f'gen_v_phi_{tag}'], dijet, genjets) monojet_sel = monojet_selection(df[f'gen_v_phi_{tag}'], genjets) nominal = df['Generator_weight'] output[f'gen_vpt_inclusive_{tag}'].fill(dataset=dataset, vpt=df[f'gen_v_pt_{tag}'], weight=nominal) mask_vbf = vbf_sel.all(*vbf_sel.names) output[f'gen_vpt_vbf_{tag}'].fill( dataset=dataset, vpt=df[f'gen_v_pt_{tag}'][mask_vbf], jpt=genjets.pt.max()[mask_vbf], mjj=dijet.mass.max()[mask_vbf], weight=nominal[mask_vbf]) mask_monojet = monojet_sel.all(*monojet_sel.names) output[f'gen_vpt_monojet_{tag}'].fill( dataset=dataset, vpt=df[f'gen_v_pt_{tag}'][mask_monojet], jpt=genjets.pt.max()[mask_monojet], weight=nominal[mask_monojet]) # Keep track of weight sum output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_znunu'] = is_lo_znunu(dataset) df['is_lo_w_ewk'] = is_lo_w_ewk(dataset) df['is_lo_z_ewk'] = is_lo_z_ewk(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[ 'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[ 'is_lo_w_ewk'] | df['is_lo_z_ewk'] df['is_data'] = is_data(dataset) gen_v_pt = None if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[ 'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']: gen = setup_gen_candidates(df) dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_combined'] elif df['is_lo_g']: gen = setup_gen_candidates(df) all_gen_photons = gen[(gen.pdg == 22)] prompt_mask = (all_gen_photons.status == 1) & (all_gen_photons.flag & 1 == 1) stat1_mask = (all_gen_photons.status == 1) gen_photons = all_gen_photons[prompt_mask | (~prompt_mask.any()) & stat1_mask] gen_photon = gen_photons[gen_photons.pt.argmax()] gen_v_pt = gen_photon.pt.max() # Generator-level leading dijet mass if df['has_lhe_v_pt']: genjets = setup_lhe_cleaned_genjets(df) digenjet = genjets[:, :2].distincts() df['mjj_gen'] = digenjet.mass.max() df['mjj_gen'] = np.where(df['mjj_gen'] > 0, df['mjj_gen'], 0) # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates( df, cfg) # Remove jets in accordance with the noise recipe if df['year'] == 2017: ak4 = ak4[(ak4.ptraw > 50) | (ak4.abseta < 2.65) | (ak4.abseta > 3.139)] bjets = bjets[(bjets.ptraw > 50) | (bjets.abseta < 2.65) | (bjets.abseta > 3.139)] # Filtering ak4 jets according to pileup ID ak4 = ak4[ak4.puid] # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts == 1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.absetasc < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index = ak4.pt.argmax() elejet_pairs = ak4[:, :1].cross(electrons) df['dREleJet'] = np.hypot( elejet_pairs.i0.eta - elejet_pairs.i1.eta, dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:, :1].cross(muons) df['dRMuonJet'] = np.hypot( muonjet_pairs.i0.eta - muonjet_pairs.i1.eta, dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons, muons, photons) df["dPFCaloSR"] = (met_pt - df["CaloMET_pt"]) / met_pt df["dPFCaloCR"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["dPFTkSR"] = (met_pt - df["TkMET_pt"]) / met_pt df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=5.0) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=5.0) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('at_least_one_tau', taus.counts > 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('mindphijm', df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo_sr', np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('dpfcalo_cr', np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) selection.add('met_sr', met_pt > cfg.SELECTION.SIGNAL.RECOIL) # AK4 dijet diak4 = ak4[:, :2].distincts() leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & ( np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA) trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & ( np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() has_track0 = np.abs(diak4.i0.eta) <= 2.5 has_track1 = np.abs(diak4.i1.eta) <= 2.5 leadak4_id = diak4.i0.tightId & (has_track0 * ( (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) & (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0) trailak4_id = has_track1 * ( (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) & (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1 df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() leading_jet_in_horn = ((diak4.i0.abseta < 3.2) & (diak4.i0.abseta > 2.8)).any() trailing_jet_in_horn = ((diak4.i1.abseta < 3.2) & (diak4.i1.abseta > 2.8)).any() selection.add('hornveto', (df['dPFTkSR'] < 0.8) | ~(leading_jet_in_horn | trailing_jet_in_horn)) if df['year'] == 2018: if df['is_data']: metphihem_mask = ~((met_phi > -1.8) & (met_phi < -0.6) & (df['run'] > 319077)) else: metphihem_mask = pass_all selection.add("metphihemextveto", metphihem_mask) selection.add('no_el_in_hem', electrons[electrons_in_hem(electrons)].counts == 0) else: selection.add("metphihemextveto", pass_all) selection.add('no_el_in_hem', pass_all) selection.add('two_jets', diak4.counts > 0) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) selection.add('hemisphere', hemisphere) selection.add('leadak4_id', leadak4_id.any()) selection.add('trailak4_id', trailak4_id.any()) selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS) selection.add( 'dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI) selection.add( 'detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA) # Cleaning cuts for signal region max_neEmEF = np.maximum(diak4.i0.nef, diak4.i1.nef) selection.add('max_neEmEF', (max_neEmEF < 0.7).any()) vec_b = calculate_vecB(ak4, met_pt, met_phi) vec_dphi = calculate_vecDPhi(ak4, met_pt, met_phi, df['TkMET_phi']) no_jet_in_trk = (diak4.i0.abseta > 2.5).any() & (diak4.i1.abseta > 2.5).any() no_jet_in_hf = (diak4.i0.abseta < 3.0).any() & (diak4.i1.abseta < 3.0).any() at_least_one_jet_in_hf = (diak4.i0.abseta > 3.0).any() | (diak4.i1.abseta > 3.0).any() at_least_one_jet_in_trk = (diak4.i0.abseta < 2.5).any() | (diak4.i1.abseta < 2.5).any() # Categorized cleaning cuts eemitigation = ((no_jet_in_hf | at_least_one_jet_in_trk) & (vec_dphi < 1.0)) | ( (no_jet_in_trk & at_least_one_jet_in_hf) & (vec_b < 0.2)) selection.add('eemitigation', eemitigation) # HF-HF veto in SR both_jets_in_hf = (diak4.i0.abseta > 3.0) & (diak4.i1.abseta > 3.0) selection.add('veto_hfhf', ~both_jets_in_hf.any()) # Divide into three categories for trigger study if cfg.RUN.TRIGGER_STUDY: two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs( diak4.i1.eta) <= 2.4) two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs( diak4.i1.eta) > 2.4) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_forward_jets) selection.add('two_central_jets', two_central_jets.any()) selection.add('two_forward_jets', two_forward_jets.any()) selection.add('one_jet_forward_one_jet_central', one_jet_forward_one_jet_central.any()) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index = photons.pt.argmax() df['is_tight_photon'] = photons.mediumId & photons.barrel selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if df['has_lhe_v_pt']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) if 'LHE_Njets' in df: output['lhe_njets'].fill(dataset=dataset, multiplicity=df['LHE_Njets']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) if 'LHE_HTIncoming' in df: output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons, cfg) weights = pileup_weights(weights, df, evaluator, cfg) weights = ak4_em_frac_weights(weights, diak4, evaluator) if not (gen_v_pt is None): weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt, df['mjj_gen']) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask]] output['kinematics']['met_phi'] += [met_phi[mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[df['is_tight_muon']].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[df['is_tight_electron']].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[df['is_tight_photon']].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights._weights['pileup'].sum() regions = vbfhinv_regions(cfg) # Get veto weights (only for MC) if not df['is_data']: veto_weights = get_veto_weights(df, cfg, evaluator, electrons, muons, taus) for region, cuts in regions.items(): exclude = [None] region_weights = copy.deepcopy(weights) if not df['is_data']: ### Trigger weights if re.match(r'cr_(\d+)e.*', region): p_pass_data = 1 - (1 - evaluator["trigger_electron_eff_data"] (electrons.etasc, electrons.pt)).prod() p_pass_mc = 1 - (1 - evaluator["trigger_electron_eff_mc"] (electrons.etasc, electrons.pt)).prod() trigger_weight = p_pass_data / p_pass_mc trigger_weight[np.isnan(trigger_weight)] = 1 region_weights.add('trigger', trigger_weight) elif re.match(r'cr_(\d+)m.*', region) or re.match( 'sr_.*', region): region_weights.add( 'trigger_met', evaluator["trigger_met"](df['recoil_pt'])) elif re.match(r'cr_g.*', region): photon_trigger_sf(region_weights, photons, df) # Veto weights if re.match('.*no_veto.*', region): exclude = [ "muon_id_iso_tight", "muon_id_tight", "muon_iso_tight", "muon_id_loose", "muon_iso_loose", "ele_reco", "ele_id_tight", "ele_id_loose", "tau_id" ] region_weights.add( "veto", veto_weights.partial_weight(include=["nominal"])) # HEM-veto weights for signal region MC if re.match('^sr_vbf.*', region) and df['year'] == 2018: # Events that lie in the HEM-veto region events_to_weight_mask = (met_phi > -1.8) & (met_phi < -0.6) # Weight is the "good lumi fraction" for 2018 weight = 21.1 / 59.7 hem_weight = np.where(events_to_weight_mask, weight, 1.0) region_weights.add("hem_weight", hem_weight) # This is the default weight for this region rweight = region_weights.partial_weight(exclude=exclude) # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region][dataset]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][dataset][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) if cfg.RUN.SAVE.TREE: if region in ['cr_1e_vbf', 'cr_1m_vbf']: output['tree_int64'][region][ "event"] += processor.column_accumulator( df["event"][mask]) output['tree_float16'][region][ "gen_v_pt"] += processor.column_accumulator( np.float16(gen_v_pt[mask])) output['tree_float16'][region][ "gen_mjj"] += processor.column_accumulator( np.float16(df['mjj_gen'][mask])) output['tree_float16'][region][ "recoil_pt"] += processor.column_accumulator( np.float16(df["recoil_pt"][mask])) output['tree_float16'][region][ "recoil_phi"] += processor.column_accumulator( np.float16(df["recoil_phi"][mask])) output['tree_float16'][region][ "mjj"] += processor.column_accumulator( np.float16(df["mjj"][mask])) output['tree_float16'][region][ "leadak4_pt"] += processor.column_accumulator( np.float16(diak4.i0.pt[mask])) output['tree_float16'][region][ "leadak4_eta"] += processor.column_accumulator( np.float16(diak4.i0.eta[mask])) output['tree_float16'][region][ "leadak4_phi"] += processor.column_accumulator( np.float16(diak4.i0.phi[mask])) output['tree_float16'][region][ "trailak4_pt"] += processor.column_accumulator( np.float16(diak4.i1.pt[mask])) output['tree_float16'][region][ "trailak4_eta"] += processor.column_accumulator( np.float16(diak4.i1.eta[mask])) output['tree_float16'][region][ "trailak4_phi"] += processor.column_accumulator( np.float16(diak4.i1.phi[mask])) output['tree_float16'][region][ "minDPhiJetRecoil"] += processor.column_accumulator( np.float16(df["minDPhiJetRecoil"][mask])) if '_1e_' in region: output['tree_float16'][region][ "leadlep_pt"] += processor.column_accumulator( np.float16(electrons.pt.max()[mask])) output['tree_float16'][region][ "leadlep_eta"] += processor.column_accumulator( np.float16(electrons[ electrons.pt.argmax()].eta.max()[mask])) output['tree_float16'][region][ "leadlep_phi"] += processor.column_accumulator( np.float16(electrons[ electrons.pt.argmax()].phi.max()[mask])) elif '_1m_' in region: output['tree_float16'][region][ "leadlep_pt"] += processor.column_accumulator( np.float16(muons.pt.max()[mask])) output['tree_float16'][region][ "leadlep_eta"] += processor.column_accumulator( np.float16( muons[muons.pt.argmax()].eta.max()[mask])) output['tree_float16'][region][ "leadlep_phi"] += processor.column_accumulator( np.float16( muons[muons.pt.argmax()].phi.max()[mask])) for name, w in region_weights._weights.items(): output['tree_float16'][region][ f"weight_{name}"] += processor.column_accumulator( np.float16(w[mask])) output['tree_float16'][region][ f"weight_total"] += processor.column_accumulator( np.float16(rweight[mask])) if region == 'inclusive': output['tree_int64'][region][ "event"] += processor.column_accumulator( df["event"][mask]) for name in selection.names: output['tree_bool'][region][ name] += processor.column_accumulator( np.bool_(selection.all(*[name])[mask])) # Save the event numbers of events passing this selection # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=rweight[mask]) fill_mult('ak4_mult', ak4[ak4.pt > 30]) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[df['is_tight_muon']]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in region_weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) ezfill("weights_wide", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, rweight[mask]) w_alljets_nopref = weight_shape( ak4[mask].eta, region_weights.partial_weight(exclude=exclude + ['prefire'])[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) ezfill('ak4_eta_nopref', jeteta=ak4[mask].eta.flatten(), weight=w_alljets_nopref) ezfill('ak4_phi_nopref', jetphi=ak4[mask].phi.flatten(), weight=w_alljets_nopref) ezfill('ak4_pt_nopref', jetpt=ak4[mask].pt.flatten(), weight=w_alljets_nopref) # Leading ak4 w_diak4 = weight_shape(diak4.pt[mask], rweight[mask]) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw0', jetpt=diak4.i0.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf0', frac=diak4.i0.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf0', frac=diak4.i0.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst0', nconst=diak4.i0.nconst[mask].flatten(), weight=w_diak4) # Trailing ak4 ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw1', jetpt=diak4.i1.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf1', frac=diak4.i1.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf1', frac=diak4.i1.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst1', nconst=diak4.i1.nconst[mask].flatten(), weight=w_diak4) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], rweight[mask]) ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo_cr', dpfcalo=df["dPFCaloCR"][mask], weight=rweight[mask]) ezfill('dpfcalo_sr', dpfcalo=df["dPFCaloSR"][mask], weight=rweight[mask]) ezfill('met', met=met_pt[mask], weight=rweight[mask]) ezfill('met_phi', phi=met_phi[mask], weight=rweight[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=rweight[mask]) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=rweight[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=rweight[mask]) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=rweight[mask]) ezfill('dphijj', dphi=df["dphijj"][mask], weight=rweight[mask]) ezfill('detajj', deta=df["detajj"][mask], weight=rweight[mask]) ezfill('mjj', mjj=df["mjj"][mask], weight=rweight[mask]) if gen_v_pt is not None: ezfill('gen_vpt', vpt=gen_v_pt[mask], weight=df['Generator_weight'][mask]) ezfill('gen_mjj', mjj=df['mjj_gen'][mask], weight=df['Generator_weight'][mask]) # Photon CR data-driven QCD estimate if df['is_data'] and re.match("cr_g.*", region) and re.match( "(SinglePhoton|EGamma).*", dataset): w_imp = photon_impurity_weights( photons[leadphoton_index].pt.max()[mask], df["year"]) output['mjj'].fill(dataset=data_driven_qcd_dataset(dataset), region=region, mjj=df["mjj"][mask], weight=rweight[mask] * w_imp) output['recoil'].fill(dataset=data_driven_qcd_dataset(dataset), region=region, recoil=df["recoil_pt"][mask], weight=rweight[mask] * w_imp) # Uncertainty variations if df['is_lo_z'] or df['is_nlo_z'] or df['is_lo_z_ewk']: theory_uncs = [x for x in cfg.SF.keys() if x.startswith('unc')] for unc in theory_uncs: reweight = evaluator[unc](gen_v_pt) w = (region_weights.weight() * reweight)[mask] ezfill('mjj_unc', mjj=df['mjj'][mask], uncertainty=unc, weight=w) # Two dimensional ezfill('recoil_mjj', recoil=df["recoil_pt"][mask], mjj=df["mjj"][mask], weight=rweight[mask]) # Muons if '_1m_' in region or '_2m_' in region or 'no_veto' in region: w_allmu = weight_shape(muons.pt[mask], rweight[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_pt_abseta', pt=muons.pt[mask].flatten(), abseta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=rweight[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], rweight[mask]) ezfill('muon_pt0', pt=dimuons.i0.pt[mask].flatten(), weight=w_dimu) ezfill('muon_pt1', pt=dimuons.i1.pt[mask].flatten(), weight=w_dimu) ezfill('muon_eta0', eta=dimuons.i0.eta[mask].flatten(), weight=w_dimu) ezfill('muon_eta1', eta=dimuons.i1.eta[mask].flatten(), weight=w_dimu) ezfill('muon_phi0', phi=dimuons.i0.phi[mask].flatten(), weight=w_dimu) ezfill('muon_phi1', phi=dimuons.i1.phi[mask].flatten(), weight=w_dimu) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons if '_1e_' in region or '_2e_' in region or 'no_veto' in region: w_allel = weight_shape(electrons.pt[mask], rweight[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_pt_eta', pt=electrons.pt[mask].flatten(), eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=rweight[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], rweight[mask]) ezfill('electron_pt0', pt=dielectrons.i0.pt[mask].flatten(), weight=w_diel) ezfill('electron_pt1', pt=dielectrons.i1.pt[mask].flatten(), weight=w_diel) ezfill('electron_eta0', eta=dielectrons.i0.eta[mask].flatten(), weight=w_diel) ezfill('electron_eta1', eta=dielectrons.i1.eta[mask].flatten(), weight=w_diel) ezfill('electron_phi0', phi=dielectrons.i0.phi[mask].flatten(), weight=w_diel) ezfill('electron_phi1', phi=dielectrons.i1.phi[mask].flatten(), weight=w_diel) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon if '_g_' in region: w_leading_photon = weight_shape( photons[leadphoton_index].pt[mask], rweight[mask]) ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_pt0_recoil', pt=photons[leadphoton_index].pt[mask].flatten(), recoil=df['recoil_pt'][mask & (leadphoton_index.counts > 0)], weight=w_leading_photon) ezfill('photon_eta_phi', eta=photons[leadphoton_index].eta[mask].flatten(), phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], rweight[mask]) # Tau if 'no_veto' in region: w_all_taus = weight_shape(taus.pt[mask], rweight[mask]) ezfill("tau_pt", pt=taus.pt[mask].flatten(), weight=w_all_taus) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=rweight[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=rweight[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=exclude)[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=exclude)[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_w_ewk'] = is_lo_w_ewk(dataset) df['is_lo_z_ewk'] = is_lo_z_ewk(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[ 'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[ 'is_lo_w_ewk'] | df['is_lo_z_ewk'] df['is_data'] = is_data(dataset) gen_v_pt = None if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[ 'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']: gen = setup_gen_candidates(df) dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_dress'] elif df['is_lo_g']: gen = setup_gen_candidates(df) gen_v_pt = gen[(gen.pdg == 22) & (gen.status == 1)].pt.max() # Generator-level leading dijet mass if df['has_lhe_v_pt']: genjets = setup_lhe_cleaned_genjets(df) digenjet = genjets[:, :2].distincts() df['mjj_gen'] = digenjet.mass.max() # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates( df, cfg) # Filtering ak4 jets according to pileup ID ak4 = ak4[ak4.puid] bjets = bjets[bjets.puid] # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts == 1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index = ak4.pt.argmax() elejet_pairs = ak4[:, :1].cross(electrons) df['dREleJet'] = np.hypot( elejet_pairs.i0.eta - elejet_pairs.i1.eta, dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:, :1].cross(muons) df['dRMuonJet'] = np.hypot( muonjet_pairs.i0.eta - muonjet_pairs.i1.eta, dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons, muons, photons) df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=4.7) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=4.7) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo', np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) if (cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC): selection.add('hemveto', df['hemveto']) else: selection.add('hemveto', np.ones(df.size) == 1) # AK4 dijet diak4 = ak4[:, :2].distincts() leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & ( np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA) trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & ( np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() has_track0 = np.abs(diak4.i0.eta) <= 2.5 has_track1 = np.abs(diak4.i1.eta) <= 2.5 leadak4_id = diak4.i0.tightId & (has_track0 * ( (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) & (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0) trailak4_id = has_track1 * ( (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) & (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1 df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() selection.add('two_jets', diak4.counts > 0) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) selection.add('hemisphere', hemisphere) selection.add('leadak4_id', leadak4_id.any()) selection.add('trailak4_id', trailak4_id.any()) selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS) selection.add( 'dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI) selection.add( 'detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA) # Divide into three categories for trigger study if cfg.RUN.TRIGGER_STUDY: two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs( diak4.i1.eta) <= 2.4) two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs( diak4.i1.eta) > 2.4) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_forward_jets) selection.add('two_central_jets', two_central_jets.any()) selection.add('two_forward_jets', two_forward_jets.any()) selection.add('one_jet_forward_one_jet_central', one_jet_forward_one_jet_central.any()) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) selection.add('two_electrons', electrons.counts == 2) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index = photons.pt.argmax() df['is_tight_photon'] = photons.mediumId \ & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if df['has_lhe_v_pt']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) if 'LHE_Njets' in df: output['lhe_njets'].fill(dataset=dataset, multiplicity=df['LHE_Njets']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) if 'LHE_HTIncoming' in df: output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons) weights = pileup_weights(weights, df, evaluator, cfg) if not (gen_v_pt is None): weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt, df['mjj_gen']) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask]] output['kinematics']['met_phi'] += [met_phi[mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[df['is_tight_muon']].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[df['is_tight_electron']].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[df['is_tight_photon']].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights._weights['pileup'].sum() regions = vbfhinv_regions(cfg) for region, cuts in regions.items(): # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=weights.weight()[mask]) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[df['is_tight_muon']]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) ezfill("weights_wide", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, weights.weight()[mask]) w_alljets_nopref = weight_shape( ak4[mask].eta, weights.partial_weight(exclude=['prefire'])[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) ezfill('ak4_eta_nopref', jeteta=ak4[mask].eta.flatten(), weight=w_alljets_nopref) ezfill('ak4_phi_nopref', jetphi=ak4[mask].phi.flatten(), weight=w_alljets_nopref) ezfill('ak4_pt_nopref', jetpt=ak4[mask].pt.flatten(), weight=w_alljets_nopref) # Leading ak4 w_diak4 = weight_shape(diak4.pt[mask], weights.weight()[mask]) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw0', jetpt=diak4.i0.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf0', frac=diak4.i0.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf0', frac=diak4.i0.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst0', nconst=diak4.i0.nconst[mask].flatten(), weight=w_diak4) # Trailing ak4 ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw1', jetpt=diak4.i1.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf1', frac=diak4.i1.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf1', frac=diak4.i1.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst1', nconst=diak4.i1.nconst[mask].flatten(), weight=w_diak4) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], weights.weight()[mask]) ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=weights.weight()[mask]) ezfill('met', met=met_pt[mask], weight=weights.weight()[mask]) ezfill('met_phi', phi=met_phi[mask], weight=weights.weight()[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=weights.weight()[mask]) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=weights.weight()[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=weights.weight()[mask]) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=weights.weight()[mask]) ezfill('dphijj', dphi=df["dphijj"][mask], weight=weights.weight()[mask]) ezfill('detajj', deta=df["detajj"][mask], weight=weights.weight()[mask]) ezfill('mjj', mjj=df["mjj"][mask], weight=weights.weight()[mask]) # Two dimensional ezfill('recoil_mjj', recoil=df["recoil_pt"][mask], mjj=df["mjj"][mask], weight=weights.weight()[mask]) # Muons if '_1m_' in region or '_2m_' in region: w_allmu = weight_shape(muons.pt[mask], weights.weight()[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=weights.weight()[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], weights.weight()[mask]) ezfill('muon_pt0', pt=dimuons.i0.pt[mask].flatten(), weight=w_dimu) ezfill('muon_pt1', pt=dimuons.i1.pt[mask].flatten(), weight=w_dimu) ezfill('muon_eta0', eta=dimuons.i0.eta[mask].flatten(), weight=w_dimu) ezfill('muon_eta1', eta=dimuons.i1.eta[mask].flatten(), weight=w_dimu) ezfill('muon_phi0', phi=dimuons.i0.phi[mask].flatten(), weight=w_dimu) ezfill('muon_phi1', phi=dimuons.i1.phi[mask].flatten(), weight=w_dimu) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons if '_1e_' in region or '_2e_' in region: w_allel = weight_shape(electrons.pt[mask], weights.weight()[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=weights.weight()[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], weights.weight()[mask]) ezfill('electron_pt0', pt=dielectrons.i0.pt[mask].flatten(), weight=w_diel) ezfill('electron_pt1', pt=dielectrons.i1.pt[mask].flatten(), weight=w_diel) ezfill('electron_eta0', eta=dielectrons.i0.eta[mask].flatten(), weight=w_diel) ezfill('electron_eta1', eta=dielectrons.i1.eta[mask].flatten(), weight=w_diel) ezfill('electron_phi0', phi=dielectrons.i0.phi[mask].flatten(), weight=w_diel) ezfill('electron_phi1', phi=dielectrons.i1.phi[mask].flatten(), weight=w_diel) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon if '_g_' in region: w_leading_photon = weight_shape( photons[leadphoton_index].pt[mask], weights.weight()[mask]) ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_pt0_recoil', pt=photons[leadphoton_index].pt[mask].flatten(), recoil=df['recoil_pt'][mask & (leadphoton_index.counts > 0)], weight=w_leading_photon) ezfill('photon_eta_phi', eta=photons[leadphoton_index].eta[mask].flatten(), phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], weights.weight()[mask]) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=weights.weight()[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=weights.weight()[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=weights.weight()[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=weights.weight()[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_v_jet'] = has_v_jet(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df['is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] df['is_data'] = is_data(dataset) gen_v_pt = None if not df['is_data']: gen = setup_gen_candidates(df) if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df['is_nlo_w']: dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_combined'] elif df['is_lo_g']: gen_v_pt = gen[(gen.pdg==22) & (gen.status==1)].pt.max() # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons = setup_candidates(df, cfg) # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts==1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts==1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index=ak4.pt.argmax() elejet_pairs = ak4[:,:1].cross(electrons) df['dREleJet'] = np.hypot(elejet_pairs.i0.eta-elejet_pairs.i1.eta , dphi(elejet_pairs.i0.phi,elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:,:1].cross(muons) df['dRMuonJet'] = np.hypot(muonjet_pairs.i0.eta-muonjet_pairs.i1.eta , dphi(muonjet_pairs.i0.phi,muonjet_pairs.i1.phi)).min() # Photons # Angular distance leading photon - leading jet phojet_pairs = ak4[:,:1].cross(photons[:,:1]) df['dRPhotonJet'] = np.hypot(phojet_pairs.i0.eta-phojet_pairs.i1.eta , dphi(phojet_pairs.i0.phi,phojet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons) df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=2.4) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=2.4) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size)==1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts==0) selection.add('veto_muo', muons.counts==0) selection.add('veto_photon', photons.counts==0) selection.add('veto_tau', taus.counts==0) selection.add('veto_b', bjets.counts==0) selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo',np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL) if(cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC): selection.add('hemveto', df['hemveto']) else: selection.add('hemveto', np.ones(df.size)==1) # AK4 Jet leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \ & (ak4.abseta[leadak4_index] < cfg.SELECTION.SIGNAL.leadak4.ETA).any() selection.add('leadak4_pt_eta', leadak4_pt_eta) selection.add('leadak4_id',(ak4.tightId[leadak4_index] \ & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \ & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any()) # AK8 Jet leadak8_index=ak8.pt.argmax() leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \ & (ak8.abseta[leadak8_index] < cfg.SELECTION.SIGNAL.leadak8.ETA).any() selection.add('leadak8_pt_eta', leadak8_pt_eta) selection.add('leadak8_id',(ak8.tightId[leadak8_index]).any()) # Mono-V selection selection.add('leadak8_tau21', ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) < cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any()) selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \ & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any()) selection.add('leadak8_wvsqcd_loosemd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.LOOSEMD) & (ak8.wvsqcdmd[leadak8_index] < cfg.WTAG.TIGHTMD)).any()) selection.add('leadak8_wvsqcd_tightmd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.TIGHTMD)).any()) selection.add('leadak8_wvsqcd_loose', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.LOOSE) & (ak8.wvsqcd[leadak8_index] < cfg.WTAG.TIGHT)).any()) selection.add('leadak8_wvsqcd_tight', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.TIGHT)).any()) selection.add('veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id", "leadak8_tau21", "leadak8_mass")) selection.add('only_one_ak8', ak8.counts==1) # Dimuon CR leadmuon_index=muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge==0).any()) selection.add('two_muons', muons.counts==2) # Single muon CR selection.add('one_muon', muons.counts==1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index=electrons.pt.argmax() selection.add('one_electron', electrons.counts==1) selection.add('two_electrons', electrons.counts==2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge==0).any()) selection.add('two_electrons', electrons.counts==2) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index=photons.pt.argmax() df['is_tight_photon'] = photons.mediumId \ & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts==1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if gen_v_pt is not None: output['genvpt_check'].fill(vpt=gen_v_pt,type="Nano", dataset=dataset, weight=df['Generator_weight']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons) weights = pileup_weights(weights, df, evaluator, cfg) if not (gen_v_pt is None): weights = theory_weights_monojet(weights, df, evaluator, gen_v_pt) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask].flatten()] output['kinematics']['met_phi'] += [met_phi[mask].flatten()] output['kinematics']['recoil'] += [df['recoil_pt'][mask].flatten()] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask].flatten()] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt.flatten()] output['kinematics']['ak4eta0'] += [ak4[leadak4_index][mask].eta.flatten()] output['kinematics']['leadbtag'] += [ak4.pt.max()<0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [muons[df['is_tight_muon']].counts[mask].flatten()] output['kinematics']['mupt0'] += [muons[leadmuon_index][mask].pt.flatten()] output['kinematics']['mueta0'] += [muons[leadmuon_index][mask].eta.flatten()] output['kinematics']['muphi0'] += [muons[leadmuon_index][mask].phi.flatten()] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [electrons[df['is_tight_electron']].counts[mask].flatten()] output['kinematics']['elpt0'] += [electrons[leadelectron_index][mask].pt.flatten()] output['kinematics']['eleta0'] += [electrons[leadelectron_index][mask].eta.flatten()] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [photons[df['is_tight_photon']].counts[mask].flatten()] output['kinematics']['gpt0'] += [photons[leadphoton_index][mask].pt.flatten()] output['kinematics']['geta0'] += [photons[leadphoton_index][mask].eta.flatten()] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights.partial_weight(include=['pileup']).sum() regions = monojet_regions(cfg) for region, cuts in regions.items(): region_weights = copy.deepcopy(weights) if not df['is_data']: if re.match(r'cr_(\d+)e.*', region): region_weights.add('trigger', np.ones(df.size)) elif re.match(r'cr_(\d+)m.*', region) or re.match('sr_.*', region): region_weights.add('trigger', evaluator["trigger_met"](df['recoil_pt'])) elif re.match(r'cr_g.*', region): region_weights.add('trigger', np.ones(df.size)) if not df['is_data']: genVs = gen[((gen.pdg==23) | (gen.pdg==24) | (gen.pdg==-24)) & (gen.pt>10)] leadak8 = ak8[ak8.pt.argmax()] leadak8_matched_mask = leadak8.match(genVs, deltaRCut=0.8) matched_leadak8 = leadak8[leadak8_matched_mask] unmatched_leadak8 = leadak8[~leadak8_matched_mask] for wp in ['loose','loosemd','tight','tightmd']: if re.match(r'.*_{wp}_v.*', region): if (wp == 'tight') or ('nomistag' in region): # no mistag SF available for tight cut matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() else: matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() \ * evaluator[f'wtag_mistag_{wp}'](unmatched_leadak8.pt).prod() region_weights.add('wtag_{wp}', matched_weights) # Blinding if(self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all']+=df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all(*cuts[:icut+1]).sum() mask = selection.all(*cuts) if cfg.RUN.SAVE.TREE: def fill_tree(variable, values): treeacc = processor.column_accumulator(values) name = f'tree_{region}_{variable}' if dataset in output[name].keys(): output[name][dataset] += treeacc else: output[name][dataset] = treeacc if region in ['cr_2m_j','cr_1m_j','cr_2e_j','cr_1e_j','cr_g_j']: fill_tree('recoil',df['recoil_pt'][mask].flatten()) fill_tree('weight',region_weights.weight()[mask].flatten()) if gen_v_pt is not None: fill_tree('gen_v_pt',gen_v_pt[mask].flatten()) else: fill_tree('gen_v_pt', -1 * np.ones(sum(mask))) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill( dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=region_weights.weight()[mask] ) fill_mult('ak8_mult', ak8) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult',bjets) fill_mult('loose_ele_mult',electrons) fill_mult('tight_ele_mult',electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult',muons) fill_mult('tight_muo_mult',muons[df['is_tight_muon']]) fill_mult('tau_mult',taus) fill_mult('photon_mult',photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill( dataset=dataset, region=region, **kwargs ) # Monitor weights for wname, wvalue in region_weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, region_weights.weight()[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_eta_phi', phi=ak4[mask].phi.flatten(),eta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) # Leading ak4 w_leadak4 = weight_shape(ak4[leadak4_index].eta[mask], region_weights.weight()[mask]) ezfill('ak4_eta0', jeteta=ak4[leadak4_index].eta[mask].flatten(), weight=w_leadak4) ezfill('ak4_phi0', jetphi=ak4[leadak4_index].phi[mask].flatten(), weight=w_leadak4) ezfill('ak4_pt0', jetpt=ak4[leadak4_index].pt[mask].flatten(), weight=w_leadak4) ezfill('ak4_ptraw0', jetpt=ak4[leadak4_index].ptraw[mask].flatten(), weight=w_leadak4) ezfill('ak4_chf0', frac=ak4[leadak4_index].chf[mask].flatten(), weight=w_leadak4) ezfill('ak4_nhf0', frac=ak4[leadak4_index].nhf[mask].flatten(), weight=w_leadak4) ezfill('drelejet', dr=df['dREleJet'][mask], weight=region_weights.weight()[mask]) ezfill('drmuonjet', dr=df['dRMuonJet'][mask], weight=region_weights.weight()[mask]) ezfill('drphotonjet', dr=df['dRPhotonJet'][mask], weight=region_weights.weight()[mask]) # AK8 jets if region=='inclusive' or region.endswith('v'): # All w_allak8 = weight_shape(ak8.eta[mask], region_weights.weight()[mask]) ezfill('ak8_eta', jeteta=ak8[mask].eta.flatten(), weight=w_allak8) ezfill('ak8_phi', jetphi=ak8[mask].phi.flatten(), weight=w_allak8) ezfill('ak8_pt', jetpt=ak8[mask].pt.flatten(), weight=w_allak8) ezfill('ak8_mass', mass=ak8[mask].mass.flatten(), weight=w_allak8) # Leading w_leadak8 = weight_shape(ak8[leadak8_index].eta[mask], region_weights.weight()[mask]) ezfill('ak8_eta0', jeteta=ak8[leadak8_index].eta[mask].flatten(), weight=w_leadak8) ezfill('ak8_phi0', jetphi=ak8[leadak8_index].phi[mask].flatten(), weight=w_leadak8) ezfill('ak8_pt0', jetpt=ak8[leadak8_index].pt[mask].flatten(), weight=w_leadak8 ) ezfill('ak8_mass0', mass=ak8[leadak8_index].mass[mask].flatten(), weight=w_leadak8) ezfill('ak8_tau210', tau21=ak8[leadak8_index].tau21[mask].flatten(), weight=w_leadak8) ezfill('ak8_wvsqcd0', tagger=ak8[leadak8_index].wvsqcd[mask].flatten(), weight=w_leadak8) ezfill('ak8_wvsqcdmd0', tagger=ak8[leadak8_index].wvsqcdmd[mask].flatten(), weight=w_leadak8) ezfill('ak8_zvsqcd0', tagger=ak8[leadak8_index].zvsqcd[mask].flatten(), weight=w_leadak8) ezfill('ak8_zvsqcdmd0', tagger=ak8[leadak8_index].zvsqcdmd[mask].flatten(), weight=w_leadak8) # histogram with only gen-matched lead ak8 pt if not df['is_data']: w_matchedleadak8 = weight_shape(matched_leadak8.eta[mask], region_weights.weight()[mask]) ezfill('ak8_Vmatched_pt0', jetpt=matched_leadak8.pt[mask].flatten(), weight=w_matchedleadak8 ) # Dimuon specifically for deepak8 mistag rate measurement if 'inclusive_v' in region: ezfill('ak8_passloose_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtight_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloosemd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtightmd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloose_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtight_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloosemd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtightmd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=region_weights.weight()[mask] ) ezfill('met', met=met_pt[mask], weight=region_weights.weight()[mask] ) ezfill('met_phi', phi=met_phi[mask], weight=region_weights.weight()[mask] ) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=region_weights.weight()[mask] ) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=region_weights.weight()[mask] ) ezfill('recoil_nopog', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(include=['pileup','theory','gen','prefire'])[mask]) ezfill('recoil_nopref', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['prefire'])[mask]) ezfill('recoil_nopu', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('recoil_notrg', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['trigger'])[mask]) ezfill('ak4_pt0_over_recoil', ratio=ak4.pt.max()[mask]/df["recoil_pt"][mask], weight=region_weights.weight()[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=region_weights.weight()[mask] ) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=region_weights.weight()[mask] ) if 'noveto' in region: continue # Muons if '_1m_' in region or '_2m_' in region: w_allmu = weight_shape(muons.pt[mask], region_weights.weight()[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu ) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=region_weights.weight()[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_eta_phi', phi=muons.phi[mask].flatten(),eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) ezfill('muon_dxy', dxy=muons.dxy[mask].flatten(), weight=w_allmu) ezfill('muon_dz', dz=muons.dz[mask].flatten(), weight=w_allmu) # Leading muon w_leadmu = weight_shape(muons[leadmuon_index].pt[mask], region_weights.weight()[mask]) ezfill('muon_pt0', pt=muons[leadmuon_index].pt[mask].flatten(), weight=w_leadmu ) ezfill('muon_eta0', eta=muons[leadmuon_index].eta[mask].flatten(), weight=w_leadmu) ezfill('muon_phi0', phi=muons[leadmuon_index].phi[mask].flatten(), weight=w_leadmu) ezfill('muon_dxy0', dxy=muons[leadmuon_index].dxy[mask].flatten(), weight=w_leadmu) ezfill('muon_dz0', dz=muons[leadmuon_index].dz[mask].flatten(), weight=w_leadmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], region_weights.weight()[mask]) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu ) ezfill('dimuon_dr', dr=dimuons.i0.p4.delta_r(dimuons.i1.p4)[mask].flatten(), weight=w_dimu ) ezfill('muon_pt1', pt=muons[~leadmuon_index].pt[mask].flatten(), weight=w_leadmu ) ezfill('muon_eta1', eta=muons[~leadmuon_index].eta[mask].flatten(), weight=w_leadmu) ezfill('muon_phi1', phi=muons[~leadmuon_index].phi[mask].flatten(), weight=w_leadmu) # Electrons if '_1e_' in region or '_2e_' in region: w_allel = weight_shape(electrons.pt[mask], region_weights.weight()[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=region_weights.weight()[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) ezfill('electron_eta_phi', phi=electrons.phi[mask].flatten(),eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_dz', dz=electrons.dz[mask].flatten(), weight=w_allel) ezfill('electron_dxy', dxy=electrons.dxy[mask].flatten(), weight=w_allel) w_leadel = weight_shape(electrons[leadelectron_index].pt[mask], region_weights.weight()[mask]) ezfill('electron_pt0', pt=electrons[leadelectron_index].pt[mask].flatten(), weight=w_leadel) ezfill('electron_eta0', eta=electrons[leadelectron_index].eta[mask].flatten(), weight=w_leadel) ezfill('electron_phi0', phi=electrons[leadelectron_index].phi[mask].flatten(), weight=w_leadel) w_trailel = weight_shape(electrons[~leadelectron_index].pt[mask], region_weights.weight()[mask]) ezfill('electron_tightid1', id=electrons[~leadelectron_index].tightId[mask].flatten(), weight=w_trailel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], region_weights.weight()[mask]) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) ezfill('dielectron_dr', dr=dielectrons.i0.p4.delta_r(dielectrons.i1.p4)[mask].flatten(), weight=w_diel ) ezfill('electron_pt1', pt=electrons[~leadelectron_index].pt[mask].flatten(), weight=w_leadel) ezfill('electron_eta1', eta=electrons[~leadelectron_index].eta[mask].flatten(), weight=w_leadel) ezfill('electron_phi1', phi=electrons[~leadelectron_index].phi[mask].flatten(), weight=w_leadel) # Photon if '_g_' in region: w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask],region_weights.weight()[mask]); ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta_phi', phi=photons[leadphoton_index].phi[mask].flatten(),eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], region_weights.weight()[mask]) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=region_weights.weight()[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=region_weights.weight()[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.weight()[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.weight()[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) return output
def process(self, df): output = self.accumulator.identity() dataset = df['dataset'] # Dilepton genjets_all = JaggedCandidateArray.candidatesfromcounts( df['nGenJet'], pt=df['GenJet_pt'], eta=df['GenJet_eta'], abseta=np.abs(df['GenJet_eta']), phi=df['GenJet_phi'], mass=df['GenJet_mass']) gen = setup_gen_candidates(df) tags = ['stat1', 'lhe'] if is_lo_w(dataset) or is_nlo_w(dataset) or is_lo_z( dataset) or is_lo_w(dataset): dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) tags.append('dress') # Select jets not overlapping with leptons genjets = genjets_all[ (~genjets_all.match(dressed,deltaRCut=0.4)) & (~genjets_all.match(gen[islep(gen)],deltaRCut=0.4)) \ ] elif is_lo_g(dataset) or is_nlo_g(dataset): photons = gen[(gen.status == 1) & (gen.pdg == 22)] df['gen_v_pt_stat1'] = photons.pt.max() df['gen_v_phi_stat1'] = photons[photons.pt.argmax()].phi.max() df['gen_v_pt_lhe'] = df['LHE_Vpt'] df['gen_v_phi_lhe'] = np.zeros(df.size) # Select jets not overlapping with photon genjets = genjets_all[ (~genjets_all.match(photons[photons.pt.argmax()],deltaRCut=0.4)) \ ] for tag in tags: # Dijet for VBF dijet = genjets[:, :2].distincts() # Selection vbf_sel = vbf_selection(df[f'gen_v_phi_{tag}'], dijet, genjets) monojet_sel = monojet_selection(df[f'gen_v_phi_{tag}'], genjets) nominal = df['Generator_weight'] output[f'gen_vpt_inclusive_{tag}'].fill(dataset=dataset, vpt=df[f'gen_v_pt_{tag}'], jpt=genjets.pt.max(), weight=nominal) mask_vbf = vbf_sel.all(*vbf_sel.names) output[f'gen_vpt_vbf_{tag}'].fill( dataset=dataset, vpt=df[f'gen_v_pt_{tag}'][mask_vbf], jpt=genjets.pt.max()[mask_vbf], mjj=dijet.mass.max()[mask_vbf], weight=nominal[mask_vbf]) mask_monojet = monojet_sel.all(*monojet_sel.names) output[f'gen_vpt_monojet_{tag}'].fill( dataset=dataset, vpt=df[f'gen_v_pt_{tag}'][mask_monojet], jpt=genjets.pt.max()[mask_monojet], weight=nominal[mask_monojet]) # Keep track of weight sum output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] return output