def __init__(self, year='2018', corrections={}): self._year = year self._corrections = corrections self._rochester = lookup_tools.rochester_lookup.rochester_lookup( corrections['rochester_data']) dataset_axis = hist.Cat("dataset", "Primary dataset") channel_axis = hist.Cat("channel", "Channel") zmass_axis = hist.Bin("mass", r"$m_{2\ell}$ [GeV]", 240, 0, 120) met_axis = hist.Bin("met", r"$E_{T}^{miss}$ [GeV]", 3000, 0, 3000) npvs_axis = hist.Bin("npvs", "Number of Vertices", 120, 0, 120) self._selections = ['massWindow'] hist.Hist.DEFAULT_DTYPE = 'f' # save some space by keeping float bin counts instead of double self._accumulator = processor.dict_accumulator() for sel in self._selections: self._accumulator[sel + '_zmass'] = hist.Hist( "Counts", dataset_axis, channel_axis, zmass_axis) self._accumulator[sel + '_met'] = hist.Hist( "Counts", dataset_axis, channel_axis, met_axis) self._accumulator[sel + '_pileup'] = hist.Hist( "Counts", dataset_axis, channel_axis, npvs_axis) self._accumulator['cutflow'] = processor.defaultdict_accumulator(int) self._accumulator['sumw'] = processor.defaultdict_accumulator(int)
def __init__(self, year, ids, xsec, common): self._year = year self._lumi = 1000. * float(PhotonPurity.lumis[year]) self._xsec = xsec self._accumulator = processor.dict_accumulator({ 'sumw': hist.Hist('sumw', hist.Cat('dataset', 'Dataset'), hist.Bin('sumw', 'Weight value', [0.])), 'count': hist.Hist('Events', hist.Cat('dataset', 'Dataset'), hist.Cat('cat', 'Cat'), hist.Bin('pt', 'Photon pT', 50, 200, 1200), hist.Bin('sieie', 'sieie', 100, 0, 0.02)) }) self._singlephoton_triggers = { '2016': ['Photon175', 'Photon165_HE10'], '2017': ['Photon200'], '2018': ['Photon200'] } self._ids = ids self._common = common
def __init__(self): ## make binning for hists self.dataset_axis = hist.Cat("dataset", "Event Process") self.jetmult_axis = hist.Cat("jmult", "nJets") self.leptype_axis = hist.Cat("leptype", "Lepton Type") self.lepcat_axis = hist.Cat("lepcat", "Lepton Category") self.btag_axis = hist.Cat("btag", "btagging Category") self.lepIso_axis = hist.Bin("iso", "pfRelIso", 2000, 0., 20.) self.mtt_axis = hist.Bin("mtt", "m($t\overline{t}$) [GeV]", 180, 200, 2000) self.ctstar_axis = hist.Bin("ctstar", "cos($\\theta^{*}$)", 200, -1., 1.) ## make dictionary of hists histo_dict = {} ## make jet hists hists = self.make_hists() histo_dict.update(hists) histo_dict['cutflow'] = processor.defaultdict_accumulator(int) self._accumulator = processor.dict_accumulator(histo_dict) self.sample_name = '' self.corrections = corrections self.isData = True
def __init__(self, isMC): self._isMC = isMC # Histograms dataset_axis = hist.Cat("dataset", "Primary dataset") selection_axis = hist.Cat("selection", "Selection name") self._accumulator = processor.dict_accumulator() self._accumulator["total_events"] = processor.defaultdict_accumulator( int) # Define histograms here self._accumulator["mjjj"] = hist.Hist( "Events", dataset_axis, selection_axis, hist.Bin("mjjj", r"$M_{jjj}$ [GeV]", dijet_binning), ) for pair in [(0, 1), (1, 2), (2, 0)]: self._accumulator[f"m{pair[0]}{pair[1]}"] = hist.Hist( "Events", dataset_axis, selection_axis, hist.Bin(f"m{pair[0]}{pair[1]}", f"$m_{{{pair[0]}{pair[1]}}}$ [GeV]", dijet_binning)) self._accumulator[f"dR{pair[0]}{pair[1]}"] = hist.Hist( "Events", dataset_axis, selection_axis, hist.Bin(f"dR{pair[0]}{pair[1]}", f"$\\Delta R_{{{pair[0]}{pair[1]}}}$ [GeV]", 100, 0., 4)) self._accumulator[f"dEta{pair[0]}{pair[1]}"] = hist.Hist( "Events", dataset_axis, selection_axis, hist.Bin(f"dEta{pair[0]}{pair[1]}", f"$\\Delta \\eta_{{{pair[0]}{pair[1]}}}$ [GeV]", 100, 0., 2)) self._accumulator[f"m{pair[0]}{pair[1]}overM"] = hist.Hist( "Events", dataset_axis, selection_axis, hist.Bin( f"m{pair[0]}{pair[1]}overM", r"$m_{{{pair0}{pair1}}}/M_{{jjj}}$".format(T="T", pair0=pair[0], pair1=pair[1], jjj="jjj"), 100, 0, 1)) for jet in [0, 1, 2]: self._accumulator[f"pt{jet}"] = hist.Hist( "Events", dataset_axis, selection_axis, hist.Bin(f"pt{jet}", r"$p^{T}_{jet}$ [GeV]".format(T="T", jet=jet), dijet_binning)) self._accumulator[f"eta{jet}"] = hist.Hist( "Events", dataset_axis, selection_axis, hist.Bin(f"eta{jet}", f"$\\eta_{jet}$", 100, -3, 3)) self._accumulator[f"ptoverM{jet}"] = hist.Hist( "Events", dataset_axis, selection_axis, hist.Bin( f"ptoverM{jet}", r"$p^{T}_{jet}/M_{{jjj}}$".format(T="T", jet=jet, jjj="jjj"), 100, 0, 2.5))
def __init__(self, year): self._year = year self._trigger = { 2016: { "e": [ "Ele27_WPTight_Gsf", "Ele45_WPLoose_Gsf", "Ele25_eta2p1_WPTight_Gsf", "Ele115_CaloIdVT_GsfTrkIdT", "Ele15_IsoVVL_PFHT350", "Ele15_IsoVVVL_PFHT400", "Ele45_CaloIdVT_GsfTrkIdT_PFJet200_PFJet50", "Ele50_CaloIdVT_GsfTrkIdT_PFJet165", ], "mu": [ "IsoMu24", "IsoTkMu24", "Mu50", "TkMu50", "Mu15_IsoVVVL_PFHT400", "Mu15_IsoVVVL_PFHT350", ], } } self._trigger = self._trigger[int(self._year)] self._accumulator = processor.dict_accumulator({ 'sumw': processor.defaultdict_accumulator(float), 'cutflow': hist.Hist( 'Events', hist.Cat('dataset', 'Dataset'), hist.Cat('channel', 'Channel'), hist.Bin('cut', 'Cut index', 9, 0, 9), ), })
def __init__(self, era=2018): datasets_axis = hist.Cat("dataset", "Signal Model") category_axis = hist.Cat("region", "Lepton category") sys_axis = hist.Cat("syst", "systematic variation") MT1_axis = hist.Bin("MT1", r"$M_{T,1}$ [GeV]", 500, 0, 2000) MT2_axis = hist.Bin("MT2", r"$M_{T,2}$ [GeV]", 500, 0, 2000) MT3_axis = hist.Bin("MT3", r"$M_{T,3}$ [GeV]", 500, 0, 2000) ST1_axis = hist.Bin("ST1", r"$S_{T,1}$ [GeV]", 500, 0, 2000) MET_axis = hist.Bin("MET", r"$E_{T}^{miss}$ [GeV]", 500, 0, 2000) RT1_axis = hist.Bin("RT1", r"$R_{T}$", 500, 0, 200) self._accumulator = processor.dict_accumulator({ 'MET': hist.Hist("Events", datasets_axis, category_axis, MET_axis), 'MT1': hist.Hist("Events", datasets_axis, category_axis, MT1_axis), 'MT2': hist.Hist("Events", datasets_axis, category_axis, MT2_axis), 'MT3': hist.Hist("Events", datasets_axis, category_axis, MT3_axis), 'RT1': hist.Hist("Events", datasets_axis, category_axis, RT1_axis), 'cutflow': processor.defaultdict_accumulator(int), }) with open(f"{os.path.dirname(__file__)}/xsections_{era}.yaml") as stream: self.xsections = yaml.safe_load(stream) self.lumi = { 2016: 35.9, 2017: 41.5, 2018: 60.0 }[era]
def __init__(self): # Histograms dataset_axis = hist.Cat("dataset", "Primary dataset") selection_axis = hist.Cat("selection", "Selection name") self._accumulator = processor.dict_accumulator() self._accumulator["nevents"] = processor.defaultdict_accumulator(int) self._accumulator["run_counter"] = processor.defaultdict_accumulator(partial(processor.defaultdict_accumulator, int))
def GroupProcesses(self, prdic={}): ''' Move from grouping in samples to groping in processes ''' if prdic != {}: self.SetProcessDic(prdic) for k in self.hists.keys(): if len(self.hists[k].identifiers('sample')) == 0: continue self.hists[k] = self.hists[k].group( hist.Cat(self.sampleLabel, self.sampleLabel), hist.Cat(self.processLabel, self.processLabel), self.prDic)
def __init__(self, year='2017'): self._year = year self._triggers = { '2017': [ 'PFHT1050', 'AK8PFJet400_TrimMass30', 'AK8PFJet420_TrimMass30', 'AK8PFHT800_TrimMass50', 'PFJet500', 'AK8PFJet500', 'AK8PFJet550', 'CaloJet500_NoJetID', 'CaloJet550_NoJetID', ] } self._muontriggers = { '2017': [ 'Mu50', #'TkMu50', ] } self._accumulator = processor.dict_accumulator({ 'sumw': processor.defaultdict_accumulator(float), 'templates': hist.Hist( 'Events', hist.Cat('dataset', 'Dataset'), hist.Cat('region', 'Region'), #hist.Cat('systematic', 'Systematic'), hist.Bin('pt', r'Jet $p_{T}$ [GeV]', 25,500,1000),#[525,575,625,700,800,1500]),#np.arange(525,2000,50)), hist.Bin('msd', r'Jet $m_{sd}$', 23, 40, 300), #hist.Bin('gru', 'GRU value',20,0.,1.), #hist.Bin('gruddt', 'GRU$^{DDT}$ value',[-1,0,1]), #hist.Bin('rho', 'jet rho', 20,-5.5,-2.),#[-5.5,-5.,-4.5,-4.,-3.5,-3.,-2.5,-2.]), #hist.Bin('n2', 'N$_2$ value', 20, 0., 0.5), #hist.Bin('n2ddt', 'N$_2^{DDT}$ value', 21, -0.3, 0.3), #hist.Bin('Vmatch', 'Matched to V', [-1,0,1]), hist.Bin('in_v3_ddt', 'IN$^{DDT}$ value', 20, -1, 0.5), hist.Bin('mu_pt', 'Leading muon p_{T}', 20,50., 700.), hist.Bin('mu_pfRelIso04_all', 'Muon pfRelIso04 isolation', 20,0.,1.), #hist.Bin('nPFConstituents', 'Number of PF candidates',41,20,60), #hist.Bin('nJet', 'Number of fat jets', 10,0,9), ), #'gruddt' : hist.Hist( # hist.Cat('dataset', 'Dataset'), # hist.Cat('region', 'Region'), #'cutflow': hist.Hist( # 'Events', # hist.Cat('dataset', 'Dataset'), # hist.Cat('region', 'Region'), # hist.Bin('cut', 'Cut index', 11, 0, 11), #), 'cutflow_signal' : processor.defaultdict_accumulator(partial(processor.defaultdict_accumulator, float)), 'cutflow_ttbar_muoncontrol' : processor.defaultdict_accumulator(partial(processor.defaultdict_accumulator, float)), })
def __init__(self, year='2017'): self._year = year self._accumulator = hist.Hist( 'Events', hist.Cat('dataset', 'Dataset'), hist.Cat('tagger', 'Tagger'), hist.Cat('btag', 'BTag WP pass/fail'), hist.Bin('flavor', 'Jet hadronFlavour', [0, 4, 5, 6]), hist.Bin('pt', 'Jet pT', [20, 30, 50, 70, 100, 140, 200, 300, 600, 1000]), hist.Bin('abseta', 'Jet abseta', [0, 1.4, 2.0, 2.5]), )
def __init__(self): dataset_axis = hist.Cat('dataset', 'dataset') lxy_axis = hist.Bin('lxy', 'lxy [cm]', 100, 0, 250) reco_axis = hist.Cat('reco', 'reco type') self._accumulator = processor.dict_accumulator({ 'lxy': hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis), 'lxy-el': hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis), 'lxy-pho': hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis), })
def __init__(self, year, xsec, corrections): self._year = year self._lumi = 1000. * float(AnalysisProcessor.lumis[year]) self._xsec = xsec self._corrections = corrections self._accumulator = processor.dict_accumulator({ 'sumw': hist.Hist('sumw', hist.Cat('dataset', 'Dataset'), hist.Bin('sumw', 'Weight value', [0.])), 'yields': hist.Hist('Events', hist.Cat('dataset', 'Dataset'), hist.Bin('yields', 'Yield', [0, 1])), })
def __init__(self): dataset_axis = hist.Cat("dataset", "Dataset") dataset_axis = hist.Cat("dataset", "Dataset") jetPt_axis = hist.Bin('jetPt', 'jetPt', btagEff_ptBins) jetEta_axis = hist.Bin('jetEta', 'jetEta', btagEff_etaBins) jetFlav_axis = hist.Bin('jetFlav', 'jetFlav', [0, 4, 5, 6]) self._accumulator = processor.dict_accumulator({ 'hJets': hist.Hist("Counts", dataset_axis, jetPt_axis, jetEta_axis, jetFlav_axis), 'hBJets': hist.Hist("Counts", dataset_axis, jetPt_axis, jetEta_axis, jetFlav_axis), })
def __init__(self, samples, objects, selection, corrections, functions, columns): self._samples = samples self._columns = columns self._objects = objects self._selection = selection self._corrections = corrections self._functions = functions # Object variables self._e = {} self._mu = {} self._jet = {} self._e['id'] = 'Electron_cutBased' self._e['dxy'] = 'Electron_dxy' self._e['dz'] = 'Electron_dz' self._mu['tight_id'] = 'Muon_tightId' self._mu['mediumId'] = 'Muon_mediumId' self._mu['dxy'] = 'Muon_dxy' self._mu['dz'] = 'Muon_dz' self._mu['iso'] = 'Muon_pfRelIso04_all' self._jet['id'] = 'Jet_jetId' # Create the histograms # 'name' : hist.Hist("Ytitle", hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("level", "level"), hist.Cat("syst", "syst"), hist.Bin("name", "X axis (GeV)", 20, 0, 100)), self._accumulator = processor.dict_accumulator({ 'dummy': hist.Hist("Dummy", hist.Cat("sample", "sample"), hist.Bin("dummy", "Number of events", 1, 0, 1)), 'lep0pt': hist.Hist( "Events", hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("level", "level"), hist.Bin("lep0pt", "Leading lepton $p_{T}$ (GeV)", 20, 0, 200)), 'lep0eta': hist.Hist( "Events", hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("level", "level"), hist.Bin("lep0eta", "Leading lepton $\eta$ ", 15, -2.5, 2.50)), 'invmass': hist.Hist("Events", hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("level", "level"), hist.Bin("invmass", "$m_{\ell\ell}$ (GeV) ", 20, 0, 200)), })
def postprocess(self, accumulator): origidentity = list(accumulator) for k in origidentity: if self.data_type == 'bkg': accumulator[k].scale(bkgSCALE, axis='dataset') accumulator[k + '_cat'] = accumulator[k].group( "dataset", hist.Cat("cat", "datasets", sorting='integral'), bkgMAP) if self.data_type == 'data': accumulator[k + '_cat'] = accumulator[k].group( "dataset", hist.Cat( "cat", "datasets", ), dataMAP) return accumulator
def scale_and_merge(histogram, samples, fileset, nano_mapping, lumi=60): """ Scale NanoAOD samples to a physical cross section. Merge NanoAOD samples into categories, e.g. several ttZ samples into one ttZ category. histogram -- coffea histogram samples -- samples dictionary that contains the x-sec and sumWeight fileset -- fileset dictionary used in the coffea processor nano_mapping -- dictionary to map NanoAOD samples into categories lumi -- integrated luminosity in 1/fb """ temp = histogram.copy() # scale according to cross sections scales = { sample: lumi * 1000 * samples[sample]['xsec'] / samples[sample]['sumWeight'] for sample in samples if sample in fileset } temp.scale(scales, axis='dataset') # merge according to categories: # merge categorical axes (example from coffea tutorial) #mapping = { # 'all samples': ['sample 1', 'sample 2'], # 'just sample 1': ['sample 1'], #} temp = temp.group("dataset", hist.Cat("dataset", "new grouped dataset"), nano_mapping) # this is not in place return temp
def cmerge(output_file, input_files, force=False): print("cmerge(output_file={}, input_files={}".format( output_file, input_files)) if os.path.isfile(output_file) and not force: raise ValueError( "Output file {} already exists. Use option force to overwrite.". format(output_file)) output = None for input_file in input_files: this_content = util.load(input_file) # Merge datasets to save space keys = list(this_content.keys()) for key in keys: if "Bcands" in key or "cutflow" in key: continue if type(this_content[key]).__name__ == "Hist": if "dataset" in [x.name for x in this_content[key].axes()]: subjobs = this_content[key].axis("dataset").identifiers() mapping = {} for subjob in subjobs: runp = re_subjob.search(subjob.name).group() if not runp in mapping: mapping[runp] = [] mapping[runp].append(subjob.name) this_content[key] = this_content[key].group( "dataset", hist.Cat("dataset", "Primary dataset"), mapping) if not output: output = this_content else: output.add(this_content) print(f"Saving output to {output_file}") util.save(output, output_file)
def test_issue_394(): dummy = hist.Hist( "Dummy", hist.Cat("sample", "sample"), hist.Bin("dummy", "Number of events", 1, 0, 1), ) dummy.fill(sample="test", dummy=1, weight=0.5)
def reduce(folder,_dataset=None,variable=None): lists = {} for filename in os.listdir(folder): if '.futures' not in filename: continue if filename.split("____")[0] not in lists: lists[filename.split("____")[0]] = [] lists[filename.split("____")[0]].append(folder+'/'+filename) for pdi in lists.keys(): if _dataset is not None and _dataset not in pdi: continue tmp={} for filename in lists[pdi]: print('Opening:',filename) hin = load(filename) for k in hin.keys(): if variable is not None and k!=variable: continue print('Considering variable',k) if k not in tmp: tmp[k]=[hin[k]] else: tmp[k].append(hin[k]) del hin for k in tmp: tmp_arr=futuresum(np.array(tmp[k])) hists = {} hists[k]=tmp_arr[0] dataset = hist.Cat("dataset", "dataset", sorting='placement') dataset_cats = ("dataset",) dataset_map = OrderedDict() for d in hists[k].identifiers('dataset'): if d.name.split("____")[0] not in dataset_map: dataset_map[d.name.split("____")[0]] = (d.name.split("____")[0]+"*",) hists[k] = hists[k].group(dataset_cats, dataset, dataset_map) print(hists) save(hists, folder+'/'+k+'--'+pdi+'.reduced')
def __init__(self, category='00'): self.category = category dataset_axis = hist.Cat('dataset', 'dataset') self._accumulator = processor.dict_accumulator({ 'dphi': processor.column_accumulator(np.zeros(shape=(0, ))), })
def __init__(self, region='SR', data_type='bkg'): self.region = region self.data_type = data_type dataset_axis = hist.Cat('dataset', 'dataset') pt_axis = hist.Bin('pt', '$p_T$ [GeV]', 100, 0, 200) invm_axis = hist.Bin('invm', 'mass [GeV]', 100, 0, 200) mass_axis = hist.Bin('mass', 'mass [GeV]', 100, 0, 200) channel_axis = hist.Bin('channel', 'channel', 3, 0, 3) self._accumulator = processor.dict_accumulator({ 'pt0': hist.Hist('Counts', dataset_axis, pt_axis, channel_axis), 'pt1': hist.Hist('Counts', dataset_axis, pt_axis, channel_axis), 'ptegm': hist.Hist('Counts', dataset_axis, pt_axis, channel_axis), # leading EGM-type for 2mu2e channel 'ptmu': hist.Hist('Counts', dataset_axis, pt_axis, channel_axis), # leading mu-type for 2mu2e channel 'invm': hist.Hist('Counts', dataset_axis, invm_axis, channel_axis), 'massmu': hist.Hist('Counts', dataset_axis, mass_axis, channel_axis), # mass of mu-type leptonjet }) self.pucorrs = get_pu_weights_function() ## NOT applied for now self.nlo_w = get_nlo_weight_function('w') self.nlo_z = get_nlo_weight_function('z')
def __init__(self, data_type='bkg', bothNeutral=True): dataset_axis = hist.Cat('dataset', 'dataset') sumpt_axis = hist.Bin('sumpt', '$\sum p_T$ [GeV]', 50, 0, 50) iso_axis = hist.Bin('iso', 'Isolation', np.arange(0, 1, 0.04)) channel_axis = hist.Bin('channel', 'channel', 3, 0, 3) self._accumulator = processor.dict_accumulator({ 'sumpt': hist.Hist('Counts', dataset_axis, sumpt_axis, channel_axis), 'pfiso': hist.Hist('Counts', dataset_axis, iso_axis, channel_axis), 'isodbeta': hist.Hist('Counts', dataset_axis, iso_axis, channel_axis), 'minpfiso': hist.Hist('Counts', dataset_axis, iso_axis, channel_axis), 'maxpfiso': hist.Hist('Counts', dataset_axis, iso_axis, channel_axis), 'lj0pfiso': hist.Hist('Counts', dataset_axis, iso_axis, channel_axis), }) self.pucorrs = get_pu_weights_function() ## NOT applied for now self.nlo_w = get_nlo_weight_function('w') self.nlo_z = get_nlo_weight_function('z') self.data_type = data_type self.bothNeutral = bothNeutral
def __init__(self): ## load b-tag SFs #self.btag_sf = BTagScaleFactor(os.path.expandvars("$TWHOME/data/DeepCSV_102XSF_V1.btag.csv.gz", "reshape") # we can use a large number of bins and rebin later dataset_axis = hist.Cat("dataset", "Primary dataset") pt_axis = hist.Bin("pt", r"$p_{T}$ (GeV)", 1000, 0, 1000) self._accumulator = processor.dict_accumulator({ 'diboson': processor.defaultdict_accumulator(int), 'ttbar': processor.defaultdict_accumulator(int), 'TTW': processor.defaultdict_accumulator(int), 'TTZ': processor.defaultdict_accumulator(int), 'TTH': processor.defaultdict_accumulator(int), 'TTTT': processor.defaultdict_accumulator(int), 'tW_scattering': processor.defaultdict_accumulator(int), 'DY': processor.defaultdict_accumulator(int), 'totalEvents': processor.defaultdict_accumulator(int), 'passedEvents': processor.defaultdict_accumulator(int), })
def __init__(self): # we can use a large number of bins and rebin later dataset_axis = hist.Cat("dataset", "Primary dataset") pt_axis = hist.Bin("pt", r"$p_{T}$ (GeV)", 600, 0, 1000) eta_axis = hist.Bin("eta", r"$\eta$", 60, -5.5, 5.5) multiplicity_axis = hist.Bin("multiplicity", r"N", 20, -0.5, 19.5) self._accumulator = processor.dict_accumulator({ "MET_pt" : hist.Hist("Counts", dataset_axis, pt_axis), "Jet_pt" : hist.Hist("Counts", dataset_axis, pt_axis), "Jet_pt_fwd" : hist.Hist("Counts", dataset_axis, pt_axis), "Jet_eta" : hist.Hist("Counts", dataset_axis, eta_axis), "GenJet_pt_fwd" : hist.Hist("Counts", dataset_axis, pt_axis), "Spectator_pt" : hist.Hist("Counts", dataset_axis, pt_axis), "Spectator_eta" : hist.Hist("Counts", dataset_axis, eta_axis), "W_pt_notFromTop" : hist.Hist("Counts", dataset_axis, pt_axis), "Top_pt" : hist.Hist("Counts", dataset_axis, pt_axis), "Top_eta" : hist.Hist("Counts", dataset_axis, eta_axis), "Antitop_pt" : hist.Hist("Counts", dataset_axis, pt_axis), "Antitop_eta" : hist.Hist("Counts", dataset_axis, eta_axis), "W_pt" : hist.Hist("Counts", dataset_axis, pt_axis), "W_eta" : hist.Hist("Counts", dataset_axis, eta_axis), "N_b" : hist.Hist("Counts", dataset_axis, multiplicity_axis), "N_jet" : hist.Hist("Counts", dataset_axis, multiplicity_axis), 'cutflow_bkg': processor.defaultdict_accumulator(int), 'cutflow_signal': processor.defaultdict_accumulator(int), })
def __init__(self, dphi_control=False, data_type='sig'): self.dphi_control = dphi_control self.data_type = data_type dataset_axis = hist.Cat('dataset', 'dataset') self._accumulator = processor.dict_accumulator({ 'all05': processor.column_accumulator(np.zeros(shape=(0, ))), 'nopu05': processor.column_accumulator(np.zeros(shape=(0, ))), 'dbeta': processor.column_accumulator(np.zeros(shape=(0, ))), 'all05w': processor.column_accumulator(np.zeros(shape=(0, ))), 'nopu05w': processor.column_accumulator(np.zeros(shape=(0, ))), 'dbetaw': processor.column_accumulator(np.zeros(shape=(0, ))), 'pt': processor.column_accumulator(np.zeros(shape=(0, ))), 'eta': processor.column_accumulator(np.zeros(shape=(0, ))), 'wgt': processor.column_accumulator(np.zeros(shape=(0, ))), 'ljtype': processor.column_accumulator(np.zeros(shape=(0, ))), 'channel': processor.column_accumulator(np.zeros(shape=(0, ))), }) self.pucorrs = get_pu_weights_function() ## NOT applied for now self.nlo_w = get_nlo_weight_function('w') self.nlo_z = get_nlo_weight_function('z')
def __init__(self): ## make binning for hists self.dataset_axis = hist.Cat("dataset", "Event Process") self.pu_nTrueInt_axis = hist.Bin("pu_nTrueInt", "nTrueInt", 100, 0, 100) self.pu_nPU_axis = hist.Bin("pu_nPU", "nPU", 100, 0, 100) ## make dictionary of hists histo_dict = {} histo_dict['PU_nTrueInt'] = hist.Hist("PU_nTrueInt", self.dataset_axis, self.pu_nTrueInt_axis) histo_dict['PU_nPU'] = hist.Hist("PU_nPU", self.dataset_axis, self.pu_nPU_axis) #set_trace() ## construct dictionary of dictionaries to hold meta info for each sample for sample in fileset.keys(): if 'Int' in sample: histo_dict['%s_pos' % sample] = processor.defaultdict_accumulator(int) histo_dict['%s_pos_runs_to_lumis' % sample] = processor.value_accumulator(list) histo_dict['%s_neg' % sample] = processor.defaultdict_accumulator(int) histo_dict['%s_neg_runs_to_lumis' % sample] = processor.value_accumulator(list) else: histo_dict[sample] = processor.defaultdict_accumulator(int) histo_dict['%s_runs_to_lumis' % sample] = processor.value_accumulator(list) self._accumulator = processor.dict_accumulator(histo_dict) self.sample_name = ''
def scale_file(file): print('Loading file:',file) hists=load(file) scalez=False if '2016' in file: scalez=True pd = [] for d in hists['sumw'].identifiers('dataset'): dataset = d.name if dataset.split("____")[0] not in pd: pd.append(dataset.split("____")[0]) print('List of primary datasets:',pd) ## # Aggregate all the histograms that belong to a single dataset ## dataset = hist.Cat("dataset", "dataset", sorting='placement') dataset_cats = ("dataset",) dataset_map = OrderedDict() for pdi in pd: dataset_map[pdi] = (pdi+"*",) for key in hists.keys(): hists[key] = hists[key].group(dataset_cats, dataset, dataset_map) print('Datasets aggregated') return scale(hists,scalez)
def __init__(self): # Create the histograms self._accumulator = processor.dict_accumulator({ 'dummy': hist.Hist("Dummy", hist.Cat("sample", "sample"), hist.Bin("dummy", "Number of events", 1, 0, 1)), })
def main(): raw = False if len(sys.argv) < 2: print("Enter year") return year = sys.argv[1] with open('xsec.json') as f: xs = json.load(f) with open('pmap.json') as f: pmap = json.load(f) indir = "outfiles/" infiles = subprocess.getoutput("ls " + indir + year + "*.coffea").split() outsum = processor.dict_accumulator() # Check if pickle exists, remove it if it does picklename = str(year) + '/templates.pkl' if os.path.isfile(picklename): os.remove(picklename) started = 0 for filename in infiles: print("Loading " + filename) if os.path.isfile(filename): out = util.load(filename) if started == 0: outsum['templates'] = out['templates'] outsum['sumw'] = out['sumw'] started += 1 else: outsum['templates'].add(out['templates']) outsum['sumw'].add(out['sumw']) del out scale_lumi = { k: xs[k] * 1000 * lumis[year] / w for k, w in outsum['sumw'].items() } outsum['templates'].scale(scale_lumi, 'dataset') templates = outsum['templates'].group('dataset', hist.Cat('process', 'Process'), pmap) del outsum outfile = open(picklename, 'wb') pickle.dump(templates, outfile, protocol=-1) outfile.close() return
def __init__(self): dataset_axis = hist.Cat('dataset', 'dataset') lxy_axis = hist.Bin('lxy', 'lxy [cm]', 100, 0, 700) reso_axis = hist.Bin('reso', '($p_T$(reco)-$p_T$(gen))/$p_T$(gen)', 100, -1, 2) reco_axis = hist.Cat('reco', 'reco type') self._accumulator = processor.dict_accumulator({ 'lxy': hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis), 'lxy-pf': hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis), 'lxy-dsa': hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis), 'reso': hist.Hist('Norm. Frequency/0.03', dataset_axis, reso_axis, reco_axis), })