Esempio n. 1
0
    def __init__(self, year='2018', corrections={}):
        self._year = year

        self._corrections = corrections
        self._rochester = lookup_tools.rochester_lookup.rochester_lookup(
            corrections['rochester_data'])

        dataset_axis = hist.Cat("dataset", "Primary dataset")
        channel_axis = hist.Cat("channel", "Channel")
        zmass_axis = hist.Bin("mass", r"$m_{2\ell}$ [GeV]", 240, 0, 120)
        met_axis = hist.Bin("met", r"$E_{T}^{miss}$ [GeV]", 3000, 0, 3000)
        npvs_axis = hist.Bin("npvs", "Number of Vertices", 120, 0, 120)

        self._selections = ['massWindow']

        hist.Hist.DEFAULT_DTYPE = 'f'  # save some space by keeping float bin counts instead of double
        self._accumulator = processor.dict_accumulator()
        for sel in self._selections:
            self._accumulator[sel + '_zmass'] = hist.Hist(
                "Counts", dataset_axis, channel_axis, zmass_axis)
            self._accumulator[sel + '_met'] = hist.Hist(
                "Counts", dataset_axis, channel_axis, met_axis)
            self._accumulator[sel + '_pileup'] = hist.Hist(
                "Counts", dataset_axis, channel_axis, npvs_axis)

        self._accumulator['cutflow'] = processor.defaultdict_accumulator(int)
        self._accumulator['sumw'] = processor.defaultdict_accumulator(int)
Esempio n. 2
0
    def __init__(self, year, ids, xsec, common):
        self._year = year

        self._lumi = 1000. * float(PhotonPurity.lumis[year])
        self._xsec = xsec

        self._accumulator = processor.dict_accumulator({
            'sumw':
            hist.Hist('sumw', hist.Cat('dataset', 'Dataset'),
                      hist.Bin('sumw', 'Weight value', [0.])),
            'count':
            hist.Hist('Events', hist.Cat('dataset', 'Dataset'),
                      hist.Cat('cat', 'Cat'),
                      hist.Bin('pt', 'Photon pT', 50, 200, 1200),
                      hist.Bin('sieie', 'sieie', 100, 0, 0.02))
        })

        self._singlephoton_triggers = {
            '2016': ['Photon175', 'Photon165_HE10'],
            '2017': ['Photon200'],
            '2018': ['Photon200']
        }

        self._ids = ids
        self._common = common
    def __init__(self):

        ## make binning for hists
        self.dataset_axis = hist.Cat("dataset", "Event Process")
        self.jetmult_axis = hist.Cat("jmult", "nJets")
        self.leptype_axis = hist.Cat("leptype", "Lepton Type")
        self.lepcat_axis = hist.Cat("lepcat", "Lepton Category")
        self.btag_axis = hist.Cat("btag", "btagging Category")
        self.lepIso_axis = hist.Bin("iso", "pfRelIso", 2000, 0., 20.)
        self.mtt_axis = hist.Bin("mtt", "m($t\overline{t}$) [GeV]", 180, 200,
                                 2000)
        self.ctstar_axis = hist.Bin("ctstar", "cos($\\theta^{*}$)", 200, -1.,
                                    1.)

        ## make dictionary of hists
        histo_dict = {}
        ## make jet hists
        hists = self.make_hists()
        histo_dict.update(hists)

        histo_dict['cutflow'] = processor.defaultdict_accumulator(int)

        self._accumulator = processor.dict_accumulator(histo_dict)
        self.sample_name = ''
        self.corrections = corrections
        self.isData = True
Esempio n. 4
0
    def __init__(self, isMC):
        self._isMC = isMC

        # Histograms
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        selection_axis = hist.Cat("selection", "Selection name")

        self._accumulator = processor.dict_accumulator()
        self._accumulator["total_events"] = processor.defaultdict_accumulator(
            int)

        # Define histograms here
        self._accumulator["mjjj"] = hist.Hist(
            "Events",
            dataset_axis,
            selection_axis,
            hist.Bin("mjjj", r"$M_{jjj}$ [GeV]", dijet_binning),
        )

        for pair in [(0, 1), (1, 2), (2, 0)]:
            self._accumulator[f"m{pair[0]}{pair[1]}"] = hist.Hist(
                "Events", dataset_axis, selection_axis,
                hist.Bin(f"m{pair[0]}{pair[1]}",
                         f"$m_{{{pair[0]}{pair[1]}}}$ [GeV]", dijet_binning))
            self._accumulator[f"dR{pair[0]}{pair[1]}"] = hist.Hist(
                "Events", dataset_axis, selection_axis,
                hist.Bin(f"dR{pair[0]}{pair[1]}",
                         f"$\\Delta R_{{{pair[0]}{pair[1]}}}$ [GeV]", 100, 0.,
                         4))
            self._accumulator[f"dEta{pair[0]}{pair[1]}"] = hist.Hist(
                "Events", dataset_axis, selection_axis,
                hist.Bin(f"dEta{pair[0]}{pair[1]}",
                         f"$\\Delta \\eta_{{{pair[0]}{pair[1]}}}$ [GeV]", 100,
                         0., 2))
            self._accumulator[f"m{pair[0]}{pair[1]}overM"] = hist.Hist(
                "Events", dataset_axis, selection_axis,
                hist.Bin(
                    f"m{pair[0]}{pair[1]}overM",
                    r"$m_{{{pair0}{pair1}}}/M_{{jjj}}$".format(T="T",
                                                               pair0=pair[0],
                                                               pair1=pair[1],
                                                               jjj="jjj"), 100,
                    0, 1))

        for jet in [0, 1, 2]:
            self._accumulator[f"pt{jet}"] = hist.Hist(
                "Events", dataset_axis, selection_axis,
                hist.Bin(f"pt{jet}", r"$p^{T}_{jet}$ [GeV]".format(T="T",
                                                                   jet=jet),
                         dijet_binning))
            self._accumulator[f"eta{jet}"] = hist.Hist(
                "Events", dataset_axis, selection_axis,
                hist.Bin(f"eta{jet}", f"$\\eta_{jet}$", 100, -3, 3))
            self._accumulator[f"ptoverM{jet}"] = hist.Hist(
                "Events", dataset_axis, selection_axis,
                hist.Bin(
                    f"ptoverM{jet}",
                    r"$p^{T}_{jet}/M_{{jjj}}$".format(T="T",
                                                      jet=jet,
                                                      jjj="jjj"), 100, 0, 2.5))
Esempio n. 5
0
    def __init__(self, year):
        self._year = year
        self._trigger = {
	    2016: {
                "e": [
                    "Ele27_WPTight_Gsf",
                    "Ele45_WPLoose_Gsf",
                    "Ele25_eta2p1_WPTight_Gsf",
                    "Ele115_CaloIdVT_GsfTrkIdT",
                    "Ele15_IsoVVL_PFHT350",
                    "Ele15_IsoVVVL_PFHT400",
                    "Ele45_CaloIdVT_GsfTrkIdT_PFJet200_PFJet50",
                    "Ele50_CaloIdVT_GsfTrkIdT_PFJet165",
                    ],
                "mu": [
                    "IsoMu24",
                    "IsoTkMu24",
                    "Mu50",
                    "TkMu50",
                    "Mu15_IsoVVVL_PFHT400",
                    "Mu15_IsoVVVL_PFHT350",
	        ],
            }
        }
        self._trigger = self._trigger[int(self._year)]

        self._accumulator = processor.dict_accumulator({
            'sumw': processor.defaultdict_accumulator(float),
            'cutflow': hist.Hist(
                'Events',
                hist.Cat('dataset', 'Dataset'),
                hist.Cat('channel', 'Channel'),
                hist.Bin('cut', 'Cut index', 9, 0, 9),
            ),
            })
Esempio n. 6
0
    def __init__(self, era=2018):
        datasets_axis = hist.Cat("dataset", "Signal Model")
        category_axis = hist.Cat("region", "Lepton category")
        sys_axis = hist.Cat("syst", "systematic variation")
        MT1_axis = hist.Bin("MT1", r"$M_{T,1}$ [GeV]", 500, 0, 2000)
        MT2_axis = hist.Bin("MT2", r"$M_{T,2}$ [GeV]", 500, 0, 2000)
        MT3_axis = hist.Bin("MT3", r"$M_{T,3}$ [GeV]", 500, 0, 2000)
        ST1_axis = hist.Bin("ST1", r"$S_{T,1}$ [GeV]", 500, 0, 2000)
        MET_axis = hist.Bin("MET", r"$E_{T}^{miss}$ [GeV]", 500, 0, 2000)
        RT1_axis = hist.Bin("RT1", r"$R_{T}$", 500, 0, 200)

        self._accumulator = processor.dict_accumulator({
            'MET': hist.Hist("Events", datasets_axis, category_axis, MET_axis),
            'MT1': hist.Hist("Events", datasets_axis, category_axis, MT1_axis),
            'MT2': hist.Hist("Events", datasets_axis, category_axis, MT2_axis),
            'MT3': hist.Hist("Events", datasets_axis, category_axis, MT3_axis),
            'RT1': hist.Hist("Events", datasets_axis, category_axis, RT1_axis),
            'cutflow': processor.defaultdict_accumulator(int),
        })

        with open(f"{os.path.dirname(__file__)}/xsections_{era}.yaml") as stream:
            self.xsections = yaml.safe_load(stream)
        self.lumi = {
            2016: 35.9,
            2017: 41.5,
            2018: 60.0
        }[era]
Esempio n. 7
0
  def __init__(self):
    # Histograms
    dataset_axis = hist.Cat("dataset", "Primary dataset")
    selection_axis = hist.Cat("selection", "Selection name")

    self._accumulator = processor.dict_accumulator()
    self._accumulator["nevents"] = processor.defaultdict_accumulator(int)
    self._accumulator["run_counter"] = processor.defaultdict_accumulator(partial(processor.defaultdict_accumulator, int))
Esempio n. 8
0
 def GroupProcesses(self, prdic={}):
     ''' Move from grouping in samples to groping in processes '''
     if prdic != {}: self.SetProcessDic(prdic)
     for k in self.hists.keys():
         if len(self.hists[k].identifiers('sample')) == 0: continue
         self.hists[k] = self.hists[k].group(
             hist.Cat(self.sampleLabel, self.sampleLabel),
             hist.Cat(self.processLabel, self.processLabel), self.prDic)
Esempio n. 9
0
    def __init__(self, year='2017'):
        self._year = year
        self._triggers = {
            '2017': [
                'PFHT1050',
                'AK8PFJet400_TrimMass30',
                'AK8PFJet420_TrimMass30',
                'AK8PFHT800_TrimMass50',
                'PFJet500',
                'AK8PFJet500',
                'AK8PFJet550',
                'CaloJet500_NoJetID',
                'CaloJet550_NoJetID', 
                 ]
        }
        self._muontriggers = {
            '2017': [
                'Mu50', 
                #'TkMu50',
                 ]
        }
        self._accumulator = processor.dict_accumulator({
            'sumw': processor.defaultdict_accumulator(float),
            'templates': hist.Hist(
                'Events',
                hist.Cat('dataset', 'Dataset'),
                hist.Cat('region', 'Region'),
                #hist.Cat('systematic', 'Systematic'),
                hist.Bin('pt', r'Jet $p_{T}$ [GeV]', 25,500,1000),#[525,575,625,700,800,1500]),#np.arange(525,2000,50)),
                hist.Bin('msd', r'Jet $m_{sd}$', 23, 40, 300),
                #hist.Bin('gru', 'GRU value',20,0.,1.),
                #hist.Bin('gruddt', 'GRU$^{DDT}$ value',[-1,0,1]),
                #hist.Bin('rho', 'jet rho', 20,-5.5,-2.),#[-5.5,-5.,-4.5,-4.,-3.5,-3.,-2.5,-2.]),
                #hist.Bin('n2', 'N$_2$ value', 20, 0., 0.5),
                #hist.Bin('n2ddt', 'N$_2^{DDT}$ value', 21, -0.3, 0.3),
                #hist.Bin('Vmatch', 'Matched to V', [-1,0,1]),
                hist.Bin('in_v3_ddt', 'IN$^{DDT}$  value', 20, -1, 0.5),
                hist.Bin('mu_pt', 'Leading muon p_{T}', 20,50., 700.),
                hist.Bin('mu_pfRelIso04_all', 'Muon pfRelIso04 isolation', 20,0.,1.),
                #hist.Bin('nPFConstituents', 'Number of PF candidates',41,20,60),
                #hist.Bin('nJet', 'Number of fat jets', 10,0,9), 
            ),
            #'gruddt' : hist.Hist(
            #    hist.Cat('dataset', 'Dataset'),
            #    hist.Cat('region', 'Region'),
            #'cutflow': hist.Hist(
            #    'Events',
            #    hist.Cat('dataset', 'Dataset'),
            #    hist.Cat('region', 'Region'),
            #    hist.Bin('cut', 'Cut index', 11, 0, 11),
            #),
            'cutflow_signal' : processor.defaultdict_accumulator(partial(processor.defaultdict_accumulator, float)),
            'cutflow_ttbar_muoncontrol' : processor.defaultdict_accumulator(partial(processor.defaultdict_accumulator, float)),

        })
Esempio n. 10
0
 def __init__(self, year='2017'):
     self._year = year
     self._accumulator = hist.Hist(
         'Events',
         hist.Cat('dataset', 'Dataset'),
         hist.Cat('tagger', 'Tagger'),
         hist.Cat('btag', 'BTag WP pass/fail'),
         hist.Bin('flavor', 'Jet hadronFlavour', [0, 4, 5, 6]),
         hist.Bin('pt', 'Jet pT',
                  [20, 30, 50, 70, 100, 140, 200, 300, 600, 1000]),
         hist.Bin('abseta', 'Jet abseta', [0, 1.4, 2.0, 2.5]),
     )
Esempio n. 11
0
 def __init__(self):
     dataset_axis = hist.Cat('dataset', 'dataset')
     lxy_axis = hist.Bin('lxy', 'lxy [cm]', 100, 0, 250)
     reco_axis = hist.Cat('reco', 'reco type')
     self._accumulator = processor.dict_accumulator({
         'lxy':
         hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis),
         'lxy-el':
         hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis),
         'lxy-pho':
         hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis),
     })
Esempio n. 12
0
 def __init__(self, year, xsec, corrections):
     self._year = year
     self._lumi = 1000. * float(AnalysisProcessor.lumis[year])
     self._xsec = xsec
     self._corrections = corrections
     self._accumulator = processor.dict_accumulator({
         'sumw':
         hist.Hist('sumw', hist.Cat('dataset', 'Dataset'),
                   hist.Bin('sumw', 'Weight value', [0.])),
         'yields':
         hist.Hist('Events', hist.Cat('dataset', 'Dataset'),
                   hist.Bin('yields', 'Yield', [0, 1])),
     })
Esempio n. 13
0
 def __init__(self):
     dataset_axis = hist.Cat("dataset", "Dataset")
     dataset_axis = hist.Cat("dataset", "Dataset")
     jetPt_axis = hist.Bin('jetPt', 'jetPt', btagEff_ptBins)
     jetEta_axis = hist.Bin('jetEta', 'jetEta', btagEff_etaBins)
     jetFlav_axis = hist.Bin('jetFlav', 'jetFlav', [0, 4, 5, 6])
     self._accumulator = processor.dict_accumulator({
         'hJets':
         hist.Hist("Counts", dataset_axis, jetPt_axis, jetEta_axis,
                   jetFlav_axis),
         'hBJets':
         hist.Hist("Counts", dataset_axis, jetPt_axis, jetEta_axis,
                   jetFlav_axis),
     })
Esempio n. 14
0
    def __init__(self, samples, objects, selection, corrections, functions,
                 columns):
        self._samples = samples
        self._columns = columns
        self._objects = objects
        self._selection = selection
        self._corrections = corrections
        self._functions = functions

        # Object variables
        self._e = {}
        self._mu = {}
        self._jet = {}

        self._e['id'] = 'Electron_cutBased'
        self._e['dxy'] = 'Electron_dxy'
        self._e['dz'] = 'Electron_dz'

        self._mu['tight_id'] = 'Muon_tightId'
        self._mu['mediumId'] = 'Muon_mediumId'
        self._mu['dxy'] = 'Muon_dxy'
        self._mu['dz'] = 'Muon_dz'
        self._mu['iso'] = 'Muon_pfRelIso04_all'

        self._jet['id'] = 'Jet_jetId'

        # Create the histograms
        # 'name' : hist.Hist("Ytitle", hist.Cat("sample", "sample"), hist.Cat("channel", "channel"), hist.Cat("level", "level"), hist.Cat("syst", "syst"), hist.Bin("name", "X axis (GeV)", 20, 0, 100)),
        self._accumulator = processor.dict_accumulator({
            'dummy':
            hist.Hist("Dummy", hist.Cat("sample", "sample"),
                      hist.Bin("dummy", "Number of events", 1, 0, 1)),
            'lep0pt':
            hist.Hist(
                "Events", hist.Cat("sample", "sample"),
                hist.Cat("channel", "channel"), hist.Cat("level", "level"),
                hist.Bin("lep0pt", "Leading lepton $p_{T}$ (GeV)", 20, 0,
                         200)),
            'lep0eta':
            hist.Hist(
                "Events", hist.Cat("sample", "sample"),
                hist.Cat("channel", "channel"), hist.Cat("level", "level"),
                hist.Bin("lep0eta", "Leading lepton $\eta$ ", 15, -2.5, 2.50)),
            'invmass':
            hist.Hist("Events", hist.Cat("sample", "sample"),
                      hist.Cat("channel", "channel"),
                      hist.Cat("level", "level"),
                      hist.Bin("invmass", "$m_{\ell\ell}$ (GeV) ", 20, 0,
                               200)),
        })
Esempio n. 15
0
 def postprocess(self, accumulator):
     origidentity = list(accumulator)
     for k in origidentity:
         if self.data_type == 'bkg':
             accumulator[k].scale(bkgSCALE, axis='dataset')
             accumulator[k + '_cat'] = accumulator[k].group(
                 "dataset", hist.Cat("cat", "datasets", sorting='integral'),
                 bkgMAP)
         if self.data_type == 'data':
             accumulator[k + '_cat'] = accumulator[k].group(
                 "dataset", hist.Cat(
                     "cat",
                     "datasets",
                 ), dataMAP)
     return accumulator
Esempio n. 16
0
def scale_and_merge(histogram, samples, fileset, nano_mapping, lumi=60):
    """
    Scale NanoAOD samples to a physical cross section.
    Merge NanoAOD samples into categories, e.g. several ttZ samples into one ttZ category.
    
    histogram -- coffea histogram
    samples -- samples dictionary that contains the x-sec and sumWeight
    fileset -- fileset dictionary used in the coffea processor
    nano_mapping -- dictionary to map NanoAOD samples into categories
    lumi -- integrated luminosity in 1/fb
    """
    temp = histogram.copy()

    # scale according to cross sections
    scales = {
        sample:
        lumi * 1000 * samples[sample]['xsec'] / samples[sample]['sumWeight']
        for sample in samples if sample in fileset
    }
    temp.scale(scales, axis='dataset')

    # merge according to categories:
    # merge categorical axes (example from coffea tutorial)
    #mapping = {
    #    'all samples': ['sample 1', 'sample 2'],
    #    'just sample 1': ['sample 1'],
    #}
    temp = temp.group("dataset", hist.Cat("dataset", "new grouped dataset"),
                      nano_mapping)  # this is not in place

    return temp
Esempio n. 17
0
def cmerge(output_file, input_files, force=False):
    print("cmerge(output_file={}, input_files={}".format(
        output_file, input_files))
    if os.path.isfile(output_file) and not force:
        raise ValueError(
            "Output file {} already exists. Use option force to overwrite.".
            format(output_file))
    output = None
    for input_file in input_files:
        this_content = util.load(input_file)
        # Merge datasets to save space
        keys = list(this_content.keys())
        for key in keys:
            if "Bcands" in key or "cutflow" in key:
                continue
            if type(this_content[key]).__name__ == "Hist":
                if "dataset" in [x.name for x in this_content[key].axes()]:
                    subjobs = this_content[key].axis("dataset").identifiers()
                    mapping = {}
                    for subjob in subjobs:
                        runp = re_subjob.search(subjob.name).group()
                        if not runp in mapping:
                            mapping[runp] = []
                        mapping[runp].append(subjob.name)
                    this_content[key] = this_content[key].group(
                        "dataset", hist.Cat("dataset", "Primary dataset"),
                        mapping)

        if not output:
            output = this_content
        else:
            output.add(this_content)
    print(f"Saving output to {output_file}")
    util.save(output, output_file)
Esempio n. 18
0
def test_issue_394():
    dummy = hist.Hist(
        "Dummy",
        hist.Cat("sample", "sample"),
        hist.Bin("dummy", "Number of events", 1, 0, 1),
    )
    dummy.fill(sample="test", dummy=1, weight=0.5)
Esempio n. 19
0
def reduce(folder,_dataset=None,variable=None):

     lists = {}
     for filename in os.listdir(folder):
          if '.futures' not in filename: continue
          if filename.split("____")[0] not in lists: lists[filename.split("____")[0]] = []
          lists[filename.split("____")[0]].append(folder+'/'+filename)
          
     for pdi in lists.keys():
          if _dataset is not None and _dataset not in pdi: continue
          tmp={}
          for filename in lists[pdi]:
               print('Opening:',filename)
               hin = load(filename)
               for k in hin.keys():
                    if variable is not None and k!=variable: continue
                    print('Considering variable',k)
                    if k not in tmp: tmp[k]=[hin[k]]
                    else: tmp[k].append(hin[k])
               del hin
          for k in tmp:
               tmp_arr=futuresum(np.array(tmp[k]))
               hists = {}
               hists[k]=tmp_arr[0]
               dataset = hist.Cat("dataset", "dataset", sorting='placement')
               dataset_cats = ("dataset",)
               dataset_map = OrderedDict()
               for d in hists[k].identifiers('dataset'):
                    if d.name.split("____")[0] not in dataset_map: dataset_map[d.name.split("____")[0]] = (d.name.split("____")[0]+"*",)
               hists[k] = hists[k].group(dataset_cats, dataset, dataset_map)
               print(hists)
               save(hists, folder+'/'+k+'--'+pdi+'.reduced')
 def __init__(self, category='00'):
     self.category = category
     dataset_axis = hist.Cat('dataset', 'dataset')
     self._accumulator = processor.dict_accumulator({
         'dphi':
         processor.column_accumulator(np.zeros(shape=(0, ))),
     })
Esempio n. 21
0
    def __init__(self, region='SR', data_type='bkg'):
        self.region = region
        self.data_type = data_type

        dataset_axis = hist.Cat('dataset', 'dataset')
        pt_axis = hist.Bin('pt', '$p_T$ [GeV]', 100, 0, 200)
        invm_axis = hist.Bin('invm', 'mass [GeV]', 100, 0, 200)
        mass_axis = hist.Bin('mass', 'mass [GeV]', 100, 0, 200)
        channel_axis = hist.Bin('channel', 'channel', 3, 0, 3)

        self._accumulator = processor.dict_accumulator({
            'pt0':
            hist.Hist('Counts', dataset_axis, pt_axis, channel_axis),
            'pt1':
            hist.Hist('Counts', dataset_axis, pt_axis, channel_axis),
            'ptegm':
            hist.Hist('Counts', dataset_axis, pt_axis,
                      channel_axis),  # leading EGM-type for 2mu2e channel
            'ptmu':
            hist.Hist('Counts', dataset_axis, pt_axis,
                      channel_axis),  # leading mu-type for 2mu2e channel
            'invm':
            hist.Hist('Counts', dataset_axis, invm_axis, channel_axis),
            'massmu':
            hist.Hist('Counts', dataset_axis, mass_axis,
                      channel_axis),  # mass of mu-type leptonjet
        })

        self.pucorrs = get_pu_weights_function()
        ## NOT applied for now
        self.nlo_w = get_nlo_weight_function('w')
        self.nlo_z = get_nlo_weight_function('z')
Esempio n. 22
0
    def __init__(self, data_type='bkg', bothNeutral=True):
        dataset_axis = hist.Cat('dataset', 'dataset')
        sumpt_axis = hist.Bin('sumpt', '$\sum p_T$ [GeV]', 50, 0, 50)
        iso_axis = hist.Bin('iso', 'Isolation', np.arange(0, 1, 0.04))
        channel_axis = hist.Bin('channel', 'channel', 3, 0, 3)
        self._accumulator = processor.dict_accumulator({
            'sumpt':
            hist.Hist('Counts', dataset_axis, sumpt_axis, channel_axis),
            'pfiso':
            hist.Hist('Counts', dataset_axis, iso_axis, channel_axis),
            'isodbeta':
            hist.Hist('Counts', dataset_axis, iso_axis, channel_axis),
            'minpfiso':
            hist.Hist('Counts', dataset_axis, iso_axis, channel_axis),
            'maxpfiso':
            hist.Hist('Counts', dataset_axis, iso_axis, channel_axis),
            'lj0pfiso':
            hist.Hist('Counts', dataset_axis, iso_axis, channel_axis),
        })

        self.pucorrs = get_pu_weights_function()
        ## NOT applied for now
        self.nlo_w = get_nlo_weight_function('w')
        self.nlo_z = get_nlo_weight_function('z')

        self.data_type = data_type
        self.bothNeutral = bothNeutral
Esempio n. 23
0
    def __init__(self):

        ## load b-tag SFs
        #self.btag_sf = BTagScaleFactor(os.path.expandvars("$TWHOME/data/DeepCSV_102XSF_V1.btag.csv.gz", "reshape")

        # we can use a large number of bins and rebin later
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        pt_axis = hist.Bin("pt", r"$p_{T}$ (GeV)", 1000, 0, 1000)

        self._accumulator = processor.dict_accumulator({
            'diboson':
            processor.defaultdict_accumulator(int),
            'ttbar':
            processor.defaultdict_accumulator(int),
            'TTW':
            processor.defaultdict_accumulator(int),
            'TTZ':
            processor.defaultdict_accumulator(int),
            'TTH':
            processor.defaultdict_accumulator(int),
            'TTTT':
            processor.defaultdict_accumulator(int),
            'tW_scattering':
            processor.defaultdict_accumulator(int),
            'DY':
            processor.defaultdict_accumulator(int),
            'totalEvents':
            processor.defaultdict_accumulator(int),
            'passedEvents':
            processor.defaultdict_accumulator(int),
        })
    def __init__(self):

        # we can use a large number of bins and rebin later
        dataset_axis        = hist.Cat("dataset",   "Primary dataset")
        pt_axis             = hist.Bin("pt",        r"$p_{T}$ (GeV)", 600, 0, 1000)
        eta_axis            = hist.Bin("eta",       r"$\eta$", 60, -5.5, 5.5)
        multiplicity_axis   = hist.Bin("multiplicity",         r"N", 20, -0.5, 19.5)

        self._accumulator = processor.dict_accumulator({
            "MET_pt" :          hist.Hist("Counts", dataset_axis, pt_axis),
            "Jet_pt" :          hist.Hist("Counts", dataset_axis, pt_axis),
            "Jet_pt_fwd" :      hist.Hist("Counts", dataset_axis, pt_axis),
            "Jet_eta" :         hist.Hist("Counts", dataset_axis, eta_axis),
            "GenJet_pt_fwd" :   hist.Hist("Counts", dataset_axis, pt_axis),
            "Spectator_pt" :    hist.Hist("Counts", dataset_axis, pt_axis),
            "Spectator_eta" :   hist.Hist("Counts", dataset_axis, eta_axis),
            "W_pt_notFromTop" : hist.Hist("Counts", dataset_axis, pt_axis),
            "Top_pt" :          hist.Hist("Counts", dataset_axis, pt_axis),
            "Top_eta" :         hist.Hist("Counts", dataset_axis, eta_axis),
            "Antitop_pt" :      hist.Hist("Counts", dataset_axis, pt_axis),
            "Antitop_eta" :     hist.Hist("Counts", dataset_axis, eta_axis),
            "W_pt" :            hist.Hist("Counts", dataset_axis, pt_axis),
            "W_eta" :           hist.Hist("Counts", dataset_axis, eta_axis),
            "N_b" :             hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "N_jet" :           hist.Hist("Counts", dataset_axis, multiplicity_axis),
            'cutflow_bkg':      processor.defaultdict_accumulator(int),
            'cutflow_signal':   processor.defaultdict_accumulator(int),
        })
    def __init__(self, dphi_control=False, data_type='sig'):
        self.dphi_control = dphi_control
        self.data_type = data_type

        dataset_axis = hist.Cat('dataset', 'dataset')
        self._accumulator = processor.dict_accumulator({
            'all05':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'nopu05':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'dbeta':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'all05w':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'nopu05w':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'dbetaw':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'pt':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'eta':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'wgt':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'ljtype':
            processor.column_accumulator(np.zeros(shape=(0, ))),
            'channel':
            processor.column_accumulator(np.zeros(shape=(0, ))),
        })

        self.pucorrs = get_pu_weights_function()
        ## NOT applied for now
        self.nlo_w = get_nlo_weight_function('w')
        self.nlo_z = get_nlo_weight_function('z')
Esempio n. 26
0
    def __init__(self):

        ## make binning for hists
        self.dataset_axis = hist.Cat("dataset", "Event Process")
        self.pu_nTrueInt_axis = hist.Bin("pu_nTrueInt", "nTrueInt", 100, 0,
                                         100)
        self.pu_nPU_axis = hist.Bin("pu_nPU", "nPU", 100, 0, 100)

        ## make dictionary of hists
        histo_dict = {}
        histo_dict['PU_nTrueInt'] = hist.Hist("PU_nTrueInt", self.dataset_axis,
                                              self.pu_nTrueInt_axis)
        histo_dict['PU_nPU'] = hist.Hist("PU_nPU", self.dataset_axis,
                                         self.pu_nPU_axis)

        #set_trace()
        ## construct dictionary of dictionaries to hold meta info for each sample
        for sample in fileset.keys():
            if 'Int' in sample:
                histo_dict['%s_pos' %
                           sample] = processor.defaultdict_accumulator(int)
                histo_dict['%s_pos_runs_to_lumis' %
                           sample] = processor.value_accumulator(list)
                histo_dict['%s_neg' %
                           sample] = processor.defaultdict_accumulator(int)
                histo_dict['%s_neg_runs_to_lumis' %
                           sample] = processor.value_accumulator(list)
            else:
                histo_dict[sample] = processor.defaultdict_accumulator(int)
                histo_dict['%s_runs_to_lumis' %
                           sample] = processor.value_accumulator(list)

        self._accumulator = processor.dict_accumulator(histo_dict)
        self.sample_name = ''
Esempio n. 27
0
def scale_file(file):

    print('Loading file:',file)    
    hists=load(file)
    scalez=False
    if '2016' in file:
        scalez=True

    pd = []
    for d in hists['sumw'].identifiers('dataset'):
        dataset = d.name
        if dataset.split("____")[0] not in pd: pd.append(dataset.split("____")[0])
    print('List of primary datasets:',pd)

    ##
    # Aggregate all the histograms that belong to a single dataset
    ##

    dataset = hist.Cat("dataset", "dataset", sorting='placement')
    dataset_cats = ("dataset",)
    dataset_map = OrderedDict()
    for pdi in pd:
        dataset_map[pdi] = (pdi+"*",)
    for key in hists.keys():
        hists[key] = hists[key].group(dataset_cats, dataset, dataset_map)
    print('Datasets aggregated')

    return scale(hists,scalez)
Esempio n. 28
0
 def __init__(self):
     # Create the histograms
     self._accumulator = processor.dict_accumulator({
         'dummy':
         hist.Hist("Dummy", hist.Cat("sample", "sample"),
                   hist.Bin("dummy", "Number of events", 1, 0, 1)),
     })
Esempio n. 29
0
def main():

    raw = False

    if len(sys.argv) < 2:
        print("Enter year")
        return

    year = sys.argv[1]

    with open('xsec.json') as f:
        xs = json.load(f)

    with open('pmap.json') as f:
        pmap = json.load(f)

    indir = "outfiles/"
    infiles = subprocess.getoutput("ls " + indir + year + "*.coffea").split()
    outsum = processor.dict_accumulator()

    # Check if pickle exists, remove it if it does
    picklename = str(year) + '/templates.pkl'
    if os.path.isfile(picklename):
        os.remove(picklename)

    started = 0
    for filename in infiles:

        print("Loading " + filename)

        if os.path.isfile(filename):
            out = util.load(filename)

            if started == 0:
                outsum['templates'] = out['templates']
                outsum['sumw'] = out['sumw']
                started += 1
            else:
                outsum['templates'].add(out['templates'])
                outsum['sumw'].add(out['sumw'])

            del out

    scale_lumi = {
        k: xs[k] * 1000 * lumis[year] / w
        for k, w in outsum['sumw'].items()
    }

    outsum['templates'].scale(scale_lumi, 'dataset')
    templates = outsum['templates'].group('dataset',
                                          hist.Cat('process', 'Process'), pmap)

    del outsum

    outfile = open(picklename, 'wb')
    pickle.dump(templates, outfile, protocol=-1)
    outfile.close()

    return
Esempio n. 30
0
 def __init__(self):
     dataset_axis = hist.Cat('dataset', 'dataset')
     lxy_axis = hist.Bin('lxy', 'lxy [cm]', 100, 0, 700)
     reso_axis = hist.Bin('reso', '($p_T$(reco)-$p_T$(gen))/$p_T$(gen)',
                          100, -1, 2)
     reco_axis = hist.Cat('reco', 'reco type')
     self._accumulator = processor.dict_accumulator({
         'lxy':
         hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis),
         'lxy-pf':
         hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis),
         'lxy-dsa':
         hist.Hist('Counts', dataset_axis, lxy_axis, reco_axis),
         'reso':
         hist.Hist('Norm. Frequency/0.03', dataset_axis, reso_axis,
                   reco_axis),
     })