Esempio n. 1
0
def np2root(data, column_names, outname="output.root",tname="tree",dtype=float):
    """
    converts numpy array to ROOT TTree and file.
    :param data: the 2D array containing M variables for N events
    :param column_names: M variables
    :param outname: name of the output root file
    :param dtype: float or int or list or dictionary. will map columns to data types in ROOT tree.
    :return:
    """
    # adding support for different types.
    branches = {}
    if not (isinstance(dtype,dict) or isinstance(dtype,list)):
        assert dtype in [float, int], "dtype not understood"
        mtype = FloatCol
        if dtype == int: mtype = IntCol
        branches = {col: mtype() for col in column_names}
    elif isinstance(dtype,dict):
        my_map = { col : FloatCol if val == float else IntCol for col,val in dtype.iteritems()}
        branches = {col: my_map[col]() for col in column_names}
    else:
        my_map = [ FloatCol if val == float else IntCol for val in dtype]
        branches = {col: my_map[i]() for i,col in enumerate(column_names)}

    fOut = root_open(outname,"RECREATE")
    tree = Tree(tname)
    tree.create_branches(branches)
    rows, cols = shape(data)
    for i in range(0, rows):
        for j in range(0, cols):
            exec("tree.{col} = {val}".format(col=column_names[j], val=data[i,j])) in locals()
        tree.Fill()
    fOut.Write()
    fOut.Close()
    print 'wrote ROOT file {name}'.format(name=outname)
def copy_in_trigger_signal(in_files_name,
                           out_name,
                           tree_name,
                           prefix,
                           cdc_events,
                           cth_events,
                           rand_t=None):
    # Convert input lists to sets first
    set_cdc_events = set(cdc_events)
    set_cth_events = set(cth_events)

    # Define the chain of input trees
    in_chain = TreeChain(name=tree_name, files=in_files_name)
    # First create a new file to save the new tree in:
    out_file = root_open(out_name, "r+")

    # Add the time shift if we want it in the tree
    ExtraBranches = Tagged
    if rand_t is not None:
        ExtraBranches += Smeared

    # Get the new tree with its extra branches
    out_tree = Tree(tree_name, model=ExtraBranches.prefix(prefix))

    # This creates all the same branches in the new tree but
    # their addresses point to the same memory used by the original tree.
    out_tree.create_branches(in_chain._buffer)
    out_tree.update_buffer(in_chain._buffer)

    # Now loop over the original tree(s) and fill the new tree
    for entry in in_chain:
        # Add in the new values
        this_event_number = entry[prefix + "EventNumber"].value
        out_tree.__setattr__(prefix + "GoodTrack", this_event_number
                             in set_cdc_events)
        out_tree.__setattr__(prefix + "GoodTrig", this_event_number
                             in set_cth_events)
        if rand_t is not None:
            try:
                out_tree.__setattr__(prefix + "SmearTime",
                                     rand_t[this_event_number])
            except:
                for key, item in entry.iteritems():
                    print key, item
        # Fill, noting that most of the buffer is shared between the chain
        # and the output tree
        out_tree.Fill()
    # Close it up
    out_tree.Write()
    out_file.Close()
Esempio n. 3
0
# Now we want to copy the tree above into a new file while overwriting a branch
# First create a new file to save the new tree in:
f_copy = root_open("test_copy.root", "recreate")

# You may not know the entire model of the original tree but only the branches
# you intend to overwrite, so I am not specifying the model=Event below as an
# example of how to deal with this in general:
tree_copy = Tree("test_copy")

# If the original tree was not handed to you through rootpy don't forget to:
# >>> from rootpy import asrootpy
# >>> tree = asrootpy(tree)

# Here we specify the buffer for the new tree to use. We use the same buffer as
# the original tree. This creates all the same branches in the new tree but
# their addresses point to the same memory used by the original tree.
tree_copy.set_buffer(chain._buffer, create_branches=True)

# Now loop over the original tree and fill the new tree
for entry in chain:
    # Overwrite a branch value. This changes the value that will be written to
    # the new tree but leaves the value unchanged in the original tree on disk.
    entry.x = 3.141
    # "entry" is actually the buffer, which is shared between both trees.
    tree_copy.Fill()

# tree_copy is now a copy of tree where the "x" branch has been overwritten
# with new values
tree_copy.Write()
f_copy.Close()
Esempio n. 4
0
                tokens = [i.strip('{}') for i in tokens]
                n_bin_vars = int(tokens[0])
                bin_vars = tokens[1:n_bin_vars + 1]
                n_fcn_vars = int(tokens[n_bin_vars + 1])
                fcn_vars = tokens[n_bin_vars + 2:n_bin_vars + 2 + n_fcn_vars]
                formula = tokens[n_bin_vars + 2 + n_fcn_vars]
                if n_fcn_vars == 1:
                    fcn = ROOT.TF1('fcn', formula)
                    corr_tree.Branch('fcn', 'TF1', fcn)
                elif n_fcn_vars == 2:
                    fcn = ROOT.TF2('fcn', formula)
                    corr_tree.Branch('fcn', 'TF2', fcn)
                elif n_fcn_vars == 3:
                    fcn = ROOT.TF3('fcn', formula)
                    corr_tree.Branch('fcn', 'TF3', fcn)
            else:
                corr_tree.min = float(tokens[0])
                corr_tree.max = float(tokens[1])
                var_ranges = tokens[3:n_fcn_vars * 2 + 3]
                fcn_pars = tokens[n_fcn_vars * 2 + 3:]
                for i, par_value in enumerate(fcn_pars):
                    fcn.SetParameter(i, float(par_value))
                ranges = [
                    float(j) for i, j in enumerate(var_ranges) if i % 2 == 0
                ]
                ranges.extend(
                    [float(j) for i, j in enumerate(var_ranges) if i % 2 == 1])
                fcn.SetRange(*ranges)
                corr_tree.Fill()
        corr_tree.Write()
Esempio n. 5
0
class sampleContainer:
    def __init__(self, fns, outn, outd, model):
        print "Initializing Container!"
        #self.tin = r.TChain('EcalVeto')
        #for fn in fns:
        #    self.tin.Add(fn)
        #self.tfile = root_open(fn,'r+')
        #with root_open(fn,'r+') as f:
        #self.tin = self.tfile.EcalVeto
        #self.tin.Print()
        self.tin = TreeChain('EcalVeto', fns)
        self.tin.create_branches({'discValue_gabrielle': 'F'})
        self.model = model

        self.outdir = outd
        self.outname = outn
        self.outfile = root_open(outn, 'RECREATE')
        self.tout = Tree('EcalVeto')
        self.tout.set_buffer(self.tin._buffer, create_branches=True)

        self.events = []
        #print self.tin.GetEntries()
        for event in self.tin:
            #if len(self.events)>10:
            #    continue
            evt = []
            ################################### Features #######################################
            evt.append(event.nReadoutHits)
            evt.append(event.summedDet)
            evt.append(event.summedTightIso)
            evt.append(event.maxCellDep)
            evt.append(event.showerRMS)
            evt.append(event.xStd)
            evt.append(event.yStd)
            evt.append(event.avgLayerHit)
            evt.append(event.deepestLayerHit)
            evt.append(event.stdLayerHit)
            #new features
            evt.append(event.ele68ContEnergy)
            evt.append(event.ele68x2ContEnergy)
            evt.append(event.ele68x3ContEnergy)
            evt.append(event.ele68x4ContEnergy)
            evt.append(event.ele68x5ContEnergy)
            evt.append(event.photon68ContEnergy)
            evt.append(event.photon68x2ContEnergy)
            evt.append(event.photon68x3ContEnergy)
            evt.append(event.photon68x4ContEnergy)
            evt.append(event.photon68x5ContEnergy)
            evt.append(event.outside68ContEnergy)
            evt.append(event.outside68x2ContEnergy)
            evt.append(event.outside68x3ContEnergy)
            evt.append(event.outside68x4ContEnergy)
            evt.append(event.outside68x5ContEnergy)
            evt.append(event.outside68ContNHits)
            evt.append(event.outside68x2ContNHits)
            evt.append(event.outside68x3ContNHits)
            evt.append(event.outside68x4ContNHits)
            evt.append(event.outside68x5ContNHits)
            evt.append(event.outside68ContXstd)
            evt.append(event.outside68x2ContXstd)
            evt.append(event.outside68x3ContXstd)
            evt.append(event.outside68x4ContXstd)
            evt.append(event.outside68x5ContXstd)
            evt.append(event.outside68ContYstd)
            evt.append(event.outside68x2ContYstd)
            evt.append(event.outside68x3ContYstd)
            evt.append(event.outside68x4ContYstd)
            evt.append(event.outside68x5ContYstd)
            evt.append(event.ecalBackEnergy)

            evtarray = np.array([evt])
            pred = float(model.predict(xgb.DMatrix(evtarray))[0])
            #print pred
            event.discValue_gabrielle = pred
            self.tout.Fill()
            ######################################################################################
            self.events.append(evt)

            if (len(self.events) % 10000 == 0 and len(self.events) > 0):
                print 'The shape of events = ', np.shape(self.events)

        self.outfile.cd()
        self.tout.Write()

        self.outfile.Close()
        #self.tfile.Close()
        print 'cp %s %s' % (self.outname, self.outdir)
        os.system('cp %s %s' % (self.outname, self.outdir))
            t.trueTrackInt = events[event_num].track.get_intercept()
            t.fittedTrackGrad = popt[2 * event_num]
            t.fittedTrackInt = popt[(2 * event_num) + 1]

            # Record true and fitted hit distances
            t.trueHitDistance = true_hit_rads[j]
            t.fittedHitDistance = fitted_hit_rads[j]

            # Record numbers indexing position of struck wire
            t.moduleNum = events[event_num].wire_hits[j].module_num
            t.planeNum = events[event_num].wire_hits[j].plane_num
            t.layerNum = events[event_num].wire_hits[j].layer_num
            t.wireNum = events[event_num].wire_hits[j].wire_num

            # Fill tree
            t.Fill()

    # Stop carrying out fits, if close to time limit
    if (time.time() - start_wall_time) > (wall_time_limit - time_buffer):
        fit_count = k + 1
        break
    if (time.clock() - start_cpu_time) > (cpu_time_limit - time_buffer):
        fit_count = k + 1
        break

# Write tree to file, then close
f.Write()
f.Close()

print ""
print("Closed TFile")
Esempio n. 7
0
define_objects(chain)

for event in chain:

    outtree.runnumber = event.RunNumber
    outtree.evtnumber = event.EventNumber
    outtree.weight = event.mc_event_weight
    # sort taus and jets in decreasing order by pT
    event.taus.sort(key=lambda tau: tau.decay.fourvect_vis.Pt(), reverse=True)
    event.jets.sort(key=lambda jet: jet.pt, reverse=True)

    # Set variables describing the two taus
    # and the ditau system
    tau1, tau2 = event.taus
    FourMomentum.set(outtree.higgs, event.higgs[0])
    outtree.Fill(reset=-1)

    # MET = tau1.decay.fourvect_missing + tau2.decay.fourvect_missing
    jets = list(event.jets)
    outtree.numJets = len(jets)
    if len(jets) >= 2:
        jet1, jet2 = jets[:2]
        TrueTauBlock.set(outtree, tau1, tau2, jet1, jet2)
        TrueJetBlock.set(outtree, jet1, jet2)

        jet1 = jets[0]
        TrueTauBlock.set(outtree, tau1, tau2, jet1)
        TrueJetBlock.set(outtree, jet1, jet2)

    elif len(jets) == 1:
        jet1 = jets[0]
random.seed(0)


class Sample(TreeModel):
    a = FloatCol()
    b = FloatCol()
    label = BoolCol()


with root_open('sample.root', 'recreate'):
    tree = Tree('sample', model=Sample)
    for i in xrange(10000):
        if i % 4 == 0:
            tree.a = gauss(1, 1)
            tree.b = gauss(1, 1)
            tree.label = True
        elif i % 4 == 1:
            tree.a = gauss(1, 1)
            tree.b = gauss(-1.5, 1)
            tree.label = False
        elif i % 4 == 2:
            tree.a = gauss(-1.5, 1)
            tree.b = gauss(-1, 1)
            tree.label = True
        else:
            tree.a = gauss(-1, 1)
            tree.b = gauss(1, 1)
            tree.label = False
        tree.Fill()
    tree.write()
Esempio n. 9
0
def processNtuple(infile_name,
                  outfile_name,
                  variables,
                  sample,
                  flav_weight=False,
                  pteta_weight=False,
                  cat_weight=False,
                  tag=''):
    log.debug("processing %s --> %s" % (infile_name, outfile_name))
    type_dict = {'i': int, 'l': long, 'f': float}

    fname_regex = re.compile(
        '[a-zA-Z_0-9\/]*\/?[a-zA-Z_0-9]+_(?P<category>[a-zA-Z]+)_(?P<flavor>[A-Z]+)\.root'
    )
    match = fname_regex.match(infile_name)

    if not match:
        raise ValueError("Could not match the regex to the file %s" %
                         infile_name)
    flavor = match.group('flavor')
    full_category = match.group('category')
    weight_tfile = None
    flav_dir = None
    tfile_category = ''
    if pteta_weight or flav_weight or cat_weight:
        weight_tfile = io.root_open('data/%s_weights.root' % sample)
        flav_dir = weight_tfile.Get(flavor)
        categories = [i.name for i in flav_dir.keys()]
        #match existing categories to this one, which might be a subset of general category stored in the root file
        tfile_category = [i for i in categories if i in full_category][0]

    weights = None
    if pteta_weight:
        weights = flav_dir.Get('%s/kin' % tfile_category)

    flavor_weight = 1.
    if flav_weight:
        flavor_weight = prettyjson.loads(
            weight_tfile.flavour_weights.String().Data())[flavor]

    #put bias weights
    category_weights = None
    if cat_weight:
        category_weights = flav_dir.Get('%s/bias' % tfile_category)

    with io.root_open(outfile_name, 'recreate') as outfile:
        outtree = Tree('tree', title='c-tagging training tree')
        branches_def = dict(
            (name, info['type']) for name, info in variables.iteritems())
        if pteta_weight:
            branches_def['kinematic_weight'] = 'F'
            branches_def['total_weight'] = 'F'
        if flav_weight:
            branches_def['flavour_weight'] = 'F'
            branches_def['total_weight'] = 'F'
        if cat_weight:
            branches_def['slcategory_weight'] = 'F'
            branches_def['total_weight'] = 'F'

        outtree.create_branches(branches_def)
        with io.root_open(infile_name) as infile:
            intree = infile.Get(full_category)
            for e_idx, entry in enumerate(intree):
                if e_idx % 1000 == 0:
                    log.debug("processing entry: %i" % e_idx)
                for name, info in variables.iteritems():
                    value = info['default']
                    try:
                        if 'var' in info and hasattr(entry, info['var']):
                            var = getattr(entry, info['var'])
                            vtype = type_dict[info['type'].lower()]
                            if 'idx' in info:
                                if var.size() > info['idx']:
                                    value = vtype(var[info['idx']])
                            else:
                                value = vtype(var)
                        elif 'fcn' in info:
                            vtype = type_dict[info['type'].lower()]
                            fcn = globals()[info['fcn']]
                            value = vtype(fcn(entry, *info['args']))
                    except:
                        set_trace()
                    # else:
                    #   set_trace()
                    #   raise RuntimeError("something went wrong processing variable %s" % name)

                    #if value is nan, then set to default (maybe better if you skip the whole jet)
                    value = info['default'] if math.isnan(value) else value
                    setattr(outtree, name, value)
                total_weight = 1.
                if pteta_weight:
                    bin_idx = weights.FindFixBin(entry.jetPt,
                                                 abs(entry.jetEta))
                    outtree.kinematic_weight = weights[bin_idx].value
                    total_weight *= weights[bin_idx].value
                if flav_weight:
                    outtree.flavour_weight = flavor_weight
                    total_weight *= flavor_weight
                if cat_weight:
                    bin_idx = category_weights.FindFixBin(
                        entry.jetPt, abs(entry.jetEta))
                    outtree.slcategory_weight = category_weights[bin_idx].value
                    total_weight *= category_weights[bin_idx].value
                if 'total_weight' in branches_def:
                    outtree.total_weight = total_weight
                #set_trace()
                outtree.Fill()
    log.info("processing done [%s]" % tag)
Esempio n. 10
0
def add_branch(arr,
               filename,
               tree='bTag_AntiKt2PV0TrackJets',
               branchname='jet_mv2c20_new'):
    '''
    writes the newly evaluated mv2 scores into a branch in a Friend Tree
    # --------------------------------------------------------------------------------
    # -- *WARNING*: Make sure the file you are trying to modify is *NOT* already open!
    #               Otherwise, instead of adding a branch to that file, you will
    #               corrupt the file!
    # --------------------------------------------------------------------------------
    Args:
    -----
        arr:        array containg newly evaluated mv2 scores
        filename:   .root file where new branch will be added
        tree:       (optional) name of TTree that will get a new Friend
        branchname: (optional) name of the new branch 
    '''
    # -- Check if file already exists:
    if not os.path.exists(filename):
        print '[WARNING] file not found, creating new file'

    # -- Open file:
    f = root_open(filename, "update")
    # -- Extract TTree
    T = f[tree]

    # -- Need to figure out dtype in order to save the branch correctly
    # -- If dtype is wrong, ROOT returns garbage, so this below is important!

    # -- Case of branch being event level values:
    if 'float' in str(type(arr[0])):
        if '64' in str(type(arr[0])):
            dtype = 'double'
        else:
            dtype = 'float'
    elif 'int' in str(type(arr[0])):
        dtype = 'int'

    # -- Case of branch being jet level list:
    elif hasattr(arr[0], '__iter__'):
        if 'float' in str(type(arr[0][0])):
            if '64' in str(type(arr[0][0])):
                dtype = 'double'
            else:
                dtype = 'float'
        elif 'int' in str(type(arr[0][0])):
            dtype = 'int'
        else:
            raise TypeError('Nested type `{}` not supported'.format(
                str(type(arr[0][0]))))
        dtype = 'vector<{}>'.format(dtype)

    else:
        raise TypeError('Type `{}` not supported'.format(str(type(arr[0]))))
    sys.stdout.write('Detected dtype: {}\n'.format(dtype))
    sys.stdout.flush()

    # -- Create friend:
    T_friend = Tree(tree + '_Friend')

    # -- Add new branch to friend tree:
    T_friend.create_branches({branchname: dtype})

    # -- Fill the branch:
    sys.stdout.write('Filling branch "{}" ... \n'.format(branchname))
    sys.stdout.flush()

    for i, branch4event in enumerate(arr):
        if 'vector' in dtype:
            buf = stl.vector(dtype.replace('vector<', '').replace('>', ''))()
            _ = [buf.push_back(e) for e in branch4event]
            exec('T_friend.{} = buf'.format(branchname))

        else:
            exec('T_friend.{} = branch4event'.format(branchname))
        T_friend.Fill()

    # -- Write out the tree and close the file:
    sys.stdout.write('Finalizing and closing file "{}" \n'.format(filename))
    sys.stdout.flush()

    T.AddFriend(T_friend, tree + '_Friend')
    T_friend.Write()
    f.Write()
    f.Close()
Esempio n. 11
0
    def work(self):

        # trigger config tool to read trigger info in the ntuples
        trigger_config = get_trigger_config()

        OutputModel = (RecoTauBlock + EventVariables + SkimExtraModel +
            TrueTauBlock)

        onfilechange = []
        # update the trigger config maps on every file change
        onfilechange.append((update_trigger_config, (trigger_config,)))

        cutflow = Hist(2, 0, 2, name='cutflow', type='D')

        # initialize the TreeChain of all input files (each containing one tree named self.metadata.treename)
        chain = TreeChain(self.metadata.treename,
                         files=self.files,
                         events=self.events,
                         cache=True,
                         cache_size=10000000,
                         learn_entries=30,
                         onfilechange=onfilechange)

        # create output tree
        self.output.cd()
        tree = Tree(name='higgstautauhh', model=OutputModel)

        copied_variables = ['actualIntPerXing',
                            'averageIntPerXing',
                            'RunNumber',
                            'EventNumber',
                            'lbn']

        tree.set_buffer(
                chain.buffer,
                branches=copied_variables,
                create_branches=True,
                visible=False)
        chain.always_read(copied_variables)

        # set the event filters
        event_filters = EventFilterList([
            #Triggers(
            #    datatype=self.metadata.datatype,
            #    year=YEAR,
            #    skim=False),
            PriVertex(),
            LArError(),
            LArHole(datatype=self.metadata.datatype),
            JetCleaning(
                datatype=self.metadata.datatype,
                year=YEAR),
            TauAuthor(1),
            TauHasTrack(1),
            TauPT(1, thresh=25 * GeV),
            TauEta(1),
            TauCrack(1),
            TauLArHole(1),
            #TauTriggerMatch(
            #    config=trigger_config,
            #    year=YEAR,
            #    datatype=self.metadata.datatype,
            #    skim=False,
            #    tree=tree,
            #    min_taus=1),
        ])

        self.filters['event'] = event_filters
        chain.filters += event_filters

        # define tree collections
        chain.define_collection(name="taus", prefix="tau_", size="tau_n", mix=TauFourMomentum)
        chain.define_collection(name="taus_EF", prefix="trig_EF_tau_",
                                size="trig_EF_tau_n", mix=TauFourMomentum)

        # jet_* etc. is AntiKt4LCTopo_* in tau-perf D3PDs
        chain.define_collection(name="jets", prefix="jet_", size="jet_n", mix=FourMomentum)
        chain.define_collection(name="truetaus", prefix="trueTau_", size="trueTau_n", mix=MCTauFourMomentum)
        chain.define_collection(name="mc", prefix="mc_", size="mc_n", mix=MCParticle)
        chain.define_collection(name="muons", prefix="mu_staco_", size="mu_staco_n")
        chain.define_collection(name="electrons", prefix="el_", size="el_n")
        chain.define_collection(name="vertices", prefix="vxp_", size="vxp_n")

        from externaltools import PileupReweighting
        from ROOT import Root
        # Initialize the pileup reweighting tool
        pileup_tool = Root.TPileupReweighting()
        if YEAR == 2011:
            pileup_tool.AddConfigFile(PileupReweighting.get_resource('mc11b_defaults.prw.root'))
            pileup_tool.AddLumiCalcFile('lumi/2011/hadhad/ilumicalc_histograms_None_178044-191933.root')
        elif YEAR == 2012:
            pileup_tool.AddConfigFile(PileupReweighting.get_resource('mc12a_defaults.prw.root'))
            pileup_tool.SetDataScaleFactors(1./1.11)
            pileup_tool.AddLumiCalcFile('lumi/2012/hadhad/ilumicalc_histograms_None_200841-205113.root')
        else:
            raise ValueError('No pileup reweighting defined for year %d' %
                    YEAR)
        # discard unrepresented data (with mu not simulated in MC)
        pileup_tool.SetUnrepresentedDataAction(2)
        pileup_tool.Initialize()

        # entering the main event loop...
        for event in chain:
            tree.reset()

            event.vertices.select(vertex_selection)
            tree.number_of_good_vertices = len(event.vertices)

            # match only with visible true taus
            event.truetaus.select(lambda tau: tau.vis_Et > 10 * GeV and abs(tau.vis_eta) < 2.5)

            if len(event.truetaus) == 1:
                true_tau = event.truetaus[0]
                TrueTauBlock.set(tree, 1, true_tau)
            else:
                continue

            # Truth-matching
            matched_reco = None
            reco_index = true_tau.tauAssoc_index
            tau = event.taus.getitem(reco_index)
            if tau in event.taus:
                matched_reco = tau
            else:
                continue

            tree.MET = event.MET_RefFinal_BDTMedium_et

            # fill tau block
            RecoTauBlock.set(event, tree, matched_reco, None)

            # set the event weight
            tree.pileup_weight = pileup_tool.GetCombinedWeight(event.RunNumber,
                                                               event.mc_channel_number,
                                                               event.averageIntPerXing)
            tree.mc_weight = event.mc_event_weight
            tree.Fill(reset=True)

        self.output.cd()
        tree.FlushBaskets()
        tree.Write()
        total_events = event_filters[0].total
        cutflow[0] = total_events
        cutflow[1] = total_events
        cutflow.Write()
Esempio n. 12
0
    def load_analysis(self, inputs, res_T):
        cHW = res_T[inputs.split('/')[-1]][0]
        tcHW = res_T[inputs.split('/')[-1]][1]
        xsec = res_T[inputs.split('/')[-1]][2]
        # Create chain of root trees
        chain1 = ROOT.TChain("Delphes")
        chain1.Add(inputs)

        # Create object of class ExRootTreeReader
        treeReader = ROOT.ExRootTreeReader(chain1)
        numberOfEntries = treeReader.GetEntries()

        # create new root file
        root_name = 'cHW_{}_tcHW_{}.root'.format(cHW, tcHW)
        csv_name = 'cHW_{}_tcHW_{}.csv'.format(cHW, tcHW)
        f = root_open(root_name, "recreate")
        tree = Tree("cHW_{}_tcHW_{}".format(cHW, tcHW))
        tree.create_branches({
            'PT_l1': 'F',
            'PT_l2': 'F',
            'PT_ll': 'F',
            'Cos_lZ': 'F',
            'DPHI_ll': 'F',
            'PT_j1': 'F',
            'PT_j2': 'F',
            'PT_b1': 'F',
            'PT_b2': 'F',
            'Eta_H': 'F',
            'phi_H': 'F',
            'M_H': 'F',
            'Cos_Hb1': 'F',
            'PT_H': 'F',
            'PT_ZH': 'F',
            'M_Z': 'F',
            'M_ZH': 'F',
            'cHW': 'F',
            'tcHW': 'F',
            'xsec': 'F'
        })

        # Get pointers to branches used in this analysis
        branchJet = treeReader.UseBranch("Jet")
        branchElectron = treeReader.UseBranch("Electron")
        branchMuon = treeReader.UseBranch("Muon")
        branchPhoton = treeReader.UseBranch("Photon")
        branchMET = treeReader.UseBranch("MissingET")
        # Loop over all events
        for entry in range(0, numberOfEntries):
            # Load selected branches with data from specified event
            treeReader.ReadEntry(entry)
            muons = []
            for n in xrange(branchMuon.GetEntries()):
                muons.append(branchMuon.At(n))

            if len(muons) >= 2:
                muons = sorted(branchMuon,
                               key=lambda Muon: Muon.P4().Pt(),
                               reverse=True)
            else:
                continue

            missing = sorted(branchMET,
                             key=lambda MisingET: MisingET.MET,
                             reverse=True)
            muon1 = muons[0]
            muon2 = muons[1]
            Muon1 = ROOT.TLorentzVector()
            Muon2 = ROOT.TLorentzVector()
            Muon1.SetPtEtaPhiE(muon1.P4().Pt(),
                               muon1.P4().Eta(),
                               muon1.P4().Phi(),
                               muon1.P4().E())
            Muon2.SetPtEtaPhiE(muon2.P4().Pt(),
                               muon2.P4().Eta(),
                               muon2.P4().Phi(),
                               muon2.P4().E())
            met = ROOT.TLorentzVector()
            met.SetPtEtaPhiE(missing[0].P4().Pt(), missing[0].P4().Eta(),
                             missing[0].P4().Phi(), missing[0].P4().E())
            bjato1 = ROOT.TLorentzVector()
            bjato2 = ROOT.TLorentzVector()
            jato1 = ROOT.TLorentzVector()
            jato2 = ROOT.TLorentzVector()
            ####################################################################################
            bjets, ljets = [], []
            for n in xrange(branchJet.GetEntries()):
                if branchJet.At(n).BTag == 1:
                    bjets.append(branchJet.At(n))
                else:
                    ljets.append(branchJet.At(n))

            if len(bjets) >= 2:
                bjets = sorted(bjets,
                               key=lambda BJet: BJet.P4().Pt(),
                               reverse=True)
            else:
                continue

            ljets = sorted(ljets, key=lambda Jet: Jet.P4().Pt(), reverse=True)

            try:
                jato1.SetPtEtaPhiE(ljets[0].P4().Pt(), ljets[0].P4().Eta(),
                                   ljets[0].P4().Phi(), ljets[0].P4().E())
            except IndexError:
                tree.PT_j1 = -999

            try:
                jato2.SetPtEtaPhiE(ljets[1].P4().Pt(), ljets[1].P4().Eta(),
                                   ljets[1].P4().Phi(), ljets[1].P4().E())
            except IndexError:
                tree.PT_j2 = -999

        ####################################################################################
            bjato1.SetPtEtaPhiE(bjets[0].P4().Pt(), bjets[0].P4().Eta(),
                                bjets[0].P4().Phi(), bjets[0].P4().E())
            bjato2.SetPtEtaPhiE(bjets[1].P4().Pt(), bjets[1].P4().Eta(),
                                bjets[1].P4().Phi(), bjets[1].P4().E())

            ###################################################################################################
            if 95 < (bjato1 + bjato2).M() < 135:
                tree.PT_l1 = Muon1.Pt()
                tree.PT_l2 = Muon2.Pt()
                tree.PT_ll = (Muon1 + Muon2).Pt()
                tree.PT_b1 = bjato1.Pt()
                tree.PT_b2 = bjato2.Pt()
                tree.PT_j1 = jato1.Pt()
                tree.PT_j2 = jato2.Pt()
                Z = ROOT.TLorentzVector()
                H = ROOT.TLorentzVector()
                ZH = ROOT.TLorentzVector()
                Z = (Muon1 + Muon2)
                H = (bjato1 + bjato2)
                ZH = Z + H
                tree.phi_H = H.Phi()
                tree.PT_ZH = ZH.Pt()
                tree.M_ZH = ZH.M()
                tree.PT_H = H.Pt()
                tree.Eta_H = H.Eta()
                tree.M_H = H.M()
                tree.M_Z = Z.M()
                tree.DPHI_ll = np.abs(Muon1.DeltaPhi(Muon2))
                ########################## boosted objects  ############################################
                Ztob = ROOT.TLorentzVector()
                Ztob.SetPxPyPzE(Z.Px(), Z.Py(), Z.Pz(), Z.E())
                Zboost = ROOT.TVector3()
                Zboost = Ztob.BoostVector()
                v = Zboost.Unit()
                Muon1.Boost(-Zboost)
                Htob = ROOT.TLorentzVector()
                Htob.SetPxPyPzE(H.Px(), H.Py(), H.Pz(), H.E())
                Hboost = ROOT.TVector3()
                Hboost = Htob.BoostVector()
                ang = Hboost.Unit()
                bjato1.Boost(-Hboost)
                tree.Cos_Hb1 = np.cos(bjato1.Angle(ang))
                tree.Cos_lZ = np.cos(Muon1.Angle(v))
                ##########################################################################################
                tree.cHW = cHW
                tree.tcHW = tcHW
                tree.xsec = xsec
                tree.Fill()

        tree.write()
        f.close()

        #create the csv output

        to_convert = root2array(root_name, "cHW_{}_tcHW_{}".format(cHW, tcHW))

        df_conv = pd.DataFrame(to_convert)

        df_conv.to_csv(csv_name,
                       index=False,
                       header=df_conv.keys(),
                       mode='w',
                       sep=' ')

        ### move everything
        if not os.path.exists('500GeV_res'):
            os.makedirs('500GeV_res')
            os.makedirs('500GeV_res/roots')
            os.makedirs('500GeV_res/csv')

        shutil.move(root_name, '500GeV_res/roots')
        shutil.move(csv_name, '500GeV_res/csv')
Esempio n. 13
0
    def work(self):

        year = self.metadata.year
        verbose = self.args.verbose
        draw_decays = self.args.draw_decays
        args = self.args

        # initialize the TreeChain of all input files
        # only enable branches I need
        chain = TreeChain(self.metadata.treename,
                          files=self.files,
                          branches=[
                              'tau_*',
                              'mc_*',
                              'el_*',
                              'mu_staco_*',
                              'MET_RefFinal_BDTMedium_*',
                              'MET_RefFinal_STVF_*',
                              'EventNumber',
                              'RunNumber',
                              'averageIntPerXing',
                          ],
                          events=self.events,
                          read_branches_on_demand=True,
                          cache=True,
                          verbose=True)

        define_objects(chain, year)

        self.output.cd()

        # this tree will contain info pertaining to true tau decays
        # for possible use in the optimization of a missing mass calculator
        tree = Tree(name="ditaumass", model=DTMEvent)

        tree.define_object(name='resonance', prefix='resonance_')
        tree.define_object(name='radiative', prefix='radiative_')

        truetaus = [
            tree.define_object(name='truetau1', prefix='truetau1_'),
            tree.define_object(name='truetau2', prefix='truetau2_')
        ]

        taus = [
            tree.define_object(name='tau1', prefix='tau1_'),
            tree.define_object(name='tau2', prefix='tau2_')
        ]

        electrons = [
            tree.define_object(name='ele1', prefix='ele1_'),
            tree.define_object(name='ele2', prefix='ele2_')
        ]

        muons = [
            tree.define_object(name='muon1', prefix='muon1_'),
            tree.define_object(name='muon2', prefix='muon2_')
        ]

        # get the Z or Higgs
        if args.higgs:
            resonance_pdgid = 25
        else:
            resonance_pdgid = 23

        if '7TeV' in self.metadata.name:
            collision_energy = 7
        else:
            collision_energy = 8

        for event_index, event in enumerate(chain):

            try:
                tree.reset_branch_values()

                # get the Z or Higgs
                resonance = tautools.get_particles(event,
                                                   resonance_pdgid,
                                                   num_expected=1)

                if not resonance:
                    print "could not find resonance"
                    continue

                # get the resonance just before the decay
                resonance = resonance[0].last_self

                if draw_decays:
                    resonance.export_graphvis('resonance_%d.dot' %
                                              event.EventNumber)

                FourVectModel.set(tree.resonance, resonance)

                # collect decay products (taus and photons)
                tau_decays = []
                mc_photons = []
                for child in resonance.iter_children():
                    if abs(child.pdgId) == pdg.tau_minus:
                        # ignore status 3 taus in 2012 (something strange in the
                        # MC record...)
                        if year == 2012:
                            if child.status == 3:
                                continue
                        tau_decays.append(tautools.TauDecay(child))
                    elif child.pdgId == pdg.gamma:
                        mc_photons.append(child)
                    else:
                        raise TypeError(
                            'unexpected particle after resonance:\n%s' % child)

                # There should be exactly two taus
                if len(tau_decays) != 2:
                    print "found %i tau decays in MC record" % len(tau_decays)
                    for decay in tau_decays:
                        print decay
                    # skip this event
                    continue

                # check for incomplete tau decays
                invalid = False
                for decay in tau_decays:
                    if not decay.valid:
                        print "invalid tau decay:"
                        print decay
                        if draw_decays:
                            decay.init.export_graphvis('decay_invalid_%d.dot' %
                                                       event.EventNumber)
                        invalid = True
                        break
                if invalid:
                    # skip this event
                    continue

                radiative_fourvect = LorentzVector()
                for photon in mc_photons:
                    radiative_fourvect += photon.fourvect

                radiative_fourvect.fourvect = radiative_fourvect
                FourVectModel.set(tree.radiative, radiative_fourvect)
                tree.radiative_ngamma = len(mc_photons)
                tree.radiative_ngamma_5 = len(
                    [ph for ph in mc_photons if ph.pt > 5])
                tree.radiative_ngamma_10 = len(
                    [ph for ph in mc_photons if ph.pt > 10])
                tree.radiative_et_scalarsum = sum([ph.pt
                                                   for ph in mc_photons] + [0])

                all_matched = True
                matched_objects = []

                skip = False
                for i, (decay, truetau, tau, electron, muon) in enumerate(
                        zip(tau_decays, truetaus, taus, electrons, muons)):

                    if draw_decays:
                        decay.init.export_graphvis('decay%d_%d.dot' %
                                                   (i, event.EventNumber))

                    TrueTau.set(truetau, decay, verbose=verbose)

                    # match to reco taus, electrons and muons
                    if decay.hadronic:
                        recotau, dr = closest_reco_object(
                            event.taus, decay.fourvect_visible, dR=0.2)
                        if recotau is not None:
                            matched_objects.append(recotau)
                            recotau.matched = True
                            recotau.matched_dr = dr
                            RecoTau.set(tau, recotau, verbose=verbose)
                        else:
                            all_matched = False
                    elif decay.leptonic_electron:
                        recoele, dr = closest_reco_object(
                            event.electrons, decay.fourvect_visible, dR=0.2)
                        if recoele is not None:
                            matched_objects.append(recoele)
                            recoele.matched = True
                            recoele.matched_dr = dr
                            RecoElectron.set(electron, recoele)
                        else:
                            all_matched = False
                    elif decay.leptonic_muon:
                        recomuon, dr = closest_reco_object(
                            event.muons, decay.fourvect_visible, dR=0.2)
                        if recomuon is not None:
                            matched_objects.append(recomuon)
                            recomuon.matched = True
                            recomuon.matched_dr = dr
                            RecoMuon.set(muon, recomuon)
                        else:
                            all_matched = False
                    else:
                        print "unhandled invalid tau decay:"
                        print decay
                        if not draw_decays:
                            decay.init.export_graphvis('decay%d_%d.dot' %
                                                       (i, event.EventNumber))
                        # skip this event
                        skip = True
                        break
                if skip:
                    # skip this event
                    continue

                # did both decays match a reco object?
                tree.matched = all_matched

                # match collision: decays matched same reco object
                if all_matched:
                    tree.match_collision = (
                        matched_objects[0] == matched_objects[1])

                # MET
                tree.met_x = event.MET.etx
                tree.met_y = event.MET.ety
                tree.met_phi = event.MET.phi
                tree.met = event.MET.et
                tree.sum_et = event.MET.sumet

                # set extra event variables
                tree.channel = event.mc_channel_number
                tree.event = event.EventNumber
                tree.run = event.RunNumber
                tree.mu = event.averageIntPerXing
                tree.collision_energy = collision_energy

                tree.Fill()
            except:
                print "event index: %d" % event_index
                print "event number: %d" % event.EventNumber
                print "file: %s" % chain.file.GetName()
                raise

        self.output.cd()
        tree.FlushBaskets()
        tree.Write()
Esempio n. 14
0
    def work(self):
        # get argument values
        local = self.args.local
        syst_terms = self.args.syst_terms
        datatype = self.metadata.datatype
        year = self.metadata.year
        verbose = self.args.student_verbose
        very_verbose = self.args.student_very_verbose
        redo_selection = self.args.redo_selection
        nominal_values = self.args.nominal_values

        # get the dataset name
        dsname = os.getenv('INPUT_DATASET_NAME', None)
        if dsname is None:
            # attempt to guess dsname from dirname
            if self.files:
                dsname = os.path.basename(os.path.dirname(self.files[0]))

        # is this a signal sample?
        # if so we will also keep some truth information in the output below
        is_signal = datatype == datasets.MC and (
            '_VBFH' in dsname or '_ggH' in dsname or '_ZH' in dsname
            or '_WH' in dsname or '_ttH' in dsname)
        log.info("DATASET: {0}".format(dsname))
        log.info("IS SIGNAL: {0}".format(is_signal))

        # is this an inclusive signal sample for overlap studies?
        is_inclusive_signal = is_signal and '_inclusive' in dsname

        # is this a BCH-fixed sample? (temporary)
        is_bch_sample = 'r5470_r4540_p1344' in dsname
        if is_bch_sample:
            log.warning("this is a BCH-fixed r5470 sample")

        # onfilechange will contain a list of functions to be called as the
        # chain rolls over to each new file
        onfilechange = []
        count_funcs = {}

        if datatype != datasets.DATA:
            # count the weighted number of events
            if local:

                def mc_weight_count(event):
                    return event.hh_mc_weight
            else:

                def mc_weight_count(event):
                    return event.TruthEvent[0].weights()[0]

            count_funcs = {
                'mc_weight': mc_weight_count,
            }

        if local:
            # local means running on the skims, the output of this script
            # running on the grid
            if datatype == datasets.DATA:
                # merge the GRL fragments
                merged_grl = goodruns.GRL()

                def update_grl(student, grl, name, file, tree):
                    grl |= str(
                        file.Get('Lumi/%s' %
                                 student.metadata.treename).GetString())

                onfilechange.append((update_grl, (
                    self,
                    merged_grl,
                )))

            if datatype == datasets.DATA:
                merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D')
            else:
                merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D')

            def update_cutflow(student, cutflow, name, file, tree):
                # record a cut-flow
                year = student.metadata.year
                datatype = student.metadata.datatype
                cutflow[1].value += file.cutflow_event[1].value
                if datatype != datasets.DATA:
                    cutflow[2].value += file.cutflow_event_mc_weight[1].value

            onfilechange.append((update_cutflow, (
                self,
                merged_cutflow,
            )))

        else:

            # NEED TO BE CONVERTED TO XAOD
            # if datatype not in (datasets.EMBED, datasets.MCEMBED):
            #     # merge TrigConfTrees
            #     metadirname = '%sMeta' % self.metadata.treename
            #     trigconfchain = ROOT.TChain('%s/TrigConfTree' % metadirname)
            #     map(trigconfchain.Add, self.files)
            #     metadir = self.output.mkdir(metadirname)
            #     metadir.cd()
            #     trigconfchain.Merge(self.output, -1, 'fast keep')
            #     self.output.cd()

            if datatype == datasets.DATA:
                # merge GRL XML strings
                merged_grl = goodruns.GRL()
            #     for fname in self.files:
            #         with root_open(fname) as f:
            #             for key in f.Lumi.keys():
            #                 merged_grl |= goodruns.GRL(
            #                     str(key.ReadObj().GetString()),
            #                     from_string=True)
            #     lumi_dir = self.output.mkdir('Lumi')
            #     lumi_dir.cd()
            #     xml_string= ROOT.TObjString(merged_grl.str())
            #     xml_string.Write(self.metadata.treename)
            #     self.output.cd()

        self.output.cd()

        # create the output tree
        model = get_model(datatype,
                          dsname,
                          prefix=None if local else 'hh_',
                          is_inclusive_signal=is_inclusive_signal)
        log.info("Output Model:\n\n{0}\n\n".format(model))
        outtree = Tree(name=self.metadata.treename, model=model)

        if local:
            tree = outtree
        else:
            tree = outtree.define_object(name='tree', prefix='hh_')

        #tree.define_object(name='tau', prefix='tau_')
        tree.define_object(name='tau1', prefix='tau1_')
        tree.define_object(name='tau2', prefix='tau2_')
        tree.define_object(name='truetau1', prefix='truetau1_')
        tree.define_object(name='truetau2', prefix='truetau2_')
        tree.define_object(name='jet1', prefix='jet1_')
        tree.define_object(name='jet2', prefix='jet2_')
        tree.define_object(name='jet3', prefix='jet3_')

        mmc_objects = [
            tree.define_object(name='mmc0', prefix='mmc0_'),
            tree.define_object(name='mmc1', prefix='mmc1_'),
            tree.define_object(name='mmc2', prefix='mmc2_'),
        ]

        for mmc_obj in mmc_objects:
            mmc_obj.define_object(name='resonance', prefix='resonance_')

        # NEED TO BE CONVERTED TO XAOD
        # trigger_emulation = TauTriggerEmulation(
        #     year=year,
        #     passthrough=local or datatype != datasets.MC or year > 2011,
        #     count_funcs=count_funcs)

        # if not trigger_emulation.passthrough:
        #     onfilechange.append(
        #         (update_trigger_trees, (self, trigger_emulation,)))

        # trigger_config = None

        # if datatype not in (datasets.EMBED, datasets.MCEMBED):
        #     # trigger config tool to read trigger info in the ntuples
        #     trigger_config = get_trigger_config()
        #     # update the trigger config maps on every file change
        #     onfilechange.append((update_trigger_config, (trigger_config,)))

        # define the list of event filters
        if local and syst_terms is None and not redo_selection:
            event_filters = None
        else:
            tau_ntrack_recounted_use_ntup = False
            if year > 2011:
                # peek at first tree to determine if the extended number of
                # tracks is already stored
                with root_open(self.files[0]) as test_file:
                    test_tree = test_file.Get(self.metadata.treename)
                    tau_ntrack_recounted_use_ntup = ('tau_out_track_n_extended'
                                                     in test_tree)

            log.info(self.grl)
            event_filters = EventFilterList([
                GRLFilter(self.grl,
                          passthrough=(local
                                       or (datatype not in (datasets.DATA,
                                                            datasets.EMBED))),
                          count_funcs=count_funcs),
                CoreFlags(passthrough=local, count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingPileupPatch(
                #     passthrough=(
                #         local or year > 2011 or datatype != datasets.EMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD (not a priority)
                # PileupTemplates(
                #     year=year,
                #     passthrough=(
                #         local or is_bch_sample or datatype not in (
                #             datasets.MC, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # RandomSeed(
                #     datatype=datatype,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # BCHSampleRunNumber(
                #     passthrough=not is_bch_sample,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # RandomRunNumber(
                #     tree=tree,
                #     datatype=datatype,
                #     pileup_tool=pileup_tool,
                #     passthrough=local,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # trigger_emulation,
                # NEED TO BE CONVERTED TO XAOD
                # Triggers(
                #     year=year,
                #     tree=tree,
                #     datatype=datatype,
                #     passthrough=datatype in (datasets.EMBED, datasets.MCEMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                PileupReweight_xAOD(
                    tree=tree,
                    passthrough=(local
                                 or (datatype
                                     not in (datasets.MC, datasets.MCEMBED))),
                    count_funcs=count_funcs),
                PriVertex(passthrough=local, count_funcs=count_funcs),
                LArError(passthrough=local, count_funcs=count_funcs),
                TileError(passthrough=local, count_funcs=count_funcs),
                TileTrips(passthrough=(local or datatype
                                       in (datasets.MC, datasets.MCEMBED)),
                          count_funcs=count_funcs),
                JetCalibration(datatype=datatype,
                               passthrough=local,
                               count_funcs=count_funcs),
                JetResolution(
                    passthrough=(local
                                 or (datatype
                                     not in (datasets.MC, datasets.MCEMBED))),
                    count_funcs=count_funcs),
                TauCalibration(datatype,
                               passthrough=local,
                               count_funcs=count_funcs),
                # # truth matching must come before systematics due to
                # # TES_TRUE/FAKE
                # NEED TO BE CONVERTED TO XAOD
                TrueTauSelection(passthrough=datatype == datasets.DATA,
                                 count_funcs=count_funcs),
                TruthMatching(passthrough=datatype == datasets.DATA,
                              count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                NvtxJets(tree=tree, count_funcs=count_funcs),
                # # PUT THE SYSTEMATICS "FILTER" BEFORE
                # # ANY FILTERS THAT REFER TO OBJECTS
                # # BUT AFTER CALIBRATIONS
                # # Systematics must also come before anything that refers to
                # # thing.fourvect since fourvect is cached!
                # NEED TO BE CONVERTED TO XAOD
                # Systematics(
                #     terms=syst_terms,
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     verbose=verbose,
                #     passthrough=not syst_terms,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # JetIsPileup(
                #     passthrough=(
                #         local or year < 2012 or
                #         datatype not in (datasets.MC, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                JetCleaning(datatype=datatype,
                            year=year,
                            count_funcs=count_funcs),
                ElectronVeto(el_sel='Medium', count_funcs=count_funcs),
                MuonVeto(count_funcs=count_funcs),
                TauPT(2, thresh=20 * GeV, count_funcs=count_funcs),
                TauHasTrack(2, count_funcs=count_funcs),
                TauEta(2, count_funcs=count_funcs),
                TauElectronVeto(2, count_funcs=count_funcs),
                TauMuonVeto(2, count_funcs=count_funcs),
                TauCrack(2, count_funcs=count_funcs),
                # # before selecting the leading and subleading taus
                # # be sure to only consider good candidates
                TauIDMedium(2, count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # but not used by default
                # #TauTriggerMatchIndex(
                # #    config=trigger_config,
                # #    year=year,
                # #    datatype=datatype,
                # #    passthrough=datatype == datasets.EMBED,
                # #    count_funcs=count_funcs),
                # Select two leading taus at this point
                # 25 and 35 for data
                # 20 and 30 for MC to leave room for TES uncertainty
                TauLeadSublead(lead=(35 * GeV if datatype == datasets.DATA
                                     or local else 30 * GeV),
                               sublead=(25 * GeV if datatype == datasets.DATA
                                        or local else 20 * GeV),
                               count_funcs=count_funcs),
                # taus are sorted (in decreasing order) by pT from here on
                TauIDSelection(count_funcs=count_funcs),
                TaudR(3.2, count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # but not used by default
                # #TauTriggerMatchThreshold(
                # #    datatype=datatype,
                # #    tree=tree,
                # #    count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauTriggerEfficiency(
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     tes_systematic=self.args.syst_terms and (
                #         Systematics.TES_TERMS & self.args.syst_terms),
                #     passthrough=datatype == datasets.DATA,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                PileupScale(tree=tree,
                            year=year,
                            datatype=datatype,
                            passthrough=local,
                            count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                TauIDScaleFactors(year=year,
                                  passthrough=datatype == datasets.DATA,
                                  count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauFakeRateScaleFactors(
                #     year=year,
                #     datatype=datatype,
                #     tree=tree,
                #     tes_up=(self.args.syst_terms is not None and
                #         (Systematics.TES_FAKE_TOTAL_UP in self.args.syst_terms or
                #          Systematics.TES_FAKE_FINAL_UP in self.args.syst_terms)),
                #     tes_down=(self.args.syst_terms is not None and
                #         (Systematics.TES_FAKE_TOTAL_DOWN in self.args.syst_terms or
                #          Systematics.TES_FAKE_FINAL_DOWN in self.args.syst_terms)),
                #     passthrough=datatype in (datasets.DATA, datasets.EMBED),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                HiggsPT(year=year,
                        tree=tree,
                        passthrough=not is_signal or local,
                        count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # TauTrackRecounting(
                #     year=year,
                #     use_ntup_value=tau_ntrack_recounted_use_ntup,
                #     passthrough=local,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # MCWeight(
                #     datatype=datatype,
                #     tree=tree,
                #     passthrough=local or datatype == datasets.DATA,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingIsolation(
                #     tree=tree,
                #     passthrough=(
                #         local or year < 2012 or
                #         datatype not in (datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingCorrections(
                #     tree=tree,
                #     year=year,
                #     passthrough=(
                #         local or
                #         datatype not in (datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # EmbeddingTauSpinner(
                #     year=year,
                #     tree=tree,
                #     passthrough=(
                #         local or datatype not in (
                #             datasets.EMBED, datasets.MCEMBED)),
                #     count_funcs=count_funcs),
                # # put MET recalculation after tau selection but before tau-jet
                # # overlap removal and jet selection because of the RefAntiTau
                # # MET correction
                # NEED TO BE CONVERTED TO XAOD
                # METRecalculation(
                #     terms=syst_terms,
                #     year=year,
                #     tree=tree,
                #     refantitau=not nominal_values,
                #     verbose=verbose,
                #     very_verbose=very_verbose,
                #     count_funcs=count_funcs),
                TauJetOverlapRemoval(count_funcs=count_funcs),
                JetPreselection(count_funcs=count_funcs),
                NonIsolatedJet(tree=tree, count_funcs=count_funcs),
                JetSelection(year=year, count_funcs=count_funcs),
                RecoJetTrueTauMatching(passthrough=datatype == datasets.DATA
                                       or local,
                                       count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                # BCHCleaning(
                #     tree=tree,
                #     passthrough=year == 2011 or local,
                #     datatype=datatype,
                #     count_funcs=count_funcs),
                # NEED TO BE CONVERTED TO XAOD
                ClassifyInclusiveHiggsSample(
                    tree=tree,
                    passthrough=not is_inclusive_signal,
                    count_funcs=count_funcs),
            ])

            # set the event filters
            self.filters['event'] = event_filters

        hh_buffer = TreeBuffer()
        if local:
            chain = TreeChain(
                self.metadata.treename,
                files=self.files,
                # ignore_branches=ignore_branches,
                events=self.events,
                onfilechange=onfilechange,
                filters=event_filters,
                cache=True,
                cache_size=50000000,
                learn_entries=100)
            buffer = TreeBuffer()
            for name, value in chain._buffer.items():
                if name.startswith('hh_'):
                    hh_buffer[name[3:]] = value
                elif name in copied:
                    buffer[name] = value
            outtree.set_buffer(hh_buffer, create_branches=False, visible=True)
            outtree.set_buffer(buffer, create_branches=True, visible=False)

        else:

            root_chain = ROOT.TChain(self.metadata.treename)
            for f in self.files:
                log.info(f)
                root_chain.Add(f)

            # if len(self.files) != 1:
            #     raise RuntimeError('lenght of files has to be 1 for now (no xAOD chaining available)')
            # self.files = self.files[0]
            # root_chain = ROOT.TFile(self.files)

            chain = xAODTree(root_chain,
                             filters=event_filters,
                             events=self.events)
            define_objects(chain, datatype=datatype)
            outtree.set_buffer(hh_buffer, create_branches=True, visible=False)

            # create the MMC
            mmc = mass.MMC(year=year)

        # report which packages have been loaded
        # externaltools.report()

        self.output.cd()

        # The main event loop
        # the event filters above are automatically run for each event and only
        # the surviving events are looped on
        for event in chain:

            if local and syst_terms is None and not redo_selection:
                outtree.Fill()
                continue

            # sort taus and jets in decreasing order by pT
            event.taus.sort(key=lambda tau: tau.pt(), reverse=True)
            event.jets.sort(key=lambda jet: jet.pt(), reverse=True)

            # tau1 is the leading tau
            # tau2 is the subleading tau
            tau1, tau2 = event.taus
            tau1.fourvect = asrootpy(tau1.p4())
            tau2.fourvect = asrootpy(tau2.p4())

            beta_taus = (tau1.fourvect + tau2.fourvect).BoostVector()
            tau1.fourvect_boosted = LorentzVector()
            tau1.fourvect_boosted.copy_from(tau1.fourvect)
            tau1.fourvect_boosted.Boost(beta_taus * -1)

            tau2.fourvect_boosted = LorentzVector()
            tau2.fourvect_boosted.copy_from(tau2.fourvect)
            tau2.fourvect_boosted.Boost(beta_taus * -1)

            jets = list(event.jets)
            for jet in jets:
                jet.fourvect = asrootpy(jet.p4())

            jet1, jet2, jet3 = None, None, None
            beta = None
            if len(jets) >= 2:
                jet1, jet2 = jets[:2]

                # determine boost of system
                # determine jet CoM frame
                beta = (jet1.fourvect + jet2.fourvect).BoostVector()
                tree.jet_beta.copy_from(beta)

                jet1.fourvect_boosted = LorentzVector()
                jet1.fourvect_boosted.copy_from(jet1.fourvect)
                jet1.fourvect_boosted.Boost(beta * -1)

                jet2.fourvect_boosted = LorentzVector()
                jet2.fourvect_boosted.copy_from(jet2.fourvect)
                jet2.fourvect_boosted.Boost(beta * -1)

                tau1.min_dr_jet = min(tau1.fourvect.DeltaR(jet1.fourvect),
                                      tau1.fourvect.DeltaR(jet2.fourvect))
                tau2.min_dr_jet = min(tau2.fourvect.DeltaR(jet1.fourvect),
                                      tau2.fourvect.DeltaR(jet2.fourvect))

                # tau centrality (degree to which they are between the two jets)
                tau1.centrality = eventshapes.eta_centrality(
                    tau1.fourvect.Eta(), jet1.fourvect.Eta(),
                    jet2.fourvect.Eta())

                tau2.centrality = eventshapes.eta_centrality(
                    tau2.fourvect.Eta(), jet1.fourvect.Eta(),
                    jet2.fourvect.Eta())

                # boosted tau centrality
                tau1.centrality_boosted = eventshapes.eta_centrality(
                    tau1.fourvect_boosted.Eta(), jet1.fourvect_boosted.Eta(),
                    jet2.fourvect_boosted.Eta())

                tau2.centrality_boosted = eventshapes.eta_centrality(
                    tau2.fourvect_boosted.Eta(), jet1.fourvect_boosted.Eta(),
                    jet2.fourvect_boosted.Eta())

                # 3rd leading jet
                if len(jets) >= 3:
                    jet3 = jets[2]
                    jet3.fourvect_boosted = LorentzVector()
                    jet3.fourvect_boosted.copy_from(jet3.fourvect)
                    jet3.fourvect_boosted.Boost(beta * -1)

            elif len(jets) == 1:
                jet1 = jets[0]

                tau1.min_dr_jet = tau1.fourvect.DeltaR(jet1.fourvect)
                tau2.min_dr_jet = tau2.fourvect.DeltaR(jet1.fourvect)

            RecoJetBlock.set(tree, jet1, jet2, jet3, local=local)

            # mass of ditau + leading jet system
            if jet1 is not None:
                tree.mass_tau1_tau2_jet1 = (tau1.fourvect + tau2.fourvect +
                                            jet1.fourvect).M()

            #####################################
            # number of tracks from PV minus taus
            #####################################
            ntrack_pv = 0
            ntrack_nontau_pv = 0
            for vxp in event.vertices:
                # primary vertex
                if vxp.vertexType() == 1:
                    ntrack_pv = vxp.nTrackParticles()
                    ntrack_nontau_pv = ntrack_pv - tau1.nTracks(
                    ) - tau2.nTracks()
                    break
            tree.ntrack_pv = ntrack_pv
            tree.ntrack_nontau_pv = ntrack_nontau_pv

            #########################
            # MET variables
            #########################
            MET = event.MET[0]
            METx = MET.mpx()
            METy = MET.mpy()
            METet = MET.met()
            MET_vect = Vector2(METx, METy)
            MET_4vect = LorentzVector()
            MET_4vect.SetPxPyPzE(METx, METy, 0., METet)
            MET_4vect_boosted = LorentzVector()
            MET_4vect_boosted.copy_from(MET_4vect)
            if beta is not None:
                MET_4vect_boosted.Boost(beta * -1)

            tree.MET_et = METet
            tree.MET_etx = METx
            tree.MET_ety = METy
            tree.MET_phi = MET.phi()
            dPhi_tau1_tau2 = abs(tau1.fourvect.DeltaPhi(tau2.fourvect))
            dPhi_tau1_MET = abs(tau1.fourvect.DeltaPhi(MET_4vect))
            dPhi_tau2_MET = abs(tau2.fourvect.DeltaPhi(MET_4vect))
            tree.dPhi_tau1_tau2 = dPhi_tau1_tau2
            tree.dPhi_tau1_MET = dPhi_tau1_MET
            tree.dPhi_tau2_MET = dPhi_tau2_MET
            tree.dPhi_min_tau_MET = min(dPhi_tau1_MET, dPhi_tau2_MET)
            tree.MET_bisecting = is_MET_bisecting(dPhi_tau1_tau2,
                                                  dPhi_tau1_MET, dPhi_tau2_MET)

            sumET = MET.sumet()
            tree.MET_sumet = sumET
            if sumET != 0:
                tree.MET_sig = ((2. * METet / GeV) /
                                (utils.sign(sumET) * sqrt(abs(sumET / GeV))))
            else:
                tree.MET_sig = -1.

            tree.MET_centrality = eventshapes.phi_centrality(
                tau1.fourvect, tau2.fourvect, MET_vect)
            tree.MET_centrality_boosted = eventshapes.phi_centrality(
                tau1.fourvect_boosted, tau2.fourvect_boosted,
                MET_4vect_boosted)

            tree.number_of_good_vertices = len(event.vertices)

            ##########################
            # Jet and sum pt variables
            ##########################
            tree.numJets = len(event.jets)

            # sum pT with only the two leading jets
            tree.sum_pt = sum([tau1.pt(), tau2.pt()] +
                              [jet.pt() for jet in jets[:2]])

            # sum pT with all selected jets
            tree.sum_pt_full = sum([tau1.pt(), tau2.pt()] +
                                   [jet.pt() for jet in jets])

            # vector sum pT with two leading jets and MET
            tree.vector_sum_pt = sum([tau1.fourvect, tau2.fourvect] +
                                     [jet.fourvect
                                      for jet in jets[:2]] + [MET_4vect]).Pt()

            # vector sum pT with all selected jets and MET
            tree.vector_sum_pt_full = sum([tau1.fourvect, tau2.fourvect] +
                                          [jet.fourvect for jet in jets] +
                                          [MET_4vect]).Pt()

            # resonance pT
            tree.resonance_pt = sum([tau1.fourvect, tau2.fourvect,
                                     MET_4vect]).Pt()

            # #############################
            # # tau <-> vertex association
            # #############################
            tree.tau_same_vertex = (tau1.vertex() == tau2.vertex())

            tau1.vertex_prob = ROOT.TMath.Prob(tau1.vertex().chiSquared(),
                                               int(tau1.vertex().numberDoF()))

            tau2.vertex_prob = ROOT.TMath.Prob(tau2.vertex().chiSquared(),
                                               int(tau2.vertex().numberDoF()))

            # ##########################
            # # MMC Mass
            # ##########################
            mmc_result = mmc.mass(tau1,
                                  tau2,
                                  METx,
                                  METy,
                                  sumET,
                                  njets=len(event.jets))

            for mmc_method, mmc_object in enumerate(mmc_objects):
                mmc_mass, mmc_resonance, mmc_met = mmc_result[mmc_method]
                if verbose:
                    log.info("MMC (method %d): %f" % (mmc_method, mmc_mass))

                mmc_object.mass = mmc_mass
                mmc_object.MET_et = mmc_met.Mod()
                mmc_object.MET_etx = mmc_met.X()
                mmc_object.MET_ety = mmc_met.Y()
                mmc_object.MET_phi = math.pi - mmc_met.Phi()
                if mmc_mass > 0:
                    FourMomentum.set(mmc_object.resonance, mmc_resonance)

            # ############################
            # # collinear and visible mass
            # ############################
            vis_mass, collin_mass, tau1_x, tau2_x = mass.collinearmass(
                tau1, tau2, METx, METy)

            tree.mass_vis_tau1_tau2 = vis_mass
            tree.mass_collinear_tau1_tau2 = collin_mass
            tau1.collinear_momentum_fraction = tau1_x
            tau2.collinear_momentum_fraction = tau2_x

            # # Fill the tau block
            # # This must come after the RecoJetBlock is filled since
            # # that sets the jet_beta for boosting the taus
            RecoTauBlock.set(event, tree, datatype, tau1, tau2, local=local)

            # NEED TO BE CONVERTED TO XAOD
            if datatype != datasets.DATA:
                TrueTauBlock.set(tree, tau1, tau2)
            # fill the output tree
            outtree.Fill(reset=True)

        # externaltools.report()

        # flush any baskets remaining in memory to disk
        self.output.cd()
        outtree.FlushBaskets()
        outtree.Write()

        if local:
            if datatype == datasets.DATA:
                xml_string = ROOT.TObjString(merged_grl.str())
                xml_string.Write('lumi')
            merged_cutflow.Write()
Esempio n. 15
0
                    outTree.corrPhotonMVA *= evt.photonMVAWeight

                if 'muon' in dataset or 'electron' in dataset:
                    this_evt_mingyan = mingyan_data.query(
                        'run == {0} and lumi == {1} and evt == {2}'.format(
                            evt.runNumber, evt.lumiSection, evt.evtNumber))
                    if this_evt_mingyan.shape[0] > 0:
                        this_mass = this_evt_mingyan['CMS_hzg_mass'].values[0]
                        outTree.llgMKinMY = this_mass
                        outTree.isMingYanData = 1
                    else:
                        this_mass_mingyan = mingyan_data.query(
                            'run == {0} and lumi == {1} and abs(llgMRaw - {2}) <= 0.001'
                            .format(evt.runNumber, evt.lumiSection, evt.llgM))
                        if this_mass_mingyan.shape[0] > 0:
                            this_mass = this_mass_mingyan[
                                'CMS_hzg_mass'].values[0]
                            outTree.llgMKinMY = this_mass
                            outTree.evtNumber = this_mass_mingyan[
                                'evt'].values[0]
                            outTree.isMingYanData = 1

                outTree.Fill()

            outTree.Write()
            hist.Write()
        outputFile.Close()
        inputFile.Close()

    pickle.dump(sf_dict, open('data/mc_sfs/mc_sfs.pkl', 'wb'))
Esempio n. 16
0
    def work(self):
        # get argument values
        local = self.args.local
        syst_terms = self.args.syst_terms
        datatype = self.metadata.datatype
        year = self.metadata.year
        verbose = self.args.student_verbose
        very_verbose = self.args.student_very_verbose
        redo_selection = self.args.redo_selection
        nominal_values = self.args.nominal_values

        # get the dataset name
        dsname = os.getenv('INPUT_DATASET_NAME', None)
        if dsname is None:
            # attempt to guess dsname from dirname
            if self.files:
                dsname = os.path.basename(os.path.dirname(self.files[0]))

        # is this a signal sample?
        # if so we will also keep some truth information in the output below
        is_signal = datatype == datasets.MC and (
            '_VBFH' in dsname or '_ggH' in dsname or '_ZH' in dsname
            or '_WH' in dsname or '_ttH' in dsname)
        log.info("DATASET: {0}".format(dsname))
        log.info("IS SIGNAL: {0}".format(is_signal))

        # is this an inclusive signal sample for overlap studies?
        is_inclusive_signal = is_signal and '_inclusive' in dsname

        # is this a BCH-fixed sample? (temporary)
        is_bch_sample = 'r5470_r4540_p1344' in dsname
        if is_bch_sample:
            log.warning("this is a BCH-fixed r5470 sample")

        # onfilechange will contain a list of functions to be called as the
        # chain rolls over to each new file
        onfilechange = []
        count_funcs = {}

        if datatype != datasets.DATA:
            # count the weighted number of events
            if local:

                def mc_weight_count(event):
                    return event.hh_mc_weight
            else:

                def mc_weight_count(event):
                    return event.mc_event_weight

            count_funcs = {
                'mc_weight': mc_weight_count,
            }

        # three instances of the pileup reweighting tool are created to write
        # out the nominal, high and low pileup weights
        pileup_tool = None
        pileup_tool_high = None
        pileup_tool_low = None

        if local:
            # local means running on the skims, the output of this script
            # running on the grid
            if datatype == datasets.DATA:
                # merge the GRL fragments
                merged_grl = goodruns.GRL()

                def update_grl(student, grl, name, file, tree):
                    grl |= str(
                        file.Get('Lumi/%s' %
                                 student.metadata.treename).GetString())

                onfilechange.append((update_grl, (
                    self,
                    merged_grl,
                )))

            if datatype == datasets.DATA:
                merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D')
            else:
                merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D')

            def update_cutflow(student, cutflow, name, file, tree):
                # record a cut-flow
                year = student.metadata.year
                datatype = student.metadata.datatype
                cutflow[1].value += file.cutflow_event[1].value
                if datatype != datasets.DATA:
                    cutflow[2].value += file.cutflow_event_mc_weight[1].value

            onfilechange.append((update_cutflow, (
                self,
                merged_cutflow,
            )))

        else:
            # get pileup reweighting tool
            pileup_tool = get_pileup_reweighting_tool(year=year,
                                                      use_defaults=True)
            pileup_tool_high = get_pileup_reweighting_tool(year=year,
                                                           use_defaults=True,
                                                           systematic='high')
            pileup_tool_low = get_pileup_reweighting_tool(year=year,
                                                          use_defaults=True,
                                                          systematic='low')

            if datatype not in (datasets.EMBED, datasets.MCEMBED):
                # merge TrigConfTrees
                metadirname = '%sMeta' % self.metadata.treename
                trigconfchain = ROOT.TChain('%s/TrigConfTree' % metadirname)
                map(trigconfchain.Add, self.files)
                metadir = self.output.mkdir(metadirname)
                metadir.cd()
                trigconfchain.Merge(self.output, -1, 'fast keep')
                self.output.cd()

            if datatype == datasets.DATA:
                # merge GRL XML strings
                merged_grl = goodruns.GRL()
                for fname in self.files:
                    with root_open(fname) as f:
                        for key in f.Lumi.keys():
                            merged_grl |= goodruns.GRL(str(
                                key.ReadObj().GetString()),
                                                       from_string=True)
                lumi_dir = self.output.mkdir('Lumi')
                lumi_dir.cd()
                xml_string = ROOT.TObjString(merged_grl.str())
                xml_string.Write(self.metadata.treename)
                self.output.cd()

        self.output.cd()

        # create the output tree
        model = get_model(datatype,
                          dsname,
                          prefix=None if local else 'hh_',
                          is_inclusive_signal=is_inclusive_signal)
        log.info("Output Model:\n\n{0}\n\n".format(model))
        outtree = Tree(name=self.metadata.treename, model=model)

        if local:
            tree = outtree
        else:
            tree = outtree.define_object(name='tree', prefix='hh_')

        tree.define_object(name='tau', prefix='tau_')
        tree.define_object(name='tau1', prefix='tau1_')
        tree.define_object(name='tau2', prefix='tau2_')
        tree.define_object(name='truetau1', prefix='truetau1_')
        tree.define_object(name='truetau2', prefix='truetau2_')
        tree.define_object(name='jet1', prefix='jet1_')
        tree.define_object(name='jet2', prefix='jet2_')
        tree.define_object(name='jet3', prefix='jet3_')

        mmc_objects = [
            tree.define_object(name='mmc0', prefix='mmc0_'),
            tree.define_object(name='mmc1', prefix='mmc1_'),
            tree.define_object(name='mmc2', prefix='mmc2_'),
        ]

        for mmc_obj in mmc_objects:
            mmc_obj.define_object(name='resonance', prefix='resonance_')

        trigger_emulation = TauTriggerEmulation(year=year,
                                                passthrough=local
                                                or datatype != datasets.MC
                                                or year > 2011,
                                                count_funcs=count_funcs)

        if not trigger_emulation.passthrough:
            onfilechange.append((update_trigger_trees, (
                self,
                trigger_emulation,
            )))

        trigger_config = None

        if datatype not in (datasets.EMBED, datasets.MCEMBED):
            # trigger config tool to read trigger info in the ntuples
            trigger_config = get_trigger_config()
            # update the trigger config maps on every file change
            onfilechange.append((update_trigger_config, (trigger_config, )))

        # define the list of event filters
        if local and syst_terms is None and not redo_selection:
            event_filters = None
        else:
            tau_ntrack_recounted_use_ntup = False
            if year > 2011:
                # peek at first tree to determine if the extended number of
                # tracks is already stored
                with root_open(self.files[0]) as test_file:
                    test_tree = test_file.Get(self.metadata.treename)
                    tau_ntrack_recounted_use_ntup = ('tau_out_track_n_extended'
                                                     in test_tree)

            event_filters = EventFilterList([
                averageIntPerXingPatch(
                    passthrough=(local or year < 2012
                                 or datatype != datasets.MC),
                    count_funcs=count_funcs),
                PileupTemplates(
                    year=year,
                    passthrough=(local or is_bch_sample or datatype
                                 not in (datasets.MC, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                RandomSeed(datatype=datatype, count_funcs=count_funcs),
                RandomRunNumber(tree=tree,
                                datatype=datatype,
                                pileup_tool=pileup_tool,
                                passthrough=local,
                                count_funcs=count_funcs),
                PileupReweight(
                    year=year,
                    tool=pileup_tool,
                    tool_high=pileup_tool_high,
                    tool_low=pileup_tool_low,
                    tree=tree,
                    passthrough=(local
                                 or (datatype
                                     not in (datasets.MC, datasets.MCEMBED))),
                    count_funcs=count_funcs),
                TruthMatching(passthrough=datatype == datasets.DATA,
                              count_funcs=count_funcs),
                JetIsPileup(
                    passthrough=(local or year < 2012 or datatype
                                 not in (datasets.MC, datasets.MCEMBED)),
                    count_funcs=count_funcs),
                HiggsPT(year=year,
                        tree=tree,
                        passthrough=not is_signal or local,
                        count_funcs=count_funcs),
                MCWeight(datatype=datatype,
                         tree=tree,
                         passthrough=local or datatype == datasets.DATA,
                         count_funcs=count_funcs),
                ClassifyInclusiveHiggsSample(
                    tree=tree,
                    passthrough=not is_inclusive_signal,
                    count_funcs=count_funcs),
            ])

            # set the event filters
            self.filters['event'] = event_filters

        # peek at first tree to determine which branches to exclude
        with root_open(self.files[0]) as test_file:
            test_tree = test_file.Get(self.metadata.treename)
            ignore_branches = test_tree.glob(hhbranches.REMOVE,
                                             exclude=hhbranches.KEEP)
            ignore_branches_output = test_tree.glob(
                hhbranches.REMOVE_OUTPUT, exclude=hhbranches.KEEP_OUTPUT)

        # initialize the TreeChain of all input files
        chain = TreeChain(self.metadata.treename,
                          files=self.files,
                          ignore_branches=ignore_branches,
                          events=self.events,
                          onfilechange=onfilechange,
                          filters=event_filters,
                          cache=True,
                          cache_size=50000000,
                          learn_entries=100)

        if local:
            copied = [
                'EventNumber',
            ]

            hh_buffer = TreeBuffer()
            buffer = TreeBuffer()
            for name, value in chain._buffer.items():
                if name.startswith('hh_'):
                    hh_buffer[name[3:]] = value
                elif name in copied:
                    buffer[name] = value
            outtree.set_buffer(hh_buffer, create_branches=False, visible=True)
            outtree.set_buffer(buffer, create_branches=True, visible=False)

        else:
            # additional decorations on existing objects
            if year > 2011 and datatype in (datasets.MC, datasets.MCEMBED):

                class Decorations(TreeModel):
                    jet_ispileup = stl.vector('bool')

                chain.set_buffer(Decorations(), create_branches=True)

            # include the branches in the input chain in the output tree
            # set branches to be removed in ignore_branches
            outtree.set_buffer(chain._buffer,
                               ignore_branches=ignore_branches +
                               ignore_branches_output,
                               create_branches=True,
                               ignore_duplicates=True,
                               transfer_objects=True,
                               visible=False)

        # define tree objects
        define_objects(chain, year)

        # create the MMC
        mmc = mass.MMC(year=year)

        # report which packages have been loaded
        externaltools.report()

        self.output.cd()

        # The main event loop
        # the event filters above are automatically run for each event and only
        # the surviving events are looped on
        for event in chain:

            if local and syst_terms is None and not redo_selection:
                outtree.Fill()
                continue

            # sort taus and jets in decreasing order by pT
            event.taus.sort(key=lambda tau: tau.pt, reverse=True)
            event.jets.sort(key=lambda jet: jet.pt, reverse=True)

            # tau1 is the leading tau
            # tau2 is the subleading tau
            taus = list(event.taus)
            if len(taus) >= 2:
                tau1, tau2 = taus[0], taus[1]
                jets = list(event.jets)
                jet1, jet2, jet3 = None, None, None
                beta = None

                if len(jets) >= 2:
                    jet1, jet2 = jets[:2]

                    # determine boost of system
                    # determine jet CoM frame
                    beta = (jet1.fourvect + jet2.fourvect).BoostVector()
                    tree.jet_beta.copy_from(beta)

                    jet1.fourvect_boosted.copy_from(jet1.fourvect)
                    jet2.fourvect_boosted.copy_from(jet2.fourvect)
                    jet1.fourvect_boosted.Boost(beta * -1)
                    jet2.fourvect_boosted.Boost(beta * -1)

                    tau1.fourvect_boosted.copy_from(tau1.fourvect)
                    tau2.fourvect_boosted.copy_from(tau2.fourvect)
                    tau1.fourvect_boosted.Boost(beta * -1)
                    tau2.fourvect_boosted.Boost(beta * -1)

                    tau1.min_dr_jet = min(tau1.fourvect.DeltaR(jet1.fourvect),
                                          tau1.fourvect.DeltaR(jet2.fourvect))
                    tau2.min_dr_jet = min(tau2.fourvect.DeltaR(jet1.fourvect),
                                          tau2.fourvect.DeltaR(jet2.fourvect))

                    # sphericity, aplanarity = eventshapes.sphericity_aplanarity(
                    #    [tau1.fourvect,
                    #     tau2.fourvect,
                    #     jet1.fourvect,
                    #     jet2.fourvect])

                    # sphericity
                    # tree.sphericity = sphericity
                    # aplanarity
                    # tree.aplanarity = aplanarity

                    # sphericity_boosted, aplanarity_boosted = eventshapes.sphericity_aplanarity(
                    #    [tau1.fourvect_boosted,
                    #     tau2.fourvect_boosted,
                    #     jet1.fourvect_boosted,
                    #     jet2.fourvect_boosted])

                    # sphericity
                    # tree.sphericity_boosted = sphericity_boosted
                    # aplanarity
                    # tree.aplanarity_boosted = aplanarity_boosted

                    # tau centrality (degree to which they are between the two jets)
                    tau1.centrality = eventshapes.eta_centrality(
                        tau1.fourvect.Eta(), jet1.fourvect.Eta(),
                        jet2.fourvect.Eta())

                    tau2.centrality = eventshapes.eta_centrality(
                        tau2.fourvect.Eta(), jet1.fourvect.Eta(),
                        jet2.fourvect.Eta())

                    # boosted tau centrality
                    tau1.centrality_boosted = eventshapes.eta_centrality(
                        tau1.fourvect_boosted.Eta(),
                        jet1.fourvect_boosted.Eta(),
                        jet2.fourvect_boosted.Eta())

                    tau2.centrality_boosted = eventshapes.eta_centrality(
                        tau2.fourvect_boosted.Eta(),
                        jet1.fourvect_boosted.Eta(),
                        jet2.fourvect_boosted.Eta())

                    # 3rd leading jet
                    if len(jets) >= 3:
                        jet3 = jets[2]
                        jet3.fourvect_boosted.copy_from(jet3.fourvect)
                        jet3.fourvect_boosted.Boost(beta * -1)

                elif len(jets) == 1:
                    jet1 = jets[0]

                    tau1.min_dr_jet = tau1.fourvect.DeltaR(jet1.fourvect)
                    tau2.min_dr_jet = tau2.fourvect.DeltaR(jet1.fourvect)

                    # sphericity, aplanarity = eventshapes.sphericity_aplanarity(
                    #    [tau1.fourvect,
                    #     tau2.fourvect,
                    #     jet1.fourvect])

                    # sphericity
                    # tree.sphericity = sphericity
                    # aplanarity
                    #tree.aplanarity = aplanarity

                    RecoJetBlock.set(tree, jet1, jet2, jet3, local=local)

                # mass of ditau + leading jet system
                if jet1 is not None:
                    tree.mass_tau1_tau2_jet1 = (tau1.fourvect + tau2.fourvect +
                                                jet1.fourvect).M()

                # full sphericity and aplanarity
                # sphericity_full, aplanarity_full = eventshapes.sphericity_aplanarity(
                #    [tau1.fourvect, tau2.fourvect] + [jet.fourvect for jet in jets])

                # tree.sphericity_full = sphericity_full
                # tree.aplanarity_full = aplanarity_full

                # ####################################
                # number of tracks from PV minus taus
                # ####################################
                ntrack_pv = 0
                ntrack_nontau_pv = 0
                for vxp in event.vertices:
                    # primary vertex
                    if vxp.type == 1:
                        ntrack_pv = vxp.nTracks
                        ntrack_nontau_pv = ntrack_pv - tau1.numTrack - tau2.numTrack
                        break
                tree.ntrack_pv = ntrack_pv
                tree.ntrack_nontau_pv = ntrack_nontau_pv

                # ########################
                # MET variables
                # ########################
                METx = event.MET.etx
                METy = event.MET.ety
                MET = event.MET.et
                MET_vect = Vector2(METx, METy)
                MET_4vect = LorentzVector()
                MET_4vect.SetPxPyPzE(METx, METy, 0., MET)
                MET_4vect_boosted = LorentzVector()
                MET_4vect_boosted.copy_from(MET_4vect)
                if beta is not None:
                    MET_4vect_boosted.Boost(beta * -1)

                tree.MET_et = MET
                tree.MET_etx = METx
                tree.MET_ety = METy
                tree.MET_phi = event.MET.phi
                dPhi_tau1_tau2 = abs(tau1.fourvect.DeltaPhi(tau2.fourvect))
                dPhi_tau1_MET = abs(tau1.fourvect.DeltaPhi(MET_4vect))
                dPhi_tau2_MET = abs(tau2.fourvect.DeltaPhi(MET_4vect))
                tree.dPhi_tau1_tau2 = dPhi_tau1_tau2
                tree.dPhi_tau1_MET = dPhi_tau1_MET
                tree.dPhi_tau2_MET = dPhi_tau2_MET
                tree.dPhi_min_tau_MET = min(dPhi_tau1_MET, dPhi_tau2_MET)
                tree.MET_bisecting = is_MET_bisecting(dPhi_tau1_tau2,
                                                      dPhi_tau1_MET,
                                                      dPhi_tau2_MET)

                sumET = event.MET.sumet
                tree.MET_sumet = sumET
                if sumET != 0:
                    tree.MET_sig = (
                        (2. * MET / GeV) /
                        (utils.sign(sumET) * sqrt(abs(sumET / GeV))))
                else:
                    tree.MET_sig = -1.

                tree.MET_centrality = eventshapes.phi_centrality(
                    tau1.fourvect, tau2.fourvect, MET_vect)
                tree.MET_centrality_boosted = eventshapes.phi_centrality(
                    tau1.fourvect_boosted, tau2.fourvect_boosted,
                    MET_4vect_boosted)

                tree.number_of_good_vertices = len(event.vertices)

                # #########################
                # Jet and sum pt variables
                # #########################
                tree.numJets = len(event.jets)

                # sum pT with only the two leading jets
                tree.sum_pt = sum([tau1.pt, tau2.pt] +
                                  [jet.pt for jet in jets[:2]])

                # sum pT with all selected jets
                tree.sum_pt_full = sum([tau1.pt, tau2.pt] +
                                       [jet.pt for jet in jets])

                # vector sum pT with two leading jets and MET
                tree.vector_sum_pt = sum([tau1.fourvect, tau2.fourvect] +
                                         [jet.fourvect for jet in jets[:2]] +
                                         [MET_4vect]).Pt()

                # vector sum pT with all selected jets and MET
                tree.vector_sum_pt_full = sum([tau1.fourvect, tau2.fourvect] +
                                              [jet.fourvect for jet in jets] +
                                              [MET_4vect]).Pt()

                # resonance pT
                tree.resonance_pt = sum(
                    [tau1.fourvect, tau2.fourvect, MET_4vect]).Pt()

                # ############################
                # tau <-> vertex association
                # ############################
                tree.tau_same_vertex = (tau1.privtx_x == tau2.privtx_x
                                        and tau1.privtx_y == tau2.privtx_y
                                        and tau1.privtx_z == tau2.privtx_z)

                tau1.vertex_prob = ROOT.TMath.Prob(tau1.privtx_chiSquared,
                                                   int(tau1.privtx_numberDoF))

                tau2.vertex_prob = ROOT.TMath.Prob(tau2.privtx_chiSquared,
                                                   int(tau2.privtx_numberDoF))

                # #########################
                # MMC Mass
                # #########################
                mmc_result = mmc.mass(tau1,
                                      tau2,
                                      METx,
                                      METy,
                                      sumET,
                                      njets=len(event.jets))

                for mmc_method, mmc_object in enumerate(mmc_objects):
                    mmc_mass, mmc_resonance, mmc_met = mmc_result[mmc_method]
                    if verbose:
                        log.info("MMC (method %d): %f" %
                                 (mmc_method, mmc_mass))

                    mmc_object.mass = mmc_mass
                    mmc_object.MET_et = mmc_met.Mod()
                    mmc_object.MET_etx = mmc_met.X()
                    mmc_object.MET_ety = mmc_met.Y()
                    mmc_object.MET_phi = math.pi - mmc_met.Phi()
                    if mmc_mass > 0:
                        FourMomentum.set(mmc_object.resonance, mmc_resonance)

                # ###########################
                # collinear and visible mass
                # ###########################
                vis_mass, collin_mass, tau1_x, tau2_x = mass.collinearmass(
                    tau1, tau2, METx, METy)

                tree.mass_vis_tau1_tau2 = vis_mass
                tree.mass_collinear_tau1_tau2 = collin_mass
                tau1.collinear_momentum_fraction = tau1_x
                tau2.collinear_momentum_fraction = tau2_x

                ###########################
                # Match jets to VBF partons
                ###########################
                #if datatype == datasets.MC and 'VBF' in dsname and year == 2011:
                #    # get partons (already sorted by eta in hepmc) FIXME!!!
                #    parton1, parton2 = hepmc.get_VBF_partons(event)
                #    tree.mass_true_quark1_quark2 = (parton1.fourvect + parton2.fourvect).M()
                #    # order here needs to be revised since jets are no longer
                #    # sorted by eta but instead by pT
                #    PartonBlock.set(tree, parton1, parton2)
                #    if len(jets) >= 2:
                #        jet1, jet2 = jets[:2]
                #        for i, jet in zip((1, 2), (jet1, jet2)):
                #            for parton in (parton1, parton2):
                #                if utils.dR(jet.eta, jet.phi, parton.eta, parton.phi) < .8:
                #                    setattr(tree, 'jet%i_matched' % i, True)

                # Fill the tau block
                # This must come after the RecoJetBlock is filled since
                # that sets the jet_beta for boosting the taus
                RecoTauBlock.set(event,
                                 tree,
                                 datatype,
                                 tau1,
                                 tau2,
                                 local=local)
                if datatype != datasets.DATA:
                    TrueTauBlock.set(tree, tau1, tau2)

            # fill the output tree
            outtree.Fill(reset=True)

        externaltools.report()

        # flush any baskets remaining in memory to disk
        self.output.cd()
        outtree.FlushBaskets()
        outtree.Write()

        if local:
            if datatype == datasets.DATA:
                xml_string = ROOT.TObjString(merged_grl.str())
                xml_string.Write('lumi')
            merged_cutflow.Write()
Esempio n. 17
0
    def work(self):
        """
        This is the one function that all "ATLASStudent"s must implement.
        """
        datatype = self.metadata.datatype
        year = self.metadata.year
        verbose = self.args.verbose

        OutputModel = C3POEvent

        if datatype == datasets.MC:
            # only create truth branches for MC
            OutputModel += (
                    FourVectModel.prefix('resonance_') +
                    TrueTau.prefix('truetau1_') +
                    TrueTau.prefix('truetau2_'))

        onfilechange = []
        count_funcs = {}

        if datatype in (datasets.MC, datasets.EMBED):

            def mc_weight_count(event):
                return event.mc_event_weight

            count_funcs = {
                'mc_weight': mc_weight_count,
            }

        trigger_config = None

        if datatype != datasets.EMBED:
            # trigger config tool to read trigger info in the ntuples
            trigger_config = get_trigger_config()

            # update the trigger config maps on every file change
            onfilechange.append((update_trigger_config, (trigger_config,)))

        if datatype == datasets.DATA:
            merged_grl = GRL()

            def update_grl(student, grl, name, file, tree):

                grl |= str(file.Get('Lumi/%s' % student.metadata.treename).GetString())

            onfilechange.append((update_grl, (self, merged_grl,)))

        if datatype == datasets.DATA:
            merged_cutflow = Hist(1, 0, 1, name='cutflow', type='D')
        else:
            merged_cutflow = Hist(2, 0, 2, name='cutflow', type='D')

        def update_cutflow(student, cutflow, name, file, tree):

            year = student.metadata.year
            datatype = student.metadata.datatype
            if datatype == datasets.MC:
                cutflow[0] += file.cutflow_event[0]
                cutflow[1] += file.cutflow_event_mc_weight[0]
            else:
                cutflow[0] += file.cutflow_event[0]

        onfilechange.append((update_cutflow, (self, merged_cutflow,)))

        # initialize the TreeChain of all input files
        # (each containing one tree named self.metadata.treename)
        chain = TreeChain(
                self.metadata.treename,
                files=self.files,
                events=self.events,
                read_branches_on_demand=True,
                cache=True,
                onfilechange=onfilechange)

        # create output tree
        self.output.cd()
        tree = Tree(name='higgstautauhh', model=OutputModel)

        copied_variables = [
                'actualIntPerXing',
                'averageIntPerXing',
                'RunNumber',
                'EventNumber',
                'lbn']

        tree.set_buffer(
                chain._buffer,
                branches=copied_variables,
                create_branches=True,
                visible=False)

        chain.always_read(copied_variables)

        # set the event filters
        event_filters = EventFilterList([
            CoreFlags(
                count_funcs=count_funcs),
            TauSelected(2,
                count_funcs=count_funcs),
            TruthMatching(
                passthrough=datatype != datasets.MC,
                count_funcs=count_funcs),
            MCWeight(
                datatype=datatype,
                tree=tree,
                passthrough=datatype != datasets.MC,
                count_funcs=count_funcs)
        ])

        self.filters['event'] = event_filters

        chain._filters += event_filters

        define_objects(chain, year, skim=False)

        # define tree objects
        taus = [
            tree.define_object(name='tau1', prefix='tau1_'),
            tree.define_object(name='tau2', prefix='tau2_')]

        if datatype == datasets.MC:
            truetaus = [
                tree.define_object(name='truetau1', prefix='truetau1_'),
                tree.define_object(name='truetau2', prefix='truetau2_')]

            tree.define_object(name='resonance', prefix='resonance_')

        # entering the main event loop...
        for event in chain:

            # sort taus and jets in decreasing order by pT
            event.taus.sort(key=lambda tau: tau.pt, reverse=True)

            tau1, tau2 = event.taus

            # MET
            METx = event.MET.etx
            METy = event.MET.ety
            MET_vect = Vector2(METx, METy)
            MET = event.MET.et
            MET_phi = event.MET.phi

            tree.MET = MET
            tree.MET_x = METx
            tree.MET_y = METy
            tree.MET_phi = MET_phi

            sumET = event.MET.sumet
            tree.sumET = sumET
            if sumET != 0:
                tree.MET_sig = ((2. * MET / GeV) /
                        (utils.sign(sumET) * sqrt(abs(sumET / GeV))))
            else:
                tree.MET_sig = -1.

            # use MMC values from skim
            mmc_mass = event.tau_MMC_mass
            mmc_resonance = event.tau_MMC_resonance
            mmc_met = Vector2(event.tau_MMC_MET_x, event.tau_MMC_MET_y)

            tree.mass_mmc_tau1_tau2 = mmc_mass
            tree.mmc_resonance.copy_from(mmc_resonance)
            if mmc_mass > 0:
                tree.mmc_resonance_pt = mmc_resonance.Pt()
            tree.MET_mmc = mmc_met.Mod()
            tree.MET_mmc_x = mmc_met.X()
            tree.MET_mmc_y = mmc_met.Y()
            tree.MET_mmc_phi = math.pi - mmc_met.Phi()

            # truth matching
            if datatype == datasets.MC:

                resonance, tau_decays = get_taus(event)

                if resonance is not None:

                    FourVectModel.set(tree.resonance, resonance)

                    matched_taus = []
                    decays = tau_decays[:]
                    for itau, tau in enumerate(event.taus):
                        for idecay, tau_decay in enumerate(decays):
                            if tau.matches_vect(tau_decay.fourvect_visible):
                                tau_decay.matched = True
                                tau_decay.matched_object = tau
                                tau.matched = True
                                tau.matched_object = tau_decay
                                TrueTau.set(truetaus[itau], tau_decay,
                                        verbose=verbose)
                                decays.pop(idecay)
                                matched_taus.append(itau)
                                break

                    if len(decays) > 0:
                        for idecay, decay in enumerate(decays):
                            reco_idx = -1
                            remaining_idx = range(2)
                            for imatched in remaining_idx:
                                if imatched not in matched_taus:
                                    reco_idx = imatched
                                    remaining_idx.remove(imatched)
                                    break
                            TrueTau.set(truetaus[reco_idx], tau_decay,
                                    verbose=verbose)

                    if len(tau_decays) == 2:
                        # write truth met
                        fourvect_missing = (tau_decays[0].fourvect_missing +
                                            tau_decays[1].fourvect_missing)

                        tree.MET_true = fourvect_missing.Pt()
                        tree.MET_phi_true = fourvect_missing.Phi()
                        tree.MET_x_true = tree.MET_true * math.cos(tree.MET_phi_true)
                        tree.MET_y_true = tree.MET_true * math.sin(tree.MET_phi_true)
                        tree.MET_phi_diff = Vector2.Phi_mpi_pi(tree.MET_phi_true - MET_phi)

            # tau - vertex association
            tree.tau_same_vertex = (
                    tau1.privtx_x == tau2.privtx_x and
                    tau1.privtx_y == tau2.privtx_y and
                    tau1.privtx_z == tau2.privtx_z)

            # fill tau block
            for outtau, intau in zip(taus, event.taus):
                RecoTau.set(outtau, intau, verbose=verbose)

            # fill output ntuple
            tree.Fill(reset=True)

        self.output.cd()
        tree.FlushBaskets()
        tree.Write()

        if datatype == datasets.DATA:
            xml_string = ROOT.TObjString(merged_grl.str())
            xml_string.Write('lumi')
        merged_cutflow.Write()