Esempio n. 1
0
def copy_directory(newdir, olddir, condition=None):
    """Reads all objects from olddir and writes them to newdir.

    newdir, olddir: Directories (inheriting from TDirectory).
    condition: Function that takes key name and returns whether the file should
        be kept or not (optional).
    """
    for key in olddir.GetListOfKeys():
        if condition is not None and (not condition(key) or
                                      key.GetName().startswith('ProcessID')):
            continue
        cl = gROOT.GetClass(key.GetClassName())
        if not cl:
            continue
        if cl.InheritsFrom(TDirectory.Class()):
            newsub = newdir.mkdir(key.GetName())
            oldsub = olddir.GetDirectory(key.GetName())
            copy_directory(newsub, oldsub)
        elif cl.InheritsFrom(TTree.Class()):
            oldtree = olddir.Get(key.GetName())
            newdir.cd()
            newtree = oldtree.CloneTree(-1, 'fast')
            newtree.Write()
        else:
            olddir.cd()
            obj = key.ReadObj()
            newdir.cd()
            obj.Write(key.GetName())
            del obj
Esempio n. 2
0
def merge_root_file(target, source_list):
    """
    Merge next file from the source list with the target file.
    Function called recursively for each element of the list.

    :param TFile target: the result ROOT file
    :param TList source_list: list of input files to merge
    """
    logger = get_logger()
    raw_path = target.GetPath()
    path = raw_path[raw_path.find(":") + 1:]

    first_source = source_list.First()
    first_source.cd(path)
    current_source_dir = gDirectory
    # gain time, do not add the objects in the list in memory
    status = TH1.AddDirectoryStatus()
    TH1.AddDirectory(False)

    # loop over all keys in this directory
    #global_chain = TChain()
    next_key = TIter(current_source_dir.GetListOfKeys())
    #key = TKey()
    #TKey old_key = None
    key = next_key()
    while key:
        # keep only the highest cycle number for each key
        #if old_key and not old_key.GetName() == key.GetName():
        #    continue
        # read object from first source file
        first_source.cd(path)
        obj = key.ReadObj()

        if obj.IsA().InheritsFrom(TH1.Class()):
            # descendant of TH1 -> merge it
            logger.info("Merging histogram %s", obj.GetName())
            h1 = TH1(obj)

            # loop over all source files and add the content of the
            # correspondant histogram to the one pointed to by "h1"
            next_source = source_list.After(first_source)
            while next_source:
                # make sure we are at the correct directory level by cd'ing to path
                next_source.cd(path)
                key2 = gDirectory.GetListOfKeys().FindObject(h1.GetName())
                if key2:
                    h2 = TH1(key2.ReadObj())
                    h1.Add(h2)
                    #del h2
                next_source = source_list.After(next_source)

        elif obj.IsA().InheritsFrom(TTree.Class()):
            logger.info("Merging tree %s", obj.GetName())
            # loop over all source files and create a chain of Trees "global_chain"
            obj_name = obj.GetName()
            global_chain = TChain(obj_name)
            global_chain.Add(first_source.GetName())
            next_source = source_list.After(first_source)
            while next_source:
                global_chain.Add(next_source.GetName())
                next_source = source_list.After(next_source)

        elif obj.IsA().InheritsFrom(TDirectory.Class()):
            logger.info("Found subdirectory %s", obj.GetName())
            # create a new subdir of same name and title in the target file
            target.cd()
            new_dir = target.mkdir(obj.GetName(), obj.GetTitle())
            # newdir is now the starting point of another round of merging
            # newdir still knows its depth within the target file via
            # GetPath(), so we can still figure out where we are in the recursion
            merge_root_file(new_dir, source_list)

        else:
            logger.info("Unknown object type, name: %s, title: %s",
                        obj.GetName(), obj.GetTitle())

        # now write the merged histogram (which is "in" obj) to the target file
        # note that this will just store obj in the current directory level,
        # which is not persistent until the complete directory itself is stored
        # by "target.Write()" below
        if obj is not None:
            target.cd()
            # if the object is a tree, it is stored in global_chain...
            if obj.IsA().InheritsFrom(TTree.Class()):
                global_chain.Merge(target.GetFile(), 0, "keep")
            else:
                obj.Write(key.GetName())

        # move to the next element
        key = next_key()

    # save modifications to target file
    target.SaveSelf(True)
    TH1.AddDirectory(status)
    target.Write()
Esempio n. 3
0
def skim_tree(fname_patts,
              branches_to_keep,
              treename="t",
              fname_out="skim.root",
              cut_str=""):

    # This stuff is super necessary or else we all die
    from ROOT import TChain, TFile, gSystem, gROOT, TTree
    gSystem.Load("libFWCoreFWLite.so")
    gSystem.Load("libDataFormatsFWLite.so")
    gROOT.ProcessLine("FWLiteEnabler::enable()")

    ch = TChain(treename)
    for patt in fname_patts:
        ch.Add(patt)
    nevents = ch.GetEntries()
    branches_to_keep = [b for b in branches_to_keep
                        if b]  # remove empty strings

    if len(cut_str) > 0:
        print ">>> [!] You specified a cut string of: %s" % cut_str
        print ">>> [!] Make sure that you are opting to keep all branches used in that cut string."

    filenames = [f.GetTitle() for f in ch.GetListOfFiles()]

    f1 = TFile(filenames[0])
    tree = f1.Get(treename)
    tree.SetMakeClass(1)
    branches = [b.GetName() for b in tree.GetListOfBranches()]

    # see if the dummy user specified any branches to keep that aren't in the chain
    # and subtract them out to avoid segfaulttttt
    branches_not_in_chain = set(branches_to_keep) - set(branches)
    if len(branches_not_in_chain) > 0 and len(branches_to_keep) > 0:
        print ">>> [!] You dummy! I am going to neglect these branches which are not even in the TTree: %s" % ",".join(
            list(branches_not_in_chain))

    branches_to_keep = list(set(branches_to_keep) - branches_not_in_chain)

    if len(branches_to_keep) == 0:
        if len(cut_str) == 0:
            print ">>> [!] You dummy! You want me to skim 0 branches without any cut? That's pointless."
            return
        else:
            print ">>> [!] You specified 0 branches to keep, but you gave me a cut string, so keeping ALL branches."
            branches_to_keep = branches[:]
    else:

        # whitelist the ones to copy
        ch.SetBranchStatus("*", 0)
        for bname in branches_to_keep:
            ch.SetBranchStatus(bname, 1)

        # need this to actually copy over any 4vectors. WTF.
        # https://root.cern.ch/phpBB3/viewtopic.php?t=10725
        ch.SetBranchStatus("fCoordinates*", 1)

    # actually do the skim and save the file
    t0 = time.time()
    new_file = TFile(fname_out, "RECREATE")

    # copy over all the histograms too - note that this only takes the first file (TODO is to actually add multiples, but this is not a use case for me right now)
    for key in f1.GetListOfKeys():
        if key.ReadObj().InheritsFrom(TTree.Class()): continue
        name = key.GetName()
        print name
        f1.Get(name).Write()

    print ">>> Started skimming tree %s with %i events: %i --> %i branches" % (
        treename, nevents, len(branches), len(branches_to_keep))
    ch_new = ch.CopyTree(cut_str)
    print ">>> Finished skim in %.2f seconds" % (time.time() - t0)
    ch_new.GetCurrentFile().Write()
    ch_new.GetCurrentFile().Close()

    # wow the user with incredible reduction stats
    size_before = get_filesizes(filenames)
    size_after = get_filesizes([fname_out])
    print ">>> Size reduction: %s --> %s (factor of %.1f)" % (readable_size(
        size_before), readable_size(size_after), size_before / size_after)
    print ">>> Your output file is %s." % fname_out
Esempio n. 4
0
def skim_tree(fname_patts,
              branches_to_keep=[],
              treename="t",
              fname_out="skim.root",
              cut_str="",
              flip_branches=False):

    # This stuff is super necessary or else we all die
    from ROOT import TChain, TFile, gSystem, gROOT, TTree
    import ROOT as r
    r.PyConfig.IgnoreCommandLineOptions = True  # https://root-forum.cern.ch/t/pyroot-crashes-when-in-arguments/25379/3
    r.v5.TFormula.SetMaxima(100000000)

    gSystem.Load("libFWCoreFWLite.so")
    gSystem.Load("libDataFormatsFWLite.so")
    gROOT.ProcessLine("FWLiteEnabler::enable()")

    ch = r.TChain(treename)
    for patt in fname_patts:
        ch.Add(patt)
    nevents = ch.GetEntries()
    branches_to_keep = [b for b in branches_to_keep
                        if b]  # remove empty strings

    # https://root-forum.cern.ch/t/pyroot-crashes-when-in-arguments/25379/3 -- figured out solution to below lines
    # # cut_str = "Sum$(abs(genps_id_mother)==24 && genps_isLastCopy && (abs(genps_id)==11 || abs(genps_id)==13))==2" # 2 leps
    # # cut_str = "abs(Sum$(genps_id*(abs(genps_id_mother)==24 && genps_isLastCopy && (abs(genps_id)==11 || abs(genps_id)==13))))>20" # SS
    # # apparently when using a $ sign in the cut_str, root hijacks the arguments to the script and crashes. Sigh.
    # cut_str = cut_str.replace("SSS","$")

    if len(cut_str) > 0:
        print ">>> [!] You specified a cut string of: %s" % cut_str
        print ">>> [!] Make sure that you are opting to keep all branches used in that cut string."

    filenames = [f.GetTitle() for f in ch.GetListOfFiles()]

    f1 = r.TFile(filenames[0])
    tree = f1.Get(treename)
    tree.SetMakeClass(1)
    branches = [b.GetName() for b in tree.GetListOfBranches()]

    # see if the dummy user specified any branches to keep that aren't in the chain
    # and subtract them out to avoid segfaulttttt
    if not flip_branches:
        branches_not_in_chain = set(branches_to_keep) - set(branches)
        if len(branches_not_in_chain) > 0 and len(branches_to_keep) > 0:
            print ">>> [!] You dummy! I am going to neglect these branches which are not even in the TTree: %s" % ",".join(
                list(branches_not_in_chain))
        branches_to_keep = list(set(branches_to_keep) - branches_not_in_chain)

    if len(branches_to_keep) == 0 and not flip_branches:
        if len(cut_str) == 0:
            print ">>> [!] You dummy! You want me to skim 0 branches without any cut? That's pointless."
            return
        else:
            print ">>> [!] You specified 0 branches to keep, but you gave me a cut string, so keeping ALL branches."
            branches_to_keep = branches[:]
    else:

        # whitelist the ones to copy
        # or reverse if we have flip_branches
        if not flip_branches:
            ch.SetBranchStatus("*", 0)
            for bname in branches_to_keep:
                ch.SetBranchStatus(bname, 1)
        else:
            ch.SetBranchStatus("*", 1)
            for bname in branches_to_keep:
                ch.SetBranchStatus(bname, 0)

        # need this to actually copy over any 4vectors. WTF.
        # https://root.cern.ch/phpBB3/viewtopic.php?t=10725
        ch.SetBranchStatus("fCoordinates*", 1)

    # actually do the skim and save the file
    t0 = time.time()
    new_file = r.TFile(fname_out, "RECREATE")

    # copy over all the histograms too - note that this only takes the first file (TODO is to actually add multiples, but this is not a use case for me right now)
    for key in f1.GetListOfKeys():
        if key.ReadObj().InheritsFrom(TTree.Class()): continue
        name = key.GetName()
        f1.Get(name).Write()
    print ">>> Started skimming tree %s with %i events: %i --> %i branches" % (
        treename, nevents, len(branches), len(branches_to_keep))
    ch_new = ch.CopyTree(cut_str)
    print ">>> Finished skim in %.2f seconds" % (time.time() - t0)
    ch_new.GetCurrentFile().Write()
    ch_new.GetCurrentFile().Close()

    # wow the user with incredible reduction stats
    size_before = get_filesizes(filenames)
    size_after = get_filesizes([fname_out])
    print ">>> Size reduction: %s --> %s (factor of %.1f)" % (
        readable_size(size_before), readable_size(size_after),
        1.0 * size_before / size_after)
    print ">>> Your output file is %s" % fname_out