Example #1
0
def readDataFile_hits(filename,event_start_no,event_stride):
    '''
    DESCRIPTION:
        This function will read the root file which contains the simulated
        data of particles and the corresponding recorded hits in the detector.
        The recorded hits in detertor will be later used for energy interpolation
        to the square cells.
        This code is similar to starting code in repo.
    USAGE:
        INPUT:
            filename        : the name of root file
            event_start_no  : the starting event number from where we want to
                                process minibatch.
            event_stride    : the size of minibatch to process in one go,
                                (the time cost taken is less than the memory
                                on increasing the value)
            query_string: this will be used to filter out the events like
                            selecting the hits in EE part with certain energy etc.
        OUTPUT:
            df          : the pandas dataframe of the data in root file
                            with only the recorded hits to convert to image
                            of the required batch size
    '''
    print '>>> Reading the root File to get hits dataframe'
    tree=uproot.open(filename)['ana/hgc']
    branches=[]
    #Just extracting the required attributes to create image
    branches += ["rechit_detid","rechit_energy"]
    #Adding the branches for logical Error check (Optional)
    #branches +=["rechit_z","rechit_cluster2d","cluster2d_multicluster"]
    #branches +=["rechit_cluster2d","cluster2d_multicluster"]

    cache={}
    df=tree.pandas.df(branches,cache=cache,executor=executor)

    #Renaming the attribute in short form
    col_names={name:name.replace('rechit_','') for name in branches}
    df.rename(col_names,inplace=True,axis=1)

    #Extracting out the minibatch of event to process at a time
    if event_stride=='upto_end':
        df=df.iloc[event_start_no:]
    else:
        df=df.iloc[event_start_no:event_start_no+event_stride]

    #Do the Filtering here only no need to do it each time for each event

    #Printing for sanity check
    #print df.head()
    print 'Shape of dataframe: ',df.shape
    # print all_event_hits.loc[0,'energy']
    # print type(all_event_hits.loc[0,'energy'])
    # print all_event_hits.loc[0,'energy'].shape

    return df
Example #2
0
def thist_to_np_xy_cache(infile,key='reconstructedProfileHisto'):
    assert(infile.endswith(".root"))
    npzfile = infile+"."+str(key)+".npz"
    if os.path.isfile(npzfile):
        cached = np.load(npzfile)
        return [cached['x'].tolist(),cached['y'].tolist()]
    else:
        f=uproot.open(infile)
        outdata_x=binedges_to_centers(f[key].edges)
        outdata_y=f[key].values
        np.savez(npzfile, x=np.array(outdata_x), y=np.array(outdata_y))
        return [outdata_x,outdata_y]
Example #3
0
def load_ttree(filename, treename, branchnames=None):
    """
    Loads a root ttree into a numpy record array
    If branchnames is None all branches are read.
    """
    import numpy as np
    tree = uproot.open(filename)[treename]

    if not branchnames:
        branchnames = tree.keys()
    array_dict = tree.arrays(branchnames)
    return np.rec.fromarrays(array_dict.values(),
                             names=[x.decode() for x in array_dict.keys()])
Example #4
0
def readDataFile_genpart(filename,event_start_no,event_stride):
    '''
    DESCRIPTION:
        This function is similar to readDataFile_hits but this will
        read the genpart of the same events as read by the above
        function to generate the target label for the corresponding
        image files.
    USAGE:
        INPUT:
            filename        : the name of the root file containing the
                                events
            event_start_no  : starting event number in this file to
                                extract the events from. This Will
                                be controlled manually while generating the
                                data for training set.
            event_stride    : the number of events to be processed in the
                                in one go.(consider memory cost here than the
                                time cost.)
        OUTPUT:
            df              : returns the data frame containing the particles
                                whose properties we will need to predict from
                                the corresponding hit images of events
    '''
    #Reading the root file to a dataframe
    print '>>> Reading the rootfile to get genpart dataframe'
    tree=uproot.open(filename)['ana/hgc']

    branches =["genpart_energy","genpart_phi","genpart_eta",
                "genpart_gen","genpart_pid","genpart_reachedEE",
                "genpart_posx","genpart_posy","genpart_posz"]
    cache={}
    df=tree.pandas.df(branches,cache=cache,executor=executor)

    #Renaming the attributes in short form
    col_names={name:name.replace('genpart_','') for name in branches}
    df.rename(col_names,inplace=True,axis=1)

    #Extracting the dataframe for the required events
    if event_stride=='upto_end':
        df=df.iloc[event_start_no:]
    else:
        df=df.iloc[event_start_no:event_start_no+event_stride]

    print '>>> Extraction completed with current shape: ',df.shape
    # print df.dtypes
    # print type(df.loc[0,'posx'])
    # print df.loc[0,'posx']
    # sys.exit(1)

    return df
Example #5
0
def readDataFile_hits(filename,event_id,layer_num):
    tree=uproot.open(filename)['ana/hgc']
    branches=[]
    branches += ["rechit_energy","rechit_detid",
                "rechit_x","rechit_y","rechit_z","rechit_layer"]
    cache={}
    df=tree.pandas.df(branches,cache=cache,executor=executor)

    #Projecting the dataframe for the required attributes
    print '>>> Selecting a single event'
    all_hits = pd.DataFrame({name.replace('rechit_',''):df.loc[event_id,name]
                            for name in branches if 'rechit_' in name })

    all_hits=all_hits[all_hits['layer']==layer_num]
    print all_hits.head()
    print all_hits.shape
    return all_hits
Example #6
0
 def Reader(self):
     '''
     Read the content of a TTree in a ROOT File.
     Note the use of the uproot package.
     The variables should be a list of the "variable" to read.
     '''
     logger.debug("Reading {}".format(self.file_name))
     import uproot
     for key in self.variables:
         self.data.update({str(key): []})
     tree = uproot.open(self.file_name)[self.tree_name]
     for data in tree.iterate(self.variables):
         for key, value in data.items():
             varName = key.decode("utf-8")
             self.data.update(
                 {str(varName): self.data[str(varName)] + value.tolist()})
     return True
Example #7
0
def open_compwa_plot_data(input_file_path):
    from pycompwa.plotting import PlotData
    pd = PlotData()

    # open file
    file = uproot.open(input_file_path)
    trees = file.keys()

    file = file.get("final_state_id_to_name_mapping")
    for k, v in file.items():
        pd.particle_id_to_name_mapping[v] = k.decode()[:k.decode().find(';')]

    if "data" in [x.decode()[:x.decode().find(';')] for x in trees]:
        pd.data = load_ttree(input_file_path, "data")
    if "intensity_weighted_phspdata" in [x.decode()[:x.decode().find(';')]
                                         for x in trees]:
        pd.fit_result_data = load_ttree(
            input_file_path, "intensity_weighted_phspdata")

    return pd
Example #8
0
def readDataFile(filename):
    '''
    DESCRIPTION:
        This function will read the root file which contains the simulated
        data of particles and the corresponding recorded hits in the detector.
        The recorded hits in detertor will be later used for energy interpolation
        to the square cells.
        This code is similar to starting code in repo.
    USAGE:
        INPUT:
            filename    : the name of root file
        OUTPUT:
            df          : the pandas dataframe of the data in root file
    '''
    tree=uproot.open(filename)['ana/hgc']
    branches=[]
    branches += ["rechit_detid","rechit_z", "rechit_energy",
                'rechit_cluster2d','cluster2d_multicluster']
    cache={}
    df=tree.pandas.df(branches,cache=cache,executor=executor)

    return df
Example #9
0
from matplotlib import colors
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import scipy.stats as stats
from scipy.stats import norm
import matplotlib.mlab as mlab
import pickle
import math
from scipy.stats import norm
import random
import collections
import more_itertools as mit
from pandas.core.common import flatten

filee = "/home/amrutha/muraves/simulation/MuravesSim_geo_updated/build/z_layer_5p4/MuravesSim.root"
file = uproot.open(filee)
lis = file.keys()
tree = file[lis[0]]
lis1 = tree.keys()
eventid = tree.arrays()
data = pd.DataFrame(eventid)
muon_data = data.loc[data.iloc[:, 1] == 1]
muon_data = muon_data.loc[muon_data.iloc[:, 7] != 0]
for i in range(0, len(muon_data.iloc[:, 3])):
    muon_data.iloc[i, 3] = muon_data.iloc[i, 3][0]
    muon_data.iloc[i, 4] = muon_data.iloc[i, 4][0]

    print(i)
muon_data.iloc[:, 4] = muon_data.iloc[:, 4] / 1000
muon_data.iloc[:, 10] = muon_data.iloc[:, 10] / 10
muon_data.iloc[:, 11] = muon_data.iloc[:, 11] / 10
Example #10
0
def count_entries_in_phasespace(treefilename):
    assert(treefilename.endswith(".root"))
    f=uproot.open(treefilename)
    return f['PhaseSpace'].numentries
Example #11
0
import uproot, uproot_methods
from Builder import Initialize

file = uproot.open("nano_5.root")
tree = file["Events"]

e = Initialize({
    'pt': tree.array("Electron_pt"),
    'eta': tree.array("Electron_eta"),
    'phi': tree.array("Electron_phi"),
    'mass': tree.array("Electron_mass"),
    'iso': tree.array('Electron_pfRelIso03_all'),
    'dxy': tree.array('Electron_dxy'),
    'dz': tree.array('Electron_dz'),
    'id': tree.array('Electron_mvaSpring16GP_WP90')
})

mu = Initialize({
    'pt': tree.array("Muon_pt"),
    'eta': tree.array("Muon_eta"),
    'phi': tree.array("Muon_phi"),
    'mass': tree.array("Muon_mass"),
    'iso': tree.array('Muon_pfRelIso04_all'),
    'dxy': tree.array('Muon_dxy'),
    'dz': tree.array('Muon_dz')
})

tau = Initialize({
    'pt': tree.array('Tau_pt'),
    'eta': tree.array('Tau_eta'),
    'phi': tree.array('Tau_phi'),
Example #12
0
from matplotlib import pyplot as plt
import uproot
import numpy as np
import pandas as pd

#my_file = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.ZprimeToTT_2016v3.root")                      
#my_file_mc = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.TT_TuneCUETP8M2T4_2016v3.root")            
my_file_bkg1 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-100To200_2018.root")
my_file_bkg2 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-1200To2500_2018.root")
my_file_bkg3 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-200To400_2018.root")
my_file_bkg4 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-2500ToInf_2018.root")
my_file_bkg5 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-400To600_2018.root")
my_file_bkg6 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-600To800_2018.root")
#my_file_bkg7 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-70To100_2018.root")        
my_file_bkg8 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-800To1200_2018.root")

#my_file_bkg = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_2016v3.root")                  

my_file = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.ZprimeToTT_2018.root")
my_file_mc = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.TTToSemiLeptonic_2018.root")
#my_file_bkg = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_2018.root")                    
tree = my_file['AnalysisTree']
tree_mc = my_file_mc['AnalysisTree']
#tree_bkg = my_file_bkg['AnalysisTree']                                                                                                        
#tree_bkg1 = my_file_bkg1['AnalysisTree']                                                                                                      
tree_bkg2 = my_file_bkg2['AnalysisTree']
tree_bkg3 = my_file_bkg3['AnalysisTree']
tree_bkg4 = my_file_bkg4['AnalysisTree']
tree_bkg5 = my_file_bkg5['AnalysisTree']
tree_bkg6 = my_file_bkg6['AnalysisTree']
#tree_bkg7 = my_file_bkg7['AnalysisTree']                                                                                                      
Example #13
0
def _work_function(item,
                   processor_instance,
                   flatten=False,
                   savemetrics=False,
                   mmap=False,
                   nano=False,
                   cachestrategy=None,
                   skipbadfiles=False,
                   retries=0,
                   xrootdtimeout=None):
    if processor_instance == 'heavy':
        item, processor_instance = item
    if not isinstance(processor_instance, ProcessorABC):
        processor_instance = cloudpickle.loads(
            lz4f.decompress(processor_instance))
    if mmap:
        localsource = {}
    else:
        opts = dict(uproot.FileSource.defaults)
        opts.update({'parallel': None})

        def localsource(path):
            return uproot.FileSource(path, **opts)

    import warnings
    out = processor_instance.accumulator.identity()
    retry_count = 0
    while retry_count <= retries:
        try:
            from uproot.source.xrootd import XRootDSource
            xrootdsource = XRootDSource.defaults
            xrootdsource['timeout'] = xrootdtimeout
            file = uproot.open(item.filename,
                               localsource=localsource,
                               xrootdsource=xrootdsource)
            if nano:
                cache = None
                if cachestrategy == 'dask-worker':
                    from distributed import get_worker
                    from .dask import ColumnCache
                    worker = get_worker()
                    try:
                        cache = worker.plugins[ColumnCache.name]
                    except KeyError:
                        # emit warning if not found?
                        pass
                df = NanoEvents.from_file(
                    file=file,
                    treename=item.treename,
                    entrystart=item.entrystart,
                    entrystop=item.entrystop,
                    metadata={'dataset': item.dataset},
                    cache=cache,
                )
            else:
                tree = file[item.treename]
                df = LazyDataFrame(tree,
                                   item.entrystart,
                                   item.entrystop,
                                   flatten=flatten)
                df['dataset'] = item.dataset
            tic = time.time()
            out = processor_instance.process(df)
            toc = time.time()
            metrics = dict_accumulator()
            if savemetrics:
                if isinstance(file.source, uproot.source.xrootd.XRootDSource):
                    metrics['bytesread'] = value_accumulator(
                        int, file.source.bytesread)
                    metrics['dataservers'] = set_accumulator(
                        {file.source._source.get_property('DataServer')})
                metrics['columns'] = set_accumulator(df.materialized)
                metrics['entries'] = value_accumulator(int, df.size)
                metrics['processtime'] = value_accumulator(float, toc - tic)
            wrapped_out = dict_accumulator({'out': out, 'metrics': metrics})
            file.source.close()
            break
        # catch xrootd errors and optionally skip
        # or retry to read the file
        except OSError as e:
            if not skipbadfiles:
                raise e
            else:
                w_str = 'Bad file source %s.' % item.filename
                if retries:
                    w_str += ' Attempt %d of %d.' % (retry_count + 1,
                                                     retries + 1)
                    if retry_count + 1 < retries:
                        w_str += ' Will retry.'
                    else:
                        w_str += ' Skipping.'
                else:
                    w_str += ' Skipping.'
                warnings.warn(w_str)
            metrics = dict_accumulator()
            if savemetrics:
                metrics['bytesread'] = value_accumulator(int, 0)
                metrics['dataservers'] = set_accumulator({})
                metrics['columns'] = set_accumulator({})
                metrics['entries'] = value_accumulator(int, 0)
                metrics['processtime'] = value_accumulator(float, 0)
                metrics['skippedbadfiles'] = value_accumulator(int, 1)
            wrapped_out = dict_accumulator({'out': out, 'metrics': metrics})
        except Exception as e:
            if retries == retry_count:
                raise e
            w_str = 'Attempt %d of %d. Will retry.' % (retry_count + 1,
                                                       retries + 1)
            warnings.warn(w_str)
        retry_count += 1

    return wrapped_out
Example #14
0

def h1_invmass(counts, bins=34, name=''):
    th1 = ROOT.TH1D(f'{name}', f'{name}_x', int(bins), 2.96, 3.04)

    for index in range(0, len(counts)):
        th1.SetBinContent(index + 1, counts[index])
        # th1.SetBinError(index + 1, np.sqrt(counts[index]))

    th1.SetDirectory(0)

    return th1


##COMPUTE PRESELECTION-EFFICIENCY
df_rec = uproot.open("../Tables/SignalTable_pp13TeV_mtexp.root")[
    "SignalTable"].pandas.df().query("pt>0 and rej_accept>0")
df_sim = uproot.open(
    "../Tables/SignalTable_pp13TeV_mtexp.root")["SignalTable"].pandas.df()

presel_eff = len(df_rec) / len(df_sim.query("abs(gY)<0.5 and rej_accept>0"))
print("-------------------------------------")
print("Pre-selection efficiency: ", presel_eff)

## FIT INVARIANT MASS SPECTRA
df = pd.read_parquet(
    "../Utils/ReducedDataFrames/selected_df_data_pp.parquet.gzip")
df_ls = pd.read_parquet(
    "../Utils/ReducedDataFrames/selected_df_ls_pp.parquet.gzip")
df_em = pd.read_parquet(
    "../Utils/ReducedDataFrames/selected_df_em_pp.parquet.gzip")
#!/usr/bin/env python3 

"""
Doesn't work, forget it
"""

import numpy as numpy
import uproot

f = uproot.open("Lumi_MC_100000.root")
events = f["pndsim"]
# Track IDs
lmdPoints = events['LMDPoint'][b'LMDPoint.fTrackID']

# eventIDs
lmdPoints = events['LMDPoint'][b'LMDPoint.fEventId']

# eventIDs
lmdPoints = events['LMDPoint'][b'LMDPoint.fX'].array()

# eventIDs
# lmdPoints = events['LMDPoint'][b'LMDPoint.fEventId']

print(lmdPoints)
# print(lmdPoints.array())
Example #16
0
def file(name,
         filepath,
         treepath,
         location_prefix=None,
         localsource=uproot.MemmapSource.defaults,
         xrootdsource=uproot.XRootDSource.defaults,
         httpsource=uproot.HTTPSource.defaults,
         **options):
    fullfilepath = filepath if location_prefix is None else location_prefix + filepath
    uprootfile = uproot.open(fullfilepath,
                             localsource=localsource,
                             xrootdsource=xrootdsource,
                             httpsource=httpsource,
                             **options)

    numentries = 0
    colnames = []
    columns = []
    branches = []
    for branchname, uprootbranch in uprootfile[treepath].iteritems(
            recursive=True):
        if uprootbranch.numbaskets != uprootbranch._numgoodbaskets:
            raise NotImplementedError(
                "branch recovery not handled by uproot-skyhook yet")
        if numpy.uint8(uprootbranch._tree_iofeatures) & numpy.uint8(
                uproot.const.kGenerateOffsetMap) != 0:
            raise NotImplementedError(
                "branch feature kGenerateOffsetMap not handled by uproot-skyhook yet"
            )

        local_offsets = uprootbranch._fBasketEntry[:uprootbranch.numbaskets +
                                                   1]
        page_seeks = numpy.empty(uprootbranch.numbaskets, dtype="<u8")
        compression = None
        iscompressed = numpy.empty(uprootbranch.numbaskets, dtype=numpy.bool_)
        compressedbytes = numpy.empty(uprootbranch.numbaskets, dtype="<u4")
        uncompressedbytes = numpy.empty(uprootbranch.numbaskets, dtype="<u4")
        basket_page_offsets = numpy.empty(uprootbranch.numbaskets + 1,
                                          dtype="<u4")
        basket_page_offsets[0] = 0
        basket_keylens = numpy.zeros(uprootbranch.numbaskets, dtype="<u4")
        basket_data_borders = numpy.zeros(uprootbranch.numbaskets, dtype="<u4")

        for i in range(uprootbranch.numbaskets):
            source = uprootbranch._source.threadlocal()
            key = uprootbranch._basketkey(source, i, True)
            cursor = uproot.source.cursor.Cursor(key._fSeekKey + key._fKeylen)

            basket_compressedbytes = key._fNbytes - key._fKeylen
            basket_uncompressedbytes = key._fObjlen

            if basket_compressedbytes == basket_uncompressedbytes:
                pagei = basket_page_offsets[i]
                page_seeks[pagei] = cursor.index
                iscompressed[pagei] = False
                compressedbytes[pagei] = basket_compressedbytes
                uncompressedbytes[pagei] = basket_uncompressedbytes
                pagei += 1
                basket_page_offsets[i + 1] = pagei

            else:
                pagei = basket_page_offsets[i]
                start = cursor.index
                total_compressedbytes = 0
                while cursor.index - start < basket_compressedbytes:
                    algo, method, c1, c2, c3, u1, u2, u3 = cursor.fields(
                        source.parent(),
                        uproot.source.compressed.CompressedSource._header)
                    page_compressedbytes = c1 + (c2 << 8) + (c3 << 16)
                    page_uncompressedbytes = u1 + (u2 << 8) + (u3 << 16)
                    total_compressedbytes += 9 + page_compressedbytes
                    if algo == b"ZL":
                        if compression is not None and compression != uproot_skyhook.layout.zlib:
                            raise ValueError(
                                "different compression used by different baskets"
                            )
                        compression = uproot_skyhook.layout.zlib
                    elif algo == b"XZ":
                        if compression is not None and compression != uproot_skyhook.layout.lzma:
                            raise ValueError(
                                "different compression used by different baskets"
                            )
                        compression = uproot_skyhook.layout.lzma
                    elif algo == b"L4":
                        if compression is not None and compression != uproot_skyhook.layout.lz4:
                            raise ValueError(
                                "different compression used by different baskets"
                            )
                        compression = uproot_skyhook.layout.lz4
                        cursor.skip(8)
                        page_compressedbytes -= 8
                    elif algo == b"CS":
                        raise ValueError(
                            "unsupported compression algorithm: 'old' (according to ROOT comments, hasn't been used in 20+ years!)"
                        )

                    # extremely rare, though possible, for numpages > numbaskets
                    if pagei >= len(page_seeks):
                        page_seeks = numpy.resize(page_seeks,
                                                  int(len(page_seeks) * 1.2))
                        iscompressed = numpy.resize(
                            iscompressed, int(len(iscompressed) * 1.2))
                        compressedbytes = numpy.resize(
                            compressedbytes, int(len(compressedbytes) * 1.2))
                        uncompressedbytes = numpy.resize(
                            uncompressedbytes,
                            int(len(uncompressedbytes) * 1.2))

                    page_seeks[pagei] = cursor.index
                    iscompressed[pagei] = True
                    compressedbytes[pagei] = page_compressedbytes
                    uncompressedbytes[pagei] = page_uncompressedbytes
                    pagei += 1

                    cursor.skip(page_compressedbytes)

                if total_compressedbytes != basket_compressedbytes:
                    raise ValueError(
                        "total compressedbytes of all compressed pages ({0}) is not equal to the compressedbytes in the basket key ({1})",
                        total_compressedbytes, basket_compressedbytes)

                basket_page_offsets[i + 1] = pagei

            basket_keylens[i] = key._fKeylen
            basket_data_borders[
                i] = 0 if key._fObjlen == key.border else key.border

        if len(page_seeks) > basket_page_offsets[-1]:
            page_seeks = page_seeks[:basket_page_offsets[-1]].copy()
            iscompressed = iscompressed[:basket_page_offsets[-1]].copy()
            compressedbytes = compressedbytes[:basket_page_offsets[-1]].copy()
            uncompressedbytes = uncompressedbytes[:basket_page_offsets[
                -1]].copy()

        if (basket_data_borders == 0).all():
            basket_keylens = None
            basket_data_borders = None

        if compression is None:
            compression = uproot_skyhook.layout.none
            iscompressed = None
            compressedbytes = None

        colnames.append(branchname.decode("utf-8"))
        columns.append(
            uproot_skyhook.layout.Column(
                uprootbranch.interpretation,
                None if uprootbranch.title == b"" or uprootbranch.title is None
                else uprootbranch.title.decode("utf-8")))
        branches.append(
            uproot_skyhook.layout.Branch(local_offsets, page_seeks,
                                         compression, iscompressed,
                                         compressedbytes, uncompressedbytes,
                                         basket_page_offsets, basket_keylens,
                                         basket_data_borders))
        numentries = max(numentries, branches[-1].local_offsets[-1])

    file = uproot_skyhook.layout.File(filepath,
                                      uprootfile._context.tfile["_fUUID"],
                                      branches)
    return uproot_skyhook.layout.Dataset(name,
                                         treepath,
                                         colnames,
                                         columns, [file], [0, numentries],
                                         location_prefix=location_prefix)
Example #17
0
    REPLAYPATH = "/home/%s/Work/JLab/hallc_replay_lt" % USER[1]

# Add more path setting as needed in a similar manner
OUTPATH = "%s/UTIL_KAONLT/scripts/demo/OUTPUT" % REPLAYPATH
CUTPATH = "%s/UTIL_KAONLT/DB/CUTS" % REPLAYPATH
sys.path.insert(0, '%s/UTIL_KAONLT/bin/python/' % REPLAYPATH)
import kaonlt as klt  # Import kaonlt module, need the path setting line above prior to importing this

print("Running as %s on %s, hallc_replay_lt path assumed as %s" %
      (USER[1], HOST[1], REPLAYPATH))
# Construct the name of the rootfile based upon the info we provided
rootName = "%s/UTIL_KAONLT/ROOTfiles/%s_%s_%s.root" % (REPLAYPATH, ROOTPrefix,
                                                       runNum, MaxEvent)

# Read stuff from the main event tree, here we're just going to get some quantities for the acceptance for the HMS/SHMS
e_tree = up.open(rootName)["T"]
# HMS info
H_gtr_beta = e_tree.array("H.gtr.beta")
H_gtr_xp = e_tree.array("H.gtr.th")  # xpfp -> Theta
H_gtr_yp = e_tree.array("H.gtr.ph")  # ypfp -> Phi
H_gtr_dp = e_tree.array("H.gtr.dp")
# SHMS info
P_gtr_beta = e_tree.array("P.gtr.beta")
P_gtr_xp = e_tree.array("P.gtr.th")  # xpfp -> Theta
P_gtr_yp = e_tree.array("P.gtr.ph")  # ypfp -> Phi
P_gtr_p = e_tree.array("P.gtr.p")
P_gtr_dp = e_tree.array("P.gtr.dp")

r = klt.pyRoot()
# Specify the file which contains the cuts we want to use
fout = '%s/UTIL_KAONLT/DB/CUTS/run_type/demo.cuts' % REPLAYPATH
            'title': r'$\pi$'
        },
        'mu': {
            'title': r'$\mu$'
        },
    },
}

plotTitleAddOn = {
    'Dst': r'$D^{*}$ tree',
    'D0': r'$D^{0}$ tree',
}

for ntpName in ntpsIn:
    hep.style.use('LHCb2')
    ntp = uproot.open(ntpName)

    for treeId, scheme in plotScheme.items():
        if treeId in ntpName:
            for part in scheme:
                brP, brEta, brWt = read_branches(
                    ntp, 'tree', [f'{part}_p', f'{part}_eta', f'wtrk_{part}'])

                effRatio = f', {plotTitleAddOn[treeId]}, tracking eff: {brWt.sum() / brWt.size:.2f}'

                plotPEta(brP,
                         brEta,
                         f'{treeId}_{part}_p_eta.png',
                         binning=plotRange,
                         title=scheme[part]['title'] + effRatio)
            break
Example #19
0
 def test_vector_of_vector_of_numbers(self):
     branch = uproot.open("tests/samples/vectorVectorDouble.root")["t"]["x"]
     assert branch.array().tolist() == [[], [[], []],
                                        [[10.0], [], [10.0, 20.0]],
                                        [[20.0, -21.0, -22.0]],
                                        [[200.0], [-201.0], [202.0]]]
Example #20
0
 def test_array(self):
     tree = uproot.open(
         "tests/samples/small-evnt-tree-fullsplit.root")["tree"]
     assert tree.array("ArrayI16[10]").tolist() == [[i] * 10
                                                    for i in range(100)]
Example #21
0
 def test_strings3(self):
     tree = uproot.open(
         "tests/samples/small-evnt-tree-fullsplit.root")["tree"]
     assert tree.array("StlVecStr").tolist() == [
         [], [b'vec-001'], [b'vec-002', b'vec-002'],
         [b'vec-003', b'vec-003', b'vec-003'],
         [b'vec-004', b'vec-004', b'vec-004', b'vec-004'],
         [b'vec-005', b'vec-005', b'vec-005', b'vec-005', b'vec-005'],
         [
             b'vec-006', b'vec-006', b'vec-006', b'vec-006', b'vec-006',
             b'vec-006'
         ],
         [
             b'vec-007', b'vec-007', b'vec-007', b'vec-007', b'vec-007',
             b'vec-007', b'vec-007'
         ],
         [
             b'vec-008', b'vec-008', b'vec-008', b'vec-008', b'vec-008',
             b'vec-008', b'vec-008', b'vec-008'
         ],
         [
             b'vec-009', b'vec-009', b'vec-009', b'vec-009', b'vec-009',
             b'vec-009', b'vec-009', b'vec-009', b'vec-009'
         ], [], [b'vec-011'], [b'vec-012', b'vec-012'],
         [b'vec-013', b'vec-013', b'vec-013'],
         [b'vec-014', b'vec-014', b'vec-014', b'vec-014'],
         [b'vec-015', b'vec-015', b'vec-015', b'vec-015', b'vec-015'],
         [
             b'vec-016', b'vec-016', b'vec-016', b'vec-016', b'vec-016',
             b'vec-016'
         ],
         [
             b'vec-017', b'vec-017', b'vec-017', b'vec-017', b'vec-017',
             b'vec-017', b'vec-017'
         ],
         [
             b'vec-018', b'vec-018', b'vec-018', b'vec-018', b'vec-018',
             b'vec-018', b'vec-018', b'vec-018'
         ],
         [
             b'vec-019', b'vec-019', b'vec-019', b'vec-019', b'vec-019',
             b'vec-019', b'vec-019', b'vec-019', b'vec-019'
         ], [], [b'vec-021'], [b'vec-022', b'vec-022'],
         [b'vec-023', b'vec-023', b'vec-023'],
         [b'vec-024', b'vec-024', b'vec-024', b'vec-024'],
         [b'vec-025', b'vec-025', b'vec-025', b'vec-025', b'vec-025'],
         [
             b'vec-026', b'vec-026', b'vec-026', b'vec-026', b'vec-026',
             b'vec-026'
         ],
         [
             b'vec-027', b'vec-027', b'vec-027', b'vec-027', b'vec-027',
             b'vec-027', b'vec-027'
         ],
         [
             b'vec-028', b'vec-028', b'vec-028', b'vec-028', b'vec-028',
             b'vec-028', b'vec-028', b'vec-028'
         ],
         [
             b'vec-029', b'vec-029', b'vec-029', b'vec-029', b'vec-029',
             b'vec-029', b'vec-029', b'vec-029', b'vec-029'
         ], [], [b'vec-031'], [b'vec-032', b'vec-032'],
         [b'vec-033', b'vec-033', b'vec-033'],
         [b'vec-034', b'vec-034', b'vec-034', b'vec-034'],
         [b'vec-035', b'vec-035', b'vec-035', b'vec-035', b'vec-035'],
         [
             b'vec-036', b'vec-036', b'vec-036', b'vec-036', b'vec-036',
             b'vec-036'
         ],
         [
             b'vec-037', b'vec-037', b'vec-037', b'vec-037', b'vec-037',
             b'vec-037', b'vec-037'
         ],
         [
             b'vec-038', b'vec-038', b'vec-038', b'vec-038', b'vec-038',
             b'vec-038', b'vec-038', b'vec-038'
         ],
         [
             b'vec-039', b'vec-039', b'vec-039', b'vec-039', b'vec-039',
             b'vec-039', b'vec-039', b'vec-039', b'vec-039'
         ], [], [b'vec-041'], [b'vec-042', b'vec-042'],
         [b'vec-043', b'vec-043', b'vec-043'],
         [b'vec-044', b'vec-044', b'vec-044', b'vec-044'],
         [b'vec-045', b'vec-045', b'vec-045', b'vec-045', b'vec-045'],
         [
             b'vec-046', b'vec-046', b'vec-046', b'vec-046', b'vec-046',
             b'vec-046'
         ],
         [
             b'vec-047', b'vec-047', b'vec-047', b'vec-047', b'vec-047',
             b'vec-047', b'vec-047'
         ],
         [
             b'vec-048', b'vec-048', b'vec-048', b'vec-048', b'vec-048',
             b'vec-048', b'vec-048', b'vec-048'
         ],
         [
             b'vec-049', b'vec-049', b'vec-049', b'vec-049', b'vec-049',
             b'vec-049', b'vec-049', b'vec-049', b'vec-049'
         ], [], [b'vec-051'], [b'vec-052', b'vec-052'],
         [b'vec-053', b'vec-053', b'vec-053'],
         [b'vec-054', b'vec-054', b'vec-054', b'vec-054'],
         [b'vec-055', b'vec-055', b'vec-055', b'vec-055', b'vec-055'],
         [
             b'vec-056', b'vec-056', b'vec-056', b'vec-056', b'vec-056',
             b'vec-056'
         ],
         [
             b'vec-057', b'vec-057', b'vec-057', b'vec-057', b'vec-057',
             b'vec-057', b'vec-057'
         ],
         [
             b'vec-058', b'vec-058', b'vec-058', b'vec-058', b'vec-058',
             b'vec-058', b'vec-058', b'vec-058'
         ],
         [
             b'vec-059', b'vec-059', b'vec-059', b'vec-059', b'vec-059',
             b'vec-059', b'vec-059', b'vec-059', b'vec-059'
         ], [], [b'vec-061'], [b'vec-062', b'vec-062'],
         [b'vec-063', b'vec-063', b'vec-063'],
         [b'vec-064', b'vec-064', b'vec-064', b'vec-064'],
         [b'vec-065', b'vec-065', b'vec-065', b'vec-065', b'vec-065'],
         [
             b'vec-066', b'vec-066', b'vec-066', b'vec-066', b'vec-066',
             b'vec-066'
         ],
         [
             b'vec-067', b'vec-067', b'vec-067', b'vec-067', b'vec-067',
             b'vec-067', b'vec-067'
         ],
         [
             b'vec-068', b'vec-068', b'vec-068', b'vec-068', b'vec-068',
             b'vec-068', b'vec-068', b'vec-068'
         ],
         [
             b'vec-069', b'vec-069', b'vec-069', b'vec-069', b'vec-069',
             b'vec-069', b'vec-069', b'vec-069', b'vec-069'
         ], [], [b'vec-071'], [b'vec-072', b'vec-072'],
         [b'vec-073', b'vec-073', b'vec-073'],
         [b'vec-074', b'vec-074', b'vec-074', b'vec-074'],
         [b'vec-075', b'vec-075', b'vec-075', b'vec-075', b'vec-075'],
         [
             b'vec-076', b'vec-076', b'vec-076', b'vec-076', b'vec-076',
             b'vec-076'
         ],
         [
             b'vec-077', b'vec-077', b'vec-077', b'vec-077', b'vec-077',
             b'vec-077', b'vec-077'
         ],
         [
             b'vec-078', b'vec-078', b'vec-078', b'vec-078', b'vec-078',
             b'vec-078', b'vec-078', b'vec-078'
         ],
         [
             b'vec-079', b'vec-079', b'vec-079', b'vec-079', b'vec-079',
             b'vec-079', b'vec-079', b'vec-079', b'vec-079'
         ], [], [b'vec-081'], [b'vec-082', b'vec-082'],
         [b'vec-083', b'vec-083', b'vec-083'],
         [b'vec-084', b'vec-084', b'vec-084', b'vec-084'],
         [b'vec-085', b'vec-085', b'vec-085', b'vec-085', b'vec-085'],
         [
             b'vec-086', b'vec-086', b'vec-086', b'vec-086', b'vec-086',
             b'vec-086'
         ],
         [
             b'vec-087', b'vec-087', b'vec-087', b'vec-087', b'vec-087',
             b'vec-087', b'vec-087'
         ],
         [
             b'vec-088', b'vec-088', b'vec-088', b'vec-088', b'vec-088',
             b'vec-088', b'vec-088', b'vec-088'
         ],
         [
             b'vec-089', b'vec-089', b'vec-089', b'vec-089', b'vec-089',
             b'vec-089', b'vec-089', b'vec-089', b'vec-089'
         ], [], [b'vec-091'], [b'vec-092', b'vec-092'],
         [b'vec-093', b'vec-093', b'vec-093'],
         [b'vec-094', b'vec-094', b'vec-094', b'vec-094'],
         [b'vec-095', b'vec-095', b'vec-095', b'vec-095', b'vec-095'],
         [
             b'vec-096', b'vec-096', b'vec-096', b'vec-096', b'vec-096',
             b'vec-096'
         ],
         [
             b'vec-097', b'vec-097', b'vec-097', b'vec-097', b'vec-097',
             b'vec-097', b'vec-097'
         ],
         [
             b'vec-098', b'vec-098', b'vec-098', b'vec-098', b'vec-098',
             b'vec-098', b'vec-098', b'vec-098'
         ],
         [
             b'vec-099', b'vec-099', b'vec-099', b'vec-099', b'vec-099',
             b'vec-099', b'vec-099', b'vec-099', b'vec-099'
         ]
     ]
import uproot
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.optimize as scp
import numpy as np
import sys

sns.set(font_scale=2)

NBINS = 1200
#f = uproot.open("./RPTest_100.root")
f = uproot.open("./MCRecoEventTest.root")
#f = uproot.open("./FullComb_preTrigFix.root")
ftree = f.get("phaseII")
ftree.items()
digitT = ftree.get("digitT")
TrueVtxTime = ftree.get("trueVtxTime")
TrueVtxX = ftree.get("trueVtxX")
TrueVtxY = ftree.get("trueVtxY")
TrueVtxZ = ftree.get("trueVtxZ")
DeltaT = ftree.get("deltaVtxT")
digitX = ftree.get("digitX")
digitY = ftree.get("digitY")
digitType = ftree.get("digitType")
digitZ = ftree.get("digitZ")
evn = ftree.get("eventNumber")
evnums = evn.array()
diT = digitT.array()
diX = digitX.array()
diY = digitY.array()
diZ = digitZ.array()
Example #23
0
def main(args):
    fname = args.fname
    file = uproot.open(fname)
    all_ttrees = dict(
        file.allitems(
            filterclass=lambda cls: issubclass(cls, uproot.tree.TTreeMethods)))
    tracks = all_ttrees[b'PWGHF_TreeCreator/tree_Particle;1']
    pds_trks = tracks.pandas.df()  # entrystop=10)
    events = all_ttrees[b'PWGHF_TreeCreator/tree_event_char;1']
    pds_evs = events.pandas.df()

    # print the banner first
    fj.ClusterSequence.print_banner()

    # signal jet definition
    maxrap = 0.9
    jet_R0 = args.jetR
    jet_def = fj.JetDefinition(fj.antikt_algorithm, jet_R0)
    jet_selector = fj.SelectorPtMin(0.0) & fj.SelectorPtMax(
        1000.0) & fj.SelectorAbsEtaMax(1)
    jet_area_def = fj.AreaDefinition(fj.active_area,
                                     fj.GhostedAreaSpec(maxrap))
    print(jet_def)

    # background estimation
    grid_spacing = maxrap / 10.
    gmbge = fj.GridMedianBackgroundEstimator(maxrap, grid_spacing)

    print()

    output_columns = ['evid', 'pt', 'eta', 'phi', 'area', 'ptsub']
    e_jets = pd.DataFrame(columns=output_columns)

    for i, e in pds_evs.iterrows():
        iev_id = int(e['ev_id'])
        _ts = pds_trks.loc[pds_trks['ev_id'] == iev_id]

        start = time.time()
        _tpsj = fj_parts_from_tracks_numpy(_ts)
        end = time.time()
        dt_swig = end - start

        start = time.time()
        _tpsj_for = fj_parts_from_tracks(_ts)
        end = time.time()
        dt_for = end - start

        # print ('len {} =?= {}'.format(len(_tpsj_for), len(_tpsj)))
        print(
            '[i] timing (ntracks={}): dt_for: {} dt_swig: {} ratio: {}'.format(
                len(_tpsj), dt_for, dt_swig, dt_for / dt_swig))

        # print('maximum particle rapidity:', max([psj.rap() for psj in _tpsj]))
        _cs = fj.ClusterSequenceArea(_tpsj, jet_def, jet_area_def)
        _jets = jet_selector(fj.sorted_by_pt(_cs.inclusive_jets()))
        gmbge.set_particles(_tpsj)
        # print("rho   = ", gmbge.rho())
        # print("sigma = ", gmbge.sigma())

        # _jets = jet_selector(jet_def(_tpsj))
        # _jets_a = [[iev_id, j.perp(), j.eta(), j.phi()] for j in _jets]
        # _jets_a = pd.DataFrame(np.array([[iev_id, j.perp(), j.eta(), j.phi()] for j in _jets]), columns=['evid', 'pt', 'eta', 'phi'])
        _jets_a = pd.DataFrame([[
            iev_id,
            j.perp(),
            j.eta(),
            j.phi(),
            j.area(),
            j.perp() - gmbge.rho() * j.area()
        ] for j in _jets],
                               columns=output_columns)
        # , columns=['evid, pt, eta, phi']
        e_jets = e_jets.append(_jets_a, ignore_index=True)
        # print('event', i, 'number of parts', len(_tpsj), 'number of jets', len(_jets))
        # print(_jets_a.describe())
        if args.fjsubtract:
            fj_example_02_area(_tpsj)

    # print(e_jets.describe())
    joblib.dump(e_jets, args.output)
Example #24
0
   NUM_EVENTS_PROCESSED = 0

   INPUT_FILES = glob.glob('/pnfs/desy.de/cms/tier2/store/user/missirol/jme_trigger/jmeTriggerNtuples/pfMET/v02/191103/Data_Run2018B_EGamma/*/*/*/*/*.root')

   output = 'test'

   for i_inpf in INPUT_FILES:

       if VERBOSE: print('\033[1m'+'\033[92m'+'[input]'+'\033[0m', i_inpf)

       stop_exe = False
   
       if UPROOT:
          import uproot

          i_ttree = uproot.open(i_inpf)['JMETriggerNTuple/Events']

          i_firstEvent = 0
          i_lastEvent = min(num_maxEvents - NUM_EVENTS_PROCESSED, i_ttree.numentries) if (num_maxEvents >= 0) else i_ttree.numentries

          hltPuppiMET_pt = i_ttree.arrays('*', entrystart=i_firstEvent, entrystop=i_lastEvent)

          for i_ent in range(i_firstEvent, i_lastEvent):

              a = hltPuppiMET_pt['hltPuppiMET_pt'][i_ent]

              if (num_maxEvents >= 0) and (NUM_EVENTS_PROCESSED >= num_maxEvents):
                 stop_exe = True
                 break

#              analyze_event(event=i_evt, th1s=th1s, th2s=th2s)
Example #25
0
from vectorized import vectorize
import uproot
import numpy as np
from itertools import combinations
import matplotlib.pyplot as plt
import time
import functional

columnar_events = uproot.open(
    "http://scikit-hep.org/uproot/examples/HZZ.root")["events"]
columns = columnar_events.arrays(["*Muon*"])

Muon_E = columns["Muon_E"].content
Muon_Px = columns["Muon_Px"].content
Muon_Py = columns["Muon_Py"].content
Muon_Pz = columns["Muon_Pz"].content

starts = columns["Muon_Px"].starts
stops = columns["Muon_Px"].stops


# ======================================================================
# Examples
# ======================================================================
def totalp(index, Muon_Px, Muon_Py, Muon_Pz, Muon_P):
    px2 = Muon_Px[index]**2
    py2 = Muon_Py[index]**2
    pz2 = Muon_Pz[index]**2
    Muon_P[index] = np.sqrt(px2 + py2 + pz2)

import uproot
import pandas
import numpy as np
import xgboost

file_train = uproot.open("file_train.root")
file_test = uproot.open("file_test.root")
file_train_and_test = uproot.open("file_train_and_test.root")

tree_train = file_train["tree_name"]
tree_test = file_test["tree_name"]
tree_train_and_test = file_train_and_test["tree_name"]


# function here just so I don't repeat this four times in the code
def getit(tf):
    # select which branches not to import
    df = tf.pandas.df(lambda branch: branch.name != b'true_mass' and branch.
                      name[:3] != b'dir')
    # create new branches based on other branches
    df['sum_nhits_1'] = df.loc[:, ['nhits1_p0', 'nhits1_p1', 'nhits1_p2']].sum(
        axis=1)
    df['sum_nhits_2'] = df.loc[:, ['nhits2_p0', 'nhits2_p1', 'nhits2_p2']].sum(
        axis=1)
    df['sum_nhits'] = df.loc[:, ['sum_nhits_1', 'sum_nhits_2']].sum(axis=1)
    return df


data_train_sig = getit(tree_train)

data_test_sig = getit(tree_test)
Example #27
0
    help=
    'training k-fold. use 0,1,2 for offset in selecting the third of data to reserve for validation'
)
args = parser.parse_args()

lr_init = 1e-2
epochs = 20

train_batch_size = 2**10
infer_batch_size = 2**15
num_workers = 4

kfold = int(args.kfold)
train_modulus = 3
train_portion = 2
t = uproot.open(args.train)['Tree']

pt, eta, phi, w = t.arrays(['jetPt', 'jetEta', 'jetPhi', 'weight'],
                           outputtype=tuple)  # mass set to zero in toyTrees
lv = uproot_methods.TLorentzVectorArray.from_ptetaphim(pt, eta, phi, 0)
lv, w = torch.FloatTensor([pt, eta, phi, lv.mass]), torch.FloatTensor(w)
# lv is [feature, event, jet], want [event, feature, jet]
lv = lv.transpose(0, 1)

print("Split into training and validation sets")
n = lv.shape[0]
idx = np.arange(n)
is_train = (idx + kfold) % train_modulus < train_portion
is_valid = ~is_train

dataset_train = TensorDataset(lv[is_train], w[is_train])
Example #28
0
if __name__ == '__main__':

    #recomb = "06417"
    recomb = "0715"
    energyList = [1,2,3,4,5,6,7] 
    variables = ['dEdx','aarondEdx','energy','hitCorrection','mcDepCorrection','missedHitsCorrection','noHitsCorrection','energyCorrected','mcInitEnergy','recoMinusTrueOverTrue','mcIDEdiscrep']
        
#    rfile2 = up.open( "PDSPProd2_1GeV.root" )
#    df2    = rfile2["pdAnaTree/AnaTree"].pandas.df( variables )
#    
    fig = plt.figure(figsize=(26,14))
    plotCount = 1

    for energy in energyList:
        print("{} GeV".format(str(energy)))
        rfile      = up.open( "singlePositron_{}GeV_sceON_keepOFF_recomb{}.root".format( energy, recomb ) )
        rfile_phot = up.open( "singlePhoton_{}GeV_sceON_keepOFF_recomb{}.root".format( energy, recomb )   )
        
        df      = rfile["pdAnaTree/AnaTree"].pandas.df( variables )
        df_phot = rfile_phot["pdAnaTree/AnaTree"].pandas.df( variables )
        

        peakVal      = peakValue( np.histogram(df.mcDepCorrection/df.mcInitEnergy           *100, range=(0,5), bins=50) )
        peakVal_phot = peakValue( np.histogram(df_phot.mcDepCorrection/df_phot.mcInitEnergy *100, range=(0,5), bins=50) )

        plt.subplot(len(energyList),6,plotCount)
        plotCount += 1
        plt.hist( df.mcDepCorrection/df.mcInitEnergy           *100, range=(0,5), bins=50, density=True, histtype='stepfilled', edgecolor='r', fc=(1,0,0,0.1), label=r'$e^-$: Peak={}'.format(round(peakVal,3))      )
        plt.hist( df_phot.mcDepCorrection/df_phot.mcInitEnergy *100, range=(0,5), bins=50, density=True, histtype='stepfilled', edgecolor='b', fc=(0,0,1,0.1), label=r'$\gamma$: Peak={}'.format(round(peakVal_phot,3)) )
        plt.axvline(peakVal,      c='r', ls='--', lw=0.5)
        plt.axvline(peakVal_phot, c='b', ls='--', lw=0.5)
Example #29
0
def Recon(filename, output, mode, offset, types, initial, MC, method, verbose):
    '''
    reconstruction

    fid: root reference file convert to .h5
    fout: output file
    '''
    # Create the output file and the group
    print(filename)  # filename
    # Create the output file and the group
    h5file = tables.open_file(output,
                              mode="w",
                              title="OneTonDetector",
                              filters=tables.Filters(complevel=9))
    group = "/"
    # Create tables
    ReconTable = h5file.create_table(group, "Recon", pub.ReconData, "Recon")
    recondata = ReconTable.row

    # Loop for event
    f = uproot.open(filename)
    data = f['SimTriggerInfo']
    if types == 'Sim_root':
        PMTId = data['PEList.PMTId'].array()
        Time = data['PEList.HitPosInWindow'].array()
        Charge = data['PEList.Charge'].array()
        SegmentId = ak.to_numpy(ak.flatten(
            data['truthList.SegmentId'].array()))
        VertexId = ak.to_numpy(ak.flatten(data['truthList.VertexId'].array()))
        x = ak.to_numpy(ak.flatten(data['truthList.x'].array()))
        y = ak.to_numpy(ak.flatten(data['truthList.y'].array()))
        z = ak.to_numpy(ak.flatten(data['truthList.z'].array()))
        E = ak.to_numpy(ak.flatten(data['truthList.EkMerged'].array()))

        for pmt, time_array, pe_array, sid, vid, xt, yt, zt, Et in zip(
                PMTId, Time, Charge, SegmentId, VertexId, x, y, z, E):
            recondata['x_truth'] = xt
            recondata['y_truth'] = yt
            recondata['z_truth'] = zt
            recondata['E_truth'] = Et
            recondata['EventID'] = sid
            fired_PMT = ak.to_numpy(pmt)
            time_array = ak.to_numpy(time_array)

            # PMT order: 0-29
            # PE /= Gain
            # pe_array, cid = np.histogram(pmt, bins=np.arange(31)-0.5, weights=PE)

            # For hit info
            pe_array, cid = np.histogram(fired_PMT, bins=np.arange(31))
            # For very rough estimate
            # pe_array = np.round(pe_array)

            if np.sum(pe_array) == 0:
                continue

            if args.initial == 'WA':
                x0_in = pub.Initial.ChargeWeighted(pe_array, PMT_pos,
                                                   time_array)
            elif args.initial == 'fit':
                x0_in = pub.Initial.FitGrid(pe_array, mesh, tpl, time_array)
            elif args.initial == 'MC':
                x0_in = pub.Initial.MCGrid(pe_array, mesh, tpl, time_array)

            x0_in = x0_in[1:]
            result_in = minimize(pub.Likelihood_Truth.Likelihood,
                                 x0_in,
                                 method='SLSQP',
                                 bounds=((0, 1), (None, None), (None, None),
                                         (None, None)),
                                 args=(coeff_time, coeff_pe, PMT_pos,
                                       fired_PMT, time_array, pe_array,
                                       cut_time, cut_pe))
            z, x = pub.Likelihood_Truth.Calc_basis(result_in.x, PMT_pos,
                                                   cut_pe)
            L, E_in = pub.Likelihood_Truth.Likelihood_PE(
                coeff_pe, z, x, pe_array, cut_pe)

            # xyz coordinate
            in2 = pub.r2c(result_in.x[:3]) * shell
            recondata['x_sph_in'] = in2[0]
            recondata['y_sph_in'] = in2[1]
            recondata['z_sph_in'] = in2[2]
            recondata['success_in'] = result_in.success
            recondata['Likelihood_in'] = result_in.fun
            # outer recon
            if args.initial == 'WA':
                x0_out = result_in.copy()
                x0_out[0] = 0.92
            else:
                x0_out = pub.Initial.FitGrid(pe_array, mesh_out, tpl_out,
                                             time_array)
                x0_out = x0_out[1:]
            result_out = minimize(pub.Likelihood_Truth.Likelihood,
                                  x0_out,
                                  method='SLSQP',
                                  bounds=((0, 1), (None, None), (None, None),
                                          (None, None)),
                                  args=(coeff_time, coeff_pe, PMT_pos,
                                        fired_PMT, time_array, pe_array,
                                        cut_time, cut_pe))
            z, x = pub.Likelihood_Truth.Calc_basis(result_out.x, PMT_pos,
                                                   cut_pe)
            L, E_out = pub.Likelihood_Truth.Likelihood_PE(
                coeff_pe, z, x, pe_array, cut_pe)

            out2 = pub.r2c(result_out.x[:3]) * shell
            recondata['x_sph_out'] = out2[0]
            recondata['y_sph_out'] = out2[1]
            recondata['z_sph_out'] = out2[2]
            recondata['success_out'] = result_out.success
            recondata['Likelihood_out'] = result_out.fun

            # 0-th order (Energy intercept)
            base_in = LG.legval(result_in.x[1], coeff_pe.T)
            base_out = LG.legval(result_out.x[1], coeff_pe.T)
            recondata['E_sph_in'] = np.exp(E_in - base_in[0] + np.log(2))
            recondata['E_sph_out'] = np.exp(E_out - base_out[0] + np.log(2))

            if (verbose):
                print('-' * 60)
                print(f'inner: {np.exp(E_in - base_in[0] + np.log(2))}')
                print(f'outer: {np.exp(E_out - base_out[0] + np.log(2))}')

                print('inner')
                print(f'Template likelihood: {-np.max(L)}')
                print(
                    '%d vertex: [%+.2f, %+.2f, %+.2f] radius: %+.2f, Likelihood: %+.6f'
                    % (sid, in2[0], in2[1], in2[2], norm(in2), result_in.fun))
                print('outer')
                print(
                    '%d vertex: [%+.2f, %+.2f, %+.2f] radius: %+.2f, Likelihood: %+.6f'
                    % (sid, out2[0], out2[1], out2[2], norm(out2),
                       result_out.fun))
            recondata.append()
    elif types == 'h5':
        pass

    # Flush into the output file
    ReconTable.flush()
    h5file.close()
Example #30
0
        "correctedTerEigenValLength",
        "pandoraPriProjectionLength",
        "pandoraSecProjectionLength",
        "pandoraTerProjectionLength",
        "correctedPriProjectionLength",
        "correctedSecProjectionLength",
        "correctedTerProjectionLength",
        #                    "nonCorrectedHit3DX", "nonCorrectedHit3DY", "nonCorrectedHit3DZ",
        #                    "correctedHit3DX", "correctedHit3DY", "correctedHit3DZ",
        #                    "dEdt",
        #                     "hitCharge", "hitTrueEnergy",
    ]

    print("Open File 1")
    rfile1 = up.open(
        "./data/singleParticles/electrons/singleElectron_1GeV_trimmed_cnn.root"
    )
    print("Open File 2")
    rfile2 = up.open(
        "./data/singleParticles/electrons/singleElectron_2GeV_trimmed_cnn.root"
    )
    print("Open File 3")
    rfile3 = up.open(
        "./data/singleParticles/electrons/singleElectron_3GeV_trimmed_cnn.root"
    )
    print("Open File 4")
    rfile4 = up.open(
        "./data/singleParticles/electrons/singleElectron_4GeV_trimmed_cnn.root"
    )
    print("Open File 5")
    rfile5 = up.open(
Example #31
0
from preprocessing import *

# for PointNet
preprocessing_algo = make_graph_noedge

# for EdgeNet
#preprocessing_algo = make_graph_etaphi
#grouping_algo = 'knn' #or 'kdtree'
#preprocessing_args= dict(k=4)
#preprocessing_args= dict(r = 0.07) #if algo == 'kdtree'
#layer_norm = 150 #only used for etaphi, no effect for other preprocessors

fname = '../data/ntup/partGun_PDGid15_x1000_Pt3.0To100.0_NTUP_1.root'

test = uproot.open(fname)['ana']['hgc']

#example of generating a binary ground-truth adjacency matrix
#for both endcaps in all events for all clusters
#truth is now that hits in adjacent layers are connected
#and so are hits in the same layer within delta-R < 2
arrays = test.arrays([b'simcluster_hits_indices'])
rechit = test.arrays([
    b'rechit_x', b'rechit_y', b'rechit_z', b'rechit_eta', b'rechit_phi',
    b'rechit_layer', b'rechit_time', b'rechit_energy'
])
NEvents = rechit[b'rechit_z'].shape[0]
rechit[b'rechit_x'].content[rechit[b'rechit_z'].content < 0] *= -1
sim_indices = awkward.fromiter(arrays[b'simcluster_hits_indices'])
valid_sim_indices = sim_indices[sim_indices > -1]
import uproot
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import os.path
'''Takes in a root file and extracts the appropriate data, then uses uproot to view the 2D Histogram image.'''

Sample = 'TTbar'
BASE_PATH = "/storage2/ec6821/P2JetsSums/MSciProjects/2020/Histograms_{sample}".format(
    sample=Sample)
for subdir, dirs, files in os.walk(BASE_PATH):
    i = 0
    for filename in files:
        filepath = subdir + os.sep + filename
        if filepath.endswith(".root"):
            f = uproot.open(filepath)
            a = f["caloGrid"].values
            # plt.imshow(a)
            # plt.show()
            outPathBase = '{sample}_numpy'.format(sample=Sample.upper())
            if not os.path.isdir(outPathBase):
                os.mkdir(outPathBase)
            outFile = outPathBase + '/' + filename.replace('.root', '')
            np.save(outFile, a)
            # break
Example #33
0
def preprocess(args):
    """ Preprocess the flat ROOT files in input

    The input ROOT files are organized as follows:

    - run
    - event
    - trackster_id
    - [layers   of the layerClusters belonging to this trackster]
    - [etas     of the layerClusters belonging to this trackster]
    - [phis     of the layerClusters belonging to this trackster]
    - [energies of the layerClusters belonging to this trackster]
    - energy of the caloParticle linked to this trackster
    - pdgId of the caloParticle linked to this trackster
    - raw_energy of the trackster

    FLAT PROTOCOL BUFFER PADDED FORMAT

    This is the format that will be used in input to the training. It should be structured as follows:

    - trackster number
    - energy of the caloParticle
    - pdgId of the caloParticle
    - raw_energy of the trackster
    - sigma1 of the trackster (along the "major" PCA component)
    - sigma2 of the trackster
    - sigma3 of the trackster
    - layer of a layerCluster
    - energy of a layerCluster
    - eta of a layerCluster
    - phi of a layerCluster

    This structure is padded in the sense that, for every trackster, there will be 10 layerClusters for each layer, for all 50 layers of HGCAL. The target final size of the DataFrame is:

    rows of target DataFrame = number of tracksters in input ROOT files * 50 * 10

    The number of input trackster in the input ROOT files has to be deduced by the ROOT file itself. Something along the lines:

    df.shape[0]

    done on the input ROOT file.
    """
    if not isinstance(args, argparse.Namespace):
        args = SimpleNamespace(**args)

    if args.debug:
        pd.set_option('display.max_rows', 200)

    rootfiles_path = os.path.join(args.inputDir, args.suffix + '*.root')
    rootfiles = glob(rootfiles_path)
    if len(rootfiles) == 0:
        print('Input directory: {} Input suffix: {}'.format(
            args.inputDir, args.suffix))
        raise ValueError(
            '[preprocessing_pb.py]: No input files found in {}.'.format(
                rootfiles, rootfiles_path))

    max_perlayer = 10
    number_layers = 50

    keepVars = [
        'ts_energy', 'ts_sigma1', 'ts_sigma2', 'ts_sigma3',
        'cp_missingEnergyFraction'
    ]
    keepVars += [
        'cp_energy', 'cp_pdgid', 'lc_energy', 'lc_eta', 'lc_phi', 'lc_layer'
    ]

    for rootfile in tqdm(rootfiles):
        start_time = time()
        filename = os.path.basename(rootfile).replace(".root", "")
        if args.debug:
            print('File: ', rootfile, filename)
        try:
            with uproot.open(rootfile) as open_file:
                directory = open_file[args.dir]
                tree = directory[args.tree]
                df = tree.arrays(filter_name=keepVars, library='pd')
                df = df[df['cp_missingEnergyFraction'] <
                        args.maxMissingEnergyFraction]
                df.drop(['cp_missingEnergyFraction'], axis=1)
        except:
            print('File {} had a problem.'.format(rootfile))
            raise

        unique_entries = df.index.get_level_values('entry').unique().to_list()
        events = len(unique_entries)
        if args.debug:
            print(df)

        checkDirAndCreate(args.outputDir)
        checkDirAndCreate(os.path.join(args.outputDir, 'padded'))

        name = os.path.join(args.outputDir, 'padded', filename + '_padded.pb')
        writer = tf.io.TFRecordWriter(name)
        with open(args.targetsFile, 'a') as f:
            f.write(name + '\n')

        for entry in unique_entries:
            example = make_example_pid_and_pad(df.iloc[[entry]], number_layers,
                                               max_perlayer, args.debug)
            writer.write(example.SerializeToString())
            if args.debug:
                print(example)

        end_time = time()
        name = name.replace('.pb', '.log')
        with open(name, 'w') as logfile:
            logfile.write('Events: {} Time: {} Rate: {}'.format(
                events, (end_time - start_time),
                events / float(end_time - start_time)))
        with open(args.targetsFile, 'a') as f:
            f.write(name + '\n')

    print("Done!")
Example #34
0
import ROOT as R
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score

def scale_list(factor, items):
    for item in items:
        item.Scale(factor)

# Import trees
loc_yeB = "/data/bfys/jrol/RapidSim/{0}2tau2pipipi_filtered.root"
loc_noB = "/data/bfys/jrol/RapidSim/{0}2tau2pipipi_noBTracking.root"

f_Dplus_noB  = uproot.open(loc_noB.format("Dplus"));  Dplus_tree_noB  = f_Dplus_noB["DecayTree"]
f_Dsplus_noB = uproot.open(loc_noB.format("Dsplus")); Dsplus_tree_noB = f_Dsplus_noB["DecayTree"]
f_Bplus_noB  = uproot.open(loc_noB.format("Bplus"));  Bplus_tree_noB  = f_Bplus_noB["DecayTree"]
f_Bcplus_noB = uproot.open(loc_noB.format("Bcplus")); Bcplus_tree_noB = f_Bcplus_noB["DecayTree"]

f_Dplus_yeB  = uproot.open(loc_yeB.format("Dplus"));  Dplus_tree_yeB  = f_Dplus_yeB["DecayTree"]
f_Dsplus_yeB = uproot.open(loc_yeB.format("Dsplus")); Dsplus_tree_yeB = f_Dsplus_yeB["DecayTree"]
f_Bplus_yeB  = uproot.open(loc_yeB.format("Bplus"));  Bplus_tree_yeB  = f_Bplus_yeB["DecayTree"]
f_Bcplus_yeB = uproot.open(loc_yeB.format("Bcplus")); Bcplus_tree_yeB = f_Bcplus_yeB["DecayTree"]

Dp_df_noB  = Dplus_tree_noB.pandas.df();  Dp_df_yeB  = Dplus_tree_yeB.pandas.df()
Dsp_df_noB = Dsplus_tree_noB.pandas.df(); Dsp_df_yeB = Dsplus_tree_yeB.pandas.df()
Bp_df_noB  = Bplus_tree_noB.pandas.df();  Bp_df_yeB  = Bplus_tree_yeB.pandas.df()
Bcp_df_noB = Bcplus_tree_noB.pandas.df(); Bcp_df_yeB = Bcplus_tree_yeB.pandas.df()

h2d_sig = R.TH2F("2dh_sig", "Signal events", 30, 0, 7.0, 30, 0, 4)
Example #35
0
 def __init__(self, name):
     self.mc_file = uproot.open(name)
     self.t_pts   = self.mc_file['npts']
     self.n_event = self.t_pts.numentries
     self.n_pts_prev = np.zeros((4,2), dtype=np.uint32)
Example #36
0
def thist_to_np_xy(infile,key='reconstructedProfileHisto'):
    assert(infile.endswith(".root"))
    f=uproot.open(infile)
    outdata_x=binedges_to_centers(f[key].edges)
    outdata_y=f[key].values
    return [outdata_x,outdata_y]
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import utils_endcap
import time, pickle
from tqdm import tqdm
from scipy import stats

# for sklearn, see

np.random.seed(1337)

fin = uproot.open(
    "/home/prasant/Files/Lowmass_ntuple/Out_Singlephoton_Lowmass_photonIDMVA_woShowershape_LMTrain_18pT18_RunIIFall17_3_1_0_03122018.root"
)
print fin.keys()
prompt = fin['promptPhotons']
fake = fin['fakePhotons']
print fin['promptPhotons'].keys()
print fin['fakePhotons'].keys()

## for endcap

geometry_selection = lambda tree: np.logical_and(
    abs(tree.array('scEta')) > 1.566,
    abs(tree.array('scEta')) < 2.5)

input_values, target_values, orig_weights, train_weights, pt, scEta, input_vars = utils_endcap.load_file(
    fin, geometry_selection)
Example #38
0
import uproot, uproot_methods
from Builder import Initialize

file = uproot.open("nano_5.root")
tree = file["Events"]

e = Initialize({'pt':tree.array("Electron_pt"),
                'eta':tree.array("Electron_eta"),
                'phi':tree.array("Electron_phi"),
                'mass':tree.array("Electron_mass"),
                'iso':tree.array('Electron_pfRelIso03_all'),
                'dxy':tree.array('Electron_dxy'),
                'dz':tree.array('Electron_dz'),
                'id':tree.array('Electron_mvaSpring16GP_WP90')})

mu = Initialize({'pt':tree.array("Muon_pt"),
                 'eta':tree.array("Muon_eta"),
                 'phi':tree.array("Muon_phi"),
                 'mass':tree.array("Muon_mass"),
                 'iso':tree.array('Muon_pfRelIso04_all'),
                 'dxy':tree.array('Muon_dxy'),
                 'dz':tree.array('Muon_dz')})

tau = Initialize({'pt':tree.array('Tau_pt'),
                  'eta':tree.array('Tau_eta'),
                  'phi':tree.array('Tau_phi'),
                  'mass':tree.array('Tau_mass'),
                  'decayMode':tree.array('Tau_idDecayMode'),
                  'decayModeNew':tree.array('Tau_idDecayModeNewDMs'),
                  'id':tree.array('Tau_idMVAnew')})
Example #39
0
args = parser.parse_args()

outdir=args.o
if len(outdir) < 1:
    exit()

os.system('mkdir -p '+outdir)

def convertAndWrite(infile):
    
    thisofile=outdir+'/'+os.path.basename(infile)[:-5]+'_skim.root'
    os.system('skim '+infile+' '+thisofile)
     

allfiles = []
with open(args.inputFile) as f:
    for l in f:
        l = l.rstrip('\n').rstrip(' ')
        if len(l) and os.path.isfile(l):
            try:
                tree = uproot.open(l)["B4"]
                nevents = tree.numentries
                allfiles.append(l)
            except:
                pass
        
print(allfiles)        
p = Pool()
p.map(convertAndWrite, allfiles)
Example #40
0
ntree_limit = 800
_model = xgb.Booster({'nthread': 6})

#mpath='../mva/model_' + args.model + '/xgb_fulldata_None.model'
mpath = '/work/ytakahas/work/analysis/CMSSW_10_2_10/src/rJpsi/mva/model_' + args.model + '/xgb_fulldata_None.model'
os.system('ls -lart ' + mpath)
print('model path = ', mpath)
#_model.load_model('/work/ytakahas/work/analysis/CMSSW_10_2_10/src/BcJpsiTauNu/mva/model_' + args.model + '/xgb_fulldata_None.model')
_model.load_model(mpath)

#_model.load_model('/work/ytakahas/work/mva/BcJpsiTau/model/xgb_fulldata_None.model')

print(args.file)
os.system('ls -lart ' + args.file)

events = uproot.open(args.file)['tree']

#print 'setup'
#ofile = TFile('Myroot.root', 'recreate')
#otree = TTree('tree', 'tree')

#mass = np.zeros(1, dtype=float)
#xgbs = np.zeros(1, dtype=float)
#otree.Branch('mass', mass, 'mass/F')
#otree.Branch('xgbs', xgbs, 'xgbs/F')

for i, params in enumerate(
        events.iterate(outputtype=pd.DataFrame, entrysteps=1000000)):
    print
    print(
        i, 'making ... ' + fdir + '/Myroot_' + args.prefix + '_' + str(i) +
Example #41
0
def GetTree(file_name, add_cuts="", write_tracks=False):
    """Retrieves the events in the TTree with uproot and returns them as
    a pandas DataFrame."""
    if debug:
        t0_jets = dt.datetime.now()
        print('Start GetTree')
    var_list = list(mapping.keys())
    tree = up.open(file_name)[tree_name]

    if write_tracks:
        tracks_ndarray = GetTracks(tree)
    if debug:
        print('Getting tracks ndarray took a total of: {}'.format(
            dt.datetime.now() - t0_jets))
        t0_jets = dt.datetime.now()

    df = tree.pandas.df(var_list)
    if debug:
        print('Getting df with uproot took: {}'.format(dt.datetime.now() -
                                                       t0_jets))
        t0_jets = dt.datetime.now()

    df['jet_bH_pt'] = df.apply(lambda row: max(row['jet_bH_pt'])[0], axis=1)
    df['jet_bH_pt'] = df['jet_bH_pt'].mask(
        df['jet_bH_pt'].lt(0), 0)  # Set all negative bH pt values to 0
    df['jetPtRank'] = df.groupby(level=0)['jet_pt'].rank(
        ascending=False)  # Add jet pT rank
    # If jet_jf_dR is larger than 15, it was set to the "default" value of std::hypot(-11,-11), so set this to its actual default of -1
    df['jet_jf_dR'] = df['jet_jf_dR'].mask(df['jet_jf_dR'].gt(15),
                                           default_values2['jf_dR'][0])

    # Apply jet quality cuts
    df.query('jet_pt>20e3 & abs(jet_eta)<2.5 & (abs(jet_eta)>2.4 |\
                jet_pt>60e3 | jet_JVT>0.5) & (jet_aliveAfterOR ==True)',
             inplace=True)

    if add_cuts != "":
        df.query(add_cuts, inplace=True)

    if debug:
        print('Querying jets df took: {}'.format(dt.datetime.now() - t0_jets))
        t0_jets = dt.datetime.now()

    df.rename(index=str, columns=mapping, inplace=True)
    # changing eta to absolute eta
    df['absEta_btagJes'] = df['eta_btagJes'].abs()
    # Replacing default values with this synthax
    # df.replace({'A': {0: 100, 4: 400}})
    rep_dict = {}
    for key, val in default_values2.items():
        if key in list(var_conv_oldDl1.keys()):
            replacer = {}
            for elem in val:
                replacer[elem] = np.nan
            rep_dict[var_conv_oldDl1[key]] = replacer
    df.replace(rep_dict, inplace=True)

    # Generating default flags
    df['JetFitter_isDefaults'] = FindCheck(df['JetFitter_mass'].values)
    df['SV1_isDefaults'] = FindCheck(df['SV1_masssvx'].values)
    df['IP2D_isDefaults'] = FindCheck(df['IP2D_bu'].values)
    df['IP3D_isDefaults'] = FindCheck(df['IP3D_bu'].values)
    df['JetFitterSecondaryVertex_isDefaults'] = FindCheck(
        df['JetFitterSecondaryVertex_nTracks'].values)
    # rnnip default flag not necessary anymore
    df['rnnip_isDefaults'] = FindCheck(df['rnnip_pu'].values)

    if debug:
        print('Remaining jets columns took: {}'.format(dt.datetime.now() -
                                                       t0_jets))
        t0_jets = dt.datetime.now()

    if write_tracks:
        return df, tracks_ndarray
    else:
        return df