Esempio n. 1
0
def test_list_trees():
    # TTree
    trees = rnp.list_trees(load('vary1.root'))
    assert_equal(trees, ['tree'])
    # TNtuple
    trees = rnp.list_trees(load('ntuple.root'))
    assert_equal(trees, ['ntuple'])
Esempio n. 2
0
def test_list_trees():
    # TTree
    trees = rnp.list_trees(load('vary1.root'))
    assert_equal(trees, ['tree'])
    # TNtuple
    trees = rnp.list_trees(load('ntuple.root'))
    assert_equal(trees, ['ntuple'])
def get_any_tree(tfilepath):
    trees = list_trees(tfilepath)   #returns list of tree-names
    if len(trees) == 1:
        tree_name = trees[0]
    else:
        raise ValueError('More/less than one tree found in {}\nPossible trees: {}'.format(tfilepath, trees))
    return tree_name
Esempio n. 4
0
File: tmva.py Progetto: 0x0all/rep
    def _run_tmva_training(self, info):
        """
        Run subprocess to train tmva factory

        :param info: class with additional information
        """
        tmva_process = subprocess.Popen(
            'cd {directory}; {executable} -c "from rep.estimators import _tmvaFactory; _tmvaFactory.main()"'.format(
                directory=info.directory,
                executable=sys.executable),
            stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT,
            shell=True)

        cPickle.dump(self, tmva_process.stdin)
        cPickle.dump(info, tmva_process.stdin)
        stdout, stderr = tmva_process.communicate()
        assert tmva_process.returncode == 0, \
            'ERROR: TMVA process is incorrect finished \n LOG: %s \n %s' % (stderr, stdout)

        assert 'TrainTree' in root_numpy.list_trees(os.path.join(info.directory, info.tmva_root)), \
            'ERROR: Result file has not TrainTree'

        xml_filename = os.path.join(info.directory, 'weights',
                                    '{job}_{name}.weights.xml'.format(job=info.tmva_job, name=self._method_name))
        with open(xml_filename, 'r') as xml_file:
            self.formula_xml = xml_file.read()
Esempio n. 5
0
File: tmva.py Progetto: spolakh/rep
    def _run_tmva_training(self, info):
        """
        Run subprocess to train tmva factory

        :param info: class with additional information
        """
        tmva_process = subprocess.Popen(
            'cd {directory}; {executable} -c "from rep.estimators import _tmvaFactory; _tmvaFactory.main()"'
            .format(directory=info.directory, executable=sys.executable),
            stdin=PIPE,
            stdout=PIPE,
            stderr=subprocess.STDOUT,
            shell=True)

        cPickle.dump(self, tmva_process.stdin)
        cPickle.dump(info, tmva_process.stdin)
        stdout, stderr = tmva_process.communicate()
        assert tmva_process.returncode == 0, \
            'ERROR: TMVA process is incorrect finished \n LOG: %s \n %s' % (stderr, stdout)

        assert 'TrainTree' in root_numpy.list_trees(os.path.join(info.directory, info.tmva_root)), \
            'ERROR: Result file has not TrainTree'

        xml_filename = os.path.join(
            info.directory, 'weights',
            '{job}_{name}.weights.xml'.format(job=info.tmva_job,
                                              name=self._method_name))
        with open(xml_filename, 'r') as xml_file:
            self.formula_xml = xml_file.read()
Esempio n. 6
0
def get_any_tree(tfilepath):
    trees = rn.list_trees(tfilepath)  #returns list of tree-names
    if len(trees) == 1:
        tree_name = trees[0]
    else:
        raise ValueError('More than one tree found in {}'.format(tfilepath))

    return tree_name
Esempio n. 7
0
    def from_file(files, **options):
        """
        Load a dataset from a file or collection of files.

        files: string file name, glob pattern or iterable of string file names.
        options:
            treename: str, name of the TTree object in the collection of files.
                      Optional only if all input files contain a single TTree, with the same name.
        """
        if isinstance(files, str):
            files = glob.glob(files)
        treename = options.get("treename")
        from root_numpy import list_trees  #TODO: avoid the dependency
        if treename is None:
            notOK = []
            treenames = set()
            for fname in files:  #TODO: check the case when there are no trees in a file!
                trees = list_trees(fname)
                if len(trees) != 1: notOK.append(fname)
                treenames.add(trees[0])
            if notOK:
                raise ValueError(
                    "Multiple trees found in file(s) {0}!\nPlease specify a tree name."
                    .format(','.join(notOK)))
            if len(treenames) > 1:
                raise ValueError(
                    "Different tree names found!\nPlease specify the desired tree name."
                )
            treename = treenames.pop()
            del notOK, treenames
        else:
            notOK = []
            for fname in files:  #TODO: check the case when there are no trees in a file!
                trees = list_trees(fname)
                if trees[0] != treename: notOK.append(fname)
            if notOK:
                raise ValueError(
                    "Specified tree name '{0}'' not found in file(s) {1}!\nPlease check your inputs."
                    .format(treename, ','.join(notOK)))

        chain = ROOT.TChain(treename)
        for name in list(files):
            n = chain.Add(name)

        return ROOTDataset(
            chain, FileOrigin([f.GetTitle() for f in chain.GetListOfFiles()]))
Esempio n. 8
0
File: root.py Progetto: stefco/gwpy
def table_from_root(source, treename=None, include_names=None, **kwargs):
    """Read a Table from a ROOT tree
    """
    import root_numpy

    if include_names is None:
        try:
            include_names = kwargs.pop('columns')
        except KeyError:
            pass
        else:
            warnings.warn("Keyword argument `columns` has been renamed to "
                          "`include_names` to better match default "
                          "astropy.table.Table.read kwargs, please update "
                          "your call.", DeprecationWarning)

    # parse column filters into tree2array ``selection`` keyword
    # NOTE: not all filters can be passed directly to root_numpy, so we store
    #       those separately and apply them after-the-fact before returning
    try:
        selection = kwargs.pop('selection')
    except KeyError:  # no filters
        filters = None
    else:
        rootfilters = []
        filters = []
        for col, op_, value in parse_column_filters(selection):
            try:
                opstr = [key for key in OPERATORS if OPERATORS[key] is op_][0]
            except (IndexError, KeyError):  # cannot filter with root_numpy
                filters.append((col, op_, value))
            else:  # can filter with root_numpy
                rootfilters.append('{0} {1} {2!r}'.format(col, opstr, value))
        kwargs['selection'] = ' && '.join(rootfilters)

    # pass file name (not path)
    if not isinstance(source, string_types):
        source = source.name

    # find single tree (if only one tree present)
    if treename is None:
        trees = root_numpy.list_trees(source)
        if len(trees) == 1:
            treename = trees[0]
        elif not trees:
            raise ValueError("No trees found in %s" % source)
        else:
            raise ValueError("Multiple trees found in %s, please select on "
                             "via the `treename` keyword argument, e.g. "
                             "`treename='events'`. Available trees are: %s."
                             % (source, ', '.join(map(repr, trees))))

    # read, filter, and return
    t = Table(root_numpy.root2array(source, treename,
                                    branches=include_names, **kwargs))
    if filters:
        return filter_table(t, *filters)
    return t
Esempio n. 9
0
def find_tree(tree_to_look_for, filename):
    """ Find argument tree name that contains Track Information """
    trees = rn.list_trees(filename)
    try:
        track_tree = next(tree for tree in trees if tree_to_look_for.lower() in tree.lower())
    except StopIteration:
        raise ValueError("No tree with name \"" + tree_to_look_for + "\" found in file")

    return track_tree
Esempio n. 10
0
def Read_Root_file(Path_to_tree,
                   Tree_name_array,
                   Branches,
                   Tree_selection=None):
    """Transform .root file with branches to ndarray (similar as a dictionary).

        Returns an array of arrays where each array will be a branch
        This function use root2array wich is a root_numpy's function.

        Parameters:
        Path_to_tree -- The complete path to the folder of the .root files
        Tree_name_array -- The name of each tree inside the folder without .root extension. Only the names in an array. For example: ['bla1', 'bla2']
        Branches -- The branches can be selected one by one in array. For example: ['Brho' , 'D']
        Selection -- A prelimanar selection can be apply. For example: 'ThetaLdeg >> 0.0 & D == 0.0'

        Excepctions:
        None

        Return:
        array_data -- A matrix with the name of the branches in arrays related with the data of each initial branch in the original .root file

        """

    path_filename = list(
        map(lambda s: os.path.join(Path_to_tree, s) + '.root',
            Tree_name_array))  #The file path
    path_treename = rn.list_trees(path_filename[0])  #The name inside the tree

    print('We are reading this root files:')
    print(Tree_name_array)

    if Branches == 'All':
        Tree_branches = rn.list_branches(
            path_filename[0]
        )  #Asumption with all the trees have the same name for their branches
        print('All branches are chosen')
    else:
        Tree_branches = Branches
        print('These branches ')
        print(Branches)
        print('are selected')

    if Tree_selection != None:
        print('The pre-selection over trees is ', Tree_selection)
        array_data = rn.root2array(filenames=path_filename,
                                   treename=path_treename[0],
                                   branches=Tree_branches,
                                   selection=Tree_selection)
    else:
        print('No pre-selection applied')
        array_data = rn.root2array(filenames=path_filename,
                                   treename=path_treename[0],
                                   branches=Tree_branches)

    return array_data
Esempio n. 11
0
def test_list_trees():
    # TTree
    trees = rnp.list_trees(load('vary1.root'))
    assert_equal(trees, ['tree'])
    # TNtuple
    trees = rnp.list_trees(load('ntuple.root'))
    assert_equal(trees, ['ntuple'])
    # Multiple key cycles of the same tree
    with temp() as rfile:
        tree = ROOT.TTree('tree', 'tree')
        rfile.Write()
        assert_equal(len(rnp.list_trees(rfile.GetName())), 1)
        rfile.Write()
        assert_equal(len(rnp.list_trees(rfile.GetName())), 1)
        rdir = rfile.mkdir('dir')
        rdir.cd()
        tree = ROOT.TTree('tree', 'tree')
        rfile.Write()
        assert_equal(set(rnp.list_trees(rfile.GetName())),
                     set(['tree', 'dir/tree']))
Esempio n. 12
0
def check_truncate_impute(filename):
    filename = load(filename)
    # first convert array and find object columns
    arr = rnp.root2array(filename)
    assert_true(len(arr))
    object_fields = [
        field for field in arr.dtype.names if arr.dtype[field] == 'O'
    ]
    fields_1d = [
        field for field in object_fields
        if arr[field][0].dtype != 'O' and len(arr[field][0].shape) == 1
    ]
    fields_md = list(set(object_fields) - set(fields_1d))
    assert_true(fields_1d)
    assert_true(fields_md)
    fields_1d.sort()
    fields_md.sort()

    rfile = ROOT.TFile.Open(filename)
    tree = rfile.Get(rnp.list_trees(filename)[0])

    # test both root2array and tree2array
    for func, arg in [(rnp.root2array, filename), (rnp.tree2array, tree)]:

        arr1 = func(arg, branches=[(f, 0) for f in fields_1d])
        assert_true(len(arr1))
        assert_equal(set(arr1.dtype.names), set(fields_1d))
        # Giving length of 1 will result in the same output
        arr2 = func(arg, branches=[(f, 0, 1) for f in fields_1d])
        assert_array_equal(arr1, arr2)
        # fill_value of 1 instead of 0 should change output array
        arr2 = func(arg, branches=[(f, 1, 1) for f in fields_1d])
        assert_raises(AssertionError, assert_array_equal, arr1, arr2)
        # check dtype shape
        arr3 = func(arg, branches=[(f, 0, 3) for f in fields_1d])
        for field in fields_1d:
            assert_equal(arr3.dtype[field].shape, (3, ))

        # length must be at least 1
        assert_raises(ValueError, func, arg, branches=[(fields_1d[0], 0, 0)])
        # tuple is not of length 2 or 3
        assert_raises(ValueError,
                      func,
                      arg,
                      branches=[(fields_1d[0], 1, 1, 1)])
        assert_raises(ValueError, func, arg, branches=(fields_1d[0], 1, 1, 1))
        # can only truncate 1d arrays
        assert_raises(TypeError, func, arg, branches=(fields_md[0], 0))

        # expressions
        arr1 = func(arg, branches='{0}==0'.format(fields_1d[0]))
        assert_equal(arr1.dtype, 'O')
        arr2 = func(arg, branches=('{0}==0'.format(fields_1d[0]), 0))
        assert_equal(arr2.dtype, arr1[0].dtype)
Esempio n. 13
0
def test_list_trees():
    # TTree
    trees = rnp.list_trees(load('vary1.root'))
    assert_equal(trees, ['tree'])
    # TNtuple
    trees = rnp.list_trees(load('ntuple.root'))
    assert_equal(trees, ['ntuple'])
    # Multiple key cycles of the same tree
    with temp() as rfile:
        tree = ROOT.TTree('tree', 'tree')
        rfile.Write()
        assert_equal(len(rnp.list_trees(rfile.GetName())), 1)
        rfile.Write()
        assert_equal(len(rnp.list_trees(rfile.GetName())), 1)
        rdir = rfile.mkdir('dir')
        rdir.cd()
        tree = ROOT.TTree('tree', 'tree')
        rfile.Write()
        assert_equal(set(rnp.list_trees(rfile.GetName())),
                     set(['tree', 'dir/tree']))
Esempio n. 14
0
 def export_root_data_contents(self, rfile):
     """
     Uses the root_numpy class to list trees and output them.
     :param rfile: Input ROOT file
     :return: a dictionary defining the underlying data in the file link. The key is the name of each tree.
     """
     for tree in list_trees(rfile):
         print 'Processing tree ' + tree
         print list_branches(rfile, tree)
         arr = root2array(rfile, treename=tree)
         print str(arr.view(numpy.recarray))
Esempio n. 15
0
def Branches_to_Arrays(Path_to_tree, Tree_names, Tree_name_inside, Branches, Tree_selection = None):
        """Transform .root file with branches to array of arrays (similar as a dictionary).

        Returns an array of arrays where each array will be a branch
        This function use root2array wich is a root_numpy's function.

        Parameters:
        Path_to_tree -- The complete path to the folder of the .root files
        Tree_names -- The name of each tree inside the folder without .root extension. Only the names in an array. For example: ['bla1', 'bla2']
        Tree_name_inside -- In general, all the root files have equal tree name inside, ex: 'GM', 'AD'
        Branches -- The branches can be selected one by one in array. For example: ['Brho' , 'D'] or 'All' if one wants all the branches
        Selection -- A prelimanar selection can be apply. For example: 'ThetaLdeg >> 0.0 & D == 0.0'

        Excepctions:
        None

        Return:
        array_data -- A matrix with the name of the branches in arrays related with the data of each initial branch in the original .root file

        """
        print('You are working with this samples: ')
        print(str(Tree_names))
        print('\n')
        path_filename = list(map(lambda s: os.path.join(Path_to_tree, s) + '.root', Tree_names)) #The file path
        path_treename = rn.list_trees(path_filename[0]) #The names inside the root file (is an array)
        print('The trees inside are: ', path_treename)
        print('\n')

        if path_treename[0] != Tree_name_inside:
            #by default is taken the first
            print('Wrong tree election!')
            print('\n')

        if Branches == 'All':
                #Tree_branches = rn.list_branches(path_treename[0]) #Asumption with all the trees have the same name for their branches
                Tree_branches = None #by default that means take all in root_numpy
                print('All branches are chosen')
                print('\n')
        else:
                Tree_branches = Branches
                print('This branches ')
                print(Branches)
                print('are selected')
                print('\n')

        if Tree_selection != None:
            print('The pre-selection over trees is ', Tree_selection)
            array_data = rn.root2array(filenames = path_filename, treename = path_treename[0], branches = Tree_branches, selection = Tree_selection)
        else:
            print('No pre-selection applied')
            array_data = rn.root2array(filenames = path_filename, treename = path_treename[0], branches = Tree_branches)

        return array_data
Esempio n. 16
0
def load_root_tree(files,
                   tree=None,
                   columns=None,
                   ignore=None,
                   *kargs,
                   **kwargs):

    from pandas import DataFrame
    from root_numpy import root2array, list_trees, list_branches

    # check if we get a list or a single file
    if not isinstance(files, list):
        files = [files]
    # use the first file to define tree & branches
    init_file = files[0]

    # check to see if there is a specified tree, if not,
    # look for a single tree. If the choice is ambiguous,
    # raise an error and exit
    if tree == None:
        trees = list_trees(init_file)
        if len(trees) == 1:
            tree = trees[0]
        elif len(trees) == 0:
            raise ValueError('Error: no trees found in {}'.format(init_file))
        else:
            raise ValueError(
                'Ambiguous call: more than one tree found in {}'.format(
                    init_file))

    branches = list_branches(init_file, tree)

    # match existing branches to branches asked for by user
    if not columns:
        all_vars = branches
    else:
        all_vars = get_matching_variables(branches, columns)

    # handle branches that are asked to be ignored
    if ignore:
        if isinstance(ignore, string_types):
            ignore = [ignore]
        ignored = get_matching_variables(ignore, branches)
        for var in ignored:
            all_vars.remove(var)

    arr = root2array(files, tree, all_vars, *kargs, **kwargs)

    if 'index' in arr.dtype.names:
        df = DataFrame.from_records(arr, index='index')
    else:
        df = DataFrame.from_records(arr)
    return df
Esempio n. 17
0
def table_from_root(source, treename=None, columns=None, **kwargs):
    """Read a Table from a ROOT tree
    """
    import root_numpy

    # parse column filters into tree2array ``selection`` keyword
    # NOTE: not all filters can be passed directly to root_numpy, so we store
    #       those separately and apply them after-the-fact before returning
    try:
        selection = kwargs.pop('selection')
    except KeyError:  # no filters
        filters = None
    else:
        rootfilters = []
        filters = []
        for col, op_, value in parse_column_filters(selection):
            try:
                opstr = [key for key in OPERATORS if OPERATORS[key] is op_][0]
            except (IndexError, KeyError):  # cannot filter with root_numpy
                filters.append((col, op_, value))
            else:  # can filter with root_numpy
                rootfilters.append('{0} {1} {2!r}'.format(col, opstr, value))
        kwargs['selection'] = ' && '.join(rootfilters)

    # pass file name (not path)
    if not isinstance(source, string_types):
        source = source.name

    # find single tree (if only one tree present)
    if treename is None:
        trees = root_numpy.list_trees(source)
        if len(trees) == 1:
            treename = trees[0]
        elif not trees:
            raise ValueError("No trees found in %s" % source)
        else:
            raise ValueError("Multiple trees found in %s, please select on "
                             "via the `treename` keyword argument, e.g. "
                             "`treename='events'`. Available trees are: %s."
                             % (source, ', '.join(map(repr, trees))))

    # read, filter, and return
    t = Table(root_numpy.root2array(
        source,
        treename,
        branches=columns,
        **kwargs
    ))
    if filters:
        return filter_table(t, *filters)
    return t
Esempio n. 18
0
def read_trees(input_dir, selection):
    dfs = []

    root_files = glob.iglob(input_dir + r"*.root")

    for root_file in root_files:
        for tree in list_trees(root_file):
            # Parse tree name
            split_tree = tree.split("__")
            process = split_tree[0].split("Ttree_", 1)[-1]
            if process not in process_groups.keys():
                continue

            try:
                systematic = "__" + split_tree[1]

                try:
                    pm = split_tree[2]
                    if pm == "plus":
                        systematic = systematic + "Up"
                    elif pm == "minus":
                        systematic = systematic + "Down"
                    else:
                        continue
                except IndexError:
                    pass
            except IndexError:
                systematic = ""

            with warnings.catch_warnings():
                warnings.simplefilter('ignore', RuntimeWarning)
                df = read_tree(root_file, tree, where=selection)

            if df.empty:
                print(process + systematic, "skipped (empty)")
                continue
            print(process + systematic)

            # Label $PROCESS__$SYSTEMATIC
            df = df.assign(Group=process_groups[process] + systematic)
            df = df.assign(Category=process + systematic)

            dfs.append(df)

    df = pd.concat(dfs)
    df.Category = df.Category.astype('category')
    df.Group = df.Group.astype('category')

    return df.reset_index(drop=True)
Esempio n. 19
0
def export_root_to_csv(filename, branches=None):
    """From selected file exports all the trees in separate files, exports all the branches,
    requires rootpy and root_numpy modules"""
    import root_numpy
    import os
    trees = root_numpy.list_trees(filename)
    print("The following branches are found:\n %s" % trees)
    result = []
    for tree_name in trees:
        x = root_numpy.root2array(filename, treename=tree_name, branches=branches)
        new_file_name = os.path.splitext(filename)[0] + '_' + tree_name + '.csv'
        pandas.DataFrame(x).to_csv(new_file_name)
        result.append(new_file_name)
    print("Successfully converted")
    return result
Esempio n. 20
0
def check_truncate_impute(filename):
    filename = load(filename)
    # first convert array and find object columns
    arr = rnp.root2array(filename)
    assert_true(len(arr))
    object_fields = [field for field in arr.dtype.names if arr.dtype[field] == 'O']
    fields_1d = [field for field in object_fields
                 if arr[field][0].dtype != 'O' and len(arr[field][0].shape) == 1]
    fields_md = list(set(object_fields) - set(fields_1d))
    assert_true(fields_1d)
    assert_true(fields_md)
    fields_1d.sort()
    fields_md.sort()

    rfile = ROOT.TFile.Open(filename)
    tree = rfile.Get(rnp.list_trees(filename)[0])

    # test both root2array and tree2array
    for func, arg in [(rnp.root2array, filename), (rnp.tree2array, tree)]:

        arr1 = func(arg, branches=[(f, 0) for f in fields_1d])
        assert_true(len(arr1))
        assert_equal(set(arr1.dtype.names), set(fields_1d))
        # Giving length of 1 will result in the same output
        arr2 = func(arg, branches=[(f, 0, 1) for f in fields_1d])
        assert_array_equal(arr1, arr2)
        # fill_value of 1 instead of 0 should change output array
        arr2 = func(arg, branches=[(f, 1, 1) for f in fields_1d])
        assert_raises(AssertionError, assert_array_equal, arr1, arr2)
        # check dtype shape
        arr3 = func(arg, branches=[(f, 0, 3) for f in fields_1d])
        for field in fields_1d:
            assert_equal(arr3.dtype[field].shape, (3,))

        # length must be at least 1
        assert_raises(ValueError, func, arg, branches=[(fields_1d[0], 0, 0)])
        # tuple is not of length 2 or 3
        assert_raises(ValueError, func, arg, branches=[(fields_1d[0], 1, 1, 1)])
        assert_raises(ValueError, func, arg, branches=(fields_1d[0], 1, 1, 1))
        # can only truncate 1d arrays
        assert_raises(TypeError, func, arg, branches=(fields_md[0], 0))

        # expressions
        arr1 = func(arg, branches='{0}==0'.format(fields_1d[0]))
        assert_equal(arr1.dtype, 'O')
        arr2 = func(arg, branches=('{0}==0'.format(fields_1d[0]), 0))
        assert_equal(arr2.dtype, arr1[0].dtype)
Esempio n. 21
0
def run(input_fns, output_fn, h1, h2, h3):
    keys = list_trees(input_fns[0])
    assert len(keys) == 1, keys
    df = read_root(input_fns, keys[0])

    df['H1_isMuon'] = df['H1_isMuon'].astype(np.bool)
    df['H2_isMuon'] = df['H2_isMuon'].astype(np.bool)
    df['H3_isMuon'] = df['H3_isMuon'].astype(np.bool)

    # Sort the columns so that the first is the most kaon-like
    assert sorted([h1, h2, h3
                   ]) == [h1, h2, h3
                          ], 'Children are ranked from kaon-like to pion-like'
    order = np.argsort(df[['H3_ProbK', 'H2_ProbK', 'H1_ProbK']], axis=1)
    for col in [c for c in df.columns if c.startswith('H1_')]:
        col = col[len('H1_'):]
        cols = [f'H1_{col}', f'H2_{col}', f'H3_{col}']
        df[cols] = df[cols].values[np.arange(order.shape[0])[:, None], order]

    # Compute the PE and mass of all particles
    for head, mass in [('H1', mass_dict[h1]), ('H2', mass_dict[h2]),
                       ('H3', mass_dict[h3])]:
        df.eval(f'{head}_P = sqrt({head}_PX**2 + {head}_PY**2 + {head}_PZ**2)',
                inplace=True)
        df.eval(f'{head}_PE = sqrt({mass}**2 + {head}_P**2)', inplace=True)
    for component in ['PE', 'PX', 'PY', 'PZ']:
        df.eval(
            f'B_{component} = H1_{component} + H2_{component} + H3_{component}',
            inplace=True)
    df.eval(f'B_M = sqrt(B_PE**2 - B_PX**2 - B_PY**2 - B_PZ**2)', inplace=True)

    # if [h1, h2, h3] == ['K', 'K', 'K']:
    # Apply ignore muons
    df.query('~(H1_isMuon | H2_isMuon | H3_isMuon)', inplace=True)
    # Apply an additional selection
    df.query(f'(H1_IPChi2 > 25) & (H2_IPChi2 > 25) & (H3_IPChi2 > 25)',
             inplace=True)
    # Apply a PID selection
    df.query(
        f'(H1_Prob{h1} > {pid_cut}) & (H2_Prob{h2} > {pid_cut}) & (H3_Prob{h3} > {pid_cut})',
        inplace=True)

    to_root(df, output_fn, key=f'B2{h1}{h2}{h3}', mode='w', store_index=False)
Esempio n. 22
0
def table_from_root(f, treename=None, include_names=None, **kwargs):
    import root_numpy

    if include_names is None:
        try:
            include_names = kwargs.pop('columns')
        except KeyError:
            pass
        else:
            warnings.warn("Keyword argument `columns` has been renamed to "
                          "`include_names` to better match default "
                          "astropy.table.Table.read kwargs, please update "
                          "your call.", DeprecationWarning)

    # parse column filters into tree2array ``selection`` keyword
    try:
        filters = kwargs.pop('selection')
    except KeyError:
        pass
    else:
        if isinstance(filters, (list, tuple)):
            filters = ' && '.join(filters)
        kwargs['selection'] = filters

    # find single tree (if only one tree present)

    files = file_list(f)
    if treename is None:
        trees = root_numpy.list_trees(files[0])
        if len(trees) == 1:
            treename = trees[0]
        elif len(trees) == 0:
            raise ValueError("No trees found in %s" % files[0])
        else:
            raise ValueError("Multiple trees found in %s, please select on "
                             "via the `treename` keyword argument, e.g. "
                             "`treename='events'`. Available trees are: %s."
                             % (files[0], ', '.join(map(repr, trees))))

    # read and return
    return Table(root_numpy.root2array(files, treename, branches=include_names,
                                       **kwargs))
Esempio n. 23
0
    def __init__(self, files, chunksize, n_files=100000, key=None, **kwargs):
        """
      Creates a DataChunk object building the sample combining several root files. 

      Arguments
        files - list of strings
          List of file names to be read

        chunksize - int
          Total number of rows to be picked from the various files 

        n_files - int 
          Maximal number of randomly selected files to pick from in a single chunk. 
          Default: 100000 

        key - string or None
          Name of the TTree to be loaded. Can be None (default) is a single TTree
          is defined per TFile. 

        Other arguments are passed to root_numpy.root2array complementing the
        arguments: `file`, `treename`, `start`, `stop` defined by DataChunks. 

    """
        self._files = list()
        self._n_files = n_files
        self._chunksize = chunksize
        self._ntot = 0
        self._kwargs = kwargs
        for f in files:
            key_ = key if key else None
            if not key_:
                for key_ in rnp.list_trees(f):
                    break
            root_file = ROOT.TFile.Open(f)
            if not root_file: raise IOError("File % could not be opened" % f)
            root_tree = root_file.Get(key_)
            if not root_file: raise IOError("File % could not be opened" % f)
            entries = root_tree.GetEntries()
            self._files.append((f, key_, entries))
            self._ntot += entries
Esempio n. 24
0
def _LoadRoot(filepath):
    if not useRootNumpy:
        raise IOError("root_numpy not available - can't load ROOT file")
    data = BDSAsciiData()
    trees = _rnp.list_trees(filepath)

    if 'optics' in trees:
        branches = _rnp.list_branches(filepath, 'optics')
        treedata = _rnp.root2array(filepath, 'optics')
    elif 'orbit' in trees:
        branches = _rnp.list_branches(filepath, 'orbit')
        treedata = _rnp.root2array(filepath, 'orbit')
    else:
        raise IOError("This file doesn't have the required tree 'optics'.")
    for element in range(len(treedata[branches[0]])):
        elementlist = []
        for branch in branches:
            if element == 0:
                data._AddProperty(branch)
            elementlist.append(treedata[branch][element])
        data.append(elementlist)
    return data
Esempio n. 25
0
def read_root(fname,
              tree_name=None,
              variables=None,
              ignore=None,
              chunksize=None,
              *kargs,
              **kwargs):
    """
    Read a ROOT file into a pandas DataFrame.
    Further *kargs and *kwargs are passed to root_numpy's root2array.
    If the root file contains a branch called index, it will become the DataFrame's index.

    Parameters
    ----------
    fname: string
        The filename of the root file
    tree_name: string
        The name of the tree to load
    variables: sequence
        A sequence of shell-patterns. Matching variables are read.
    ignore: sequence
        A sequence of shell-patterns. All matching variables are ignored (overriding the variables argument)
    chunksize: int
        If this parameter is specified, an iterator is returned that yields DataFrames with chunksize rows

    Returns
    -------
        DataFrame from the ROOT file

    Notes
    -----

        >>> df = read_root('test.root', 'MyTree', variables=['x_*', 'y_*'], selection='x_1 > 100')

    """
    if not tree_name:
        branches = list_trees(fname)
        if len(branches) == 1:
            tree_name = branches[0]
        else:
            raise ValueError('More than one tree found in {}'.format(fname))

    if not variables:
        all_vars = None
    else:
        # index is always loaded if it exists
        variables.append('index')
        all_vars = get_matching_variables(fname, tree_name, variables)

    if ignore:
        if not all_vars:
            all_vars = get_matching_variables(fname, tree_name, ['*'])

        ignored = get_matching_variables(fname, tree_name, ignore)
        if 'index' in ignored:
            raise ValueError('index variable is being ignored!')
        for var in ignored:
            all_vars.remove(var)

    if chunksize:
        f = ROOT.TFile(fname)
        n_entries = f.Get(tree_name).GetEntries()
        f.Close()

        def genchunks():
            for chunk in range(int(ceil(float(n_entries) / chunksize))):
                arr = root2array(fname,
                                 tree_name,
                                 all_vars,
                                 start=chunk * chunksize,
                                 stop=(chunk + 1) * chunksize,
                                 *kargs,
                                 **kwargs)
                yield convert_to_dataframe(arr)

        return genchunks()

    arr = root2array(fname, tree_name, all_vars, *kargs, **kwargs)
    return convert_to_dataframe(arr)
#branch_names = 'absCosThetaStar_CS,absCosTheta_bb,absCosTheta_gg,PhoJetMinDr,customLeadingPhotonIDMVA,customSubLeadingPhotonIDMVA,leadingJet_DeepCSV,subleadingJet_DeepCSV,leadingPhotonSigOverE,subleadingPhotonSigOverE,sigmaMOverM,diphotonCandidatePtOverdiHiggsM,dijetCandidatePtOverdiHiggsM,leadingJet_bRegNNResolution,subleadingJet_bRegNNResolution,noexpand:sigmaMJets/Mjj,noexpand:leadingPhoton_pt/CMS_hgg_mass,noexpand:subleadingPhoton_pt/CMS_hgg_mass,noexpand:leadingJet_pt/Mjj,noexpand:subleadingJet_pt/Mjj,PhoJetOtherDr'.split(",")
#DeepJet
branch_names = 'absCosThetaStar_CS,absCosTheta_bb,absCosTheta_gg,PhoJetMinDr,customLeadingPhotonIDMVA,customSubLeadingPhotonIDMVA,leadingJet_DeepFlavour,subleadingJet_DeepFlavour,leadingPhotonSigOverE,subleadingPhotonSigOverE,sigmaMOverM,diphotonCandidatePtOverdiHiggsM,dijetCandidatePtOverdiHiggsM,leadingJet_bRegNNResolution,subleadingJet_bRegNNResolution,sigmaMJets,noexpand:leadingPhoton_pt/CMS_hgg_mass,noexpand:subleadingPhoton_pt/CMS_hgg_mass,noexpand:leadingJet_pt/Mjj,noexpand:subleadingJet_pt/Mjj,PhoJetOtherDr,rho'.split(
    ",")
#DeepJet + Mjj
#branch_names = 'absCosThetaStar_CS,absCosTheta_bb,absCosTheta_gg,PhoJetMinDr,customLeadingPhotonIDMVA,customSubLeadingPhotonIDMVA,leadingJet_DeepFlavour,subleadingJet_DeepFlavour,leadingPhotonSigOverE,subleadingPhotonSigOverE,sigmaMOverM,diphotonCandidatePtOverdiHiggsM,dijetCandidatePtOverdiHiggsM,leadingJet_bRegNNResolution,subleadingJet_bRegNNResolution,sigmaMJets,Mjj,rho'.split(",")

branch_names = [c.strip() for c in branch_names]
print branch_names

import pandas as pd
import root_pandas as rpd
from root_numpy import root2array, list_trees

for i in range(len(utils.IO.backgroundName)):
    print list_trees(utils.IO.backgroundName[i])

preprocessing.set_signals_and_backgrounds("bbggSelectionTree", branch_names)
X_bkg, y_bkg, weights_bkg, X_sig, y_sig, weights_sig = preprocessing.set_variables(
    branch_names)

#relative weighting between components of one class is kept, all classes normalized to the same
#weights_sig=preprocessing.weight_signal_with_resolution(weights_sig,y_sig)  #!!!! reweight 28112019
weights_bkg, weights_sig = preprocessing.normalize_process_weights(
    weights_bkg, y_bkg, weights_sig, y_sig)

X_bkg, y_bkg, weights_bkg = preprocessing.randomize(X_bkg, y_bkg, weights_bkg)
X_sig, y_sig, weights_sig = preprocessing.randomize(X_sig, y_sig, weights_sig)

print X_bkg.shape
print y_bkg.shape
Esempio n. 27
0
# plt.rcParams.update({'font.size': 18})

parser = argparse.ArgumentParser(prog='./Efficiencies')
parser.add_argument('-i', '--input', nargs='+', help='specify input root file')
parser.add_argument('-o', '--output', default='./', help='specify output dir')
args = parser.parse_args()

inputs = args.input
output = args.output

if not os.path.isdir(output):
    os.mkdir(output)

if isinstance(inputs, (list, )):
    treeName = list_trees(inputs[0])
else:
    treeName = list_trees(inputs)
    inputs = [
        inputs,
    ]

if (len(treeName) > 1):
    print("more then one tree in file ... specify, which tree to use")
    exit()

# acceptance selection
selection = 'z_genMass > 56 ' \
            '& z_genMass < 116 ' \
            '& muon_genPt > 30 ' \
            '& antiMuon_genPt > 30 ' \
Esempio n. 28
0
def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=None, flatten=False, *args, **kwargs):
    """
    Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
    Further *args and *kwargs are passed to root_numpy's root2array.
    If the root file contains a branch matching __index__*, it will become the DataFrame's index.

    Parameters
    ----------
    paths: string or list
        The path(s) to the root file(s)
    key: string
        The key of the tree to load.
    columns: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). Matching columns are read.
        The columns beginning with `noexpand:` are not interpreted as shell-patterns,
        allowing formula columns such as `noexpand:2*x`. The column in the returned DataFrame
        will not have the `noexpand:` prefix.
    ignore: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). All matching columns are ignored (overriding the columns argument).
    chunksize: int
        If this parameter is specified, an iterator is returned that yields DataFrames with `chunksize` rows.
    where: str
        Only rows that match the expression will be read.
    flatten: sequence of str
        A sequence of column names. Will use root_numpy.stretch to flatten arrays in the specified columns into
        individual entries. All arrays specified in the columns must have the same length for this to work.
        Be careful if you combine this with chunksize, as chunksize will refer to the number of unflattened entries,
        so you will be iterating over a number of entries that is potentially larger than chunksize.
        The index of each element within its former array will be saved in the __array_index column.

    Returns
    -------
        DataFrame created from matching data in the specified TTree

    Notes
    -----

        >>> df = read_root('test.root', 'MyTree', columns=['A{B,C}*', 'D'], where='ABB > 100')

    """

    if not isinstance(paths, list):
        paths = [paths]
    # Use a single file to search for trees and branches
    seed_path = paths[0]

    if not key:
        trees = list_trees(seed_path)
        if len(trees) == 1:
            key = trees[0]
        elif len(trees) == 0:
            raise ValueError('No trees found in {}'.format(seed_path))
        else:
            raise ValueError('More than one tree found in {}'.format(seed_path))

    branches = list_branches(seed_path, key)

    if not columns:
        all_vars = branches
    else:
        if isinstance(columns, string_types):
            columns = [columns]
        # __index__* is always loaded if it exists
        # XXX Figure out what should happen with multi-dimensional indices
        index_branches = list(filter(lambda x: x.startswith('__index__'), branches))
        if index_branches:
            columns = columns[:]
            columns.append(index_branches[0])
        columns, noexpand = filter_noexpand_columns(columns)
        columns = list(itertools.chain.from_iterable(list(map(expand_braces, columns))))
        all_vars = get_matching_variables(branches, columns) + noexpand

    if ignore:
        if isinstance(ignore, string_types):
            ignore = [ignore]
        ignored = get_matching_variables(branches, ignore, fail=False)
        ignored = list(itertools.chain.from_iterable(list(map(expand_braces, ignored))))
        if any(map(lambda x: x.startswith('__index__'), ignored)):
            raise ValueError('__index__* branch is being ignored!')
        for var in ignored:
            all_vars.remove(var)

    def do_flatten(arr, flatten):
        if flatten is True:
            warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
                          "to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
            arr_, idx = stretch(arr, return_indices=True)
        else:
            nonscalar = get_nonscalar_columns(arr)
            fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]
            will_drop = [x for x in arr.dtype.names if x not in fields]
            if will_drop:
                warnings.warn("Ignored the following non-scalar branches: {bad_names}"
                      .format(bad_names=", ".join(will_drop)), UserWarning)
            arr_, idx = stretch(arr, fields=fields, return_indices=True)
        arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
        return arr

    if chunksize:
        tchain = ROOT.TChain(key)
        for path in paths:
            tchain.Add(path)
        n_entries = tchain.GetEntries()
        # XXX could explicitly clean up the opened TFiles with TChain::Reset

        def genchunks():
            for chunk in range(int(ceil(float(n_entries) / chunksize))):
                arr = root2array(paths, key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *args, **kwargs)
                if flatten:
                    arr = do_flatten(arr, flatten)
                yield convert_to_dataframe(arr)
        return genchunks()

    arr = root2array(paths, key, all_vars, selection=where, *args, **kwargs)
    if flatten:
        arr = do_flatten(arr, flatten)
    return convert_to_dataframe(arr)
Esempio n. 29
0
import numpy as np
import math
#from matplotlib import pylab as plt
from ROOT import TTree, TFile, TH2D, TCanvas, TH1F, gROOT
from root_numpy import array2hist, hist2array, fill_hist, tree2array, root2array, list_trees

filename = "../output/ClusterAllOne_main_phs1_gamma.root"
inputFile = TFile(filename)
tree = inputFile.Get("PhaseSpace")
list_trees(inputFile)
# Energy = hist2array(inputFile)
E_dep = root2array(tree, treename="dX")

plt.hist(E_kinetic)
plt.pause(0.01)
#print(E_kinetic)
#np.save('pythonArr', E_kinetic)
input("press enter to exit")
Esempio n. 30
0
def test_list_trees():
    trees = rnp.list_trees(load('vary1.root'))
    assert_equal(trees, ['tree'])
Esempio n. 31
0
    # '17E': glob.glob(storage+"_V09_UL2017E/201214_134634/0000/output_0_*"),
    # '17F': glob.glob(storage+"_V09_UL2017F/201214_141544/0000/output_0_*"),
    '17H': glob.glob(storage + "_V11_UL2017H/210420_072656/0000/output_0_*"),

    # '18A': glob.glob(storage+"_V11_UL2018A/210420_072218/0000/output_0_*"),
    # '18B': glob.glob(storage+"_V11_UL2018B/210420_072153/0000/output_0_*"),
    # '18C': glob.glob(storage+"_V11_UL2018C/210420_072235/0000/output_0_*"),
    # '18D': glob.glob(storage+"_V11_UL2018D/210420_072249/000?/output_0_*"),
    # # Missing runs from 2018
    # '18D': glob.glob(storage+"_V11_UL2018D_v3/210506_184017/0000/output_0_*")
}

for era, input in inputs.iteritems():
    print("##### era {0}".format(era))

    treeName = rn.list_trees(input[0])[0]
    print(">>> Load Events from {0} files".format(len(input)))
    _df = []
    for i in input:
        print("> file " + i)
        tfile = ROOT.TFile.Open(i)
        if (tfile.Get(treeName).GetEntries(selection) == 0):
            print("> no events in this file found! continue with next file")
            continue
        _df.append(
            tree_to_df(
                rn.root2array(i,
                              treeName,
                              selection=selection,
                              branches=branches)))
    print(">>> Concatenate")
Esempio n. 32
0
    if len(options.data) > 2:
        raise SystemExit("ERROR: To many arguments for the data file with -d. Use: -d filename.root -d tree.")


    if options.bins == None:
        logging.info("No binning with -n specified, use the default value 100")
        bins = 100
    else:
        bins = options.bins


    #Reference MC
    referenceMC = options.montecarlo[0]
    #if only one argument to -m or -d is given it is assumed that there is only one tree
    if len(options.montecarlo) == 1:
        trees = list_trees(referenceMC)
        if len(trees) == 1:
            referenceMC_tree = trees[0]
        else:
            raise SystemExit('No tree or more than one found in ', referenceMC )
    else:
        referenceMC_tree = options.montecarlo[1]

    #Reference Data
    referenceData = options.data[0]
    #if only one argument to -m or -d is given it is assumed that there is only one tree
    if len(options.data) == 1:
        trees = list_trees(referenceData)
        if len(trees) == 1:
            referenceData_tree = trees[0]
        else:
Esempio n. 33
0
 def get_info(self):
     trees = rn.list_trees(self.file)
     for tree in trees:
         print(str.capitalize(tree) + ":")
         print(rn.list_branches(self.file, treename=tree))
Esempio n. 34
0
def read_root(paths,
              key=None,
              columns=None,
              ignore=None,
              chunksize=None,
              where=None,
              flatten=False,
              *args,
              **kwargs):
    """
    Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
    Further *args and *kwargs are passed to root_numpy's root2array.
    If the root file contains a branch matching __index__*, it will become the DataFrame's index.

    Parameters
    ----------
    paths: string or list
        The path(s) to the root file(s)
    key: string
        The key of the tree to load.
    columns: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). Matching columns are read.
        The columns beginning with `noexpand:` are not interpreted as shell-patterns,
        allowing formula columns such as `noexpand:2*x`. The column in the returned DataFrame
        will not have the `noexpand:` prefix.
    ignore: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). All matching columns are ignored (overriding the columns argument).
    chunksize: int
        If this parameter is specified, an iterator is returned that yields DataFrames with `chunksize` rows.
    where: str
        Only rows that match the expression will be read.
    flatten: sequence of str
        A sequence of column names. Will use root_numpy.stretch to flatten arrays in the specified columns into
        individual entries. All arrays specified in the columns must have the same length for this to work.
        Be careful if you combine this with chunksize, as chunksize will refer to the number of unflattened entries,
        so you will be iterating over a number of entries that is potentially larger than chunksize.
        The index of each element within its former array will be saved in the __array_index column.

    Returns
    -------
        DataFrame created from matching data in the specified TTree

    Notes
    -----

        >>> df = read_root('test.root', 'MyTree', columns=['A{B,C}*', 'D'], where='ABB > 100')

    """

    if not isinstance(paths, list):
        paths = [paths]
    # Use a single file to search for trees and branches
    seed_path = paths[0]

    if not key:
        trees = list_trees(seed_path)
        if len(trees) == 1:
            key = trees[0]
        elif len(trees) == 0:
            raise ValueError('No trees found in {}'.format(seed_path))
        else:
            raise ValueError(
                'More than one tree found in {}'.format(seed_path))

    branches = list_branches(seed_path, key)

    if not columns:
        all_vars = branches
    else:
        if isinstance(columns, string_types):
            columns = [columns]
        # __index__* is always loaded if it exists
        # XXX Figure out what should happen with multi-dimensional indices
        index_branches = list(
            filter(lambda x: x.startswith('__index__'), branches))
        if index_branches:
            columns = columns[:]
            columns.append(index_branches[0])
        columns, noexpand = filter_noexpand_columns(columns)
        columns = list(
            itertools.chain.from_iterable(list(map(expand_braces, columns))))
        all_vars = get_matching_variables(branches, columns) + noexpand

    if ignore:
        if isinstance(ignore, string_types):
            ignore = [ignore]
        ignored = get_matching_variables(branches, ignore, fail=False)
        ignored = list(
            itertools.chain.from_iterable(list(map(expand_braces, ignored))))
        if any(map(lambda x: x.startswith('__index__'), ignored)):
            raise ValueError('__index__* branch is being ignored!')
        for var in ignored:
            all_vars.remove(var)

    def do_flatten(arr, flatten):
        if flatten is True:
            warnings.warn(
                " The option flatten=True is deprecated. Please specify the branches you would like "
                "to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
            arr_, idx = stretch(arr, return_indices=True)
        else:
            nonscalar = get_nonscalar_columns(arr)
            fields = [
                x for x in arr.dtype.names
                if (x not in nonscalar or x in flatten)
            ]
            will_drop = [x for x in arr.dtype.names if x not in fields]
            if will_drop:
                warnings.warn(
                    "Ignored the following non-scalar branches: {bad_names}".
                    format(bad_names=", ".join(will_drop)), UserWarning)
            arr_, idx = stretch(arr, fields=fields, return_indices=True)
        arr = append_fields(arr_,
                            '__array_index',
                            idx,
                            usemask=False,
                            asrecarray=True)
        return arr

    if chunksize:
        tchain = ROOT.TChain(key)
        for path in paths:
            tchain.Add(path)
        n_entries = tchain.GetEntries()

        # XXX could explicitly clean up the opened TFiles with TChain::Reset

        def genchunks():
            for chunk in range(int(ceil(float(n_entries) / chunksize))):
                arr = root2array(paths,
                                 key,
                                 all_vars,
                                 start=chunk * chunksize,
                                 stop=(chunk + 1) * chunksize,
                                 selection=where,
                                 *args,
                                 **kwargs)
                if flatten:
                    arr = do_flatten(arr, flatten)
                yield convert_to_dataframe(arr)

        return genchunks()

    arr = root2array(paths, key, all_vars, selection=where, *args, **kwargs)
    if flatten:
        arr = do_flatten(arr, flatten)
    return convert_to_dataframe(arr)
Esempio n. 35
0
def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=None, flatten=False, *args, **kwargs):
    """
    Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
    Further *args and *kwargs are passed to root_numpy's root2array.
    If the root file contains a branch matching __index__*, it will become the DataFrame's index.

    Parameters
    ----------
    paths: string or list
        The path(s) to the root file(s)
    key: string
        The key of the tree to load.
    columns: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). Matching columns are read.
        The columns beginning with `noexpand:` are not interpreted as shell-patterns,
        allowing formula columns such as `noexpand:2*x`. The column in the returned DataFrame
        will not have the `noexpand:` prefix.
    ignore: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). All matching columns are ignored (overriding the columns argument).
    chunksize: int
        If this parameter is specified, an iterator is returned that yields DataFrames with `chunksize` rows.
    where: str
        Only rows that match the expression will be read.
    flatten: sequence of str
        A sequence of column names. Will use root_numpy.stretch to flatten arrays in the specified columns into
        individual entries. All arrays specified in the columns must have the same length for this to work.
        Be careful if you combine this with chunksize, as chunksize will refer to the number of unflattened entries,
        so you will be iterating over a number of entries that is potentially larger than chunksize.
        The index of each element within its former array will be saved in the __array_index column.

    Returns
    -------
        DataFrame created from matching data in the specified TTree

    Notes
    -----

        >>> df = read_root('test.root', 'MyTree', columns=['A{B,C}*', 'D'], where='ABB > 100')

    """

    if not isinstance(paths, list):
        paths = [paths]
    # Use a single file to search for trees and branches, ensuring the key exists
    for seed_path in paths:
        trees = list_trees(seed_path)
        if key and key not in trees:
            continue
        break
    else:
        if key:
            raise OSError('{} not found in any of the given paths'.format(key))
        else:
            raise OSError('No trees found in any of the given paths')

    if not key:
        if len(trees) == 1:
            key = trees[0]
        elif len(trees) == 0:
            raise ValueError('No trees found in {}'.format(seed_path))
        else:
            raise ValueError('More than one tree found in {}'.format(seed_path))

    branches = list_branches(seed_path, key)

    if not columns:
        all_vars = branches
    else:
        if isinstance(columns, string_types):
            columns = [columns]
        # __index__* is always loaded if it exists
        # XXX Figure out what should happen with multi-dimensional indices
        index_branches = list(filter(lambda x: x.startswith('__index__'), branches))
        if index_branches:
            columns = columns[:]
            columns.append(index_branches[0])
        columns, noexpand = filter_noexpand_columns(columns)
        columns = list(itertools.chain.from_iterable(list(map(expand_braces, columns))))
        all_vars = get_matching_variables(branches, columns) + noexpand

    if ignore:
        if isinstance(ignore, string_types):
            ignore = [ignore]
        ignored = get_matching_variables(branches, ignore, fail=False)
        ignored = list(itertools.chain.from_iterable(list(map(expand_braces, ignored))))
        if any(map(lambda x: x.startswith('__index__'), ignored)):
            raise ValueError('__index__* branch is being ignored!')
        for var in ignored:
            all_vars.remove(var)

    if chunksize:
        tchain = ROOT.TChain(key)
        for path in paths:
            tchain.Add(path)
        n_entries = tchain.GetEntries()
        n_chunks = int(ceil(float(n_entries) / chunksize))
        # XXX could explicitly clean up the opened TFiles with TChain::Reset

        class genchunk(object):
            def __len__(self):
                return n_chunks

            def __iter__(self):
                current_index = 0
                for chunk in range(n_chunks):
                    arr = root2array(paths, key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *args, **kwargs)
                    if len(arr) == 0:
                        continue
                    if flatten:
                        arr = do_flatten(arr, flatten)
                    yield convert_to_dataframe(arr, start_index=current_index)
                    current_index += len(arr)

        return genchunk()

    arr = root2array(paths, key, all_vars, selection=where, *args, **kwargs)
    if flatten:
        arr = do_flatten(arr, flatten)
    return convert_to_dataframe(arr)
Esempio n. 36
0
def read_root(path, tree_key=None, columns=None, ignore=None, chunksize=None, where=None, *kargs, **kwargs):
    """
    Read a ROOT file into a pandas DataFrame.
    Further *kargs and *kwargs are passed to root_numpy's root2array.
    If the root file contains a branch called index, it will become the DataFrame's index.

    Parameters
    ----------
    path: string
        The path to the root file
    tree_key: string
        The key of the tree to load.
    columns: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). Matching columns are read.
    ignore: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). All matching columns are ignored (overriding the columns argument)
    chunksize: int
        If this parameter is specified, an iterator is returned that yields DataFrames with `chunksize` rows
    where: str
        Only rows that match the expression will be read

    Returns
    -------
        DataFrame created from matching data in the specified TTree

    Notes
    -----

        >>> df = read_root('test.root', 'MyTree', columns=['A{B,C}*', 'D'], where='ABB > 100')

    """
    if not tree_key:
        branches = list_trees(path)
        if len(branches) == 1:
            tree_key = branches[0]
        else:
            raise ValueError('More than one tree found in {}'.format(path))

    branches = list_branches(path, tree_key)

    if not columns:
        all_vars = branches
    else:
        # index is always loaded if it exists
        if isinstance(columns, string_types):
            columns = [columns]
        if 'index' in branches:
            columns = columns[:]
            columns.append('index')
        columns = list(itertools.chain.from_iterable(list(map(expand_braces, columns))))
        all_vars = get_matching_variables(branches, columns)

    if ignore:
        if isinstance(ignore, string_types):
            ignore = [ignore]
        ignored = get_matching_variables(branches, ignore, fail=False)
        ignored = list(itertools.chain.from_iterable(list(map(expand_braces, ignored))))
        if 'index' in ignored:
            raise ValueError('index variable is being ignored!')
        for var in ignored:
            all_vars.remove(var)

    if chunksize:
        f = ROOT.TFile(path)
        n_entries = f.Get(tree_key).GetEntries()
        f.Close()
        def genchunks():
            for chunk in range(int(ceil(float(n_entries) / chunksize))):
                arr = root2array(path, tree_key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *kargs, **kwargs)
                yield convert_to_dataframe(arr)
        return genchunks()

    arr = root2array(path, tree_key, all_vars, selection=where, *kargs, **kwargs)
    return convert_to_dataframe(arr)
Esempio n. 37
0
from root_numpy import root2array, tree2array
import root_numpy
import numpy as np
import argparse

parser = argparse.ArgumentParser(
    description='Convert ".tree" file to compressed numpy ".npz"')
parser.add_argument('--path',
                    metavar='p',
                    type=str,
                    help='Path to ".tree" file')
parser.add_argument('--output', metavar='o', type=str, help='Output name')

args = parser.parse_args()

filename = args.path
trees = root_numpy.list_trees(filename)

array = []

for i in range(len(trees)):
    array.append(root2array(filename, trees[i]))
    np.savez(args.output, array, allow_pickle=True)
Esempio n. 38
0
import numpy as np
import matplotlib
matplotlib.use('pdf')
import matplotlib.pyplot as plt


parser = argparse.ArgumentParser(prog='./ZMuMUEfficiency')
parser.add_argument(
    '-i','--input', required=True,
    help='specify input root file'
)
args = parser.parse_args()

input = args.input

treeName = list_trees(input)
if(len(treeName) > 1):
    print("more then one tree in file ... specify, which tree to use")
    exit()

#acceptance selection
selection='ZStableMass > 66 ' \
          '& ZStableMass < 116 ' \
          '& (ZDecayMode == 13 | ZDecayMode == 151313) ' \
          '& ZLeptonPt > 27 ' \
          '& ZAntiLeptonPt > 27 ' \
          '& abs(ZLeptonEta) < 2.4 ' \
          '& abs(ZAntiLeptonEta) < 2.4 '
#specify which branches to load
branches=['MuonProbeCategory','nPV','eventWeight']
Esempio n. 39
0
        data_loaded = yaml.load(stream)

    channels_models = data_loaded["models"]
    files = data_loaded["files"]
    if index_files != None:
        files = [files[index] for index in index_files]
    full_output = data_loaded["full output"]
    output_folder = data_loaded["output_folder"]
    n_processes = data_loaded["n_processes"]
    mode = data_loaded["mode"]

    args = []
    managers = {}
    locks = {}
    for index, f in enumerate(files):
        trees = list_trees(f)
        for tree in trees:
            if tree in channels_models:
                model_path = channels_models[tree][1]
                channel = channels_models[tree][0]
            else:
                continue
            foldername, treename = tree.split("/")
            output_filename = get_output_filename(f, output_folder)
            if not output_filename in managers:
                managers[output_filename] = Manager()
                locks[output_filename] = managers[output_filename].Lock()
            lock = locks[output_filename]
            args.append([
                f, treename, foldername, output_filename, model_path,
                full_output, channel, mode, lock