Example #1
0
 def _getcolumn(self, key):
     name, _, columns, _ = self._args
     if key not in columns:
         # This function is only meant for use in methods' _finalize() while
         # all columns are still virtual. Missing arrays are a sign of an incompatible
         # file or missing preloaded columns. This triggers only if the missing column is accessed.
         def nonexistentarray():
             raise RuntimeError("There was an attempt to read the nonexistent array: %s_%s" % (name, key))
         return awkward.VirtualArray(nonexistentarray)
     return columns[key]
Example #2
0
    def from_arrays(cls, arrays, methods=None, metadata=None):
        '''Build NanoEvents from a dictionary of arrays

        Parameters
        ----------
            arrays : dict
                A mapping from branch name to flat numpy array or awkward VirtualArray
            methods : dict, optional
                A mapping from collection name to class deriving from `awkward.array.objects.Methods`
                that implements additional mixins
            metadata : dict, optional
                Arbitrary metadata to embed in this NanoEvents table

        Returns a NanoEvents object
        '''
        arrays = dict(arrays)
        for k in arrays:
            if isinstance(arrays[k], awkward.VirtualArray):
                pass
            elif isinstance(arrays[k], numpy.ndarray):
                value = arrays[k]
                arrays[k] = awkward.VirtualArray(lambda: value,
                                                 type=awkward.type.ArrayType(
                                                     len(arrays[k]),
                                                     arrays[k].dtype))
                print(arrays[k])
            else:
                raise ValueError("The array %s : %r is not a valid type" %
                                 (k, arrays[k]))
        events = cls.named('event')
        collections = {k.split('_')[0] for k in arrays.keys()}
        collections -= {
            k
            for k in collections if k.startswith('n') and k[1:] in collections
        }
        allmethods = {}
        allmethods.update(collection_methods)
        if methods is not None:
            allmethods.update(methods)
        for name in collections:
            methods = allmethods.get(name, None)
            events.contents[name] = NanoCollection.from_arrays(
                arrays, name, methods)

        for name in events.columns:
            # soft hasattr via type, to prevent materialization
            if hasattr(type(events[name]), '_finalize'):
                events.contents[name]._finalize(name, events)

        events.metadata = metadata if metadata is not None else {}
        return events
Example #3
0
    def from_file(cls, file, treename=b'Events', entrystart=None, entrystop=None, cache=None, methods=None, metadata=None):
        '''Build NanoEvents directly from ROOT file

        Parameters
        ----------
            file : str or uproot.rootio.ROOTDirectory
                The filename or already opened file using e.g. ``uproot.open()``
            treename : str, optional
                Name of the tree to read in the file, defaults to ``Events``
            entrystart : int, optional
                Start at this entry offset in the tree (default 0)
            entrystop : int, optional
                Stop at this entry offset in the tree (default end of tree)
            cache : dict, optional
                A dict-like interface to a cache object, in which any materialized virtual arrays will be kept
            methods : dict, optional
                A mapping from collection name to class deriving from `awkward.array.objects.Methods`
                that implements custom additional mixins beyond the defaults provided.
            metadata : dict, optional
                Arbitrary metadata to embed in this NanoEvents table

        Returns a NanoEvents object
        '''
        if cache is None:
            cache = {}
        if not isinstance(file, uproot.rootio.ROOTDirectory):
            file = uproot.open(file)
        tree = file[treename]
        entrystart, entrystop = uproot.tree._normalize_entrystartstop(tree.numentries, entrystart, entrystop)
        arrays = {}
        for bname in tree.keys():
            interpretation = uproot.interpret(tree[bname])
            if isinstance(interpretation, uproot.asjagged):
                virtualtype = awkward.type.ArrayType(float('inf'), interpretation.content.type)
            else:
                virtualtype = awkward.type.ArrayType(entrystop - entrystart, interpretation.type)
            array = awkward.VirtualArray(
                tree[bname].array,
                (),
                {'entrystart': entrystart, 'entrystop': entrystop, 'flatten': True},
                type=virtualtype,
                persistentkey=';'.join(str(x) for x in (_hex(file._context.uuid), _ascii(treename), entrystart, entrystop, _ascii(bname))),
                cache=cache,
            )
            array.__doc__ = tree[bname].title.decode('ascii')
            arrays[bname.decode('ascii')] = array
        out = cls.from_arrays(arrays, methods=methods, metadata=metadata)
        out._cache = cache
        return out
Example #4
0
def lazyConcat1(arrays,
                methods=None,
                name=None,
                cols_ignore=(),
                ref_col=None,
                check_types=True):
    """
    lazy concatenation on axis=1 for jagged arrays of tables

    ref_col: is used for output start&stops calculation, can be explicitly given to
        avoid materializing an unneded column (is optional)

    check_types: assert the same type in all concatenateable arrays. Skip (check_types=False) if you know what you do...
    """
    arrays = tuple(arrays)
    assert all(isinstance(a, awkward.JaggedArray) for a in arrays)
    assert all(isinstance(a.content, awkward.Table) for a in arrays)

    JaggedArray = NanoCollection._get_mixin(methods, awkward.JaggedArray)
    Table = NanoCollection._get_mixin(methods, awkward.Table)
    data = Table() if name is None else Table.named(name)
    types = arrays[0].content.type.to

    cols = reduce(and_, (set(a.columns) for a in arrays)) - set(cols_ignore)
    cols = set(col for col in cols if isinstance(types[col], np.dtype))

    if ref_col is None:
        ref_col = list(cols)[0]
    ref = awkward.concatenate([a[ref_col] for a in arrays], axis=1)
    tot = ref.stops.max()

    def _do(col):
        res = awkward.concatenate([a[col] for a in arrays], axis=1)
        assert (ref.starts == res.starts).all() and (ref.stops
                                                     == res.stops).all()
        return res.content

    # data.type.takes = tot
    for col in cols:
        if check_types:
            assert len(set(a[col].type for a in arrays)) == 1
        data.contents[col] = awkward.VirtualArray(_do,
                                                  args=(col, ),
                                                  type=awkward.type.ArrayType(
                                                      tot, types[col]))
    data.contents[ref_col] = ref.content  # avoid recreating

    return JaggedArray(ref.starts, ref.stops, data)
Example #5
0
 def get(item):
     branch = df._tree[item]
     # pretty safe for NanoAOD
     counts_branch = 'n' + item.split('_')[0]
     isjagged = counts_branch in df and item != counts_branch
     if isjagged:
         memo_sum = '_sum_' + counts_branch
         if memo_sum not in df:
             df[memo_sum] = df[counts_branch].sum()
         size = df[memo_sum]
         interp = branch.interpretation.content.type
     else:
         size = df.size
         interp = branch.interpretation.type
     return ak.VirtualArray(
         df.__getitem__,
         item,
         type=ak.type.ArrayType(size, interp),
     )