def _getcolumn(self, key): name, _, columns, _ = self._args if key not in columns: # This function is only meant for use in methods' _finalize() while # all columns are still virtual. Missing arrays are a sign of an incompatible # file or missing preloaded columns. This triggers only if the missing column is accessed. def nonexistentarray(): raise RuntimeError("There was an attempt to read the nonexistent array: %s_%s" % (name, key)) return awkward.VirtualArray(nonexistentarray) return columns[key]
def from_arrays(cls, arrays, methods=None, metadata=None): '''Build NanoEvents from a dictionary of arrays Parameters ---------- arrays : dict A mapping from branch name to flat numpy array or awkward VirtualArray methods : dict, optional A mapping from collection name to class deriving from `awkward.array.objects.Methods` that implements additional mixins metadata : dict, optional Arbitrary metadata to embed in this NanoEvents table Returns a NanoEvents object ''' arrays = dict(arrays) for k in arrays: if isinstance(arrays[k], awkward.VirtualArray): pass elif isinstance(arrays[k], numpy.ndarray): value = arrays[k] arrays[k] = awkward.VirtualArray(lambda: value, type=awkward.type.ArrayType( len(arrays[k]), arrays[k].dtype)) print(arrays[k]) else: raise ValueError("The array %s : %r is not a valid type" % (k, arrays[k])) events = cls.named('event') collections = {k.split('_')[0] for k in arrays.keys()} collections -= { k for k in collections if k.startswith('n') and k[1:] in collections } allmethods = {} allmethods.update(collection_methods) if methods is not None: allmethods.update(methods) for name in collections: methods = allmethods.get(name, None) events.contents[name] = NanoCollection.from_arrays( arrays, name, methods) for name in events.columns: # soft hasattr via type, to prevent materialization if hasattr(type(events[name]), '_finalize'): events.contents[name]._finalize(name, events) events.metadata = metadata if metadata is not None else {} return events
def from_file(cls, file, treename=b'Events', entrystart=None, entrystop=None, cache=None, methods=None, metadata=None): '''Build NanoEvents directly from ROOT file Parameters ---------- file : str or uproot.rootio.ROOTDirectory The filename or already opened file using e.g. ``uproot.open()`` treename : str, optional Name of the tree to read in the file, defaults to ``Events`` entrystart : int, optional Start at this entry offset in the tree (default 0) entrystop : int, optional Stop at this entry offset in the tree (default end of tree) cache : dict, optional A dict-like interface to a cache object, in which any materialized virtual arrays will be kept methods : dict, optional A mapping from collection name to class deriving from `awkward.array.objects.Methods` that implements custom additional mixins beyond the defaults provided. metadata : dict, optional Arbitrary metadata to embed in this NanoEvents table Returns a NanoEvents object ''' if cache is None: cache = {} if not isinstance(file, uproot.rootio.ROOTDirectory): file = uproot.open(file) tree = file[treename] entrystart, entrystop = uproot.tree._normalize_entrystartstop(tree.numentries, entrystart, entrystop) arrays = {} for bname in tree.keys(): interpretation = uproot.interpret(tree[bname]) if isinstance(interpretation, uproot.asjagged): virtualtype = awkward.type.ArrayType(float('inf'), interpretation.content.type) else: virtualtype = awkward.type.ArrayType(entrystop - entrystart, interpretation.type) array = awkward.VirtualArray( tree[bname].array, (), {'entrystart': entrystart, 'entrystop': entrystop, 'flatten': True}, type=virtualtype, persistentkey=';'.join(str(x) for x in (_hex(file._context.uuid), _ascii(treename), entrystart, entrystop, _ascii(bname))), cache=cache, ) array.__doc__ = tree[bname].title.decode('ascii') arrays[bname.decode('ascii')] = array out = cls.from_arrays(arrays, methods=methods, metadata=metadata) out._cache = cache return out
def lazyConcat1(arrays, methods=None, name=None, cols_ignore=(), ref_col=None, check_types=True): """ lazy concatenation on axis=1 for jagged arrays of tables ref_col: is used for output start&stops calculation, can be explicitly given to avoid materializing an unneded column (is optional) check_types: assert the same type in all concatenateable arrays. Skip (check_types=False) if you know what you do... """ arrays = tuple(arrays) assert all(isinstance(a, awkward.JaggedArray) for a in arrays) assert all(isinstance(a.content, awkward.Table) for a in arrays) JaggedArray = NanoCollection._get_mixin(methods, awkward.JaggedArray) Table = NanoCollection._get_mixin(methods, awkward.Table) data = Table() if name is None else Table.named(name) types = arrays[0].content.type.to cols = reduce(and_, (set(a.columns) for a in arrays)) - set(cols_ignore) cols = set(col for col in cols if isinstance(types[col], np.dtype)) if ref_col is None: ref_col = list(cols)[0] ref = awkward.concatenate([a[ref_col] for a in arrays], axis=1) tot = ref.stops.max() def _do(col): res = awkward.concatenate([a[col] for a in arrays], axis=1) assert (ref.starts == res.starts).all() and (ref.stops == res.stops).all() return res.content # data.type.takes = tot for col in cols: if check_types: assert len(set(a[col].type for a in arrays)) == 1 data.contents[col] = awkward.VirtualArray(_do, args=(col, ), type=awkward.type.ArrayType( tot, types[col])) data.contents[ref_col] = ref.content # avoid recreating return JaggedArray(ref.starts, ref.stops, data)
def get(item): branch = df._tree[item] # pretty safe for NanoAOD counts_branch = 'n' + item.split('_')[0] isjagged = counts_branch in df and item != counts_branch if isjagged: memo_sum = '_sum_' + counts_branch if memo_sum not in df: df[memo_sum] = df[counts_branch].sum() size = df[memo_sum] interp = branch.interpretation.content.type else: size = df.size interp = branch.interpretation.type return ak.VirtualArray( df.__getitem__, item, type=ak.type.ArrayType(size, interp), )