Exemple #1
0
 def evaluate(self, ast_node):
     qv = ast_visitor()
     #print(ast.dump(ast_node))
     qv.visit(ast_node)
     if isinstance(self.dataset_source, str):
         data_pathname = self.dataset_source
     else:
         data_pathname = 'temp.awkd'
         awkward.save(data_pathname, self.dataset_source)
     f = open('temp.py', 'w')
     f.write('import awkward\n')
     source = ast_node.source
     while hasattr(source, 'source'):
         source = source.source
     if data_pathname[-5:] == '.awkd':
         f.write(source.rep + " = awkward.load('" + data_pathname + "')\n")
     elif data_pathname[-5:] == '.root':
         f.write('import uproot\n')
         f.write("input_file = uproot.open('" + data_pathname + "')\n")
         f.write(
             source.rep +
             " = input_file[input_file.keys()[0]].lazyarrays(namedecode='utf-8')\n"
         )
     else:
         raise BaseException('unimplemented file type: ' + data_pathname)
     f.write('output_array = awkward.fromiter(' + ast_node.rep + ')\n')
     f.write("awkward.save('output.awkd', output_array)\n")
     f.close()
     os.system('python temp.py')
     if not isinstance(self.dataset_source, str):
         os.remove(data_pathname)
     os.remove('temp.py')
     output = awkward.load('output.awkd')
     os.remove('output.awkd')
     return output
Exemple #2
0
def _read_awkd(filepath, branches, partial_load=None):
    import awkward
    with awkward.load(filepath) as f:
        outputs = {k: f[k] for k in branches}
    if partial_load is not None and partial_load != (0, 1):
        start, stop = np.trunc(
            np.asfarray(partial_load) * len(outputs[branches[0]]))
        for k, v in outputs.items():
            outputs[k] = v[start:stop]
    return outputs
Exemple #3
0
 def __init__(self, array, *args, **kwargs):
     if isinstance(array, str): array = ak.load(array)
     super(DRNDataset, self).__init__('whatever', *args, **kwargs)
     self.y = array[0]
     self.constituents = FourVectorArray(array[1], array[2], array[3],
                                         array[4])
     self.jets = FourVectorArray(array[5], array[6], array[7], array[8])
     # Overwrite with delta's w.r.t. to the main jet
     self.constituents.phi = calc_dphi(self.constituents.phi, self.jets.phi)
     self.constituents.eta = self.constituents.eta - self.jets.eta
Exemple #4
0
def get_chunking(filelist,
                 chunksize,
                 treename="Events",
                 workers=12,
                 skip_bad_files=False):
    """
    Return 2-tuple of
    - chunks: triplets of (filename,entrystart,entrystop) calculated with input `chunksize` and `filelist`
    - total_nevents: total event count over `filelist`
    """
    import uproot
    import awkward
    from tqdm.auto import tqdm
    import concurrent.futures
    chunksize = int(chunksize)
    chunks = []
    nevents = 0
    if skip_bad_files:
        # slightly slower (serial loop), but can skip bad files
        for fname in tqdm(filelist):
            try:
                items = uproot.numentries(fname, treename, total=False).items()
            except (IndexError, ValueError) as e:
                print("Skipping bad file", fname)
                continue
            for fn, nentries in items:
                nevents += nentries
                for index in range(nentries // chunksize + 1):
                    chunks.append((fn, chunksize * index,
                                   min(chunksize * (index + 1), nentries)))
    elif filelist[0].endswith(".awkd"):
        for fname in tqdm(filelist):
            f = awkward.load(fname,
                             whitelist=awkward.persist.whitelist +
                             [['blosc', 'decompress']])
            nentries = len(f["run"])
            nevents += nentries
            for index in range(nentries // chunksize + 1):
                chunks.append((fname, chunksize * index,
                               min(chunksize * (index + 1), nentries)))
    else:
        executor = None if len(
            filelist) < 5 else concurrent.futures.ThreadPoolExecutor(
                min(workers, len(filelist)))
        for fn, nentries in uproot.numentries(filelist,
                                              treename,
                                              total=False,
                                              executor=executor).items():
            nevents += nentries
            for index in range(nentries // chunksize + 1):
                if nentries <= 0:
                    continue
                chunks.append((fn, chunksize * index,
                               min(chunksize * (index + 1), nentries)))
    return chunks, nevents
Exemple #5
0
def _read_awkd(filepath, branches, load_range=None):
    import awkward
    with awkward.load(filepath) as f:
        outputs = {k: f[k] for k in branches}
    if load_range is not None:
        start = math.trunc(load_range[0] * len(outputs[branches[0]]))
        stop = max(start + 1,
                   math.trunc(load_range[1] * len(outputs[branches[0]])))
        for k, v in outputs.items():
            outputs[k] = v[start:stop]
    return outputs
Exemple #6
0
 def _load(self):
     logging.info('Start loading file %s' % self.filepath)
     counts = None
     with awkward.load(self.filepath) as a:
         self._label = a[self.label]
         for k in self.feature_dict:
             cols = self.feature_dict[k]
             if not isinstance(cols, (list, tuple)):
                 cols = [cols]
             arrs = []
             for col in cols:
                 if counts is None:
                     counts = a[col].counts
                 else:
                     assert np.array_equal(counts, a[col].counts)
                 arrs.append(pad_array(a[col], self.pad_len))
             self._values[k] = np.stack(arrs, axis=self.stack_axis)
     logging.info('Finished loading file %s' % self.filepath)
Exemple #7
0
    def __init__(self, array, n_constituents=200):
        if isinstance(array, str): array = ak.load(array)
        self.n_constituents = n_constituents
        self.y = array[0]
        self.constituents = FourVectorArray(array[1], array[2], array[3],
                                            array[4])
        self.jets = FourVectorArray(array[5], array[6], array[7], array[8])
        # Overwrite with delta's w.r.t. to the main jet
        self.constituents.phi = calc_dphi(self.constituents.phi, self.jets.phi)
        self.constituents.eta = self.constituents.eta - self.jets.eta

        # features
        self.constituents.logpt = np.log(self.constituents.pt)
        self.constituents.loge = np.log(self.constituents.energy)
        self.constituents.logpt_ptjet = np.log(self.constituents.pt /
                                               self.jets.pt)
        self.constituents.loge_ejet = np.log(self.constituents.energy /
                                             self.jets.energy)
        self.constituents.dr = np.sqrt(self.constituents.eta**2 +
                                       self.constituents.phi**2)
Exemple #8
0
                    top_4 = np.argsort(pts)[-4:]
                    num_found = len(top_4)
                    for var_num, var_name in enumerate(kinematics):
                        wanted = j_name + "_Total_" + var_name
                        vals = getattr(ew, wanted)[top_4]
                        all_kinematics[j_class][order, var_num, j, event_n, :num_found] = vals
                    energy, px, py, pz = all_kinematics[j_class][order, 1:, j, event_n, :num_found]
                    if num_found > 1:
                        shape_vals = ShapeVariables.shape(energy, px, py, pz)[1]
                        for var_num, var_name in enumerate(shapes):
                            all_shapes[j_class][order, var_num, j, event_n] = shape_vals[var_name]
                        
    content = {"shape_names": shapes, "kinematic_names": kinematics, "orders": ["nlo", "lo"], "jet_names": [spectral_names, traditional_names, iterative_names], "kinematics" : awkward.fromiter(all_kinematics), "shapes": awkward.fromiter(all_shapes)}
    awkward.save("../megaIgnore/IRC_shapes.awkd", content)
else:
    data = awkward.load("../megaIgnore/IRC_shapes.awkd")
    shapes = data["shape_names"]
    kinematics = data["kinematic_names"]
    spectral_names, traditional_names, iterative_names = data["jet_names"]
    all_kinematics = data["kinematics"]
    all_shapes = data["shapes"]


def plot_jet_name(name, variable, bounds=None, ax=None):
    colours = ['blue', 'purple', 'orange']
    line_styles = ['--', '-', '-.']
    if variable in kinematics:
        table = all_kinematics
        v_index = kinematics.index(variable)
    elif variable in shapes:
        table = all_shapes
Exemple #9
0
    else:
        import awkward
        major, minor, _ = awkward.version.version_info
        major = int(major)
        minor = int(minor)
        if major == 1:
            raise ImportError("Need awkward 0.12.X, you have %s" %
                              awkward.__version__)
        elif minor > 14:
            raise ImportError("Need awkward 0.12 / 0.13 / 0.14, you have %s" %
                              awkward.__version__)
        elif minor < 12:
            raise ImportError("Need awkward 0.12 / 0.13 / 0.14, you have %s" %
                              awkward.__version__)

        tree_data1 = awkward.load(args.filename)
        print(len(tree_data1.columns), "hists in main file")

    is_hdf5_2 = False
    if args.compareTo:
        is_hdf5_2 = "hdf5" in os.path.splitext(args.compareTo)[1]
        if not is_hdf5_2 and "awkd" not in os.path.splitext(args.compareTo)[1]:
            raise IOError("--compareTo input must be .hdf5 or .awkd")

        if is_hdf5_2:
            if not is_hdf5_1:
                import h5py
            tree_data2 = h5py.File(args.compareTo)
            print(len(tree_data2.keys()), "hists in compareTo file")
        else:
            if is_hdf5_1:
Exemple #10
0
                ac[0]["pair"] = (fcomp, ("lz4.frame", "decompress"))
            if label.startswith("blosc"):
                ac[0]["pair"] = (fcomp, ("blosc", "decompress"))
            if label.startswith("lzma"):
                ac[0]["pair"] = (fcomp, ("backports.lzma", "decompress"))

            fname = "tables/table_{}.awkd".format(label)

            t0 = time.time()
            awkward.save(fname, table, compression=ac, mode="w")
            t1 = time.time()
            info["t_compress_ms"] = 1e3 * (t1 - t0)

            t0 = time.time()
            tmp = awkward.load(fname,
                               whitelist=awkward.persist.whitelist + [
                                   ['lz4.frame', 'decompress'],
                                   ['lz4.block', 'decompress'],
                                   ['blosc', 'decompress'],
                                   ['backports.lzma', 'decompress'],
                               ])
            t1 = time.time()
            info["t_decompress_ms"] = 1e3 * (t1 - t0)

            info["uncompressed_bytes"] = table.nbytes
            info["compressed_bytes"] = int(os.stat(fname).st_size)

            data.append(info)

        pd.DataFrame(data).to_json("jsons/data_{}.json".format(i))
Exemple #11
0
    start = time.time()
    nn = ParticleNetJetTagsProducer(args.model, args.preprocess)
    diff = time.time() - start
    print('--- Setup model: %f s total' % (diff,))

    start = time.time()
    outputs = nn.predict(taginfo, eval_flags)
    diff = time.time() - start
    print('--- Run prediction: %f s total, %f s per jet ---' % (diff, diff / outputs['probQCDbb'].counts.sum()))
#     print(outputs)
#     for k in outputs:
#         print(k, outputs[k].content.mean())

    if 'FatJet_ParticleNetMD_probXbb' in table:
        print('Compare w/ stored values')
        print('Stored values:\n ...', table['FatJet_ParticleNetMD_probXbb'][:5])
        print('Computed values:\n ...', outputs['probXbb'][:5])
        print('Diff (50%, 95%, 99%, 100%) = ', np.percentile(
            np.abs(outputs['probXbb'] - table['FatJet_ParticleNetMD_probXbb']).content, [50, 95, 99, 100]))

#     assert(np.array_equal(jetmass.counts, outputs['probQCDbb'].counts))
    alloutputs = awkward.JaggedArray.zip(outputs)
    if args.make_baseline:
        with open('baseline.awkd', 'wb') as fout:
            awkward.save(fout, alloutputs)
    else:
        if os.path.exists('baseline.awkd'):
            with open('baseline.awkd', 'rb') as fin:
                baseline = awkward.load(fin)
            print("Comparison to baseline:", (alloutputs == baseline).all().all())