def load_dataset(numpy_lib, num_iter=1):
    print("loading dataset")
    download_if_not_exists(
        "data/nanoaod_test.root",
        "https://jpata.web.cern.ch/jpata/opendata_files/DY2JetsToLL-merged/1.root",
    )
    datastructures = {
        "Muon": [
            ("Muon_pt", "float32"),
            ("Muon_eta", "float32"),
            ("Muon_phi", "float32"),
            ("Muon_mass", "float32"),
            ("Muon_charge", "int32"),
            ("Muon_pfRelIso03_all", "float32"),
            ("Muon_tightId", "bool"),
        ],
        "Electron": [
            ("Electron_pt", "float32"),
            ("Electron_eta", "float32"),
            ("Electron_phi", "float32"),
            ("Electron_mass", "float32"),
            ("Electron_charge", "int32"),
            ("Electron_pfRelIso03_all", "float32"),
            ("Electron_pfId", "bool"),
        ],
        "Jet": [
            ("Jet_pt", "float32"),
            ("Jet_eta", "float32"),
            ("Jet_phi", "float32"),
            ("Jet_mass", "float32"),
            ("Jet_btag", "float32"),
            ("Jet_puId", "bool"),
        ],
        "EventVariables": [
            ("HLT_IsoMu24", "bool"),
            ("MET_pt", "float32"),
            ("MET_phi", "float32"),
            ("MET_sumet", "float32"),
            ("MET_significance", "float32"),
            ("MET_CovXX", "float32"),
            ("MET_CovXY", "float32"),
            ("MET_CovYY", "float32"),
        ],
    }
    dataset = Dataset(
        "nanoaod",
        num_iter * ["./data/nanoaod_test.root"],
        datastructures,
        treename="Events",
        datapath="",
    )

    dataset.load_root(verbose=True)
    dataset.merge_inplace(verbose=True)
    print("dataset has {0} events, {1:.2f} MB".format(
        dataset.numevents(),
        dataset.memsize() / 1000 / 1000))
    dataset.move_to_device(numpy_lib, verbose=True)
    return dataset
Example #2
0
        "Jet": [("Jet_Px", "float32"), ("Jet_Py", "float32"),
                ("Jet_Pz", "float32"), ("Jet_E", "float32"),
                ("Jet_btag", "float32"), ("Jet_ID", "bool")],
        "EventVariables": [("NPrimaryVertices", "int32"),
                           ("triggerIsoMu24", "bool"),
                           ("EventWeight", "float32")]
    }

    #Define a dataset, given the data structure and a list of filenames
    dataset = Dataset("HZZ", [filename], datastructures, treename="events")

    #Load the ROOT files
    dataset.load_root(verbose=True)

    #merge arrays across files into one big array
    dataset.merge_inplace(verbose=True)

    #move to GPU if CUDA was specified
    dataset.move_to_device(nplib, verbose=True)

    #process data, save output as a json file
    results = dataset.analyze(analyze_data_function,
                              verbose=True,
                              parameters={"muons_ptcut": 30.0})
    results.save_json("out.json")

    #Make a simple PDF plot as an example
    hist = results["hist_leading_muon_pt"]
    fig = plt.figure(figsize=(5, 5))
    plt.errorbar(hist.edges[:-1], hist.contents, np.sqrt(hist.contents_w2))
    plt.savefig("hist.png", bbox_inches="tight")