def test_function_iterate_pandas_2():
    pandas = pytest.importorskip("pandas")
    files = (skhep_testdata.data_path("uproot-HZZ.root").replace(
        "HZZ", "HZZ-{uncompressed,zlib,lz4}") + ":events")
    expect = 0
    for arrays, report in uproot4.iterate(files,
                                          "Muon_Px",
                                          report=True,
                                          library="pd"):
        assert arrays["Muon_Px"].index.values[0] == (expect, 0)
        expect += report.tree.num_entries
Exemple #2
0
def test_function_iterate():
    files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
        "6.20.04", "*"
    )
    expect = 0
    for arrays, report in uproot4.iterate(
        {files: "sample"}, "i8", report=True, library="np"
    ):
        assert arrays["i8"][:5].tolist() == [-15, -14, -13, -12, -11]
        assert report.global_entry_start == expect
        assert report.global_entry_stop == expect + len(arrays["i8"])
        expect += len(arrays["i8"])
Exemple #3
0
def test_iterate():
    with pytest.raises(ValueError):
        for arrays in uproot4.iterate(skhep_testdata.data_path("uproot-issue63.root")):
            pass

    assert (
        len(
            list(
                uproot4.iterate(
                    {skhep_testdata.data_path("uproot-issue63.root"): "blah"},
                    allow_missing=True,
                )
            )
        )
        == 0
    )

    files = skhep_testdata.data_path("uproot-sample-6.16.00-uncompressed.root").replace(
        "6.16.00", "*"
    )

    for arrays in uproot4.iterate(files, "Ai8"):
        pass
    for arrays in uproot4.iterate({files: "sample"}, "Ai8"):
        pass
    for arrays in uproot4.iterate([files], "Ai8"):
        pass
    for arrays in uproot4.iterate([{files: "sample"}], "Ai8"):
        pass
Exemple #4
0
def test_function_iterate_pandas():
    pandas = pytest.importorskip("pandas")
    files = skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root").replace(
        "6.20.04", "*"
    )
    expect = 0
    for arrays, report in uproot4.iterate(
        {files: "sample"}, "i8", report=True, library="pd"
    ):
        assert arrays["i8"].values[:5].tolist() == [-15, -14, -13, -12, -11]
        assert arrays.index.values[0] == expect
        assert report.global_entry_start == expect
        assert report.global_entry_stop == expect + len(arrays["i8"])
        expect += len(arrays["i8"])
Exemple #5
0
def iter_chunks(
    path,
    treename="t",
    progress=True,
    step_size="50MB",
    columns=None,
    nthreads=4,
):
    """
    Loop over specified ROOT files in `path` in chunks, returning dataframes.
    Tree name is specified via `treename`.
    Iterates over the files in chunks of `step_size` (as per `uproot4.iterate`), reading

    columns: list of columns ("branches") to read (default of `None` reads all)
    """
    if ":" not in path:
        path = f"{path}:{treename}"

    iterable = uproot4.iterate(
        path,
        filter_name=columns,
        step_size=step_size,
        decompression_executor=concurrent.futures.ThreadPoolExecutor(nthreads),
    )

    if progress:
        iterable = tqdm(iterable)

    nevents = 0
    t0 = time.time()
    for arrays in iterable:
        df = awkward1_arrays_to_dataframe(arrays)
        nevents += len(df)
        yield df
    t1 = time.time()
    if progress:
        print(
            f"Processed {nevents} in {t1-t0:.2f}s ({1e-6*nevents/(t1-t0):.2f}MHz)"
        )
Exemple #6
0
def test_branch_pluralization():
    awkward1 = pytest.importorskip("awkward1")
    with uproot4.open(skhep_testdata.data_path(
            "uproot-Zmumu.root"))["events/px1"] as px1:
        assert px1.array(library="np")[:5].tolist() == [
            -41.1952876442,
            35.1180497674,
            35.1180497674,
            34.1444372454,
            22.7835819537,
        ]
        assert px1.arrays(library="np")["px1"][:5].tolist() == [
            -41.1952876442,
            35.1180497674,
            35.1180497674,
            34.1444372454,
            22.7835819537,
        ]

        for i, arrays in enumerate(px1.iterate(library="np", step_size=1000)):
            if i == 0:
                assert arrays["px1"][:5].tolist() == [
                    -41.1952876442,
                    35.1180497674,
                    35.1180497674,
                    34.1444372454,
                    22.7835819537,
                ]
            elif i == 1:
                assert arrays["px1"][:5].tolist() == [
                    26.043758785,
                    26.043758785,
                    25.9962042016,
                    -44.4626620943,
                    28.2794901505,
                ]
            elif i == 2:
                assert arrays["px1"][:5].tolist() == [
                    -43.3783782352,
                    -43.3783782352,
                    -43.2444221651,
                    -20.2126675303,
                    43.7131175076,
                ]
            else:
                assert False

    for i, arrays in enumerate(
            uproot4.iterate(
                {skhep_testdata.data_path("uproot-Zmumu.root"):
                 "events/px1"})):
        if i == 0:
            assert arrays["px1"][:5].tolist() == [
                -41.1952876442,
                35.1180497674,
                35.1180497674,
                34.1444372454,
                22.7835819537,
            ]
        elif i == 1:
            assert arrays["px1"][:5].tolist() == [
                26.043758785,
                26.043758785,
                25.9962042016,
                -44.4626620943,
                28.2794901505,
            ]
        elif i == 2:
            assert arrays["px1"][:5].tolist() == [
                -43.3783782352,
                -43.3783782352,
                -43.2444221651,
                -20.2126675303,
                43.7131175076,
            ]
        else:
            assert False
def from_uproot(
    ntuple_paths: List[pathlib.Path],
    pos_in_file: str,
    variable: str,
    bins: np.ndarray,
    weight: Optional[str] = None,
    selection_filter: Optional[str] = None,
) -> Tuple[np.ndarray, np.ndarray]:
    """Reads an ntuple with uproot, and fills a histogram with the observable.

    The paths may contain wildcards.

    Args:
        ntuple_paths (List[pathlib.Path]): list of paths to ntuples
        pos_in_file (str): name of tree within ntuple
        variable (str): variable to bin histogram in
        bins (numpy.ndarray): bin edges for histogram
        weight (Optional[str], optional): event weight to extract, defaults to None (no
            weights applied)
        selection_filter (Optional[str], optional): filter to be applied on events,
            defaults to None (no filter)

    Returns:
        Tuple[np.ndarray, np.ndarray]:
            - yield per bin
            - stat. uncertainty per bin
    """
    # concatenate the path to file and location within file with ":"
    paths_with_trees = [str(path) + ":" + pos_in_file for path in ntuple_paths]

    # determine whether the weight is a float or an expression
    # (for which a branch needs to be read)
    if weight is not None:
        try:
            float(weight)
            weight_is_expression = False
        except ValueError:
            # weight is not a float, need to evaluate the expression
            weight_is_expression = True
    else:
        # no weight specified, all weights are 1.0
        weight_is_expression = False
        weight = "1.0"

    if weight_is_expression:
        # need to read observables and weights
        array_generator = uproot.iterate(
            paths_with_trees,
            expressions=[variable, weight],
            cut=selection_filter,
        )
        obs_list = []
        weight_list = []
        for arr in array_generator:
            obs_list.append(ak.to_numpy(arr[variable]))
            weight_list.append(ak.to_numpy(arr[weight]))
        observables = np.concatenate(obs_list)
        weights = np.concatenate(weight_list)

    else:
        # only need to read the observables
        array_generator = uproot.iterate(
            paths_with_trees,
            expressions=[variable],
            cut=selection_filter,
        )
        obs_list = []
        for arr in array_generator:
            obs_list.append(ak.to_numpy(arr[variable]))
        observables = np.concatenate(obs_list)
        weights = np.ones_like(observables) * float(weight)

    yields, stdev = _bin_data(observables, weights, bins)
    return yields, stdev
Exemple #8
0
def AddFFcorr(infname,
              intreename,
              outfname,
              outtreename,
              Lcstate,
              leptname,
              q2True_branchname,
              costhlTrue_branchname,
              nentries_to_read=1000000000,
              chunksize=10000):

    TH1.AddDirectory(kFALSE)

    perfname = None
    q2factor = None
    if Lcstate == 'Lc':
        perfname = './CorrectionTables/LcFFratios.root'
        q2factor = 1.
    elif (Lcstate == 'Lc2595' or Lcstate == 'Lc2625'):
        perfname = './CorrectionTables/LcstFFratios.root'
        q2factor = 1e-6
    else:
        raise Exception('Lc state not recognised', Lcstate)

    if leptname != 'mu' and leptname != 'tau':
        raise Exception('Lepton name not recognised', leptname)

    print('Using the histname', Lcstate + leptname + "_ratio")

    #variables to get from file
    varsdf = ['runNumber', 'eventNumber']
    varsdf += ['Lb_TRUEP_X', 'Lb_TRUEP_Y', 'Lb_TRUEP_Z', 'Lb_TRUEP_E']
    varsdf += ['Lc_TRUEP_X', 'Lc_TRUEP_Y', 'Lc_TRUEP_Z', 'Lc_TRUEP_E']
    varsdf += [
        'Lb_True' + leptname.capitalize() + '_PX',
        'Lb_True' + leptname.capitalize() + '_PY',
        'Lb_True' + leptname.capitalize() + '_PZ',
        'Lb_True' + leptname.capitalize() + '_PE'
    ]
    varsdf += [
        'Lb_TrueNeutrino_PX', 'Lb_TrueNeutrino_PY', 'Lb_TrueNeutrino_PZ',
        'Lb_TrueNeutrino_PE'
    ]

    File = TFile.Open(perfname, "read")
    Histg = File.Get(Lcstate + leptname + "_ratio")
    perfHist = Histg.Clone(Lcstate + leptname + "_rationew")
    File.Close()
    Xmin = perfHist.GetXaxis().GetXmin()
    Xmax = perfHist.GetXaxis().GetXmax()
    Ymin = perfHist.GetYaxis().GetXmin()
    Ymax = perfHist.GetYaxis().GetXmax()
    Limits = (Xmin, Xmax, Ymin, Ymax)
    print(Limits, perfHist.Integral())

    #variables to store in the new ttree
    varstoStore = {
        'runNumber': np.int,
        'eventNumber': np.int,
        'Event_FFcorr': np.float64,
        costhlTrue_branchname: np.float64,
        q2True_branchname: np.float64
    }

    aliases = {}
    #create a new rootfile
    with uproot3.recreate(outfname) as f:
        f[outtreename] = uproot3.newtree(varstoStore)

        #loop over the old rootfile chunkwise
        events_read = 0
        if chunksize >= nentries_to_read: chunksize = nentries_to_read
        for df_data in uproot4.iterate(infname + ':' + intreename,
                                       varsdf,
                                       aliases=aliases,
                                       cut=None,
                                       library="pd",
                                       step_size=chunksize):
            if events_read >= nentries_to_read: break

            #Compute q2 and cosThetaL
            pxl = df_data['Lb_True' + leptname.capitalize() + '_PX']
            pxnu = df_data['Lb_TrueNeutrino_PX']
            pyl = df_data['Lb_True' + leptname.capitalize() + '_PY']
            pynu = df_data['Lb_TrueNeutrino_PY']
            pzl = df_data['Lb_True' + leptname.capitalize() + '_PZ']
            pznu = df_data['Lb_TrueNeutrino_PZ']
            pel = df_data['Lb_True' + leptname.capitalize() + '_PE']
            penu = df_data['Lb_TrueNeutrino_PE']
            if (Lcstate == 'Lc2595' or Lcstate == 'Lc2625'):
                #this should be Lcstar momentum
                pxlc = df_data['Lb_TRUEP_X'] - pxl - pxnu
                pylc = df_data['Lb_TRUEP_X'] - pyl - pynu
                pzlc = df_data['Lb_TRUEP_X'] - pzl - pznu
                pelc = df_data['Lb_TRUEP_X'] - pel - penu
            elif Lcstate == 'Lc':
                pxlc = df_data['Lc_TRUEP_X']
                pylc = df_data['Lc_TRUEP_Y']
                pzlc = df_data['Lc_TRUEP_Z']
                pelc = df_data['Lc_TRUEP_E']

            PLc_lab = LorentzVector(
                Vector(pxlc, pylc,
                       pzlc), pelc)  #Format of LorentzVector(Vector(X,Y,Z), E)
            Pl_lab = LorentzVector(Vector(pxl, pyl, pzl), pel)
            PNu_lab = LorentzVector(Vector(pxnu, pynu, pznu), penu)
            PLb_lab = PLc_lab + Pl_lab + PNu_lab
            qsq, cthl = return_phasespace(PLb_lab, PLc_lab, Pl_lab)
            #print(qsq,cthl)
            df_data[q2True_branchname] = qsq
            df_data[costhlTrue_branchname] = cthl

            #get the corrections
            applyvars = [q2True_branchname, costhlTrue_branchname
                         ]  #has to be in correct order like in histogram
            df_data['Event_FFcorr'] = df_data[applyvars].apply(
                storeeff2D, args=[perfHist, Limits, q2factor], axis=1)

            #get only the things that need to be stored and write them to the file
            branch_dict = {
                vartostore: df_data[vartostore].to_numpy()
                for vartostore in list(varstoStore.keys())
            }
            f[outtreename].extend(branch_dict)
            events_read += df_data.shape[0]
            print('Events read', events_read)