コード例 #1
0
    def test_initialization_method(self):
        """
        Check initialization method in Spark backend.

        Define a method in the ROOT interpreter called getValue which returns
        the value defined by the user on the python side.

        """
        def init(value):
            import ROOT
            cpp_code = '''int userValue = %s ;''' % value
            ROOT.gInterpreter.ProcessLine(cpp_code)

        PyRDF.initialize(init, 123)
        PyRDF.current_backend = Spark()
        # Spark backend has a limited list of supported methods, so we use
        # Histo1D which is a supported action.
        # The code below creates an RDataFrame instance with one single entry
        # and defines a column 'u' whose value is taken from the variable
        # 'userValue'.
        # This variable is only declared inside the ROOT interpreter, however
        # the value of the variable is passed by the user from the python side.
        # If the init function defined by the user is properly propagated to the
        # Spark backend, each workers will run the init function as a first step
        # and hence the variable 'userValue' will be defined at runtime.
        # As a result the define operation should read the variable 'userValue'
        # and assign it to the entries of the column 'u' (only one entry).
        # Finally, Histo1D returns a histogram filled with one value. The mean
        # of this single value has to be the value itself, independently of
        # the number of spawned workers.
        df = PyRDF.RDataFrame(1).Define("u", "userValue").Histo1D("u")
        h = df.GetValue()
        self.assertEqual(h.GetMean(), 123)
コード例 #2
0
    def test_snapshot_nrows(self):
        """Test support for `Snapshot` in local backend"""
        def fill_tree(treeName, fileName):
            rdf = PyRDF.RDataFrame(100)
            return rdf.Define("b1", "rdfentry_")\
                      .Snapshot(treeName, fileName)

        # We prepare an input tree to run on
        fileName = "snapFile.root"
        treeName = "snapTree"

        snapdf = fill_tree(treeName, fileName)

        # We read the tree from the file and create a RDataFrame.
        d = PyRDF.RDataFrame(treeName, fileName)

        # Check on dataframe retrieved from file
        d_cut = d.Filter("b1 % 2 == 0")

        d_count = d_cut.Count()

        self.assertEqual(d_count.GetValue(), 50)

        # Check on dataframe returned by Snapshot operation
        snapdf_cut = snapdf.Filter("b1 % 2 == 0")
        snapdf_count = snapdf_cut.Count()

        self.assertEqual(snapdf_count.GetValue(), 50)

        # Remove unnecessary .root file
        os.remove(fileName)
コード例 #3
0
ファイル: test_inv_mass.py プロジェクト: yeckang/PyRDF
    def build_pyrdf_graph(self):
        """Create a PyRDF graph with a fixed set of operations and return it."""
        treename = "data"
        files = [
            'https://root.cern/files/teaching/CMS_Open_Dataset.root',
        ]
        rdf = PyRDF.RDataFrame(treename, files)

        # Define the analysis cuts
        chargeCutStr = "C1 != C2"
        etaCutStr = "fabs(eta1) < 2.3 && fabs(eta2) < 2.3"
        ptCutStr = "pt1 > 2 && pt2 > 2"
        rdf_f = rdf.Filter(chargeCutStr, "Opposite Charge") \
                   .Filter(etaCutStr, "Central Muons") \
                   .Filter(ptCutStr, "Sane Pt")

        # Create the invariant mass column
        invMassFormulaStr = ("sqrt(pow(E1+E2, 2) - (pow(px1+px2, 2) +"
                             "pow(py1+py2, 2) + pow(pz1+pz2, 2)))")
        rdf_fd = rdf_f.Define("invMass", invMassFormulaStr)

        # Create the histograms
        pt1_h = rdf.Histo1D(("", "", 128, 1, 1200), "pt1")
        pt2_h = rdf.Histo1D(("", "", 128, 1, 1200), "pt2")
        model = ("invMass", "CMS Opendata;#mu#mu mass[GeV];Events", 512, 5,
                 110)
        invMass_h = rdf_fd.Histo1D(model, "invMass")
        import ROOT
        pi = ROOT.TMath.Pi()
        model = ("", "", 64, -pi, pi, 64, -pi, pi)
        phis_h = rdf_fd.Histo2D(model, "phi1", "phi2")

        return pt1_h, pt2_h, invMass_h, phis_h
コード例 #4
0
def main(basepath):
    """Skim the datasets and create histograms"""

    # Create output file
    tfile = ROOT.TFile("histograms.root", "RECREATE")

    for sample, label in samplesandlabels:

        filenames = [
            basepath + sample + "_{}.root".format(i) for i in range(1, 16)
        ]
        # Uncomment to use only one file per sample
        # sample_file = eosbasepath + sample + "_1.root"

        # Create RDataFrame
        # Uncomment to use only one file per sample
        # df = PyRDF.RDataFrame("Events", sample_file)
        df = PyRDF.RDataFrame("Events", filenames)

        # Skim events
        skimdf = skim(df, sample)

        # Create histograms
        histos(skimdf, label)

    tfile.Close()
コード例 #5
0
    def test_distributed_sum(self):
        """Test support for `Sum` operation in distributed backend"""
        rdf_py = PyRDF.RDataFrame(10)
        rdf_def = rdf_py.Define("x", "rdfentry_")
        rdf_sum = rdf_def.Sum("x")

        self.assertAlmostEqual(rdf_sum.GetValue(), 45.0)
コード例 #6
0
ファイル: test_headers_include.py プロジェクト: yeckang/PyRDF
    def test_include_dir_and_headers(self):
        """
        Check that the filter operation is able to use C++ functions included
        from a list with a directory and a single header file.
        """
        PyRDF.include_headers([
            "tests/integration/local/test_headers/headers_folder",
            "tests/integration/local/test_headers/header1.hxx"
        ])
        # creates and RDataFrame with 10 integers [0...9]
        rdf = PyRDF.RDataFrame(10)

        # This filters out all numbers less than 5
        filter1 = rdf.Filter("check_number_less_than_5(tdfentry_)")
        # This filters out all numbers greater than 5
        filter2 = rdf.Filter("check_number_greater_than_5(tdfentry_)")
        # This filters out all numbers less than 10
        filter3 = rdf.Filter("check_number_less_than_10(tdfentry_)")

        count1 = filter1.Count()
        count2 = filter2.Count()
        count3 = filter3.Count()

        # The final answer should respectively 5 integers less than 5,
        # 4 integers greater than 5 and 10 integers less than 10.
        self.assertEqual(count1.GetValue(), 5)
        self.assertEqual(count2.GetValue(), 4)
        self.assertEqual(count3.GetValue(), 10)
コード例 #7
0
    def test_distributed_snapshot(self):
        """Test support for `Snapshot` in distributed backend"""
        # A simple dataframe with ten sequential numbers from 0 to 9
        df = PyRDF.RDataFrame(10).Define("x", "rdfentry_")

        # Count rows in the dataframe
        nrows = df.Count()

        # Snapshot to two files, build a ROOT.TChain with them and retrieve a
        # PyRDF.RDataFrame
        snapdf = df.Snapshot("snapTree", "snapFile.root")

        # Count the rows in the snapshotted dataframe
        snapcount = snapdf.Count()

        self.assertEqual(nrows.GetValue(), 10)
        self.assertEqual(snapcount.GetValue(), 10)

        # Retrieve list of file from the snapshotted PyRDF.RDataFrame
        input_files = snapdf.proxied_node.get_inputfiles()
        # Create list of supposed filenames for the intermediary files
        tmp_files = ["snapFile_0_4.root", "snapFile_5_9.root"]
        # Check that the two lists are the same
        self.assertListEqual(input_files, tmp_files)
        # Check that the intermediary .root files were created with the right
        # names, then remove them because they are not necessary
        for filename in tmp_files:
            self.assertTrue(os.path.exists(filename))
            os.remove(filename)
コード例 #8
0
    def test_asnumpy_return_arrays(self):
        """Test support for `AsNumpy` pythonization in local backend"""
        import numpy

        # Let's create a simple dataframe with ten rows and two columns
        df = PyRDF.RDataFrame(10).Define("x", "(int)rdfentry_")\
                                 .Define("y", "1.f/(1.f+rdfentry_)")

        # Build a dictionary of numpy arrays.
        npy = df.AsNumpy()
        self.assertIsInstance(npy, dict)

        # Retrieve the two numpy arrays with the column names of the original
        # RDataFrame as dictionary keys.
        npy_x = npy["x"]
        npy_y = npy["y"]
        self.assertIsInstance(npy_x, numpy.ndarray)
        self.assertIsInstance(npy_y, numpy.ndarray)

        # Check the two arrays are of the same length as the original columns.
        self.assertEqual(len(npy_x), 10)
        self.assertEqual(len(npy_y), 10)

        # Check the types correspond to the ones of the original columns.
        int_32_dtype = numpy.dtype("int32")
        float_32_dtype = numpy.dtype("float32")
        self.assertEqual(npy_x.dtype, int_32_dtype)
        self.assertEqual(npy_y.dtype, float_32_dtype)
コード例 #9
0
ファイル: test_include_headers.py プロジェクト: yeckang/PyRDF
    def test_extend_ROOT_include_path(self):
        """
        Check that the include path of ROOT is extended with the directories
        specified in `PyRDF.include_headers()` so references between headers
        are correctly solved.
        """
        import ROOT

        header_folder = "tests/integration/local/test_headers/headers_folder"

        PyRDF.use("spark")
        PyRDF.include_headers(header_folder)

        # Get list of include paths seen by ROOT
        ROOT_include_path = ROOT.gInterpreter.GetIncludePath().split(" ")

        # Create new include folder token
        new_folder_include = "-I\"{}\"".format(header_folder)

        # Check that new folder is in ROOT include paths
        self.assertTrue(new_folder_include in ROOT_include_path)

        # Create an RDataFrame with 100 integers from 0 to 99
        rdf = PyRDF.RDataFrame(100)

        # Filter numbers less than 10 and create an histogram
        rdf_less_than_10 = rdf.Filter("check_number_less_than_10(tdfentry_)")
        histo1 = rdf_less_than_10.Histo1D("tdfentry_")

        # Check that histogram has 10 entries and mean 4.5
        self.assertEqual(histo1.GetEntries(), 10)
        self.assertAlmostEqual(histo1.GetMean(), 4.5)
コード例 #10
0
def main():
    """
    Main function of the skimming step of the analysis
    The function loops over all required samples, reduces the content to the
    interesting events and writes them to new files.
    """
    PyRDF.use("spark")
    PyRDF.include_headers("skim.h")

    for sample in sampleNames:
        print(">>> Process sample {}:\n".format(sample))

        df = PyRDF.RDataFrame("Events", samplesBasePath + sample + ".root")

        df2 = MinimalSelection(df)
        df3 = FindGoodMuons(df2)
        df4 = FindGoodTaus(df3)
        df5 = FilterGoodEvents(df4)
        df6 = FindMuonTauPair(df5)
        df7 = DeclareVariables(df6)
        df8 = CheckGeneratorTaus(df7, sample)
        df9 = AddEventWeight(df8, sample)

        out_file = sample + "Skim.root"
        df9.Snapshot("Events", out_file, final_variables_vec)
コード例 #11
0
    def test_includes_function_with_filter_and_histo(self):
        """
        An integration test to check that the filter
        operation is able to use C++ functions that
        were included using header files.

        """
        PyRDF.include("tests/integration/local/test_headers/header1.hxx")
        PyRDF.use("spark")

        rdf = PyRDF.RDataFrame(10)

        # This filters out all numbers less than 5
        rdf_filtered = rdf.Filter("check_number_less_than_5(tdfentry_)")
        histo = rdf_filtered.Histo1D("tdfentry_")

        # The expected results after filtering
        required_numbers = range(
            5)  # The actual set of numbers required after filtering
        required_size = len(required_numbers)
        required_mean = sum(required_numbers) / float(required_size)
        required_stdDev = math.sqrt(
            sum((x - required_mean)**2
                for x in required_numbers) / required_size)

        # Compare the sizes of equivalent set of numbers
        self.assertEqual(histo.GetEntries(), float(required_size))

        # Compare the means of equivalent set of numbers
        self.assertEqual(histo.GetMean(), required_mean)

        # Compare the standard deviations of equivalent set of numbers
        self.assertEqual(histo.GetStdDev(), required_stdDev)
コード例 #12
0
 def test_lazy_define(self):
     df = PyRDF.RDataFrame(10)
     ROOT.gInterpreter.ProcessLine('int myCount = 0;')
     h = df.Define('a', 'static int i = 0; return i++;')\
           .Filter('a > 100')\
           .Define('xx', ' cout << "This should not be triggered!!" << endl; myCount++; return 1;')\
           .Histo1D('xx') # this is to check if the define is triggered!
     h.GetMean()
     self.assertEqual(0, ROOT.myCount)
コード例 #13
0
ファイル: test_dist.py プロジェクト: yeckang/PyRDF
    def test_empty_rdataframe_with_number_of_entries(self):
        """
        An RDataFrame instantiated with a number of entries leads to balanced
        ranges.

        """
        rdf = PyRDF.RDataFrame(10)

        ranges = self.get_ranges_from_rdataframe(rdf)
        ranges_reqd = [(0, 5), (5, 10)]
        self.assertListEqual(ranges, ranges_reqd)
コード例 #14
0
def main():
    PyRDF.use("spark")

    # Create output file
    tfile = ROOT.TFile("histograms.root", "RECREATE")
    variables = ranges.keys()

    # Loop through skimmed datasets and produce histograms of variables
    for name, label in [
        ("GluGluToHToTauTau", "ggH"),
        ("VBF_HToTauTau", "qqH"),
        ("W1JetsToLNu", "W1J"),
        ("W2JetsToLNu", "W2J"),
        ("W3JetsToLNu", "W3J"),
        ("TTbar", "TT"),
        ("DYJetsToLL", "ZLL"),
        ("DYJetsToLL", "ZTT"),
        ("Run2012B_TauPlusX", "dataRunB"),
        ("Run2012C_TauPlusX", "dataRunC"),
    ]:
        print(">>> Process skim {}".format(name))

        filenames = [filename for filename in os.listdir() if name in filename]
        # Load skimmed dataset and apply baseline selection
        df = PyRDF.RDataFrame("Events", filenames).Filter(
            "mt_1<30",
            "Muon transverse mass cut for W+jets suppression")\
            .Filter("iso_1<0.1", "Require isolated muon for signal region")

        # Book histograms for the signal region
        df1 = df.Filter("q_1*q_2<0",
                        "Require opposited charge for signal region")
        df1 = filterGenMatch(df1, label)
        hists = {}
        for variable in variables:
            hists[variable] = bookHistogram(df1, variable, ranges[variable])

        # Book histograms for the control region used to estimate the QCD
        # contribution
        df2 = df.Filter("q_1*q_2>0", "Control region for QCD estimation")
        df2 = filterGenMatch(df2, label)
        hists_cr = {}
        for variable in variables:
            hists_cr[variable] = bookHistogram(df2, variable, ranges[variable])

        # Write histograms to output file
        for variable in variables:
            writeHistogram(hists[variable], "{}_{}".format(label, variable))
        for variable in variables:
            writeHistogram(hists_cr[variable],
                           "{}_{}_cr".format(label, variable))

    tfile.Close()
コード例 #15
0
    def test_histo1d_merge(self):
        """Check the working of Histo1D merge operation in the reducer."""
        # Operations with PyRDF
        rdf_py = PyRDF.RDataFrame(10)
        histo_py = rdf_py.Histo1D("rdfentry_")

        # Operations with PyROOT
        rdf_cpp = ROOT.ROOT.RDataFrame(10)
        histo_cpp = rdf_cpp.Histo1D("rdfentry_")

        # Compare the 2 histograms
        self.assertHistoOrProfile(histo_py, histo_cpp)
コード例 #16
0
ファイル: test_dist.py プロジェクト: yeckang/PyRDF
    def test_rdataframe_with_treename_and_list_of_one_file(self):
        """
        Check clustered ranges produced when the input dataset is a list of a
        single ROOT file.

        """
        treename = "myTree"
        filelist = ["tests/unit/backend/2clusters.root"]
        rdf = PyRDF.RDataFrame(treename, filelist)

        ranges = self.get_ranges_from_rdataframe(rdf)
        ranges_reqd = [(0, 777), (777, 1000)]

        self.assertListEqual(ranges, ranges_reqd)
コード例 #17
0
    def test_profile1d_merge(self):
        """Check the working of Profile1D merge operation in the reducer."""
        # Operations with PyRDF
        rdf_py = PyRDF.RDataFrame(10)
        columns_py = self.define_two_columns(rdf_py)
        profile_py = columns_py.Profile1D(("", "", 64, -4, 4), "x", "y")

        # Operations with PyROOT
        rdf_cpp = ROOT.ROOT.RDataFrame(10)
        columns_cpp = self.define_two_columns(rdf_cpp)
        profile_cpp = columns_cpp.Profile1D(("", "", 64, -4, 4), "x", "y")

        # Compare the 2 profiles
        self.assertHistoOrProfile(profile_py, profile_cpp)
コード例 #18
0
    def test_includes_shared_lib_with_filter_op(self):
        """
        Check that the filter operation is able to use C++ functions that
        were include using header files.
        """
        # Paths to the cpp file that has to be compiled into a shared library
        # and the path with the output name of the library.
        # Both are relative to the current directory of this file
        cpp_path = "tests/integration/local/test_shared_libraries/a.cpp"

        library_path = "tests/integration/local/test_shared_libraries/liba.so"

        library_code = ("`root-config --cxx` "
                        "`root-config --cflags --libs` "
                        "-fPIC -shared {cpp}"
                        " -o {lib}").format(cpp=cpp_path, lib=library_path)
        # This creates the shared library
        subprocess.call(library_code, shell=True)

        # Path to the shared library relative to the main PyRDF directory.
        so_path = ("tests/integration/local/" "test_shared_libraries/liba.so")

        PyRDF.include_shared_libraries(so_path)

        # The user can include directly the header related to the library
        # or choose to declare functions or objects later
        header_path = ("tests/integration/local/" "test_shared_libraries/a.h")
        PyRDF.include_headers(header_path)

        # Creates an RDataFrame with 5 integers [0...4]
        rdf = PyRDF.RDataFrame(5)

        # This filters out all numbers less than 3
        filter1 = rdf.Filter("tdfentry_ < 3")

        # This defines a new variable x with all entries squared
        # then filters out all x values less than 3
        filter2 = rdf.Define("x", "f(tdfentry_)").Filter("x < 3")

        count1 = filter1.Count().GetValue()
        count2 = filter2.Count().GetValue()

        # The final answer should be the number of integers
        # less than 5, which is 3, and the number of squared integers less
        # than 5, which is 2.
        self.assertEqual(count1, 3)
        self.assertEqual(count2, 2)

        # Remove unnecessary files at the end
        os.remove(so_path)
コード例 #19
0
ファイル: test_dist.py プロジェクト: yeckang/PyRDF
    def test_rdataframe_with_treename_and_filename_with_globbing(self):
        """
        Check clustered ranges produced when the input dataset is a single ROOT
        file with globbing.

        """
        treename = "myTree"
        filename = "tests/unit/backend/2cluste*.root"
        rdf = PyRDF.RDataFrame(treename, filename)

        ranges = self.get_ranges_from_rdataframe(rdf)
        ranges_reqd = [(0, 777), (777, 1000)]

        self.assertListEqual(ranges, ranges_reqd)
コード例 #20
0
    def test_histo3d_merge(self):
        """Check the working of Histo3D merge operation in the reducer."""
        modelTH3D = ("", "", 64, -4, 4, 64, -4, 4, 64, -4, 4)
        # Operations with PyRDF
        rdf_py = PyRDF.RDataFrame(10)
        columns_py = self.define_three_columns(rdf_py)
        histo_py = columns_py.Histo3D(modelTH3D, "x", "y", "z")

        # Operations with PyROOT
        rdf_cpp = ROOT.ROOT.RDataFrame(10)
        columns_cpp = self.define_three_columns(rdf_cpp)
        histo_cpp = columns_cpp.Histo3D(modelTH3D, "x", "y", "z")

        # Compare the 2 histograms
        self.assertHistoOrProfile(histo_py, histo_cpp)
コード例 #21
0
    def test_histo_from_empty_root_file(self):
        """
        Check that when performing operations with the distributed backend on
        an RDataFrame without entries, PyRDF falls back to using the local
        backend and outputs the correct (empty) result.
        """
        PyRDF.use("spark")

        # Creates and RDataFrame with 10 integers [0...9]
        rdf = PyRDF.RDataFrame("NOMINAL", "tests/unit/backend/emptytree.root")
        histo = rdf.Histo1D("mybranch")

        # Get entries in the histogram, should be zero
        entries = histo.GetEntries()

        self.assertIsInstance(PyRDF.current_backend, Local)
        self.assertEqual(entries, 0)
コード例 #22
0
    def test_histo1D(self):
        ROOT.gRandom.SetSeed(1)
        tdf = PyRDF.RDataFrame(64)
        g = tdf.Define("r", "gRandom->Gaus(0,1)")
        h1Proxy = g.Histo1D(("h1", "h1", 64, -2., 2.), "r")
        h1 = h1Proxy.GetValue()

        cppCode = 'gRandom->SetSeed(1);' + \
                  'ROOT::RDataFrame tdf(64);' + \
                  'auto g = tdf.Define("r","gRandom->Gaus(0,1)");' + \
                  'auto h2Proxy = g.Histo1D({"h1","h1",64, -2., 2.},"r");'
        ROOT.gInterpreter.ProcessLine(cppCode)
        h2 = ROOT.h2Proxy.GetValue()

        self.assertEqual(h1.GetEntries(), h2.GetEntries())
        self.assertEqual(h1.GetMean(), h2.GetMean())
        self.assertEqual(h1.GetStdDev(), h2.GetStdDev())
コード例 #23
0
ファイル: test_local.py プロジェクト: yeckang/PyRDF
    def test_initialization_method(self):
        """
        Check initialization method in Local backend.

        Define a method in the ROOT interpreter called getValue which returns
        the value defined by the user on the python side.

        """
        def init(value):
            cpp_code = '''auto getUserValue = [](){return %s ;};''' % value
            ROOT.gInterpreter.Declare(cpp_code)

        PyRDF.initialize(init, 123)
        PyRDF.current_backend = Local()
        df = PyRDF.RDataFrame(1)
        s = df.Define("userValue", "getUserValue()").Sum("userValue")
        self.assertEqual(s.GetValue(), 123)
コード例 #24
0
ファイル: test_headers_include.py プロジェクト: sly2j/PyRDF
    def test_includes_function_with_filter_op(self):
        """
        An integration test to check that the filter
        operation is able to use C++ functions that
        were include using header files.

        """
        PyRDF.include("tests/integration/local/test_headers/header1.hxx")

        rdf = PyRDF.RDataFrame(10)

        # This filters out all numbers less than 5
        rdf_filtered = rdf.Filter("check_number_less_than_5(tdfentry_)")
        count = rdf_filtered.Count()

        # The final answer should be the number of integers
        # less than 5, which is 5.
        self.assertEqual(count.GetValue(), 5)
コード例 #25
0
def LoadH4RecoData(files=[]):
    """
    Load correctly H4 reco trees (and friends) into a ROOT RDataFrame (using PyRDF)
    """

    h4t_list = {}
    for fname in files:
        f = ROOT.TFile.Open(fname)
        if f and f.GetListOfKeys().FindObject('h4'):
            if len(h4t_list) == 0:
                h4t_list['h4'] = ROOT.TChain('h4')
                for ft in f.Get('h4').GetListOfFriends():
                    h4t_list[ft.GetName()] = ROOT.TChain(ft.GetName())
                    h4t_list['h4'].AddFriend(h4t_list[ft.GetName()])
            for name, t in h4t_list.items():
                t.Add(fname)
            f.Close()

    return PyRDF.RDataFrame(h4t_list['h4'])
コード例 #26
0
ファイル: test_friend_trees.py プロジェクト: yeckang/PyRDF
    def test_friend_tree_histo(self):
        """
        Tests that the computational graph can be issued both on the
        parent tree and the friend tree.
        """
        self.create_parent_tree()
        self.create_friend_tree()

        # Parent Tree
        baseTree = ROOT.TChain("T")
        baseTree.Add("treeparent.root")

        # Friend Tree
        friendTree = ROOT.TChain("TF")
        friendTree.Add("treefriend.root")

        # Add friendTree to the parent
        baseTree.AddFriend(friendTree)

        # Create a PyRDF RDataFrame with the parent and the friend trees
        PyRDF.use("spark")
        df = PyRDF.RDataFrame(baseTree)

        # Create histograms
        h_parent = df.Histo1D("x")
        h_friend = df.Histo1D("TF.x")

        # Both trees have the same number of entries, i.e. 10000
        self.assertEqual(h_parent.GetEntries(), 10000)
        self.assertEqual(h_friend.GetEntries(), 10000)

        # Check the mean of the distribution for each tree
        self.assertAlmostEqual(h_parent.GetMean(), 10, delta=0.01)
        self.assertAlmostEqual(h_friend.GetMean(), 20, delta=0.01)

        # Check the standard deviation of the distribution for each tree
        self.assertAlmostEqual(h_parent.GetStdDev(), 1, delta=0.01)
        self.assertAlmostEqual(h_friend.GetStdDev(), 1, delta=0.01)

        # Remove unnecessary .root files
        os.remove("treeparent.root")
        os.remove("treefriend.root")
コード例 #27
0
    def test_change_attribute_when_npartitions_greater_than_clusters(self):
        """
        Check that the `npartitions class attribute is changed when it is
        greater than the number of clusters in the ROOT file.
        """
        PyRDF.use("spark", {"npartitions": 10})

        from PyRDF import current_backend

        self.assertEqual(current_backend.npartitions, 10)

        treename = "TotemNtuple"
        filelist = ["tests/unit/backend/Slimmed_ntuple.root"]
        df = PyRDF.RDataFrame(treename, filelist)

        histo = df.Histo1D("track_rp_3.x")
        nentries = histo.GetEntries()

        self.assertEqual(nentries, 10)
        self.assertEqual(current_backend.npartitions, 1)
コード例 #28
0
    def test_tgraph_merge(self):
        """Check the working of TGraph merge operation in the reducer."""
        # Operations with PyRDF
        rdf_py = PyRDF.RDataFrame(10)
        columns_py = self.define_two_columns(rdf_py)
        graph_py = columns_py.Graph("x", "y")

        # Operations with PyROOT
        rdf_cpp = ROOT.ROOT.RDataFrame(10)
        columns_cpp = self.define_two_columns(rdf_cpp)
        graph_cpp = columns_cpp.Graph("x", "y")

        # Sort the graphs to make sure corresponding points are same
        graph_py.Sort()
        graph_cpp.Sort()

        # Compare the X co-ordinates of the graphs
        self.assertListEqual(list(graph_py.GetX()), list(graph_cpp.GetX()))

        # Compare the Y co-ordinates of the graphs
        self.assertListEqual(list(graph_py.GetY()), list(graph_cpp.GetY()))
コード例 #29
0
ファイル: test_histo_write.py プロジェクト: yeckang/PyRDF
    def test_write_histo(self):
        """
        Tests that an histogram is correctly written to a .root file created
        before the execution of the event loop.
        """
        self.create_tree_with_data()

        # Create a new file where the histogram will be written
        outfile = ROOT.TFile("out_file.root", "recreate")

        # Create a PyRDF RDataFrame with the parent and the friend trees
        PyRDF.use("spark")
        df = PyRDF.RDataFrame("Events", "tree_gaus.root")

        # Create histogram
        histo = df.Histo1D(("x", "x", 100, 0, 20), "x")

        # Write histogram to out_file.root and close the file
        histo.Write()
        outfile.Close()

        # Reopen file to check that histogram was correctly stored
        reopen_file = ROOT.TFile("out_file.root", "read")
        reopen_histo = reopen_file.Get("x")

        # Check histogram statistics
        self.assertEqual(reopen_histo.GetEntries(), self.nentries)
        self.assertAlmostEqual(reopen_histo.GetMean(),
                               self.gaus_mean,
                               delta=self.delta_equal)
        self.assertAlmostEqual(reopen_histo.GetStdDev(),
                               self.gaus_stdev,
                               delta=self.delta_equal)

        # Remove unnecessary .root files
        os.remove("tree_gaus.root")
        os.remove("out_file.root")
コード例 #30
0
ファイル: test_dist.py プロジェクト: yeckang/PyRDF
    def test_rdataframe_with_treename_and_list_of_files(self):
        """
        Check clustered ranges produced when the dataset is a list of a multiple
        ROOT files.

        Explanation about required ranges:
        - 2clusters.root contains 1000 entries split into 2 clusters
            ([0, 776], [777, 999]) being 776 and 999 inclusive entries
        - 4clusters.root contains 1000 entries split into 4 clusters
            ([0, 249], [250, 499], [500, 749], [750, 999]) being 249, 499, 749
            and 999 inclusive entries

        Current mechanism to create clustered ranges takes only into account the
        the number of clusters, it is assumed that clusters inside a ROOT file
        are properly distributed and balanced with respect to the number of
        entries.

        Thus, if a dataset is composed by two ROOT files which are poorly
        balanced in terms of clusters and entries, the resultant ranges will
        still respect the cluster boundaries but each one may contain a
        different number of entries.

        Since this case should not be common, ranges required on this test are
        considered the expected result.
        """
        treename = "myTree"
        filelist = [
            "tests/unit/backend/2clusters.root",
            "tests/unit/backend/4clusters.root"
        ]

        rdf = PyRDF.RDataFrame(treename, filelist)

        ranges = self.get_ranges_from_rdataframe(rdf)
        ranges_reqd = [(0, 1250), (250, 1000)]

        self.assertListEqual(ranges, ranges_reqd)