def test_rdataframe_with_treename_and_filename_with_globbing(self): """ Check globbing returns the proper file name to create ranges. """ treename = "myTree" filename = "backend/2cluste*.root" npartitions = 2 rdf = get_headnode(None, npartitions, treename, filename) expected_inputfiles = ["backend/2clusters.root"] extracted_inputfiles = rdf.inputfiles percranges = Ranges.get_percentage_ranges([treename], extracted_inputfiles, npartitions, friendinfo=None) clusteredranges = [ Ranges.get_clustered_range_from_percs(percrange)[0] for percrange in percranges ] ranges = treeranges_to_tuples(clusteredranges) ranges_reqd = [(0, 777, [0], [777], expected_inputfiles), (777, 1000, [777], [1000], expected_inputfiles)] self.assertListEqual(ranges, ranges_reqd)
def test_buildranges_with_balanced_ranges(self): """ Check that _build_ranges produces balanced ranges when there are no clusters involved. """ npartitions = 16 nentries = 50 headnode = get_headnode(None, npartitions, nentries) crs = headnode._build_ranges() ranges = emptysourceranges_to_tuples(crs) ranges_reqd = [(0, 4), (4, 8), (8, 11), (11, 14), (14, 17), (17, 20), (20, 23), (23, 26), (26, 29), (29, 32), (32, 35), (35, 38), (38, 41), (41, 44), (44, 47), (47, 50)] self.assertListEqual(ranges, ranges_reqd)
def test_rdataframe_with_notreename_and_chain_with_subtrees(self): """ Check proper handling of a TChain with different subnames. """ # Create two dummy files treename1, filename1 = "entries_1", "entries_1.root" treename2, filename2 = "entries_2", "entries_2.root" npartitions = 2 ROOT.RDataFrame(10).Define("x", "rdfentry_").Snapshot(treename1, filename1) ROOT.RDataFrame(10).Define("x", "rdfentry_").Snapshot(treename2, filename2) chain = ROOT.TChain() chain.Add(str(filename1 + "?#" + treename1)) chain.Add(str(filename2 + "?#" + treename2)) rdf = get_headnode(None, npartitions, chain) extracted_subtreenames = rdf.subtreenames extracted_filenames = rdf.inputfiles percranges = Ranges.get_percentage_ranges(extracted_subtreenames, extracted_filenames, npartitions, friendinfo=None) clusteredranges = [ Ranges.get_clustered_range_from_percs(percrange)[0] for percrange in percranges ] ranges = treeranges_to_tuples(clusteredranges) ranges_reqd = [(0, 10, [0], [10], [filename1]), (0, 10, [0], [10], [filename2])] os.remove(filename1) os.remove(filename2) self.assertListEqual(ranges, ranges_reqd)
def create_dummy_headnode(*args): """Create dummy head node instance needed in the test""" # Pass None as `npartitions`. The tests will modify this member # according to needs return get_headnode(None, *args)