def test_incorrect_args(self): """Constructor with incorrect arguments""" with self.assertRaises(TypeError): # Incorrect first argument in 2-argument case Node.HeadNode(10, "file.root") with self.assertRaises(TypeError): # Incorrect third argument in 3-argument case Node.HeadNode("treename", "file.root", "column1") with self.assertRaises(TypeError): # No argument case Node.HeadNode()
def test_kwargs_read(self): """Named arguments are read accurately.""" hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) newNode = node.Define(1, "b", a="1", b=2) self.assertEqual(newNode.operation.kwargs, {"a": "1", "b": 2})
def test_node_pickle(self): """ Test cases to check that nodes can be accurately pickled and un-pickled. """ import pickle # Node definitions # Head node hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) n1 = node.Define("a", b="c") # First child node n2 = n1.Count() # noqa: avoid PEP8 F841 n3 = node.Filter("b") n4 = n3.Count() # noqa: avoid PEP8 F841 # Pickled representation of nodes pickled_node = pickle.dumps(node.proxied_node) # n3 is of class Proxy.TransformationProxy, so the proxied node must be # accessed before pickling. pickled_n3_node = pickle.dumps(n3.proxied_node) # Un-pickled node objects unpickled_node = pickle.loads(pickled_node) unpickled_n3_node = pickle.loads(pickled_n3_node) self.assertIsInstance(unpickled_node, type(node.proxied_node)) self.assertIsInstance(unpickled_n3_node, type(n3.proxied_node)) self.assertGraphs(node, unpickled_node) self.assertGraphs(n3.proxied_node, unpickled_n3_node)
def test_args_read(self): """Arguments (unnamed) are read accurately.""" hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) newNode = node.Define(1, "b", a="1", b=2) self.assertEqual(newNode.operation.args, [1, "b"])
def test_mapper_from_graph(self): """A simple test case to check the working of mapper.""" # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = Node.HeadNode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Set of operations to build the graph n1 = node.Define() n2 = node.Filter().Filter() n4 = n2.Count() n5 = n1.Count() n6 = node.Filter() # noqa: avoid PEP8 F841 # Generate and execute the mapper generator = ComputationGraphGenerator.ComputationGraphGenerator( node.proxied_node) mapper_func = generator.get_callable() values = mapper_func(t) nodes = generator.get_action_nodes() reqd_order = [1, 3, 2, 2, 3, 2] self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n5.proxied_node, n4.proxied_node]) self.assertListEqual(values, [t, t])
def test_get_state(self): """ Test cases to check the working of __getstate__ method on Node class. """ hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) n1 = node.Define("a", b="c") # First child node # Required dictionaries node_dict = {"children": [n1.proxied_node]} n1_dict = { 'operation_name': "Define", 'operation_args': ["a"], 'operation_kwargs': { "b": "c" }, 'children': [] } # Nodes are wrapped by TransformationProxies, so the proxied nodes # must be accessed in order to extract their dictionaries. self.assertDictEqual(node.proxied_node.__getstate__(), node_dict) self.assertDictEqual(n1.proxied_node.__getstate__(), n1_dict)
def test_attr_read(self): """Function names are read accurately.""" hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) func = node.Define # noqa: avoid PEP8 F841 self.assertEqual(node._new_op_name, "Define")
def test_dfs_graph_with_computed_values_pruning(self): """ Test case to check that computed values in action nodes get pruned. """ # Head node hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() n6 = n5.Count() n7 = node.Filter() # This is to make sure action nodes with # already computed values are pruned. n6.proxied_node.value = 1 # This is to make sure that transformation # leaf nodes with value (possibly set intentionally) # don't get pruned. n7.value = 1 # noqa: avoid PEP8 F841 obtained_order = DfsTest.traverse(node=node.get_head()) # The node 'n6' will be pruned. Hence, # there's only one '3' in this list. reqd_order = [1, 2, 2, 2, 3, 2] self.assertEqual(obtained_order, reqd_order)
def test_dfs_graph_with_parent_pruning(self): """ Test case to check that parent nodes with no user references don't get pruned. """ # Head node hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 # Remove references from n2 (which shouldn't affect the graph) n2 = None obtained_order = DfsTest.traverse(node=node.get_head()) reqd_order = [1, 2, 2, 2, 3, 2] # Removing references from n2 will not prune any node # because n2 still has children self.assertEqual(obtained_order, reqd_order)
def test_dfs_graph_with_recursive_pruning(self): """ Test case to check that nodes in a DistRDF graph with no user references and no children get pruned recursively. """ # Head node hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 # Remove references from n4 and it's parent nodes n4 = n3 = n2 = None # noqa: avoid PEP8 F841 obtained_order = DfsTest.traverse(node=node.get_head()) reqd_order = [1, 2, 2] self.assertEqual(obtained_order, reqd_order)
def test_dfs_graph_with_pruning_transformations(self): """ Test case to check that transformation nodes with no children and no user references get pruned. """ # Head node hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 # Transformation pruning, n5 was earlier a transformation node n5 = n1.Count() # noqa: avoid PEP8 F841 obtained_order = DfsTest.traverse(node=node.get_head()) reqd_order = [1, 3, 2, 2, 3, 2] self.assertEqual(obtained_order, reqd_order)
def test_transformation_proxy_return(self): """Node objects are returned for transformation nodes.""" hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) newNode = node.Define(1) self.assertIsInstance(newNode, Proxy.TransformationProxy) self.assertIsInstance(newNode.proxied_node, Node.Node)
def __init__(self, *args): """initialize""" self.headnode = Node.HeadNode(*args) self.headnode.backend = DistRDataFrameInterface.TestBackend() self.headproxy = Proxy.TransformationProxy(self.headnode)
def test_three_args_with_single_file(self): """Constructor with TTree, one input file and selected branches""" rdf_branches = ["branch1", "branch2"] # Convert RDF branches list to ROOT CPP Vector reqd_vec = ROOT.std.vector("string")() for elem in rdf_branches: reqd_vec.push_back(elem) # RDataFrame constructor with 3rd argument as Python list hn_1 = Node.HeadNode("treename", "file.root", rdf_branches) # RDataFrame constructor with 3rd argument as ROOT CPP Vector hn_2 = Node.HeadNode("treename", "file.root", reqd_vec) self.assertArgs(hn_1.args, ["treename", "file.root", rdf_branches]) self.assertArgs(hn_2.args, ["treename", "file.root", reqd_vec])
def test_num_entries_two_args_case(self): """ Ensure that the number of entries recorded are correct in the case of two arguments to RDataFrame constructor. """ self.fill_tree(1111) # Store RDataFrame object of size 1111 files_vec = ROOT.std.vector("string")() files_vec.push_back("data.root") # Create RDataFrame instances hn = Node.HeadNode("tree", "data.root") hn_1 = Node.HeadNode("tree", ["data.root"]) hn_2 = Node.HeadNode("tree", files_vec) self.assertEqual(hn.get_num_entries(), 1111) self.assertEqual(hn_1.get_num_entries(), 1111) self.assertEqual(hn_2.get_num_entries(), 1111)
def test_num_entries_single_arg_case(self): """ Ensure that the number of entries recorded are correct in the case of a single integer argument to RDataFrame. """ hn = Node.HeadNode(123) # Create HeadNoded instance self.assertEqual(hn.get_num_entries(), 123)
def test_two_args(self): """Constructor with list of input files""" rdf_2_files = ["file1.root", "file2.root"] # Convert RDF files list to ROOT CPP vector reqd_vec = ROOT.std.vector("string")() for elem in rdf_2_files: reqd_vec.push_back(elem) # RDataFrame constructor with 2nd argument as string hn_1 = Node.HeadNode("treename", "file.root") # RDataFrame constructor with 2nd argument as Python list hn_2 = Node.HeadNode("treename", rdf_2_files) # RDataFrame constructor with 2nd argument as ROOT CPP Vector hn_3 = Node.HeadNode("treename", reqd_vec) self.assertArgs(hn_1.args, ["treename", "file.root"]) self.assertArgs(hn_2.args, ["treename", rdf_2_files]) self.assertArgs(hn_3.args, ["treename", reqd_vec])
def test_num_entries_three_args_case(self): """ Ensure that the number of entries recorded are correct in the case of two arguments to RDataFrame constructor. """ self.fill_tree(1234) # Store RDataFrame object of size 1234 branches_vec_1 = ROOT.std.vector("string")() branches_vec_2 = ROOT.std.vector("string")() branches_vec_1.push_back("b1") branches_vec_2.push_back("b2") # Create RDataFrame instances hn = Node.HeadNode("tree", "data.root", ["b1"]) hn_1 = Node.HeadNode("tree", "data.root", ["b2"]) hn_2 = Node.HeadNode("tree", "data.root", branches_vec_1) hn_3 = Node.HeadNode("tree", "data.root", branches_vec_2) self.assertEqual(hn.get_num_entries(), 1234) self.assertEqual(hn_1.get_num_entries(), 1234) self.assertEqual(hn_2.get_num_entries(), 1234) self.assertEqual(hn_3.get_num_entries(), 1234)
def test_inmemory_tree(self): """Constructor with an in-memory-only tree is not supported""" tree = ROOT.TTree("tree", "Tree in memory") x = array("i", [0]) tree.Branch("x", x, "x/I") for i in range(100): x[0] = i tree.Fill() headnode = Node.HeadNode(tree) with self.assertRaises(RuntimeError): # Trees with no associated files are not supported headnode.get_inputfiles()
def test_three_args_with_multiple_files(self): """Constructor with TTree, list of input files and selected branches""" rdf_branches = ["branch1", "branch2"] rdf_files = ["file1.root", "file2.root"] # Convert RDF files list to ROOT CPP Vector reqd_files_vec = ROOT.std.vector("string")() for elem in rdf_files: reqd_files_vec.push_back(elem) # Convert RDF files list to ROOT CPP Vector reqd_branches_vec = ROOT.std.vector("string")() for elem in rdf_branches: reqd_branches_vec.push_back(elem) # RDataFrame constructor with 2nd argument as Python List # and 3rd argument as Python List hn_1 = Node.HeadNode("treename", rdf_files, rdf_branches) # RDataFrame constructor with 2nd argument as Python List # and 3rd argument as ROOT CPP Vector hn_2 = Node.HeadNode("treename", rdf_files, reqd_branches_vec) # RDataFrame constructor with 2nd argument as ROOT CPP Vector # and 3rd argument as Python List hn_3 = Node.HeadNode("treename", reqd_files_vec, rdf_branches) # RDataFrame constructor with 2nd and 3rd arguments as ROOT # CPP Vectors hn_4 = Node.HeadNode("treename", reqd_files_vec, reqd_branches_vec) self.assertArgs(hn_1.args, ["treename", rdf_files, rdf_branches]) self.assertArgs(hn_2.args, ["treename", rdf_files, reqd_branches_vec]) self.assertArgs(hn_3.args, ["treename", reqd_files_vec, rdf_branches]) self.assertArgs( hn_4.args, ["treename", reqd_files_vec, reqd_branches_vec])
def test_num_entries_with_ttree_arg(self): """ Ensure that the number of entries recorded are correct in the case of RDataFrame constructor with a TTree. """ tree = ROOT.TTree("tree", "test") # Create tree v = ROOT.std.vector("int")(4) # Create a vector of 0s of size 4 tree.Branch("vectorb", v) # Create branch to hold the vector for i in range(4): v[i] = 1 # Change the vector element to 1 tree.Fill() # Fill the tree with that element hn = Node.HeadNode(tree) self.assertEqual(hn.get_num_entries(), 4)
def test_set_state(self): """ Test cases to check the working of __setstate__ method on Node class. """ # Head node hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) nn1 = Node.Node(None, None) nn1.backend = TestBackend() n1 = Proxy.TransformationProxy(nn1) # State dictionaries node_dict = {"children": [n1]} n1_dict = { "operation_name": "Define", "operation_args": ["a"], "operation_kwargs": { "b": "c" }, "children": [] } # Set node objects with state dicts node.proxied_node.__setstate__(node_dict) n1.proxied_node.__setstate__(n1_dict) self.assertListEqual([node.operation, node.children], [None, node_dict["children"]]) self.assertListEqual([ n1.operation.name, n1.operation.args, n1.operation.kwargs, n1.children ], [ n1_dict["operation_name"], n1_dict["operation_args"], n1_dict["operation_kwargs"], n1_dict["children"] ])
def test_dfs_graph_without_pruning(self): """ Test case to check that node pruning does not occur if every node either has children or some user references. """ # Head node hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Count() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 obtained_order = DfsTest.traverse(node=node.get_head()) reqd_order = [1, 3, 2, 2, 3, 2] self.assertEqual(obtained_order, reqd_order)
def test_mapper_with_pruning(self): """ A test case to check that the mapper works even in the case of pruning. """ # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = Node.HeadNode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Set of operations to build the graph n1 = node.Define() n2 = node.Filter().Filter() n4 = n2.Count() n5 = n1.Count() n6 = node.Filter() # noqa: avoid PEP8 F841 # Reason for pruning (change of reference) n5 = n1.Filter() # noqa: avoid PEP8 F841 # Generate and execute the mapper generator = ComputationGraphGenerator.ComputationGraphGenerator( node.proxied_node) mapper_func = generator.get_callable() values = mapper_func(t) nodes = generator.get_action_nodes() reqd_order = [1, 2, 2, 2, 3, 2] self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n4.proxied_node]) self.assertListEqual(values, [t])
def make_dataframe(self, *args, **kwargs): """Creates an instance of SparkDataFrame""" headnode = Node.HeadNode(*args) return DataFrame.RDataFrame(headnode, self, **kwargs)
def test_integer_arg(self): """Constructor with number of entries""" hn = Node.HeadNode(10) self.assertListEqual(hn.args, [10])