def test_mapper_from_graph(self): """A simple test case to check the working of mapper.""" # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = create_dummy_headnode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Set of operations to build the graph n1 = node.Define() n2 = node.Filter().Filter() n4 = n2.Count() n5 = n1.Count() n6 = node.Filter() # noqa: avoid PEP8 F841 # Generate and execute the mapper generator = ComputationGraphGenerator.ComputationGraphGenerator( node.proxied_node) mapper_func = generator.generate_computation_graph triggerables = mapper_func(t, 0) nodes = generator.get_action_nodes() reqd_order = [1, 3, 2, 2, 3, 2] self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n5.proxied_node, n4.proxied_node]) self.assertListEqual(triggerables, [t, t])
def test_dfs_graph_with_parent_pruning(self): """ Test case to check that parent nodes with no user references don't get pruned. """ # Head node hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 # Remove references from n2 (which shouldn't affect the graph) n2 = None obtained_order = DfsTest.traverse(node=node.get_head()) reqd_order = [1, 2, 2, 2, 3, 2] # Removing references from n2 will not prune any node # because n2 still has children self.assertEqual(obtained_order, reqd_order)
def test_dfs_graph_with_computed_values_pruning(self): """ Test case to check that computed values in action nodes get pruned. """ # Head node hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() n6 = n5.Count() n7 = node.Filter() # This is to make sure action nodes with # already computed values are pruned. n6.proxied_node.value = 1 # This is to make sure that transformation # leaf nodes with value (possibly set intentionally) # don't get pruned. n7.value = 1 # noqa: avoid PEP8 F841 obtained_order = DfsTest.traverse(node=node.get_head()) # The node 'n6' will be pruned. Hence, # there's only one '3' in this list. reqd_order = [1, 2, 2, 2, 3, 2] self.assertEqual(obtained_order, reqd_order)
def test_attr_read(self): """Function names are read accurately.""" hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) func = node.Define # noqa: avoid PEP8 F841 self.assertEqual(node._new_op_name, "Define")
def test_get_state(self): """ Test cases to check the working of __getstate__ method on Node class. """ hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) n1 = node.Define("a", b="c") # First child node # Required dictionaries node_dict = {"children": [n1.proxied_node]} n1_dict = { 'operation_name': "Define", 'operation_args': ["a"], 'operation_kwargs': { "b": "c" }, 'children': [] } # Nodes are wrapped by TransformationProxies, so the proxied nodes # must be accessed in order to extract their dictionaries. self.assertDictEqual(node.proxied_node.__getstate__(), node_dict) self.assertDictEqual(n1.proxied_node.__getstate__(), n1_dict)
def test_args_read(self): """Arguments (unnamed) are read accurately.""" hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) newNode = node.Define(1, "b", a="1", b=2) self.assertEqual(newNode.operation.args, [1, "b"])
def test_node_pickle(self): """ Test cases to check that nodes can be accurately pickled and un-pickled. """ import pickle # Node definitions # Head node hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) n1 = node.Define("a", b="c") # First child node n2 = n1.Count() # noqa: avoid PEP8 F841 n3 = node.Filter("b") n4 = n3.Count() # noqa: avoid PEP8 F841 # Pickled representation of nodes pickled_node = pickle.dumps(node.proxied_node) # n3 is of class Proxy.TransformationProxy, so the proxied node must be # accessed before pickling. pickled_n3_node = pickle.dumps(n3.proxied_node) # Un-pickled node objects unpickled_node = pickle.loads(pickled_node) unpickled_n3_node = pickle.loads(pickled_n3_node) self.assertIsInstance(unpickled_node, type(node.proxied_node)) self.assertIsInstance(unpickled_n3_node, type(n3.proxied_node)) self.assertGraphs(node, unpickled_node) self.assertGraphs(n3.proxied_node, unpickled_n3_node)
def test_kwargs_read(self): """Named arguments are read accurately.""" hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) newNode = node.Define(1, "b", a="1", b=2) self.assertEqual(newNode.operation.kwargs, {"a": "1", "b": 2})
def test_dfs_graph_without_pruning(self): """ Test case to check that node pruning does not occur if every node either has children or some user references. """ # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = create_dummy_headnode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Count() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 # Generate and execute the mapper graph_dict = hn._generate_graph_dict() mapper_func = ComputationGraphGenerator.generate_computation_graph triggerables = mapper_func(graph_dict, t, 0) nodes = hn._get_action_nodes() reqd_order = [1, 2, 2, 3, 3, 2] self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n4.proxied_node, n5.proxied_node]) # One occurrence of 't' per action node self.assertListEqual(triggerables, [t, t])
def test_mapper_from_graph(self): """A simple test case to check the working of mapper.""" # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = create_dummy_headnode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Set of operations to build the graph n1 = node.Define() n2 = node.Filter().Filter() n4 = n2.Count() n5 = n1.Count() n6 = node.Filter() # noqa: avoid PEP8 F841 # Generate and execute the mapper graph_dict = hn._generate_graph_dict() mapper_func = ComputationGraphGenerator.generate_computation_graph triggerables = mapper_func(graph_dict, t, 0) nodes = hn._get_action_nodes() # Required order in the list of returned values (the nodes are stored # in DFS order the first time they are appended to the graph) reqd_order = [1, 2, 2, 3, 3, 2] self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n4.proxied_node, n5.proxied_node]) self.assertListEqual(triggerables, [t, t])
def test_dfs_graph_with_pruning_transformations(self): """ Test case to check that transformation nodes with no children and no user references get pruned. """ # Head node hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 # Transformation pruning, n5 was earlier a transformation node n5 = n1.Count() # noqa: avoid PEP8 F841 obtained_order = DfsTest.traverse(node=node.get_head()) reqd_order = [1, 3, 2, 2, 3, 2] self.assertEqual(obtained_order, reqd_order)
def test_dfs_graph_with_recursive_pruning(self): """ Test case to check that nodes in a DistRDF graph with no user references and no children get pruned recursively. """ # Head node hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 # Remove references from n4 and it's parent nodes n4 = n3 = n2 = None # noqa: avoid PEP8 F841 obtained_order = DfsTest.traverse(node=node.get_head()) reqd_order = [1, 2, 2] self.assertEqual(obtained_order, reqd_order)
def __init__(self, headnode, backend): """Initialization of """ self._headnode = headnode self._headnode.backend = backend self._headproxy = Proxy.TransformationProxy(self._headnode)
def test_action_proxy_return(self): """Proxy objects are returned for action nodes.""" hn = Node.HeadNode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) newNode = node.Count() self.assertIsInstance(newNode, Proxy.ActionProxy) self.assertIsInstance(newNode.proxied_node, Node.Node)
def __init__(self, *args): """initialize""" self.headnode = Node.HeadNode(*args) self.headnode.backend = DistRDataFrameInterface.TestBackend() self.headproxy = Proxy.TransformationProxy(self.headnode)
def test_transformation_proxy_return(self): """Node objects are returned for transformation nodes.""" hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) newNode = node.Define(1) self.assertIsInstance(newNode, Proxy.TransformationProxy) self.assertIsInstance(newNode.proxied_node, Node.Node)
def __init__(self, *args): """initialize""" # Passing None as `npartitions`, the tests will change it as needed. self.headnode = HeadNode.get_headnode(None, *args) self.headnode.backend = DistRDataFrameInterface.TestBackend() self.headproxy = Proxy.TransformationProxy(self.headnode)
def test_undefined_attr_transformation(self): """ When a non-defined Node class attribute is called on a TransformationProxy object, it raises an AttributeError. """ node = Node.Node(None, None) node.backend = None proxy = Proxy.TransformationProxy(node) with self.assertRaises(AttributeError): proxy.attribute
def test_type_return_transformation(self): """ TransformationProxy object is of type `DistRDF.TransformationProxy` and wraps a node object. """ node = Node.Node(None, None) node.backend = None proxy = Proxy.TransformationProxy(node) self.assertIsInstance(proxy, Proxy.TransformationProxy) self.assertIsInstance(proxy.proxied_node, Node.Node)
def __init__(self, headnode, backend, **kwargs): """Initialization of """ self._headnode = headnode self._headnode.backend = backend self._headnode.npartitions = kwargs.get("npartitions", 2) self._headproxy = Proxy.TransformationProxy(self._headnode)
def test_proxied_node_has_user_references(self): """ Check that the user reference holds until the proxy lives. When the Python garbage collector attempts to remove the proxy object, its `__del__` method switches the node attribute `has_user_references` from `True` to `False`. """ node = Node.Node(None, None) node.backend = None proxy = Proxy.TransformationProxy(node) self.assertTrue(node.has_user_references) proxy = None # noqa: avoid PEP8 F841 self.assertFalse(node.has_user_references)
def test_set_state(self): """ Test cases to check the working of __setstate__ method on Node class. """ # Head node hn = create_dummy_headnode(1) hn.backend = TestBackend() node = Proxy.TransformationProxy(hn) nn1 = Node.Node(None, None) nn1.backend = TestBackend() n1 = Proxy.TransformationProxy(nn1) # State dictionaries node_dict = {"children": [n1]} n1_dict = { "operation_name": "Define", "operation_args": ["a"], "operation_kwargs": { "b": "c" }, "children": [] } # Set node objects with state dicts node.proxied_node.__setstate__(node_dict) n1.proxied_node.__setstate__(n1_dict) self.assertListEqual([node.operation, node.children], [None, node_dict["children"]]) self.assertListEqual([ n1.operation.name, n1.operation.args, n1.operation.kwargs, n1.children ], [ n1_dict["operation_name"], n1_dict["operation_args"], n1_dict["operation_kwargs"], n1_dict["children"] ])
def test_nodes_gt_python_recursion_limit(self): """ Check that we can handle more nodes than the Python default maximum number of recursive function calls (1000). """ # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = create_dummy_headnode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Create three branches n1 = node.Define() n2 = node.Filter() # Append 1000 nodes per branch for _ in range(1000): n1 = n1.Define() n2 = n2.Filter() # Generate and execute the mapper graph_dict = hn._generate_graph_dict() mapper_func = ComputationGraphGenerator.generate_computation_graph mapper_func(graph_dict, t, 0) # Required order in the list of returned values (the nodes are stored # in DFS order the first time they are appended to the graph) reqd_order = [1, 2] * (1+1000) # (branches + 1000 nodes per branch) self.assertEqual(t.ord_list, reqd_order) # Now overwrite the branches so that we can trigger the pruning later n1 = node.Filter() n2 = node.Define() # Append 1000 nodes per branch for _ in range(1000): n1 = n1.Filter() n2 = n2.Define() # Reset the mock list of nodes so old nodes are not kept t.ord_list = [] # Generate and execute the mapper graph_dict = hn._generate_graph_dict() mapper_func(graph_dict, t, 0) # Required order in the list of returned values (the nodes are stored # in DFS order the first time they are appended to the graph) reqd_order = [2, 1] * (1+1000) # (branches + 1000 nodes per branch) self.assertEqual(t.ord_list, reqd_order)
def __init__(self, headnode, backend, **kwargs): """Initialization of """ self._headnode = headnode self._headnode.backend = backend # Set the number of partitions for this dataset, one of the following: # 1. User-supplied `npartitions` optional argument # 2. An educated guess according to the backend, using the backend's # `optimize_npartitions` function # 3. Set `npartitions` to 2 self._headnode.npartitions = kwargs.get("npartitions", backend.optimize_npartitions(RDataFrame.MIN_NPARTITIONS)) self._headproxy = Proxy.TransformationProxy(self._headnode)
def test_node_attr_transformation(self): """ When a node attribute is called on a TransformationProxy object, it correctly returns the attribute of the proxied node. """ node = create_dummy_headnode(1) node.backend = AttrReadTest.TestBackend() proxy = Proxy.TransformationProxy(node) node_attributes = [ "get_head", "operation", "nchildren", "_new_op_name", "value", "rdf_node", "has_user_references" ] for attr in node_attributes: self.assertEqual(getattr(proxy, attr), getattr(proxy.proxied_node, attr))
def test_mapper_with_pruning(self): """ A test case to check that the mapper works even in the case of pruning. """ # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = create_dummy_headnode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Set of operations to build the graph n1 = node.Define() n2 = node.Filter().Filter() n4 = n2.Count() n5 = n1.Count() n6 = node.Filter() # noqa: avoid PEP8 F841 # Until here the graph would be: # [1, 2, 2, 3, 3, 2] # Reason for pruning (change of reference) n5 = n1.Filter() # noqa: avoid PEP8 F841 # After the change of reference, it becomes # [1, 2, 2, 3, 2, 2] # that is, the Filter is appended at the end of the list, it is fine # because it holds a reference to the ID of the father. # Generate and execute the mapper graph_dict = hn._generate_graph_dict() mapper_func = ComputationGraphGenerator.generate_computation_graph triggerables = mapper_func(graph_dict, t, 0) nodes = hn._get_action_nodes() reqd_order = [1, 2, 2, 3, 2, 2] self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n4.proxied_node]) # One occurrence of 't' per action node self.assertListEqual(triggerables, [t])
def test_supported_transformation(self): """ TransformationProxy object reads the right input attributes, returning the methods of the proxied node. """ node = Node.Node(None, None) node.backend = AttrReadTest.TestBackend() proxy = Proxy.TransformationProxy(node) transformations = { "Define": ["x", "tdfentry_"], "Filter": ["tdfentry_ > 0"], } for transformation, args in transformations.items(): newProxy = getattr(proxy, transformation)(*args) self.assertEqual(proxy.proxied_node._new_op_name, transformation) self.assertIsInstance(newProxy, Proxy.TransformationProxy) self.assertEqual(newProxy.proxied_node.operation.name, transformation) self.assertEqual(newProxy.proxied_node.operation.args, args)
def test_dfs_graph_with_computed_values_pruning(self): """ Test case to check that computed values in action nodes get pruned. """ # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = create_dummy_headnode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() n6 = n5.Count() n7 = node.Filter() # This is to make sure action nodes with # already computed values are pruned. n6.proxied_node.value = 1 # Generate and execute the mapper graph_dict = hn._generate_graph_dict() mapper_func = ComputationGraphGenerator.generate_computation_graph triggerables = mapper_func(graph_dict, t, 0) nodes = hn._get_action_nodes() # The node 'n6' will be pruned. Hence, # there's only one '3' in this list. reqd_order = [1, 2, 2, 3, 2, 2] self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n4.proxied_node]) # One occurrence of 't' per action node self.assertListEqual(triggerables, [t])
def test_dfs_graph_with_parent_pruning(self): """ Test case to check that parent nodes with no user references don't get pruned. """ # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = create_dummy_headnode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Graph nodes n1 = node.Define() n2 = node.Filter() n3 = n2.Filter() n4 = n3.Count() # noqa: avoid PEP8 F841 n5 = n1.Filter() # noqa: avoid PEP8 F841 n6 = node.Filter() # noqa: avoid PEP8 F841 # Remove references from n2 (which shouldn't affect the graph) n2 = None # Generate and execute the mapper graph_dict = hn._generate_graph_dict() mapper_func = ComputationGraphGenerator.generate_computation_graph triggerables = mapper_func(graph_dict, t, 0) nodes = hn._get_action_nodes() reqd_order = [1, 2, 2, 3, 2, 2] # Removing references from n2 will not prune any node # because n2 still has children self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n4.proxied_node]) # One occurrence of 't' per action node self.assertListEqual(triggerables, [t])
def test_mapper_with_pruning(self): """ A test case to check that the mapper works even in the case of pruning. """ # A mock RDF object t = ComputationGraphGeneratorTest.Temp() # Head node hn = create_dummy_headnode(1) hn.backend = ComputationGraphGeneratorTest.TestBackend() node = Proxy.TransformationProxy(hn) # Set of operations to build the graph n1 = node.Define() n2 = node.Filter().Filter() n4 = n2.Count() n5 = n1.Count() n6 = node.Filter() # noqa: avoid PEP8 F841 # Reason for pruning (change of reference) n5 = n1.Filter() # noqa: avoid PEP8 F841 # Generate and execute the mapper generator = ComputationGraphGenerator.ComputationGraphGenerator( node.proxied_node) # Prune first generator.headnode.graph_prune() mapper_func = generator.generate_computation_graph triggerables = mapper_func(t, 0) nodes = generator.get_action_nodes() reqd_order = [1, 2, 2, 2, 3, 2] self.assertEqual(t.ord_list, reqd_order) self.assertListEqual(nodes, [n4.proxied_node]) self.assertListEqual(triggerables, [t])