def _create_new_op(self, *args, **kwargs): """ Handles an operation call to the current node and returns the new node built using the operation call. """ # Create a new `Operation` object for the # incoming operation call op = Operation(self.proxied_node._new_op_name, *args, **kwargs) # Create a new `Node` object to house the operation newNode = Node(operation=op, get_head=self.proxied_node.get_head) # Logger debug statements logger.debug("Created new {} node".format(op.name)) # Add the new node as a child of the current node self.proxied_node.children.append(newNode) # Return the appropriate proxy object for the node if op.is_action(): return ActionProxy(newNode) elif op.name in ["AsNumpy", "Snapshot"]: headnode = self.proxied_node.get_head() generator = ComputationGraphGenerator(headnode) headnode.backend.execute(generator) return newNode.value else: return TransformationProxy(newNode)
def __setstate__(self, state): """ Retrieves the state dictionary of the current node and sets the instance variables. Args: state (dict): This is the state dictionary that needs to be converted to a `Node` object. """ self.children = state['children'] if state.get('operation_name'): self.operation = Operation(state['operation_name'], *state['operation_args'], **state["operation_kwargs"]) else: self.operation = None
class Node(object): """ A Class that represents a node in RDataFrame operations graph. A Node houses an operation and has references to children nodes. For details on the types of operations supported, try : Example:: import DistRDF DistRDF.use(...) # Choose your backend print(DistRDF.current_backend.supported_operations) Attributes: get_head (function): A lambda function that returns the head node of the current graph. operation: The operation that this Node represents. This could be :obj:`None`. children (list): A list of :obj:`DistRDF.Node` objects which represent the children nodes connected to the current node. _new_op_name (str): The name of the new incoming operation of the next child, which is the last child node among the current node's children. value: The computed value after executing the operation in the current node for a particular DistRDF graph. This is permanently :obj:`None` for transformation nodes and the action nodes get a :obj:`ROOT.RResultPtr` after event-loop execution. pyroot_node: Reference to the PyROOT object that implements the functionality of this node on the cpp side. has_user_references (bool): A flag to check whether the node has direct user references, that is if it is assigned to a variable. Default value is :obj:`True`, turns to :obj:`False` if the proxy that wraps the node gets garbage collected by Python. """ def __init__(self, get_head, operation, *args): """ Creates a new node based on the operation passed as argument. Args: get_head (function): A lambda function that returns the head node of the current graph. This value could be `None`. operation (DistRDF.Operation.Operation): The operation that this Node represents. This could be :obj:`None`. """ if get_head is None: # Function to get 'head' Node self.get_head = lambda: self else: self.get_head = get_head self.operation = operation self.children = [] self._new_op_name = "" self.value = None self.pyroot_node = None self.has_user_references = True def __getstate__(self): """ Converts the state of the current node to a Python dictionary. Returns: dictionary: A dictionary that stores all instance variables that represent the current DistRDF node. """ state_dict = {'children': self.children} if self.operation: state_dict['operation_name'] = self.operation.name state_dict['operation_args'] = self.operation.args state_dict['operation_kwargs'] = self.operation.kwargs return state_dict def __setstate__(self, state): """ Retrieves the state dictionary of the current node and sets the instance variables. Args: state (dict): This is the state dictionary that needs to be converted to a `Node` object. """ self.children = state['children'] if state.get('operation_name'): self.operation = Operation(state['operation_name'], *state['operation_args'], **state["operation_kwargs"]) else: self.operation = None def is_prunable(self): """ Checks whether the current node can be pruned from the computational graph. Returns: bool: True if the node has no children and no user references or its value has already been computed, False otherwise. """ if not self.children: # Every pruning condition is written on a separate line if not self.has_user_references or \ (self.operation and self.operation.is_action() and self.value): # ***** Condition 1 ***** # If the node is wrapped by a proxy which is not directly # assigned to a variable, then it will be flagged for pruning # ***** Condition 2 ***** # If the current node's value was already # computed, it should get pruned only if it's # an Action node. # Logger debug statements logger.debug("{} node can be pruned".format( self.operation.name )) return True # Logger debug statements if self.operation: # Node has an operation logger.debug("{} node shouldn't be pruned".format( self.operation.name )) else: # Node is the RDataFrame logger.debug("Graph pruning completed") return False def graph_prune(self): """ Prunes nodes from the current DistRDF graph under certain conditions. The current node will be pruned if it has no children and the user application does not hold any reference to it. The children of the current node will get recursively pruned. Returns: bool: True if the current node has to be pruned, False otherwise. """ children = [] # Logger debug statements if self.operation: logger.debug("Checking {} node for pruning".format( self.operation.name )) else: logger.debug("Starting computational graph pruning") for n in self.children: # Logger debug statement # Select children based on pruning condition if not n.graph_prune(): children.append(n) self.children = children return self.is_prunable()
def test_action(self): """Action nodes are classified accurately.""" op = Operation("Count") self.assertEqual(op.op_type, Operation.Types.ACTION)
def test_without_args_and_kwargs(self): """Check Operation constructor without arguments.""" op = Operation("Define") self.assertEqual(op.args, []) self.assertEqual(op.kwargs, {})
def test_with_args_and_kwargs(self): """Check that named and unnamed arguments are properly set.""" op = Operation("Define", 2, "p", a=1, b="b") self.assertEqual(op.args, [2, "p"]) self.assertEqual(op.kwargs, {"a": 1, "b": "b"})
def test_without_kwargs(self): """Check that unnamed arguments are properly set.""" op = Operation("Define", 1, "b") self.assertEqual(op.args, [1, "b"]) self.assertEqual(op.kwargs, {})
def test_none(self): """Incorrect operations raise an Exception.""" with self.assertRaises(Exception): Operation("random")
def test_transformation(self): """Transformation nodes are classified accurately.""" op = Operation("Define", "c1") self.assertEqual(op.op_type, Operation.Types.TRANSFORMATION)
def test_instant_action(self): """Instant actions are classified accurately.""" op = Operation("Snapshot") self.assertEqual(op.op_type, Operation.Types.INSTANT_ACTION)