def test_gpu_send_and_recv(): # First check whether do we have gputransformer available, if not, xfail if 'gpu' not in transformer_choices(): pytest.skip("GPUTransformer not available") # put x+1 on cpu numpy with ng.metadata(device='numpy'): x = ng.placeholder(()) x_plus_one = x + 1 # put x+2 on gpu numpy with ng.metadata(device='gpu'): x_plus_two = x_plus_one + 1 check_result_values(input_vector=[10, 20, 30], result_expected=[(12), (22), (32)], placeholder=x, ops=OrderedSet([x_plus_two])) # put x+1 on gpu numpy with ng.metadata(device='gpu'): x = ng.placeholder(()) x_plus_one = x + 1 # put x+2 on cpu numpy with ng.metadata(device='numpy'): x_plus_two = x_plus_one + 1 check_result_values(input_vector=[10, 20, 30], result_expected=[(12), (22), (32)], placeholder=x, ops=OrderedSet([x_plus_two]))
def __init__(self, **kwargs): super(HetrTransformer, self).__init__(**kwargs) self.my_pid = os.getpid() self.is_closed = False self.child_transformers = dict() self.transformer_list = list() self.transformers = set() self.send_nodes = OrderedSet() self.scatter_shared_queues = list() self.gather_shared_queues = list() self.hetr_passes = [ DeviceAssignPass(default_device='numpy', default_device_id=0, transformers=self.transformers), CommunicationPass(self.send_nodes, self.scatter_shared_queues, self.gather_shared_queues), DistributedPass(self.send_nodes, self.scatter_shared_queues, self.gather_shared_queues), ChildTransformerPass(self.transformer_list) ] self.vizpass = None self.inits = OrderedSet() HetrTransformer.hetr_counter += 1 assert HetrTransformer.hetr_counter <= 1 assert HetrTransformer.hetr_counter >= 0
def add_initialization_ops(self, ops): """ Ensure initializations have been captured for state in ops. Args: ops: Collection of ops. Returns: True if new initializations were added. """ did_work = False for op in ops: if op in self.init_checked_ops: continue self.init_checked_ops.add(op) new_inits = self.state_initializations(op.states_read) new_inits.update(self.state_initializations(op.states_written)) if len(new_inits) > 0: did_work = True self.state_initialization_ops.update(new_inits) self.add_initialization_ops(Op.ordered_ops(new_inits)) self.state_initialization_ops = \ OrderedSet(op.forwarded for op in self.state_initialization_ops) return did_work
def __init__(self, **kwargs): super(Transformer, self).__init__(**kwargs) self.computations = OrderedSet() self.finalized = False self.allocated = False self.initialized = False self.device_buffers = OrderedSet() self.cpu_initializations = [] self.init_computation = None self.graph_passes = None self.init_checked_ops = OrderedSet() self.init_states = OrderedSet() self.state_initialization_ops = OrderedSet()
def __init__(self, fusion=None, **kwargs): super(Transformer, self).__init__(**kwargs) self.computations = OrderedSet() self.all_results = OrderedSet() self.finalized = False self.allocated = False self.initialized = False self.opids = dict() self.fusion = fusion self.device_buffers = OrderedSet() self.cpu_initializations = [] self.init_computation = None self.graph_passes = [SimplePrune(), RequiredTensorShaping()]
def test_hetr_graph_passes(): # Build the graph with ng.metadata(device_id='1'): x = ng.placeholder(()) y = ng.placeholder(()) x_plus_y = x + y # Build the graph metadata graph_ops = OrderedSet([x_plus_y, x, y]) graph_op_metadata = {op: list() for op in graph_ops} graph_op_metadata[x] = ["numpy", '1'] graph_op_metadata[y] = ["numpy", '0'] graph_op_metadata[x_plus_y] = ["numpy", '0'] transformer_list = ["numpy1", "numpy0"] # Run the hetr passes one by one, and verify they did the expected things to the graph check_device_assign_pass("numpy", "0", graph_op_metadata, graph_ops) check_communication_pass(ops_to_transform=graph_ops, expected_recv_nodes=[x_plus_y]) # Check if the hetr pass (childTransfromer pass) generates the expected transformer list obj = ChildTransformerPass([]) transformer = ngt.make_transformer_factory('hetr')() obj.do_pass(graph_ops, transformer) transformer.close() assert set(transformer_list) == set(obj.transformer_list)
def _transform_computations(self): """ Transform computation graphs to a form that can be run. """ # with Op.saved_user_deps(): # Run passes on the computation graphs self.run_registered_graph_passes(self.all_results) # Collect up all ops from the graph and obtain the init graph all_ops = OrderedSet(Op.ordered_ops(self.all_results)) init_op = doall(self.ordered_initializers(all_ops)) # Run passes on the initialization graphs self.run_registered_graph_passes([init_op]) # Union the init and computation graphs self.inits = Op.ordered_ops([init_op]) all_ops.update(self.inits) # create computation which initializes values (called once per session) init_op.update_forwards() self.init_computation = self.computation(init_op, name="init") # Give ids for op in all_ops: if op not in self.opids: self.opids[op] = len(self.opids) self.dataflow, self.memory = assign_buffers(self, all_ops, self.fusion) # Initialize tensor descriptions for op in all_ops: self.initialize_tensor_descriptions(op) self.ops = self.dataflow.instructions self.start_transform_allocate() for device_buffer in self.device_buffers: device_buffer.transform_allocate() self.finish_transform_allocate() # Compile the computations now that we know their storage for computation in self.computations: computation.transform() self.finish_transform() self.finalized = True
def state_initializations(self, states): """ Find new initializations associated with states. Args: states: A collection of states. Returns: New initializations. """ new_inits = OrderedSet() for state in states: if state not in self.init_states: self.init_states.add(state) new_inits.update(state.initializers) return new_inits
def test_simple_graph(): # Build the graph with ng.metadata(device_id='1'): x = ng.placeholder(()) x_plus_one = x + 1 check_result_values(input_vector=[10, 20, 30], result_expected=[(11, 21, 31)], placeholder=x, ops=OrderedSet([x_plus_one])) x_plus_one = x + 1 x_plus_two = x + 2 x_mul_three = x * 3 check_result_values(input_vector=[10, 20, 30], result_expected=[(11, 12, 30), (21, 22, 60), (31, 32, 90)], placeholder=x, ops=OrderedSet([x_plus_one, x_plus_two, x_mul_three]))
def _transform_computations(self): """ Transform computation graphs to a form that can be run. """ # Run passes on the computation graphs all_results = [] for comp in self.computations: all_results.append(comp.computation) all_ops = self.run_registered_graph_passes(all_results) self.init_computation = \ self.add_computation(computation(doall(self.state_initialization_ops)).named('init')) all_ops.append(self.init_computation.computation) # Collect up all ops from the graph and obtain the init graph all_ops = OrderedSet(Op.ordered_ops(all_ops)) def init_tensor_description(tensor_description): if tensor_description.buffer is None: tensor_description.buffer = self.device_buffer_storage( tensor_description.base.tensor_size, tensor_description.dtype, tensor_description.name ) self.device_buffers.add(tensor_description.buffer) tensor_description.value = \ tensor_description.buffer.device_tensor(tensor_description) for state in self.init_states: init_tensor_description(state.tensor_description()) self.ops = Op.ordered_ops(all_ops) for op in self.ops: if op.is_tensor_op: init_tensor_description(op.tensor_description()) self.start_transform_allocate() for device_buffer in self.device_buffers: device_buffer.transform_allocate() self.finish_transform_allocate() # Compile the computations now that we know their storage for comp in self.computations: comp.computation_name = \ self.transform_ordered_ops(Op.ordered_ops([comp.computation]), name=comp.name) self.finish_transform() self.finalized = True
def test_distributed_graph(): # Build the graph H = ng.make_axis(length=4, name='height') W = ng.make_axis(length=6, name='width') x = ng.placeholder(axes=[H, W]) y = ng.placeholder(()) z = ng.placeholder(()) with ng.metadata(device_id=('1', '2'), parallel=W): x_plus_y = x + y x_plus_y_plus_z = x_plus_y + z # # Build the graph metadata # graph_ops = OrderedSet([x_plus_y_plus_z, x_plus_y, x, y, z]) # # graph_op_metadata = {op: list() for op in graph_ops} # graph_op_metadata[x] = ["numpy", '0'] # graph_op_metadata[y] = ["numpy", '0'] # graph_op_metadata[z] = ["numpy", '0'] # graph_op_metadata[x_plus_y] = ["numpy", ('1', '2')] # graph_op_metadata[x_plus_y_plus_z] = ["numpy", '0'] # # transformer_list = ["numpy2", "numpy1", "numpy0"] # # # Run the hetr passes one by one, and verify they did the expected things to the graph # check_device_assign_pass("numpy", "0", graph_op_metadata, graph_ops) # check_communication_pass( # ops_to_transform=graph_ops, # expected_recv_nodes=[ # x_plus_y, # x_plus_y, # x_plus_y_plus_z]) # # # Check if the hetr pass (childTransfromer pass) generates the expected transformer list # obj = ChildTransformerPass([]) # transformer = ngt.make_transformer_factory('hetr')() # obj.do_pass(graph_ops, transformer) # transformer.close() # # assert set(transformer_list) == set(obj.transformer_list) pytest.xfail( "Some problems due to latest changes from master, fixes in later PR") check_result_values(input_vector=[(10, 20, 30), (1, 2, 3)], result_expected=[(60, ), (6, )], placeholder=(x, y, z), ops=OrderedSet([x_plus_y_plus_z]))
def check_result_values(input_vector, result_expected, placeholder, ops=OrderedSet(), *args): """ This function checks the result values return by the hetr computation object against the expected result values it also checks if the value returned by the hetr object matches the order in the expected result list :param: input_vector: list specifying the differnt values to be passed to the placeholder :param: result_expected: list of tuples specifying the expected result values from the hetr computation object :param: placeholder: list of placeholder to be passed for hetrcomputation :param: ops: list of result handlers to be paased for hetrcomputation """ # Select the transformer transformer = ngt.make_transformer_factory('hetr')() # Build the hetr computation object if isinstance(placeholder, tuple): computation = transformer.computation(ops, *placeholder) else: computation = transformer.computation(ops, placeholder) result_obtained = [] # Check for the return result list for i in input_vector: if isinstance(i, tuple): result_obtained.append(computation(*i)) else: result_obtained.append(computation(i)) # if return result is tuple if len(result_expected) > 1: np.testing.assert_array_equal(result_expected, result_obtained) # if return result is scalar else: assert (np.array(tuple(result_obtained)) == np.array( result_expected[0])).all() transformer.close()
def allocate(self): """ Allocate storage and then initializes constants. Will finalize if not already done. """ if self.allocated: return if not self.finalized: self._transform_computations() self.allocate_storage() for op in OrderedSet(self.ops): self.initialize_constant(op) self.allocated = True
def check_communication_pass(ops_to_transform, expected_recv_nodes): """ The communication pass should insert send/recv nodes wherever the metadata[transformer] differs between nodes. This checks that the recv nodes are inserted in the right place, and counts that the expected number of send nodes are found. :param ops_to_transform: list of ops to do the garph traversal :param expected_recv_nodes: lits of ops where receive nodes are expected to be inserted after the communication pass """ transformer = ngt.make_transformer_factory('hetr')() send_nodes = OrderedSet() scatter_shared_queues = list() gather_shared_queues = list() obj = CommunicationPass(send_nodes, scatter_shared_queues, gather_shared_queues) obj.do_pass(ops_to_transform, transformer) op_list_instance_type = list() num_expected_sendnodes = len(expected_recv_nodes) # Count if the communication pass inserted the expected number of send nodes assert num_expected_sendnodes == len(send_nodes) # verify if Recv nodes are inserted in the right place for op in expected_recv_nodes: for each_arg in op.args: op_list_instance_type.append(type(each_arg)) if (ng.op_graph.communication.Recv in op_list_instance_type or ng.op_graph.communication.Gather_Recv in op_list_instance_type or ng.op_graph.communication.Scatter_Recv in op_list_instance_type) is False: assert False del op_list_instance_type[:] transformer.close()
def allocate(self): """ Allocate storage and then initializes constants. Will finalize if not already done. """ if self.allocated: return with Op.saved_user_deps(): # Disable user_deps during transformations if not self.finalized: self._transform_computations() self.allocate_storage() for op in OrderedSet(self.inits + self.ops): self.initialize_constant(op) self.allocated = True
def sort_ops_by_comm_deps(ops): """ Sort the subgraphs identified by ops using communication dependencies. Find any Receiver nodes that an op depends on; add 'control_deps' from Receivers to any other op in ops which the Sender for that Receiver depends on. Ex. Whole Graph: X -> Send0 Recv0 -> Y -> Send1 Recv1 -> Z ops to be sorted: Send0, Z Deadlock would occur if Z ran before Send0, but there are no explicit edges connecting them. Using control_deps, the subgraph for this transformer looks like: X -> Send0 ====other_dep====> Recv1 -> Z This ensures that the built in logic in any child transformer, which sorts nodes based on control_deps, will produce a correct order if one is possible. """ if len(ops) <= 1: return # For each return (ops), find out if there should be an other_dep added from any # other return to it based on communication dependencies ops_to_update = OrderedSet(ops) for op in ops_to_update: other_ops = set(ops) - set([op]) for trav_op in other_ops: recvs = find_recvs(fro=trav_op) for r in recvs: if comm_path_exists(fro=r.send_node(), to=op): if r.metadata['transformer'] == op.metadata['transformer']: r.control_deps.add(op)
def do_pass(self, ops, transformer): ops = OrderedSet(op.forwarded for op in ops) def set_new_axes(root, num_devices): visit = self.do_traversal(root) self.new_axes = calculate_new_axes(root.axes, self.parallel_axis, num_devices, False) while visit: node = visit.pop() if hasattr(node, 'axes'): node._TensorOp__axes = self.new_axes # Start traversal from the top to the bottom for op in reversed(Op.ordered_ops(ops)): args = list() for arg in op.args: if 'marker' in arg.metadata: if 'gather' is arg.metadata['marker']: self.parallel_axis = arg.metadata['parallel'] set_new_axes(arg.send_node(), len(arg.from_id)) for d in range(1, len(arg.from_id)): if d == (len(arg.from_id) - 1): self.new_axes = calculate_new_axes( arg.axes, self.parallel_axis, len(arg.from_id), True) nodes = self.do_traversal(arg.send_node()) self.clone_nodes(nodes, arg.from_id[d], self.new_axes, self.scatter_shared_queues[d], self.gather_shared_queues[d]) args.append(arg) if isinstance(op.args, tuple): op._Op__args = tuple(args) else: op.args(args)
def check_device_assign_pass(default_device, default_device_id, graph_op_metadata, graph_op=OrderedSet(), *args): """ The Device assign pass should inject the metadata{device_id, device} as specified by the user for each op, if not specified then the default {device_id:0, device:numpy} should be inserted for each op. :param: default_device: string, the default device for each op, if not specified by user ex: "numpy" :param: default_device_id: string, the default device number for each op, if not specified by user ex: "0" :param: graph_op_metadata: dict, dictionary of list specifying the expected metadata {device_id, device} for each op :param: graph_op: list of ops to do the graph traversal """ transformer = ngt.make_transformer_factory('hetr')() transformers = set() expected_transformers = set() obj = DeviceAssignPass(default_device, default_device_id, transformers) obj.do_pass(graph_op, transformer) for op in graph_op_metadata.keys(): assert op.metadata['device'] == graph_op_metadata[op][0] assert op.metadata['device_id'] == graph_op_metadata[op][1] assert op.metadata['transformer'] == graph_op_metadata[op][0] + \ str(graph_op_metadata[op][1]) expected_transformers.add(op.metadata['transformer']) assert transformers == expected_transformers transformer.close()
def comm_path_exists(fro, to): """ Find a path from fro to to, including paths non-explicit edges from a Receiver to its Sender. Note- this is a non-standard traversal, as most traversals stop at a Receiver. """ # TODO: does this correctly handle traversing multiple send-recv junctions # from fro to to? visit = OrderedSet(fro.args) while visit: v = visit.pop() if v == to: return True if isinstance(v, Receiver): visit.add(v.send_node()) else: visit.update(v.args) return False
def __init__(self): self.state_initialization_ops = OrderedSet()
def do_traversal(self, root): # Note: This is almost identical to Op's visit_input_closure. available = OrderedSet() counts = dict() parents = collections.defaultdict(list) ready = OrderedSet() nodes = list() available.add(root) while available: node = available.pop() node.update_forwards() if node in counts: continue children = [child.forwarded for child in node.control_deps] if children: counts[node] = len(children) for child in children: parents[child].append(node) available.update(children) else: ready.add(node) while ready: node = ready.pop() nodes.append(node) for p in parents.get(node, []): count = counts[p] - 1 if count == 0: ready.add(p) del counts[p] else: counts[p] = count return nodes
def __init__(self, hetr, results, *parameters, **kwargs): # super(HetrComputation, self).__init__(hetr, results, *parameters, **kwargs) self.child_computations = dict() self.child_results_map = dict() self.transformer = hetr self.transformer_name_list = hetr.transformer_list self.send_nodes = hetr.send_nodes self.hetr_passes = hetr.hetr_passes self.num_results = 0 self.num_send_nodes = dict() self.is_distributed = False self.parameters = parameters orig_results = results if not isinstance(results, OrderedSet): if not isinstance(results, list): results = [results] if results else [] results = OrderedSet(results) for op in results: if 'device_id' in op.metadata and \ isinstance(op.metadata['device_id'], (list, tuple)): op.metadata['is_split_op'] = True new_result = ResultOp(device_id=0, args=op) results.remove(op) results.append(new_result) all_results = OrderedSet(results) all_results.update(parameters) # all res empty; hetr as no computations. where do these get assigned? # previously, we used t.all_results, which went away. when was that created? # - computation object used to update all_results of transformer # - transformer transform_ops used to use all_results but not update it, # and return a new copy if orig_results is not None: # Do Hetr passes for graph_pass in self.hetr_passes: all_results = all_results + hetr.send_nodes graph_pass.do_pass(all_results, self.transformer) # TODO replicate placeholders for nodes which got replicated; # update the placeholder mapping below, so at __call__ time we know # which transformers to pass copies of the provided placeholder value to if hetr.vizpass: vis_results = all_results + hetr.send_nodes hetr.vizpass.do_pass(vis_results, self) self.transformer_to_node = { t: list() for t in self.transformer_name_list } self.is_distributed = any( 'Gather_Send' in s.name or 'Scatter_Send' in s.name for s in self.send_nodes) # update the transformer to send node mappings for s in self.send_nodes: tname = s.metadata['transformer'] self.transformer_to_node[tname].append(s) self.num_send_nodes[tname] = self.num_send_nodes.get(tname, 0) + 1 self.num_results = len(results) if orig_results is not None: for pos, op in enumerate(results): tname = op.metadata['transformer'] if self.is_distributed is True: if tname in self.num_send_nodes: for i in range(self.num_send_nodes[tname]): self.child_results_map.setdefault(tname, []).append(None) if 'ResultOp' in op.name: self.transformer_to_node[tname].append(op.args[0]) else: self.transformer_to_node[tname].append(op) self.child_results_map.setdefault(tname, []).append(pos) self.placeholders = {t: list() for t in self.transformer_name_list} self.placeholders_pos = {t: list() for t in self.transformer_name_list} for i, p in enumerate(parameters): tname = p.metadata['transformer'] assert isinstance( tname, list ) is False, "Fatal: multiple transformers cannot be handled!" self.placeholders[tname].append(p) self.placeholders_pos[tname].append(i) self.child_computations = dict() for tname in self.transformer_name_list: # request asynctransformer from HT # use it to build AsyncComputation async_trans = hetr.transformer(tname) async_comp = async_trans.computation( self.transformer_to_node[tname], tuple(self.placeholders[tname])) self.child_computations[tname] = async_comp
def find_recvs(fro): # Find all the Receivers fro depends on visit = OrderedSet() recvs = OrderedSet() visit.add(fro) while visit: v = visit.pop() if isinstance(v, Receiver): recvs.add(v) visit.add(v.send_node()) else: if hasattr(v, 'args'): visit.update(v.args) return recvs
class Computation(NameableValue): """ A handle for a computation function. Arguments: transformer (obj:`Transformer`): The associated transformer. returns: If an Op, return the value of the Op, if sequence of Ops, return the sequence of values, if a set return a map, if None, return None. *args: AllocationOps marked input will be arguments to the function. **kwargs: Args for related classes. """ def __init__(self, transformer, returns, *args, **kwargs): super(Computation, self).__init__(**kwargs) self.transformer = transformer self.computation_name = None def wrap_op(op): if isinstance(op, TensorOp): return ResultHandle(op) else: return op def wrap_ops(ops): return [wrap_op(op) for op in ops] self.ops = OrderedSet() if isinstance(returns, collections.Set): returns = set(wrap_ops(returns)) self.ops.update(returns) elif isinstance(returns, collections.Sequence): returns = wrap_ops(returns) self.ops.update(returns) elif isinstance(returns, Op): returns = wrap_op(returns) self.ops.add(returns) elif returns is not None: raise ValueError() self.returns = returns self.parameters = [] for arg in args: if arg.input: self.parameters.append(arg) else: raise ValueError(( 'The arguments to a computation must all have property ' 'input=True, but the op passed had input=False. In most ' 'cases you want to pass placeholder ops in as arguments. ' '{op} was passed in, of type {op_type}.' ).format( op=arg, op_type=arg.__class__.__name__, )) if isinstance(arg, Op): self.ops.add(arg) else: raise ValueError() control_ops = OrderedSet() for op in self.ops: control_ops.update(op.user_deps) processed_ops = set() pending_ops = OrderedSet(self.ops) while pending_ops: op = pending_ops.pop() if op in processed_ops: continue control_ops.update(op.other_deps) pending_ops.update(op.other_deps) pending_ops.update(op.args) processed_ops.add(op) self.ops.update(control_ops) self.transformer.all_results.update(self.ops) self.executor = None def transform(self): """ Transforms the computation so that it can be run. """ self.ops = {op.forwarded for op in self.ops} ordered_ops = self.transformer.dataflow.can_reach(self.ops, order=self.transformer.ops) self.computation_name = self.transformer.transform_ordered_ops(ordered_ops, name=self.name) def __call__(self, *args): """ Executes the computation passing args in to the function. """ if len(args) != len(self.parameters): raise ValueError(( 'Computation was expecting {expected} arguments, but was ' 'called with {called}.' ).format( expected=len(self.parameters), called=len(args), )) # TODO Should this be automatic? self.transformer.initialize() # Get the parameters to the device for param, arg in zip(self.parameters, args): param.value[()] = arg self.executor() # TODO Should copy this out of the device to a destination when it is not scalar def value(op): """ Returns the computed value of op, or None if it has no value. :param op: :return: Return value for op. """ if isinstance(op, TensorOp): if op.value is None: pass return op.value.get(None) else: return None if isinstance(self.returns, Op): return value(self.returns) elif isinstance(self.returns, collections.Set): result = dict() for op in self.returns: dict[op] = value(op) return result elif isinstance(self.returns, collections.Sequence): return tuple(value(op) for op in self.returns) else: return None
class Transformer(with_metaclass(Transformer_ABC_Meta, object)): """ Produce an executable version of op-graphs. Computations are subsets of Ops to compute. The transformer determines storage allocation and transforms the computations and allocations into functions. Arguments: fusion (bool): Whether to combine sequences of operations into one operation. **kwargs: Args for related classes. Attributes: computations (:obj:`set` of :class:`Computation`): The set of requested computations. all_results (:obj:`set` of :class:`ngraph.op_graph.op_graph.Op`): A root set of Ops that need to be computed. finalized (bool): True when transformation has been performed. initialized (bool): True when variables have been initialized/restored. opids (dict): TODO fusion (bool): True when fusion was enabled. device_buffers (set): Set of handles for storage allocations. cpu_initializations (list): Initializations to be performed from the CPU after allocation. init_computation (Computation): The computation that performs initialization after allocation. This happens once per training session, not once per-minibatch. """ def __init__(self, fusion=None, **kwargs): super(Transformer, self).__init__(**kwargs) self.computations = OrderedSet() self.all_results = OrderedSet() self.finalized = False self.allocated = False self.initialized = False self.opids = dict() self.fusion = fusion self.device_buffers = OrderedSet() self.cpu_initializations = [] self.init_computation = None self.graph_passes = [SimplePrune(), RequiredTensorShaping()] def register_graph_pass(self, graph_pass): self.graph_passes.append(graph_pass) def run_registered_graph_passes(self, ops): for graph_pass in self.graph_passes: graph_pass.do_pass(ops) def _transform_computations(self): """ Transform computation graphs to a form that can be run. """ # with Op.saved_user_deps(): # Run passes on the computation graphs self.run_registered_graph_passes(self.all_results) # Collect up all ops from the graph and obtain the init graph all_ops = OrderedSet(Op.ordered_ops(self.all_results)) init_op = doall(self.ordered_initializers(all_ops)) # Run passes on the initialization graphs self.run_registered_graph_passes([init_op]) # Union the init and computation graphs self.inits = Op.ordered_ops([init_op]) all_ops.update(self.inits) # create computation which initializes values (called once per session) init_op.update_forwards() self.init_computation = self.computation(init_op, name="init") # Give ids for op in all_ops: if op not in self.opids: self.opids[op] = len(self.opids) self.dataflow, self.memory = assign_buffers(self, all_ops, self.fusion) # Initialize tensor descriptions for op in all_ops: self.initialize_tensor_descriptions(op) self.ops = self.dataflow.instructions self.start_transform_allocate() for device_buffer in self.device_buffers: device_buffer.transform_allocate() self.finish_transform_allocate() # Compile the computations now that we know their storage for computation in self.computations: computation.transform() self.finish_transform() self.finalized = True @generic_method(Op) def initialize_tensor_descriptions(self, op): """ Ensures that tensor descriptions associated with op are initialized. Arguments: op (class:`ngraph.op_graph.op_graph.Op`): Initialize the tensor description for op. """ # op tensor_description = op.tensor_description() if tensor_description is not None and tensor_description.transformer is None: tensor_description.initialize(self) # Call info for op for tensor_description in op.call_info(): if tensor_description.transformer is None: tensor_description.initialize(self) @initialize_tensor_descriptions.on_type(Function) def initialize_tensor_descriptions(self, op): """ For Function, recurse into instructions Arguments: op: The function. :return: """ for inst in op.instructions: self.initialize_tensor_descriptions(inst) @abc.abstractmethod def start_transform_allocate(self): """ Called just before allocation code is transformed. """ @abc.abstractmethod def finish_transform_allocate(self): """ Called after last allocation is transformed. """ @abc.abstractmethod def transform_ordered_ops(self, ordered_ops): """ Generate code to compute ordered_ops. Arguments: ordered_ops: Ops to compute Returns: Handle for generated code """ @abc.abstractmethod def finish_transform(self): """ Finish generating the model. """ @abc.abstractmethod def allocate_storage(self): """ Allocate storage on the device. """ @generic_method(Op) def initialize_constant(self, op): pass @initialize_constant.on_type(InitTensorOp) def initialize_constant(self, op): tensor_description, = tensor_descriptions(op.args) value = op.valfun(tensor_description) tensor_description.value[()] = value def ordered_initializers(self, ordered_ops): """ TODO. Arguments: ordered_ops: TODO Returns: """ initializers = OrderedSet() todo = OrderedSet(ordered_ops) while todo: these_ops = todo todo = OrderedSet() for op in these_ops: op = op.forwarded op.update_forwards() initializers.update(op.initializers) todo.update(op.initializers) ordered_initializer_ops = [] visited = set() inits = OrderedSet() def visit(node): node = node.forwarded node.update_forwards() if node not in visited: if node.initializers: if node in inits: if node not in visited: ordered_initializer_ops.append(node) visited.add(node) else: inits.add(node) for n in node.initializers: visit(n) else: for n in node.args: visit(n) if node not in visited: ordered_initializer_ops.append(node) visited.add(node) for node in initializers: visit(node) return ordered_initializer_ops @abc.abstractmethod def device_buffer_storage(self, bytes, dtype, name): """ Make a DeviceBuffer. Arguments: bytes: Size of buffer. dtype: dtype of buffer. name: Name of the storage variable returns: A DeviceBuffer. """ @abc.abstractmethod def device_buffer_reference(self): """ Make a DeviceBufferReference. Returns: A DeviceBufferReference. """ # User API follows def computation(self, results, *parameters, **kwargs): """ Adds a computation to the transformer. Arguments: results: Values to be computed *parameters: Values to be set as arguments to evaluate name: Name for function. Defaults to None. Returns: Dictionary from results to their values """ if self.finalized: raise ValueError( 'Cannot create computations from a finalized transformer' ) result = Computation(self, results, *parameters, **kwargs) self.computations.add(result) return result def allocate(self): """ Allocate storage and then initializes constants. Will finalize if not already done. """ if self.allocated: return with Op.saved_user_deps(): # Disable user_deps during transformations if not self.finalized: self._transform_computations() self.allocate_storage() for op in OrderedSet(self.inits + self.ops): self.initialize_constant(op) self.allocated = True def initialize(self): """ Initialize storage. Will allocate if not already performed. """ if self.initialized: return self.allocate() # Need to set initialized before we are done because the init computation will # try to initialize. self.initialized = True self.init_computation()
def __init__(self, transformer, returns, *args, **kwargs): super(Computation, self).__init__(**kwargs) self.transformer = transformer self.computation_name = None def wrap_op(op): if isinstance(op, TensorOp): return ResultHandle(op) else: return op def wrap_ops(ops): return [wrap_op(op) for op in ops] self.ops = OrderedSet() if isinstance(returns, collections.Set): returns = set(wrap_ops(returns)) self.ops.update(returns) elif isinstance(returns, collections.Sequence): returns = wrap_ops(returns) self.ops.update(returns) elif isinstance(returns, Op): returns = wrap_op(returns) self.ops.add(returns) elif returns is not None: raise ValueError() self.returns = returns self.parameters = [] for arg in args: if arg.input: self.parameters.append(arg) else: raise ValueError(( 'The arguments to a computation must all have property ' 'input=True, but the op passed had input=False. In most ' 'cases you want to pass placeholder ops in as arguments. ' '{op} was passed in, of type {op_type}.' ).format( op=arg, op_type=arg.__class__.__name__, )) if isinstance(arg, Op): self.ops.add(arg) else: raise ValueError() control_ops = OrderedSet() for op in self.ops: control_ops.update(op.user_deps) processed_ops = set() pending_ops = OrderedSet(self.ops) while pending_ops: op = pending_ops.pop() if op in processed_ops: continue control_ops.update(op.other_deps) pending_ops.update(op.other_deps) pending_ops.update(op.args) processed_ops.add(op) self.ops.update(control_ops) self.transformer.all_results.update(self.ops) self.executor = None
def ordered_initializers(self, ordered_ops): """ TODO. Arguments: ordered_ops: TODO Returns: """ initializers = OrderedSet() todo = OrderedSet(ordered_ops) while todo: these_ops = todo todo = OrderedSet() for op in these_ops: op = op.forwarded op.update_forwards() initializers.update(op.initializers) todo.update(op.initializers) ordered_initializer_ops = [] visited = set() inits = OrderedSet() def visit(node): node = node.forwarded node.update_forwards() if node not in visited: if node.initializers: if node in inits: if node not in visited: ordered_initializer_ops.append(node) visited.add(node) else: inits.add(node) for n in node.initializers: visit(n) else: for n in node.args: visit(n) if node not in visited: ordered_initializer_ops.append(node) visited.add(node) for node in initializers: visit(node) return ordered_initializer_ops
class Transformer(with_metaclass(Transformer_ABC_Meta, object)): """ Produce an executable version of op-graphs. Computations are subsets of Ops to compute. The transformer determines storage allocation and transforms the computations and allocations into functions. Arguments: fusion (bool): Whether to combine sequences of operations into one operation. **kwargs: Args for related classes. Attributes: computations (:obj:`set` of :class:`Computation`): The set of requested computations. all_results (:obj:`set` of :class:`ngraph.op_graph.op_graph.Op`): A root set of Ops that need to be computed. finalized (bool): True when transformation has been performed. initialized (bool): True when variables have been initialized/restored. fusion (bool): True when fusion was enabled. device_buffers (set): Set of handles for storage allocations. cpu_initializations (list): Initializations to be performed from the CPU after allocation. init_computation (Computation): The computation that performs initialization after allocation. This happens once per training session, not once per-minibatch. init_checked_ops: All ops processed. init_states: All states seen. state_initialization_ops: Initializations """ def __init__(self, **kwargs): super(Transformer, self).__init__(**kwargs) self.computations = OrderedSet() self.finalized = False self.allocated = False self.initialized = False self.device_buffers = OrderedSet() self.cpu_initializations = [] self.init_computation = None self.graph_passes = None self.init_checked_ops = OrderedSet() self.init_states = OrderedSet() self.state_initialization_ops = OrderedSet() def add_initialization_ops(self, ops): """ Ensure initializations have been captured for state in ops. Args: ops: Collection of ops. Returns: True if new initializations were added. """ did_work = False for op in ops: if op in self.init_checked_ops: continue self.init_checked_ops.add(op) new_inits = self.state_initializations(op.states_read) new_inits.update(self.state_initializations(op.states_written)) if len(new_inits) > 0: did_work = True self.state_initialization_ops.update(new_inits) self.add_initialization_ops(Op.ordered_ops(new_inits)) self.state_initialization_ops = \ OrderedSet(op.forwarded for op in self.state_initialization_ops) return did_work def state_initializations(self, states): """ Find new initializations associated with states. Args: states: A collection of states. Returns: New initializations. """ new_inits = OrderedSet() for state in states: if state not in self.init_states: self.init_states.add(state) new_inits.update(state.initializers) return new_inits def register_graph_pass(self, graph_pass): self.graph_passes.append(graph_pass) def run_registered_graph_passes(self, ops): for graph_pass in self.graph_passes: graph_pass.do_pass(ops, self) return ops def _transform_computations(self): """ Transform computation graphs to a form that can be run. """ # Run passes on the computation graphs all_results = [] for comp in self.computations: all_results.append(comp.computation) all_ops = self.run_registered_graph_passes(all_results) self.init_computation = \ self.add_computation(computation(doall(self.state_initialization_ops)).named('init')) all_ops.append(self.init_computation.computation) # Collect up all ops from the graph and obtain the init graph all_ops = OrderedSet(Op.ordered_ops(all_ops)) def init_tensor_description(tensor_description): if tensor_description.buffer is None: tensor_description.buffer = self.device_buffer_storage( tensor_description.base.tensor_size, tensor_description.dtype, tensor_description.name ) self.device_buffers.add(tensor_description.buffer) tensor_description.value = \ tensor_description.buffer.device_tensor(tensor_description) for state in self.init_states: init_tensor_description(state.tensor_description()) self.ops = Op.ordered_ops(all_ops) for op in self.ops: if op.is_tensor_op: init_tensor_description(op.tensor_description()) self.start_transform_allocate() for device_buffer in self.device_buffers: device_buffer.transform_allocate() self.finish_transform_allocate() # Compile the computations now that we know their storage for comp in self.computations: comp.computation_name = \ self.transform_ordered_ops(Op.ordered_ops([comp.computation]), name=comp.name) self.finish_transform() self.finalized = True @abc.abstractmethod def start_transform_allocate(self): """ Called just before allocation code is transformed. """ @abc.abstractmethod def finish_transform_allocate(self): """ Called after last allocation is transformed. """ @abc.abstractmethod def transform_ordered_ops(self, ordered_ops): """ Generate code to compute ordered_ops. Arguments: ordered_ops: Ops to compute Returns: Handle for generated code """ @abc.abstractmethod def finish_transform(self): """ Finish generating the model. """ @abc.abstractmethod def allocate_storage(self): """ Allocate storage on the device. """ @generic_method(Op) def initialize_constant(self, op): pass @initialize_constant.on_type(InitTensorOp) def initialize_constant(self, op): tensor_description = op.tensor.tensor_description() value = op.valfun(tensor_description) tensor_description.value[()] = value @abc.abstractmethod def device_buffer_storage(self, bytes, dtype, name): """ Make a DeviceBuffer. Arguments: bytes: Size of buffer. dtype: dtype of buffer. name: Name of the storage variable returns: A DeviceBuffer. """ @abc.abstractmethod def device_buffer_reference(self): """ Make a DeviceBufferReference. Returns: A DeviceBufferReference. """ # Old interface def computation(self, results, *parameters): """ Adds a computation to the transformer. Arguments: results: Values to be computed *parameters: Values to be set as arguments to evaluate Returns: Callable. """ return self.add_computation(computation(results, *parameters)) def add_computation(self, computation): """ Adds a computation to the transformer. Arguments: computation: A computation Op. Returns: Callable. """ if self.finalized: raise ValueError( 'Cannot create computations from a finalized transformer' ) result = Computation(self, computation) self.computations.add(result) return result def allocate(self): """ Allocate storage and then initializes constants. Will finalize if not already done. """ if self.allocated: return if not self.finalized: self._transform_computations() self.allocate_storage() for op in OrderedSet(self.ops): self.initialize_constant(op) self.allocated = True def initialize(self): """ Initialize storage. Will allocate if not already performed. """ if self.initialized: return self.allocate() # Need to set initialized before we are done because the init computation will # try to initialize. self.initialized = True self.init_computation() def close(self): pass def __del__(self): self.close()