def ordered_initializers(self, ordered_ops): """ TODO. Arguments: ordered_ops: TODO Returns: """ initializers = OrderedSet() todo = OrderedSet(ordered_ops) while todo: these_ops = todo todo = OrderedSet() for op in these_ops: op = op.forwarded op.update_forwards() initializers.update(op.initializers) todo.update(op.initializers) ordered_initializer_ops = [] visited = set() inits = OrderedSet() def visit(node): node = node.forwarded node.update_forwards() if node not in visited: if node.initializers: if node in inits: if node not in visited: ordered_initializer_ops.append(node) visited.add(node) else: inits.add(node) for n in node.initializers: visit(n) else: for n in node.args: visit(n) if node not in visited: ordered_initializer_ops.append(node) visited.add(node) for node in initializers: visit(node) return ordered_initializer_ops
def find_recvs(fro): # Find all the Receivers fro depends on visit = OrderedSet() recvs = OrderedSet() visit.add(fro) while visit: v = visit.pop() if isinstance(v, Receiver): recvs.add(v) visit.add(v.send_node()) else: if hasattr(v, 'args'): visit.update(v.args) return recvs
def _transform_computations(self): """ Transform computation graphs to a form that can be run. """ # with Op.saved_user_deps(): # Run passes on the computation graphs self.run_registered_graph_passes(self.all_results) # Collect up all ops from the graph and obtain the init graph all_ops = OrderedSet(Op.ordered_ops(self.all_results)) init_op = doall(self.ordered_initializers(all_ops)) # Run passes on the initialization graphs self.run_registered_graph_passes([init_op]) # Union the init and computation graphs self.inits = Op.ordered_ops([init_op]) all_ops.update(self.inits) # create computation which initializes values (called once per session) init_op.update_forwards() self.init_computation = self.computation(init_op, name="init") # Give ids for op in all_ops: if op not in self.opids: self.opids[op] = len(self.opids) self.dataflow, self.memory = assign_buffers(self, all_ops, self.fusion) # Initialize tensor descriptions for op in all_ops: self.initialize_tensor_descriptions(op) self.ops = self.dataflow.instructions self.start_transform_allocate() for device_buffer in self.device_buffers: device_buffer.transform_allocate() self.finish_transform_allocate() # Compile the computations now that we know their storage for computation in self.computations: computation.transform() self.finish_transform() self.finalized = True
def state_initializations(self, states): """ Find new initializations associated with states. Args: states: A collection of states. Returns: New initializations. """ new_inits = OrderedSet() for state in states: if state not in self.init_states: self.init_states.add(state) new_inits.update(state.initializers) return new_inits
def comm_path_exists(fro, to): """ Find a path from fro to to, including paths non-explicit edges from a Receiver to its Sender. Note- this is a non-standard traversal, as most traversals stop at a Receiver. """ # TODO: does this correctly handle traversing multiple send-recv junctions # from fro to to? visit = OrderedSet(fro.args) while visit: v = visit.pop() if v == to: return True if isinstance(v, Receiver): visit.add(v.send_node()) else: visit.update(v.args) return False
def do_traversal(self, root): # Note: This is almost identical to Op's visit_input_closure. available = OrderedSet() counts = dict() parents = collections.defaultdict(list) ready = OrderedSet() nodes = list() available.add(root) while available: node = available.pop() node.update_forwards() if node in counts: continue children = [child.forwarded for child in node.control_deps] if children: counts[node] = len(children) for child in children: parents[child].append(node) available.update(children) else: ready.add(node) while ready: node = ready.pop() nodes.append(node) for p in parents.get(node, []): count = counts[p] - 1 if count == 0: ready.add(p) del counts[p] else: counts[p] = count return nodes
class Transformer(with_metaclass(Transformer_ABC_Meta, object)): """ Produce an executable version of op-graphs. Computations are subsets of Ops to compute. The transformer determines storage allocation and transforms the computations and allocations into functions. Arguments: fusion (bool): Whether to combine sequences of operations into one operation. **kwargs: Args for related classes. Attributes: computations (:obj:`set` of :class:`Computation`): The set of requested computations. all_results (:obj:`set` of :class:`ngraph.op_graph.op_graph.Op`): A root set of Ops that need to be computed. finalized (bool): True when transformation has been performed. initialized (bool): True when variables have been initialized/restored. fusion (bool): True when fusion was enabled. device_buffers (set): Set of handles for storage allocations. cpu_initializations (list): Initializations to be performed from the CPU after allocation. init_computation (Computation): The computation that performs initialization after allocation. This happens once per training session, not once per-minibatch. init_checked_ops: All ops processed. init_states: All states seen. state_initialization_ops: Initializations """ def __init__(self, **kwargs): super(Transformer, self).__init__(**kwargs) self.computations = OrderedSet() self.finalized = False self.allocated = False self.initialized = False self.device_buffers = OrderedSet() self.cpu_initializations = [] self.init_computation = None self.graph_passes = None self.init_checked_ops = OrderedSet() self.init_states = OrderedSet() self.state_initialization_ops = OrderedSet() def add_initialization_ops(self, ops): """ Ensure initializations have been captured for state in ops. Args: ops: Collection of ops. Returns: True if new initializations were added. """ did_work = False for op in ops: if op in self.init_checked_ops: continue self.init_checked_ops.add(op) new_inits = self.state_initializations(op.states_read) new_inits.update(self.state_initializations(op.states_written)) if len(new_inits) > 0: did_work = True self.state_initialization_ops.update(new_inits) self.add_initialization_ops(Op.ordered_ops(new_inits)) self.state_initialization_ops = \ OrderedSet(op.forwarded for op in self.state_initialization_ops) return did_work def state_initializations(self, states): """ Find new initializations associated with states. Args: states: A collection of states. Returns: New initializations. """ new_inits = OrderedSet() for state in states: if state not in self.init_states: self.init_states.add(state) new_inits.update(state.initializers) return new_inits def register_graph_pass(self, graph_pass): self.graph_passes.append(graph_pass) def run_registered_graph_passes(self, ops): for graph_pass in self.graph_passes: graph_pass.do_pass(ops, self) return ops def _transform_computations(self): """ Transform computation graphs to a form that can be run. """ # Run passes on the computation graphs all_results = [] for comp in self.computations: all_results.append(comp.computation) all_ops = self.run_registered_graph_passes(all_results) self.init_computation = \ self.add_computation(computation(doall(self.state_initialization_ops)).named('init')) all_ops.append(self.init_computation.computation) # Collect up all ops from the graph and obtain the init graph all_ops = OrderedSet(Op.ordered_ops(all_ops)) def init_tensor_description(tensor_description): if tensor_description.buffer is None: tensor_description.buffer = self.device_buffer_storage( tensor_description.base.tensor_size, tensor_description.dtype, tensor_description.name ) self.device_buffers.add(tensor_description.buffer) tensor_description.value = \ tensor_description.buffer.device_tensor(tensor_description) for state in self.init_states: init_tensor_description(state.tensor_description()) self.ops = Op.ordered_ops(all_ops) for op in self.ops: if op.is_tensor_op: init_tensor_description(op.tensor_description()) self.start_transform_allocate() for device_buffer in self.device_buffers: device_buffer.transform_allocate() self.finish_transform_allocate() # Compile the computations now that we know their storage for comp in self.computations: comp.computation_name = \ self.transform_ordered_ops(Op.ordered_ops([comp.computation]), name=comp.name) self.finish_transform() self.finalized = True @abc.abstractmethod def start_transform_allocate(self): """ Called just before allocation code is transformed. """ @abc.abstractmethod def finish_transform_allocate(self): """ Called after last allocation is transformed. """ @abc.abstractmethod def transform_ordered_ops(self, ordered_ops): """ Generate code to compute ordered_ops. Arguments: ordered_ops: Ops to compute Returns: Handle for generated code """ @abc.abstractmethod def finish_transform(self): """ Finish generating the model. """ @abc.abstractmethod def allocate_storage(self): """ Allocate storage on the device. """ @generic_method(Op) def initialize_constant(self, op): pass @initialize_constant.on_type(InitTensorOp) def initialize_constant(self, op): tensor_description = op.tensor.tensor_description() value = op.valfun(tensor_description) tensor_description.value[()] = value @abc.abstractmethod def device_buffer_storage(self, bytes, dtype, name): """ Make a DeviceBuffer. Arguments: bytes: Size of buffer. dtype: dtype of buffer. name: Name of the storage variable returns: A DeviceBuffer. """ @abc.abstractmethod def device_buffer_reference(self): """ Make a DeviceBufferReference. Returns: A DeviceBufferReference. """ # Old interface def computation(self, results, *parameters): """ Adds a computation to the transformer. Arguments: results: Values to be computed *parameters: Values to be set as arguments to evaluate Returns: Callable. """ return self.add_computation(computation(results, *parameters)) def add_computation(self, computation): """ Adds a computation to the transformer. Arguments: computation: A computation Op. Returns: Callable. """ if self.finalized: raise ValueError( 'Cannot create computations from a finalized transformer' ) result = Computation(self, computation) self.computations.add(result) return result def allocate(self): """ Allocate storage and then initializes constants. Will finalize if not already done. """ if self.allocated: return if not self.finalized: self._transform_computations() self.allocate_storage() for op in OrderedSet(self.ops): self.initialize_constant(op) self.allocated = True def initialize(self): """ Initialize storage. Will allocate if not already performed. """ if self.initialized: return self.allocate() # Need to set initialized before we are done because the init computation will # try to initialize. self.initialized = True self.init_computation() def close(self): pass def __del__(self): self.close()
def __init__(self, transformer, returns, *args, **kwargs): super(Computation, self).__init__(**kwargs) self.transformer = transformer self.computation_name = None def wrap_op(op): if isinstance(op, TensorOp): return ResultHandle(op) else: return op def wrap_ops(ops): return [wrap_op(op) for op in ops] self.ops = OrderedSet() if isinstance(returns, collections.Set): returns = set(wrap_ops(returns)) self.ops.update(returns) elif isinstance(returns, collections.Sequence): returns = wrap_ops(returns) self.ops.update(returns) elif isinstance(returns, Op): returns = wrap_op(returns) self.ops.add(returns) elif returns is not None: raise ValueError() self.returns = returns self.parameters = [] for arg in args: if arg.input: self.parameters.append(arg) else: raise ValueError(( 'The arguments to a computation must all have property ' 'input=True, but the op passed had input=False. In most ' 'cases you want to pass placeholder ops in as arguments. ' '{op} was passed in, of type {op_type}.' ).format( op=arg, op_type=arg.__class__.__name__, )) if isinstance(arg, Op): self.ops.add(arg) else: raise ValueError() control_ops = OrderedSet() for op in self.ops: control_ops.update(op.user_deps) processed_ops = set() pending_ops = OrderedSet(self.ops) while pending_ops: op = pending_ops.pop() if op in processed_ops: continue control_ops.update(op.other_deps) pending_ops.update(op.other_deps) pending_ops.update(op.args) processed_ops.add(op) self.ops.update(control_ops) self.transformer.all_results.update(self.ops) self.executor = None
class Computation(NameableValue): """ A handle for a computation function. Arguments: transformer (obj:`Transformer`): The associated transformer. returns: If an Op, return the value of the Op, if sequence of Ops, return the sequence of values, if a set return a map, if None, return None. *args: AllocationOps marked input will be arguments to the function. **kwargs: Args for related classes. """ def __init__(self, transformer, returns, *args, **kwargs): super(Computation, self).__init__(**kwargs) self.transformer = transformer self.computation_name = None def wrap_op(op): if isinstance(op, TensorOp): return ResultHandle(op) else: return op def wrap_ops(ops): return [wrap_op(op) for op in ops] self.ops = OrderedSet() if isinstance(returns, collections.Set): returns = set(wrap_ops(returns)) self.ops.update(returns) elif isinstance(returns, collections.Sequence): returns = wrap_ops(returns) self.ops.update(returns) elif isinstance(returns, Op): returns = wrap_op(returns) self.ops.add(returns) elif returns is not None: raise ValueError() self.returns = returns self.parameters = [] for arg in args: if arg.input: self.parameters.append(arg) else: raise ValueError(( 'The arguments to a computation must all have property ' 'input=True, but the op passed had input=False. In most ' 'cases you want to pass placeholder ops in as arguments. ' '{op} was passed in, of type {op_type}.' ).format( op=arg, op_type=arg.__class__.__name__, )) if isinstance(arg, Op): self.ops.add(arg) else: raise ValueError() control_ops = OrderedSet() for op in self.ops: control_ops.update(op.user_deps) processed_ops = set() pending_ops = OrderedSet(self.ops) while pending_ops: op = pending_ops.pop() if op in processed_ops: continue control_ops.update(op.other_deps) pending_ops.update(op.other_deps) pending_ops.update(op.args) processed_ops.add(op) self.ops.update(control_ops) self.transformer.all_results.update(self.ops) self.executor = None def transform(self): """ Transforms the computation so that it can be run. """ self.ops = {op.forwarded for op in self.ops} ordered_ops = self.transformer.dataflow.can_reach(self.ops, order=self.transformer.ops) self.computation_name = self.transformer.transform_ordered_ops(ordered_ops, name=self.name) def __call__(self, *args): """ Executes the computation passing args in to the function. """ if len(args) != len(self.parameters): raise ValueError(( 'Computation was expecting {expected} arguments, but was ' 'called with {called}.' ).format( expected=len(self.parameters), called=len(args), )) # TODO Should this be automatic? self.transformer.initialize() # Get the parameters to the device for param, arg in zip(self.parameters, args): param.value[()] = arg self.executor() # TODO Should copy this out of the device to a destination when it is not scalar def value(op): """ Returns the computed value of op, or None if it has no value. :param op: :return: Return value for op. """ if isinstance(op, TensorOp): if op.value is None: pass return op.value.get(None) else: return None if isinstance(self.returns, Op): return value(self.returns) elif isinstance(self.returns, collections.Set): result = dict() for op in self.returns: dict[op] = value(op) return result elif isinstance(self.returns, collections.Sequence): return tuple(value(op) for op in self.returns) else: return None
def __init__(self, hetr, results, *parameters, **kwargs): # super(HetrComputation, self).__init__(hetr, results, *parameters, **kwargs) self.child_computations = dict() self.child_results_map = dict() self.transformer = hetr self.transformer_name_list = hetr.transformer_list self.send_nodes = hetr.send_nodes self.hetr_passes = hetr.hetr_passes self.num_results = 0 self.num_send_nodes = dict() self.is_distributed = False self.parameters = parameters orig_results = results if not isinstance(results, OrderedSet): if not isinstance(results, list): results = [results] if results else [] results = OrderedSet(results) for op in results: if 'device_id' in op.metadata and \ isinstance(op.metadata['device_id'], (list, tuple)): op.metadata['is_split_op'] = True new_result = ResultOp(device_id=0, args=op) results.remove(op) results.append(new_result) all_results = OrderedSet(results) all_results.update(parameters) # all res empty; hetr as no computations. where do these get assigned? # previously, we used t.all_results, which went away. when was that created? # - computation object used to update all_results of transformer # - transformer transform_ops used to use all_results but not update it, # and return a new copy if orig_results is not None: # Do Hetr passes for graph_pass in self.hetr_passes: all_results = all_results + hetr.send_nodes graph_pass.do_pass(all_results, self.transformer) # TODO replicate placeholders for nodes which got replicated; # update the placeholder mapping below, so at __call__ time we know # which transformers to pass copies of the provided placeholder value to if hetr.vizpass: vis_results = all_results + hetr.send_nodes hetr.vizpass.do_pass(vis_results, self) self.transformer_to_node = { t: list() for t in self.transformer_name_list } self.is_distributed = any( 'Gather_Send' in s.name or 'Scatter_Send' in s.name for s in self.send_nodes) # update the transformer to send node mappings for s in self.send_nodes: tname = s.metadata['transformer'] self.transformer_to_node[tname].append(s) self.num_send_nodes[tname] = self.num_send_nodes.get(tname, 0) + 1 self.num_results = len(results) if orig_results is not None: for pos, op in enumerate(results): tname = op.metadata['transformer'] if self.is_distributed is True: if tname in self.num_send_nodes: for i in range(self.num_send_nodes[tname]): self.child_results_map.setdefault(tname, []).append(None) if 'ResultOp' in op.name: self.transformer_to_node[tname].append(op.args[0]) else: self.transformer_to_node[tname].append(op) self.child_results_map.setdefault(tname, []).append(pos) self.placeholders = {t: list() for t in self.transformer_name_list} self.placeholders_pos = {t: list() for t in self.transformer_name_list} for i, p in enumerate(parameters): tname = p.metadata['transformer'] assert isinstance( tname, list ) is False, "Fatal: multiple transformers cannot be handled!" self.placeholders[tname].append(p) self.placeholders_pos[tname].append(i) self.child_computations = dict() for tname in self.transformer_name_list: # request asynctransformer from HT # use it to build AsyncComputation async_trans = hetr.transformer(tname) async_comp = async_trans.computation( self.transformer_to_node[tname], tuple(self.placeholders[tname])) self.child_computations[tname] = async_comp