def general_toposort(r_out, deps, debug_print=False): """WRITEME :note: deps(i) should behave like a pure function (no funny business with internal state) :note: deps(i) will be cached by this function (to be fast) :note: The order of the return value list is determined by the order of nodes returned by the deps() function. """ deps_cache = {} def _deps(io): if io not in deps_cache: d = deps(io) if d: if not isinstance(d, (list, OrderedSet)): raise TypeError("Non-deterministic collections here make" " toposort non-deterministic.") deps_cache[io] = list(d) else: deps_cache[io] = d return d else: return deps_cache[io] assert isinstance(r_out, (tuple, list, deque)) reachable, clients = stack_search(deque(r_out), _deps, 'dfs', True) sources = deque([r for r in reachable if not deps_cache.get(r, None)]) rset = set() rlist = [] while sources: node = sources.popleft() if node not in rset: rlist.append(node) rset.add(node) for client in clients.get(node, []): deps_cache[client] = [a for a in deps_cache[client] if a is not node] if not deps_cache[client]: sources.append(client) if len(rlist) != len(reachable): if debug_print: print '' print reachable print rlist raise ValueError('graph contains cycles') return rlist
def apply(self, env, start_from = None): if start_from is None: start_from = env.outputs q = deque(graph.io_toposort(env.inputs, start_from)) def importer(node): if node is not current_node: q.append(node) def pruner(node): if node is not current_node: try: q.remove(node) except ValueError: pass u = self.attach_updater(env, importer, pruner) try: while q: if self.order == 'out_to_in': node = q.pop() else: node = q.popleft() current_node = node self.process_node(env, node) except Exception: self.detach_updater(env, u) raise self.detach_updater(env, u)
def list_of_nodes(inputs, outputs): """ Return the apply nodes of the graph between inputs and outputs """ return stack_search( deque([o.owner for o in outputs]), lambda o: [inp.owner for inp in o.inputs if inp.owner and not any(i in inp.owner.outputs for i in inputs)])
def variables_and_orphans(i, o): """WRITEME """ def expand(r): if r.owner and r not in i: l = list(r.owner.inputs) + list(r.owner.outputs) l.reverse() return l variables = stack_search(deque(o), expand, 'dfs') orphans = [r for r in variables if r.owner is None and r not in i] return variables, orphans
def distribute(self, value, indices, containers): rg = partial(numpy.random.RandomState(int(value)).randint, 2**30) elems = deque(zip(indices, containers)) i = 0 while elems: index, container = elems.popleft() while i <= index: curr = rg() i += 1 rs = numpy.random.RandomState(int(curr)) container.data = rs
def ancestors(variable_list, blockers=None): """Return the variables that contribute to those in variable_list (inclusive). :type variable_list: list of `Variable` instances :param variable_list: output `Variable` instances from which to search backward through owners :rtype: list of `Variable` instances :returns: all input nodes, in the order found by a left-recursive depth-first search started at the nodes in `variable_list`. """ def expand(r): if r.owner and (not blockers or r not in blockers): return reversed(r.owner.inputs) dfs_variables = stack_search(deque(variable_list), expand, 'dfs') return dfs_variables
def ancestors(variable_list, blockers=None): """Return the variables that contribute to those in variable_list (inclusive). :type variable_list: list of `Variable` instances :param variable_list: output `Variable` instances from which to search backward through owners :rtype: list of `Variable` instances :returns: all input nodes, in the order found by a left-recursive depth-first search started at the nodes in `variable_list`. """ def expand(r): if r.owner and (not blockers or r not in blockers): l = list(r.owner.inputs) l.reverse() return l dfs_variables = stack_search(deque(variable_list), expand, 'dfs') return dfs_variables
def ancestors(variable_list, blockers=None): """Return the variables that contribute to those in variable_list :type variable_list: list of `Variable`, `In` or `Out` instances :param variable_list: `Variable` instances from which to search backward through owners :type blockers: same as variable_list :param blockers: stop the graph traversal at those Variable. :rtype: list of `Variable` instances :returns: all nodes, in the order found by a left-recursive depth-first search started at the nodes in `variable_list` (including them). """ if blockers is None: blockers = () else: bb = [] for b in blockers: if isinstance(b, (theano.In, theano.Out)): bb.append(b.variable) else: bb.append(b) blockers = bb var_list = [] for var in variable_list: if isinstance(var, (theano.In, theano.Out)): var_list.append(var.variable) else: var_list.append(var) def expand(r): if r.owner and (not blockers or r not in blockers): return reversed(r.owner.inputs) dfs_variables = stack_search(deque(var_list), expand, 'dfs') return dfs_variables
def _contains_cycle(fgraph, orderings): """ fgraph - the FunctionGraph to check for cycles orderings - dictionary specifying extra dependencies besides those encoded in Variable.owner / Apply.inputs If orderings[my_apply] == dependencies, then my_apply is an Apply instance, dependencies is a set of Apply instances, and every member of dependencies must be executed before my_apply. The dependencies are typically used to prevent inplace apply nodes from destroying their input before other apply nodes with the same input access it. Returns True if the graph contains a cycle, False otherwise. """ # These are lists of Variable instances inputs = fgraph.inputs outputs = fgraph.outputs # this is hard-coded reimplementation of functions from graph.py # reason: go faster, prepare for port to C. # specifically, it could be replaced with a wrapper # around graph.io_toposort that returns True iff io_toposort raises # a ValueError containing the substring 'cycle'. # This implementation is optimized for the destroyhandler and runs # slightly faster than io_toposort. # this is performance-critical code. it is the largest single-function # bottleneck when compiling large graphs. assert isinstance(outputs, (tuple, list, deque)) # TODO: For more speed - use a defaultdict for the orderings # (defaultdict runs faster than dict in the case where the key # is not in the dictionary, at least in CPython) iset = set(inputs) # IG: I tried converting parent_counts to use an id for the key, # so that the dict would do reference counting on its keys. # This caused a slowdown. # Separate benchmark tests showed that calling id is about # half as expensive as a dictionary access, and that the # dictionary also runs slower when storing ids than when # storing objects. # dict mapping an Apply or Variable instance to the number # of its parents (including parents imposed by orderings) # that haven't been visited yet parent_counts = {} # dict mapping an Apply or Variable instance to its children node_to_children = {} # visitable: A container holding all Variable and Apply instances # that can currently be visited according to the graph topology # (ie, whose parents have already been visited) # TODO: visitable is a fifo_queue. could this run faster if we # implement it as a stack rather than a deque? # TODO: visitable need not be a fifo_queue, any kind of container # that we can throw things into and take things out of quickly will # work. is there another kind of container that could run faster? # we don't care about the traversal order here as much as we do # in io_toposort because we aren't trying to generate an ordering # on the nodes visitable = deque() # IG: visitable could in principle be initialized to fgraph.inputs # + fgraph.orphans... if there were an fgraph.orphans structure. # I tried making one and maintaining it caused a huge slowdown. # This may be because I made it a list, so it would have a # deterministic iteration order, in hopes of using it to speed # up toposort as well. # I think since we need to scan through all variables and nodes # to make parent_counts anyway, it's cheap enough to always # detect orphans at cycle detection / toposort time # Pass through all the nodes to build visitable, parent_count, and # node_to_children for var in fgraph.variables: # this is faster than calling get_parents owner = var.owner if owner: parents = [owner] else: parents = [] # variables don't appear in orderings, so we don't need to worry # about that here if parents: for parent in parents: # insert node in node_to_children[r] # (if r is not already in node_to_children, # intialize it to []) node_to_children.setdefault(parent, []).append(var) parent_counts[var] = len(parents) else: visitable.append(var) parent_counts[var] = 0 for a_n in fgraph.apply_nodes: parents = list(a_n.inputs) # This is faster than conditionally extending # IG: I tried using a shared empty_list = [] constructed # outside of the for loop to avoid constructing multiple # lists, but this was not any faster. parents.extend(orderings.get(a_n, [])) if parents: for parent in parents: # insert node in node_to_children[r] # (if r is not already in node_to_children, # intialize it to []) node_to_children.setdefault(parent, []).append(a_n) parent_counts[a_n] = len(parents) else: # an Apply with no inputs would be a weird case, but I'm # not sure we forbid it visitable.append(a_n) parent_counts[a_n] = 0 # at this point, # parent_counts.keys() == fgraph.apply_nodes + fgraph.variables # Now we actually check for cycles # As long as there are nodes that can be visited while respecting # the topology, we keep visiting nodes # If we run out of visitable nodes and we haven't visited all nodes, # then there was a cycle. It blocked the traversal because some # node couldn't be visited until one of its descendants had been # visited too. # This is a standard cycle detection algorithm. visited = 0 while visitable: # Since each node is inserted into the visitable queue exactly # once, it comes out of the queue exactly once # That means we can decrement its children's unvisited parent count # and increment the visited node count without double-counting node = visitable.popleft() visited += 1 for client in node_to_children.get(node, []): parent_counts[client] -= 1 # If all of a node's parents have been visited, # it may now be visited too if not parent_counts[client]: visitable.append(client) return visited != len(parent_counts)
def _dfs_toposort(i, r_out, orderings): """ i - list of inputs o - list of outputs orderings - dict of additions to the normal inputs and outputs Returns nothing. Raises exception for graph with cycles """ #this is hard-coded reimplementation of functions from graph.py # reason: go faster, prepare for port to C. assert isinstance(r_out, (tuple, list, deque)) # TODO: For more speed - use a defaultdict for the orderings iset = set(i) if 0: def expand(obj): rval = [] if obj not in iset: if isinstance(obj, graph.Variable): if obj.owner: rval = [obj.owner] if isinstance(obj, graph.Apply): rval = list(obj.inputs) rval.extend(orderings.get(obj, [])) else: assert not orderings.get(obj, []) return rval expand_cache = {} # reachable, clients = stack_search( deque(r_out), deps, 'dfs', True) start=deque(r_out) rval_set = set() rval_set.add(id(None)) rval_list = list() expand_inv = {} sources = deque() while start: l = start.pop()# this makes the search dfs if id(l) not in rval_set: rval_list.append(l) rval_set.add(id(l)) if l in iset: assert not orderings.get(l, []) expand_l = [] else: try: if l.owner: expand_l = [l.owner] else: expand_l = [] except AttributeError: expand_l = list(l.inputs) expand_l.extend(orderings.get(l, [])) if expand_l: for r in expand_l: expand_inv.setdefault(r, []).append(l) start.extend(expand_l) else: sources.append(l) expand_cache[l] = expand_l assert len(rval_list) == len(rval_set)-1 rset = set() rlist = [] while sources: node = sources.popleft() if node not in rset: rlist.append(node) rset.add(node) for client in expand_inv.get(node, []): expand_cache[client] = [a for a in expand_cache[client] if a is not node] if not expand_cache[client]: sources.append(client) if len(rlist) != len(rval_list): raise ValueError('graph contains cycles')
def apply(self, env, start_from=None): if start_from is None: start_from = env.outputs changed = True max_use_abort = False opt_name = None process_count = {} while changed and not max_use_abort: changed = False #apply global optimizer env.change_tracker.reset() for gopt in self.global_optimizers: gopt.apply(env) if env.change_tracker.changed: changed = True #apply local optimizer for node in start_from: assert node in env.outputs q = deque(graph.io_toposort(env.inputs, start_from)) max_use = len(q) * self.max_use_ratio def importer(node): if node is not current_node: q.append(node) def pruner(node): if node is not current_node: try: q.remove(node) except ValueError: pass u = self.attach_updater(env, importer, pruner) try: while q: node = q.pop() current_node = node for lopt in self.local_optimizers: process_count.setdefault(lopt, 0) if process_count[lopt] > max_use: max_use_abort = True opt_name = (getattr(lopt, "name", None) or getattr(lopt, "__name__", None) or "") else: lopt_change = self.process_node(env, node, lopt) if lopt_change: process_count[lopt] += 1 changed = True if node not in env.nodes: break # go to next node finally: self.detach_updater(env, u) self.detach_updater( env, u) #TODO: erase this line, it's redundant at best if max_use_abort: _logger.error("EquilibriumOptimizer max'ed out by '%s'" % opt_name + ". You can safely raise the current threshold of " + "%f with the theano flag 'optdb.max_use_ratio'." % config.optdb.max_use_ratio)
def apply(self, env, start_from = None): if start_from is None: start_from = env.outputs changed = True max_use_abort = False opt_name = None process_count = {} max_nb_nodes = 0 while changed and not max_use_abort: changed = False #apply global optimizer env.change_tracker.reset() for gopt in self.global_optimizers: gopt.apply(env) if env.change_tracker.changed: changed = True #apply local optimizer for node in start_from: assert node in env.outputs q = deque(graph.io_toposort(env.inputs, start_from)) max_nb_nodes = max(max_nb_nodes, len(q)) max_use = max_nb_nodes * self.max_use_ratio def importer(node): if node is not current_node: q.append(node) def pruner(node): if node is not current_node: try: q.remove(node) except ValueError: pass u = self.attach_updater(env, importer, pruner) try: while q: node = q.pop() current_node = node for lopt in self.local_optimizers: process_count.setdefault(lopt, 0) lopt_change = self.process_node(env, node, lopt) if lopt_change: process_count[lopt] += 1 changed = True if process_count[lopt] > max_use: max_use_abort = True opt_name = (getattr(lopt, "name", None) or getattr(lopt, "__name__", "")) if node not in env.nodes: break # go to next node finally: self.detach_updater(env, u) self.detach_updater(env, u) #TODO: erase this line, it's redundant at best if max_use_abort: _logger.error("EquilibriumOptimizer max'ed out by '%s'" % opt_name + ". You can safely raise the current threshold of " + "%f with the theano flag 'optdb.max_use_ratio'." % config.optdb.max_use_ratio)
def _dfs_toposort(i, r_out, orderings): """ i - list of inputs o - list of outputs orderings - dict of additions to the normal inputs and outputs Returns nothing. Raises exception for graph with cycles """ #this is hard-coded reimplementation of functions from graph.py # reason: go faster, prepare for port to C. assert isinstance(r_out, (tuple, list, deque)) # TODO: For more speed - use a defaultdict for the orderings iset = set(i) if 0: def expand(obj): rval = [] if obj not in iset: if isinstance(obj, graph.Variable): if obj.owner: rval = [obj.owner] if isinstance(obj, graph.Apply): rval = list(obj.inputs) rval.extend(orderings.get(obj, [])) else: assert not orderings.get(obj, []) return rval expand_cache = {} # reachable, clients = stack_search( deque(r_out), deps, 'dfs', True) start = deque(r_out) rval_set = set() rval_set.add(id(None)) rval_list = list() expand_inv = {} sources = deque() while start: l = start.pop() # this makes the search dfs if id(l) not in rval_set: rval_list.append(l) rval_set.add(id(l)) if l in iset: assert not orderings.get(l, []) expand_l = [] else: try: if l.owner: expand_l = [l.owner] else: expand_l = [] except AttributeError: expand_l = list(l.inputs) expand_l.extend(orderings.get(l, [])) if expand_l: for r in expand_l: expand_inv.setdefault(r, []).append(l) start.extend(expand_l) else: sources.append(l) expand_cache[l] = expand_l assert len(rval_list) == len(rval_set) - 1 rset = set() rlist = [] while sources: node = sources.popleft() if node not in rset: rlist.append(node) rset.add(node) for client in expand_inv.get(node, []): expand_cache[client] = [ a for a in expand_cache[client] if a is not node ] if not expand_cache[client]: sources.append(client) if len(rlist) != len(rval_list): raise ValueError('graph contains cycles')
def _contains_cycle(fgraph, orderings): """ fgraph - the FunctionGraph to check for cycles orderings - dictionary specifying extra dependencies besides those encoded in Variable.owner / Apply.inputs If orderings[my_apply] == dependencies, then my_apply is an Apply instance, dependencies is a set of Apply instances, and every member of dependencies must be executed before my_apply. The dependencies are typically used to prevent inplace apply nodes from destroying their input before other apply nodes with the same input access it. Returns True if the graph contains a cycle, False otherwise. """ # These are lists of Variable instances inputs = fgraph.inputs outputs = fgraph.outputs # this is hard-coded reimplementation of functions from graph.py # reason: go faster, prepare for port to C. # specifically, it could be replaced with a wrapper # around graph.io_toposort that returns True iff io_toposort raises # a ValueError containing the substring 'cycle'. # This implementation is optimized for the destroyhandler and runs # slightly faster than io_toposort. # this is performance-critical code. it is the largest single-function # bottleneck when compiling large graphs. assert isinstance(outputs, (tuple, list, deque)) # TODO: For more speed - use a defaultdict for the orderings # (defaultdict runs faster than dict in the case where the key # is not in the dictionary, at least in CPython) iset = set(inputs) # IG: I tried converting parent_counts to use an id for the key, # so that the dict would do reference counting on its keys. # This caused a slowdown. # Separate benchmark tests showed that calling id is about # half as expensive as a dictionary access, and that the # dictionary also runs slower when storing ids than when # storing objects. # dict mapping an Apply or Variable instance to the number # of its parents (including parents imposed by orderings) # that haven't been visited yet parent_counts = {} # dict mapping an Apply or Variable instance to its children node_to_children = {} # visitable: A container holding all Variable and Apply instances # that can currently be visited according to the graph topology # (ie, whose parents have already been visited) # TODO: visitable is a fifo_queue. could this run faster if we # implement it as a stack rather than a deque? # TODO: visitable need not be a fifo_queue, any kind of container # that we can throw things into and take things out of quickly will # work. is there another kind of container that could run faster? # we don't care about the traversal order here as much as we do # in io_toposort because we aren't trying to generate an ordering # on the nodes visitable = deque() # IG: visitable could in principle be initialized to fgraph.inputs # + fgraph.orphans... if there were an fgraph.orphans structure. # I tried making one and maintaining it caused a huge slowdown. # This may be because I made it a list, so it would have a # deterministic iteration order, in hopes of using it to speed # up toposort as well. # I think since we need to scan through all variables and nodes # to make parent_counts anyway, it's cheap enough to always # detect orphans at cycle detection / toposort time # Pass through all the nodes to build visitable, parent_count, and # node_to_children for var in fgraph.variables: # this is faster than calling get_parents owner = var.owner if owner: parents = [ owner ] else: parents = [] # variables don't appear in orderings, so we don't need to worry # about that here if parents: for parent in parents: # insert node in node_to_children[r] # (if r is not already in node_to_children, # intialize it to []) node_to_children.setdefault(parent, []).append(var) parent_counts[var] = len(parents) else: visitable.append(var) parent_counts[var] = 0 for a_n in fgraph.apply_nodes: parents = list(a_n.inputs) # This is faster than conditionally extending # IG: I tried using a shared empty_list = [] constructed # outside of the for loop to avoid constructing multiple # lists, but this was not any faster. parents.extend(orderings.get(a_n, [])) if parents: for parent in parents: # insert node in node_to_children[r] # (if r is not already in node_to_children, # intialize it to []) node_to_children.setdefault(parent, []).append(a_n) parent_counts[a_n] = len(parents) else: # an Apply with no inputs would be a weird case, but I'm # not sure we forbid it visitable.append(a_n) parent_counts[a_n] = 0 # at this point, # parent_counts.keys() == fgraph.apply_nodes + fgraph.variables # Now we actually check for cycles # As long as there are nodes that can be visited while respecting # the topology, we keep visiting nodes # If we run out of visitable nodes and we haven't visited all nodes, # then there was a cycle. It blocked the traversal because some # node couldn't be visited until one of its descendants had been # visited too. # This is a standard cycle detection algorithm. visited = 0 while visitable: # Since each node is inserted into the visitable queue exactly # once, it comes out of the queue exactly once # That means we can decrement its children's unvisited parent count # and increment the visited node count without double-counting node = visitable.popleft() visited += 1 for client in node_to_children.get(node,[]): parent_counts[client] -= 1 # If all of a node's parents have been visited, # it may now be visited too if not parent_counts[client]: visitable.append(client) return visited != len(parent_counts)