def __call__(self): if self.time_thunks: for cont in self.pre_call_clear: cont[0] = None try: i = 0 for thunk, node, old_storage in zip(self.thunks, self.nodes, self.post_thunk_clear): t0 = time.time() thunk() t1 = time.time() self.call_counts[i] += 1 self.call_times[i] += t1 - t0 for old_s in old_storage: old_s[0] = None i += 1 except: link.raise_with_op(node, thunk) else: for cont in self.pre_call_clear: cont[0] = None try: for thunk, node, old_storage in zip(self.thunks, self.nodes, self.post_thunk_clear): thunk() for old_s in old_storage: old_s[0] = None except: link.raise_with_op(node, thunk)
def __call__(self): if self.time_thunks: for cont in self.pre_call_clear: cont[0] = None try: for i, (thunk, node) in enumerate(zip(self.thunks, self.nodes)): t0 = time.time() thunk() t1 = time.time() self.call_counts[i] += 1 self.call_times[i] += t1 - t0 except: link.raise_with_op(node, thunk) else: for cont in self.pre_call_clear: cont[0] = None try: for thunk, node in zip(self.thunks, self.nodes): thunk() except: link.raise_with_op(node, thunk)
def __call__(self): storage_map = self.storage_map compute_map = self.compute_map thunks = self.thunks dependencies = self.dependencies self.node_executed_order = [] self.node_cleared_order = [] for k in self.storage_map: compute_map[k][0] = (k.owner is None) # apply_stack contains nodes apply_stack = list(self.base_apply_stack) last_apply_stack_len = -1 # This record all function inputs/shared varibles and constants for var, data in self.storage_map.iteritems(): if data[0] is None: continue if hasattr(var.type, 'get_shape_info'): sh = var.type.get_shape_info(data[0]) else: sh = 'input no shape' self.variable_shape[var] = sh st = getattr(data[0], 'strides', 'input no strides') if getattr(data[0], 'flags', False) and data[0].flags.c_contiguous: st = 'c' elif (hasattr(data[0], 'is_c_contiguous') and data[0].is_c_contiguous()): st = "c" self.variable_strides[var] = st while apply_stack: # Make sure something happened last time round. This is # just a safety check to make sure the op is written # correctly apply_stack should either decrease in length # by one (a thunk successfully applied), or increase in # length (added dependencies over and above the original). # NB: this doesn't catch cycles (would be too expensive/slow), # just stalls. apply_stack_len = len(apply_stack) assert apply_stack_len != last_apply_stack_len last_apply_stack_len = apply_stack_len current_apply = apply_stack.pop() current_inputs = current_apply.inputs current_outputs = current_apply.outputs current_deps = current_inputs + current_apply.destroy_dependencies computed_ins = all(compute_map[v][0] for v in current_deps) computed_outs = all(compute_map[v][0] for v in current_outputs) if not thunks[self.node_idx[current_apply]].lazy: # # stack loop: Normal Non-Lazy Case # ================================ # # Check if all inputs are in place # If so compute thunk and remove it from the apply_stack # If not leave it in, and add to the apply_stack those # that will produce you those inputs if computed_ins and not computed_outs: # -- Non-lazy case: have inputs, time to compute outputs try: _, dt = self.run_thunk_of_node(current_apply) del _ if config.profile: current_idx = self.node_idx[current_apply] self.call_counts[current_idx] += 1 self.call_times[current_idx] += dt # Computing the memory footprint of the the op # ?? What about inplace .. if the op is inplace # you don't actually ask for more memory! for (idx, o) in enumerate( thunks[self.node_idx[ current_apply]].outputs): var = self.nodes[current_idx].outputs[idx] if hasattr(var.type, 'get_shape_info'): sh = var.type.get_shape_info(o[0]) else: sh = 'input no shape' self.variable_shape[var] = sh st = getattr(o[0], 'strides', 'input no strides') if (getattr(o[0], 'flags', False) and o[0].flags.c_contiguous): st = 'c' elif (hasattr(data[0], 'is_c_contiguous') and data[0].is_c_contiguous()): st = "c" self.variable_strides[var] = st except Exception: link.raise_with_op(current_apply, self.thunks[self.node_idx[current_apply]], storage_map=storage_map) for o in current_apply.outputs: compute_map[o][0] = 1 input_index = [] # A list store the index of inputs variables if self.allow_gc: for i in current_apply.inputs: # Garbage Collection -> check if anybody else uses # this input if (dependencies[i] and i.owner and i not in self.outputs): if all(compute_map[v][0] for v in dependencies[i]): storage_map[i][0] = None input_index.append( current_apply.inputs.index(i)) # DO NOT set compute_map to 0 # If values become False and the # current_apply is still in the # stack, this will cause it to be # recomputed! This can cause wrong value # with some combination of inplace op. compute_map[i][0] = 2 if (config.warn.vm_gc_bug and current_apply in apply_stack and getattr(current_apply.op, 'destroy_map', False)): warnings.warn( "There was a bug that existed in the default Theano configuration," " only in the development version between July 5th 2012" " and July 30th 2012. This was not in a released version." " The bug was affecting this script.", # The stack level is not good when # inside a Scan. stacklevel=3 ) self.node_cleared_order.append(input_index) elif not computed_ins: # -- Non-lazy case, need inputs apply_stack.append(current_apply) apply_stack.extend(inp.owner for inp in current_deps if inp.owner) elif not computed_outs: # # stack loop: Lazy Evaluation Case # ================================ # # Lazy evaluation protocol is to run the thunk with the # current storage_map and compute_map accessed via closure, # and the thunk will return a list of variables from its input # list that it requires. try: requires, dt = self.run_thunk_of_node(current_apply) current_idx = self.node_idx[current_apply] self.call_counts[current_idx] += 1 self.call_times[current_idx] += dt except Exception: link.raise_with_op(current_apply, self.thunks[self.node_idx[current_apply]], storage_map=storage_map) if requires: for r in requires: # We are not done with this op .. so we added # back and see to get the inputs we are # missing apply_stack.append(current_apply) if current_apply.inputs[r].owner: apply_stack.append(current_apply.inputs[r].owner) else: if config.profile: for (idx, o) in enumerate(thunks[ self.node_idx[current_apply]].outputs): var = self.nodes[ self.node_idx[current_apply]].outputs[idx] if hasattr(var.type, 'get_shape_info'): sh = var.type.get_shape_info(o[0]) else: sh = 'input no shape' self.variable_shape[var] = sh st = getattr(o[0], 'strides', 'input no strides') if (getattr(o[0], 'flags', False) and o[0].flags.c_contiguous): st = 'c' elif (hasattr(data[0], 'is_c_contiguous') and data[0].is_c_contiguous()): st = "c" self.variable_strides[var] = st input_index = [] if self.allow_gc: for i in current_apply.inputs: if (dependencies[i] and i.owner and i not in self.outputs): empty_storage_map = True for x in dependencies[i]: if not compute_map[x][0]: empty_storage_map = False break if empty_storage_map: storage_map[i][0] = None input_index.append( current_apply.inputs.index(i)) # See the not lazy gc code for explanations # of compute_map change compute_map[i][0] = 2 self.node_cleared_order.append(input_index) # Hacky coarse gc final pass # This is required until we have a proper gc algorithm for graphs with # lazy evaluation. See discussion on theano-dev June 19 2012. final_index = [] if self.allow_gc: for v in storage_map: if v.owner and not v in self.outputs: if compute_map[v][0] == 2: continue else: storage_map[v][0] = None final_index.append(v) compute_map[v][0] = 2 self.node_cleared_order.append(final_index)
def __call__(self): storage_map = self.storage_map compute_map = self.compute_map thunks = self.thunks dependencies = self.dependencies self.node_executed_order = [] self.node_cleared_order = [] for k in self.storage_map: compute_map[k][0] = (k.owner is None) # apply_stack contains nodes apply_stack = list(self.base_apply_stack) last_apply_stack_len = -1 # This record all function inputs/shared varibles and constants for var, data in self.storage_map.iteritems(): if data[0] is None: continue if hasattr(var.type, 'get_shape_info'): sh = var.type.get_shape_info(data[0]) else: sh = 'input no shape' self.variable_shape[var] = sh st = getattr(data[0], 'strides', 'input no strides') if getattr(data[0], 'flags', False) and data[0].flags.c_contiguous: st = 'c' elif (hasattr(data[0], 'is_c_contiguous') and data[0].is_c_contiguous()): st = "c" self.variable_strides[var] = st while apply_stack: # Make sure something happened last time round. This is # just a safety check to make sure the op is written # correctly apply_stack should either decrease in length # by one (a thunk successfully applied), or increase in # length (added dependencies over and above the original). # NB: this doesn't catch cycles (would be too expensive/slow), # just stalls. apply_stack_len = len(apply_stack) assert apply_stack_len != last_apply_stack_len last_apply_stack_len = apply_stack_len current_apply = apply_stack.pop() current_inputs = current_apply.inputs current_outputs = current_apply.outputs current_deps = current_inputs + current_apply.destroy_dependencies computed_ins = all(compute_map[v][0] for v in current_deps) computed_outs = all(compute_map[v][0] for v in current_outputs) if not thunks[self.node_idx[current_apply]].lazy: # # stack loop: Normal Non-Lazy Case # ================================ # # Check if all inputs are in place # If so compute thunk and remove it from the apply_stack # If not leave it in, and add to the apply_stack those # that will produce you those inputs if computed_ins and not computed_outs: # -- Non-lazy case: have inputs, time to compute outputs try: _, dt = self.run_thunk_of_node(current_apply) del _ if config.profile: current_idx = self.node_idx[current_apply] self.call_counts[current_idx] += 1 self.call_times[current_idx] += dt # Computing the memory footprint of the the op # ?? What about inplace .. if the op is inplace # you don't actually ask for more memory! for (idx, o) in enumerate(thunks[ self.node_idx[current_apply]].outputs): var = self.nodes[current_idx].outputs[idx] if hasattr(var.type, 'get_shape_info'): sh = var.type.get_shape_info(o[0]) else: sh = 'input no shape' self.variable_shape[var] = sh st = getattr(o[0], 'strides', 'input no strides') if (getattr(o[0], 'flags', False) and o[0].flags.c_contiguous): st = 'c' elif (hasattr(data[0], 'is_c_contiguous') and data[0].is_c_contiguous()): st = "c" self.variable_strides[var] = st except Exception: link.raise_with_op( current_apply, self.thunks[self.node_idx[current_apply]], storage_map=storage_map) for o in current_apply.outputs: compute_map[o][0] = 1 input_index = [] # A list store the index of inputs variables if self.allow_gc: for i in current_apply.inputs: # Garbage Collection -> check if anybody else uses # this input if (dependencies[i] and i.owner and i not in self.outputs): if all(compute_map[v][0] for v in dependencies[i]): storage_map[i][0] = None input_index.append( current_apply.inputs.index(i)) # DO NOT set compute_map to 0 # If values become False and the # current_apply is still in the # stack, this will cause it to be # recomputed! This can cause wrong value # with some combination of inplace op. compute_map[i][0] = 2 if (config.warn.vm_gc_bug and current_apply in apply_stack and getattr( current_apply.op, 'destroy_map', False)): warnings.warn( "There was a bug that existed in the default Theano configuration," " only in the development version between July 5th 2012" " and July 30th 2012. This was not in a released version." " The bug was affecting this script.", # The stack level is not good when # inside a Scan. stacklevel=3) self.node_cleared_order.append(input_index) elif not computed_ins: # -- Non-lazy case, need inputs apply_stack.append(current_apply) apply_stack.extend(inp.owner for inp in current_deps if inp.owner) elif not computed_outs: # # stack loop: Lazy Evaluation Case # ================================ # # Lazy evaluation protocol is to run the thunk with the # current storage_map and compute_map accessed via closure, # and the thunk will return a list of variables from its input # list that it requires. try: requires, dt = self.run_thunk_of_node(current_apply) current_idx = self.node_idx[current_apply] self.call_counts[current_idx] += 1 self.call_times[current_idx] += dt except Exception: link.raise_with_op( current_apply, self.thunks[self.node_idx[current_apply]], storage_map=storage_map) if requires: for r in requires: # We are not done with this op .. so we added # back and see to get the inputs we are # missing apply_stack.append(current_apply) if current_apply.inputs[r].owner: apply_stack.append(current_apply.inputs[r].owner) else: if config.profile: for (idx, o) in enumerate( thunks[self.node_idx[current_apply]].outputs): var = self.nodes[ self.node_idx[current_apply]].outputs[idx] if hasattr(var.type, 'get_shape_info'): sh = var.type.get_shape_info(o[0]) else: sh = 'input no shape' self.variable_shape[var] = sh st = getattr(o[0], 'strides', 'input no strides') if (getattr(o[0], 'flags', False) and o[0].flags.c_contiguous): st = 'c' elif (hasattr(data[0], 'is_c_contiguous') and data[0].is_c_contiguous()): st = "c" self.variable_strides[var] = st input_index = [] if self.allow_gc: for i in current_apply.inputs: if (dependencies[i] and i.owner and i not in self.outputs): empty_storage_map = True for x in dependencies[i]: if not compute_map[x][0]: empty_storage_map = False break if empty_storage_map: storage_map[i][0] = None input_index.append( current_apply.inputs.index(i)) # See the not lazy gc code for explanations # of compute_map change compute_map[i][0] = 2 self.node_cleared_order.append(input_index) # Hacky coarse gc final pass # This is required until we have a proper gc algorithm for graphs with # lazy evaluation. See discussion on theano-dev June 19 2012. final_index = [] if self.allow_gc: for v in storage_map: if v.owner and not v in self.outputs: if compute_map[v][0] == 2: continue else: storage_map[v][0] = None final_index.append(v) compute_map[v][0] = 2 self.node_cleared_order.append(final_index)