def create(self, input_storage=None, trustme=False): ret = super(Profile_Maker, self).create(input_storage, trustme) if (hasattr(theano, 'sandbox') and hasattr(theano.sandbox, 'cuda') and theano.sandbox.cuda.cuda_enabled): if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1': raise Exception( "You are running the Theano profiler with CUDA enabled." " Theano GPU ops execution is asynchronous by default." " So by default, the profile is useless." " You must set the environment variable" " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to" " synchronize the execution to get a meaningful profile.") # create a function-specific storage container for profiling info profile = ProfileStats(atexit_print=False) self.mode.profile_stats[ret] = profile ret.profile = profile # initialize the timers for i, node in enumerate(ret.maker.fgraph.toposort()): profile.apply_time[node] = 0.0 # a thunk_group is a list of the thunks from each linker # corresponding to the i'th position in the toposort. assert len(ret.fn.thunk_groups[i]) == 1 profile.apply_cimpl[node] = hasattr(ret.fn.thunk_groups[i][0], 'cthunk') # Here we replace the linker function. # This ugliness makes WrapLinker (an object that *generates* # functions and is not function-specific) work with ProfileStats # objects which are function-specific. # capture old fn in closure. This is important since new_fn is about to # take its place as ret.fn. ret_fn = ret.fn def new_fn(): self.mode.apply_time = self.mode.profile_stats[ret].apply_time self.mode.variable_shape = \ self.mode.profile_stats[ret].variable_shape ret_fn() # delete the old apply_time variable # because it doesn't mean the same thing anymore. # This prevents old code from looking like it still works. del self.mode.apply_time del self.mode.variable_shape ret.fn = new_fn global run_cthunk if run_cthunk is None and any(profile.apply_cimpl.values()): # Lazy import to avoid compilation when importing theano. from theano.gof.cutils import run_cthunk # noqa warnings.warn("DEPRECATION WARNING: The ProfileMode is deprecated. " "Use the Theano flags/parameter to theano.function " "'profile=True' instead of 'mode=ProfileMode'") return ret
def create(self, input_storage=None, trustme=False): ret = super(Profile_Maker, self).create(input_storage, trustme) if (hasattr(theano, 'sandbox') and hasattr(theano.sandbox, 'cuda') and theano.sandbox.cuda.cuda_enabled): if os.environ.get('CUDA_LAUNCH_BLOCKING', '0') != '1': raise Exception( "You are running the Theano profiler with CUDA enabled." " Theano GPU ops execution is asynchronous by default." " So by default, the profile is useless." " You must set the environment variable" " CUDA_LAUNCH_BLOCKING to 1 to tell the CUDA driver to" " synchronize the execution to get a meaningful profile.") # create a function-specific storage container for profiling info profile = ProfileStats(atexit_print=False) self.mode.profile_stats[ret] = profile ret.profile = profile #initialize the timers for i, node in enumerate(ret.maker.fgraph.toposort()): profile.apply_time[node] = 0.0 # a thunk_group is a list of the thunks from each linker # corresponding to the i'th position in the toposort. assert len(ret.fn.thunk_groups[i]) == 1 profile.apply_cimpl[node] = hasattr( ret.fn.thunk_groups[i][0], 'cthunk') # Here we replace the linker function. # This ugliness makes WrapLinker (an object that *generates* # functions and is not function-specific) work with ProfileStats # objects which are function-specific. #capture old fn in closure. This is important since new_fn is about to #take its place as ret.fn. ret_fn = ret.fn def new_fn(): self.mode.apply_time = self.mode.profile_stats[ret].apply_time self.mode.variable_shape = self.mode.profile_stats[ret].variable_shape ret_fn() # delete the old apply_time variable # because it doesn't mean the same thing anymore. # This prevents old code from looking like it still works. del self.mode.apply_time del self.mode.variable_shape ret.fn = new_fn global run_cthunk if run_cthunk is None and any(profile.apply_cimpl.values()): # Lazy import to avoid compilation when importing theano. from theano.gof.cutils import run_cthunk return ret
def create(self, input_storage=None, trustme=False): ret = super(Profile_Maker, self).create(input_storage, trustme) # create a function-specific storage container for profiling info profile = ProfileStats(atexit_print=False) self.mode.profile_stats[ret] = profile ret.profile = profile #initialize the timers for i, node in enumerate(ret.maker.fgraph.toposort()): profile.apply_time[node] = 0.0 profile.outputs_size[node] = [0.0] * len(node.outputs) # a thunk_group is a list of the thunks from each linker # corresponding to the i'th position in the toposort. assert len(ret.fn.thunk_groups[i]) == 1 profile.apply_cimpl[node] = hasattr( ret.fn.thunk_groups[i][0], 'cthunk') # Here we replace the linker function. # This ugliness makes WrapLinker (an object that *generates* # functions and is not function-specific) work with ProfileStats # objects which are function-specific. #capture old fn in closure. This is important since new_fn is about to #take its place as ret.fn. ret_fn = ret.fn def new_fn(): self.mode.apply_time = self.mode.profile_stats[ret].apply_time self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size ret_fn() # delete the old apply_time variable # because it doesn't mean the same thing anymore. # This prevents old code from looking like it still works. del self.mode.apply_time del self.mode.outputs_size ret.fn = new_fn global run_cthunk if run_cthunk is None and any(profile.apply_cimpl.values()): # Lazy import to avoid compilation when importing theano. from theano.gof.cutils import run_cthunk return ret
def create(self, input_storage=None, trustme=False): ret = super(Profile_Maker, self).create(input_storage, trustme) # create a function-specific storage container for profiling info profile = ProfileStats(atexit_print=False) self.mode.profile_stats[ret] = profile ret.profile = profile #initialize the timers for i, node in enumerate(ret.maker.env.toposort()): profile.apply_time[node] = 0.0 profile.outputs_size[node] = [0.0] * len(node.outputs) # a thunk_group is a list of the thunks from each linker # corresponding to the i'th position in the toposort. assert len(ret.fn.thunk_groups[i]) == 1 profile.apply_cimpl[node] = hasattr( ret.fn.thunk_groups[i][0], 'cthunk') # Here we replace the linker function. # This ugliness makes WrapLinker (an object that *generates* # functions and is not function-specific) work with ProfileStats # objects which are function-specific. #capture old fn in closure. This is important since new_fn is about to #take its place as ret.fn. ret_fn = ret.fn def new_fn(): self.mode.apply_time = self.mode.profile_stats[ret].apply_time self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size ret_fn() # delete the old apply_time variable # because it doesn't mean the same thing anymore. # This prevents old code from looking like it still works. del self.mode.apply_time del self.mode.outputs_size ret.fn = new_fn global run_cthunk if run_cthunk is None and any(profile.apply_cimpl.values()): # Lazy import to avoid compilation when importing theano. from theano.gof.cutils import run_cthunk return ret
def pfunc(params, outputs=None, mode=None, updates=None, givens=None, no_default_updates=False, accept_inplace=False, name=None, rebuild_strict=True, allow_input_downcast=None, profile=None, on_unused_input=None): """Function-constructor for graphs with shared variables. :type params: list of either Variable or Param instances. :param params: function parameters, these are not allowed to be shared variables :type outputs: list of Variables or Out instances :param outputs: expressions to compute :type mode: string or `theano.compile.Mode` instance. :param mode: compilation mode :type updates: iterable over pairs (shared_variable, new_expression). List, tuple or dict. :param updates: update the values for SharedVariable inputs according to these expressions :type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict. The Var1 and Var2 in each pair must have the same Type. :param givens: specific substitutions to make in the computation graph (Var2 replaces Var1). :type no_default_updates: either bool or list of Variables :param no_default_updates: if True, do not perform any automatic update on Variables. If False (default), perform them all. Else, perform automatic updates on all Variables that are neither in "updates" nor in "no_default_updates". :type name: None or string :param name: attaches a name to the Profiling result of this function when using ProfileMode (will be deprecated). :type allow_input_downcast: Boolean :param allow_input_downcast: True means that the values passed as inputs when calling the function can be silently downcasted to fit the dtype of the corresponding Variable, which may lose precision. False means that it will only be cast to a more general, or precise, type. None (default) is almost like False, but allows downcasting of Python float scalars to floatX. :type profile: None, True, str, or ProfileStats instance :param profile: accumulate profiling information into a given ProfileStats instance. None is the default, and means to use the value of config.profile. If argument is `True` then a new ProfileStats instance will be used. If argument is a string, a new ProfileStats instance will be created with that string as its `message` attribute. This profiling object will be available via self.profile. :type on_unused_input: str :param on_unused_input: What to do if a variable in the 'inputs' list is not used in the graph. Possible values are 'raise', 'warn', 'ignore' and None. :rtype: theano.compile.Function :returns: a callable object that will compute the outputs (given the inputs) and update the implicit function arguments according to the `updates`. :note: Regarding givens: Be careful to make sure that these substitutions are independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in another expression is undefined. Replacements specified with givens are different from optimizations in that Var2 is not expected to be equivalent to Var1. """ # # This function works by cloning the graph (except for the inputs), and then shipping it # off to compile.function # (There it will be cloned again, unnecessarily, because it doesn't know that we already # cloned it.) # # First, it clones the replacements named in the givens argument, and points each Var1 to # the clone of Var2. # Then it sets the inputs in the clone dictionary. # After these steps, we are assuming that the clone dictionary contains all the inputs to # the computation graph. # # Then it clones the outputs and the update expressions. This rebuilds a computation graph # from the inputs and the givens. # if updates is None: updates = [] if givens is None: givens = [] if profile is None: profile = config.profile # profile -> True or False if profile == True: profile = ProfileStats(message=name) # profile -> object if type(profile) == str: profile = ProfileStats(message=profile) # profile is typically either False or an object at this point. # No need to block other objects being passed through though. It might be # useful. if not isinstance(params, (list, tuple)): raise Exception("in pfunc() the first argument must be a list or a tuple") if not isinstance(no_default_updates, bool)\ and not isinstance(no_default_updates, list): raise TypeError("no_default_update should be either a boolean or a list") # transform params into theano.compile.In objects. inputs = [_pfunc_param_to_in(p, allow_downcast=allow_input_downcast) for p in params] # Check if some variable is present more than once in inputs in_variables = [input.variable for input in inputs] for i, v in enumerate(in_variables): if v in in_variables[(i + 1):]: dup_v_i = in_variables.index(v, (i + 1)) raise UnusedInputError( ("Variable %s is used twice in inputs to theano.function, " "at indices %i and %i. This would result in values " "provided for it being ignored. Please do not duplicate " "variables in the inputs list." % (v, i, dup_v_i))) output_vars = rebuild_collect_shared(outputs, in_variables, replace=givens, updates=updates, rebuild_strict=True, copy_inputs_over=True, no_default_updates=no_default_updates) # extracting the arguments input_variables, cloned_outputs, other_stuff = output_vars clone_d, update_d, update_expr, shared_inputs = other_stuff for i, iv in zip(inputs, input_variables): i.variable = iv for sv in shared_inputs: #pass value of None here #value will be stored in the resulting functions' defaults list #but since the value of shared variables never needs to be refed, it is not needed if sv in update_d: si = In(variable=sv, value=sv.container, mutable=True, borrow=True, update=update_d[sv], shared=True) else: si = In(variable=sv, value=sv.container, mutable=False, borrow=True, shared=True) inputs.append(si) return orig_function(inputs, cloned_outputs, mode, accept_inplace=accept_inplace, name=name, profile=profile, on_unused_input=on_unused_input)