def __init__(self, pyfn, context=None, borrowable=None, ignore=None, escape_on_error=False): """ Arguments --------- borrow : tuple of objects If an object in this tuple is encountered while tracing the function, then its symbolic representation will alias that object's memory location. This means that *inplace* operations on the Python (likely NumPy) object will affect the symbolic function. """ if context is None: context = Context(borrowable=utils.as_seq(borrowable, tuple), ignore=utils.as_seq(ignore, tuple), escape_on_error=escape_on_error) assert isinstance(context, Context) self.context = context if isinstance(pyfn, Symbolic): pyfn = pyfn.pyfn self._pyfn = pyfn self._symfn = self.context.recompile(self.pyfn)
def get_theano_vars(self, inputs=None, outputs=None): """ Returns a dict containing inputs, outputs and graph corresponding to the Theano version of the pyfn. """ sym_inputs = tuple(self.get_symbolic(i) for i in utils.as_seq(inputs)) sym_outputs = tuple(self.get_symbolic(o) for o in utils.as_seq(outputs)) # get symbolic inputs corresponding to shared inputs in s_inputs # this dict maps each shared variable to its (non-shared) type. s_memo = OrderedDict((var, var.type()) for var in utils.flatten(sym_inputs)) theano_inputs = tuple(s_memo.values()) # get new graph, replacing shared inputs with symbolic ones # graph is a dict mapping "old" variables to "new" ones, where "old" # is the chain including shared variables, and "new" is the chain # with the non-shared replacements. graph = theano.gof.graph.clone_get_equiv( inputs=theano.gof.graph.inputs(sym_outputs), outputs=sym_outputs, memo=s_memo.copy()) # get symbolic outputs theano_outputs = tuple([graph[o] for o in sym_outputs]) return theano_inputs, theano_outputs, graph
def get_gradient_compile_args(self, inputs, outputs, graph, wrt=None, reduction=None): """ Helper function: given the symbolic inputs and outputs, as well as a theano graph and wrt/reduction info, return the appropriate arguments for theano.function to compile a gradient. """ wrt = utils.as_seq(wrt) if reduction in ['sum', 'max', 'mean', 'min', 'prod', 'std', 'var']: reduction = getattr(theano.tensor, reduction) if callable(reduction): if 'numpy' in reduction.__module__: reduction = getattr(theano.tensor, reduction.__name__) outputs = [reduction(o) if o.ndim > 0 else o for o in outputs] if np.any([o.ndim != 0 for o in outputs]): raise TypeError('Gradient requires either scalar outputs or a ' 'reduction that returns a scalar.') # get wrt variables. If none were specified, use inputs. if len(wrt) == 0: wrt = [i for i in inputs] else: wrt = [graph[self.get_symbolic(w)] for w in wrt] grads = utils.flatten([T.grad(o, wrt=wrt) for o in outputs]) return dict(inputs=inputs, outputs=utils.as_seq(grads, tuple))
def fmin_cg(fn, init_args=None, init_kwargs=None, **scipy_kwargs): """ Minimize a scalar valued function using SciPy's nonlinear conjugate gradient algorithm. The initial parameter guess is 'init_args'. """ init_args = utils.as_seq(init_args, tuple) init_kwargs = utils.as_seq(init_kwargs, dict) f = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, compile_fn=True) fprime = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, compile_grad=True) x0 = f.vector_from_args(init_args, init_kwargs) x_opt = scipy.optimize.fmin_cg(f=f, x0=x0, fprime=fprime, full_output=False, **scipy_kwargs) x_reshaped = f.args_from_vector(x_opt) if len(x_reshaped) == 1: x_reshaped = x_reshaped[0] return x_reshaped
def get_theano_variables(self, inputs=None, outputs=None): """ Returns a dict containing inputs, outputs and graph corresponding to the Theano version of the pyfn. """ inputs = utils.as_seq(inputs, tuple) sym_inputs = [self.get_symbolic(x) for x in inputs] outputs = utils.as_seq(outputs, tuple) sym_outputs = [self.get_symbolic(x) for x in outputs] # get symbolic inputs corresponding to shared inputs in s_inputs s_memo = OrderedDict( (arg, arg.type()) for arg in utils.flat_from_doc(sym_inputs)) theano_inputs = tuple(s_memo.values()) # get new graph, replacing shared inputs with symbolic ones graph = theano.gof.graph.clone_get_equiv( theano.gof.graph.inputs(sym_outputs), sym_outputs, memo=s_memo.copy()) # get symbolic outputs theano_outputs = tuple([graph[o] for o in sym_outputs]) return theano_inputs, theano_outputs, graph
def get_theano_variables(self, inputs=None, outputs=None): """ Returns a dict containing inputs, outputs and graph corresponding to the Theano version of the pyfn. """ inputs = utils.as_seq(inputs, tuple) sym_inputs = [self.get_symbolic(x) for x in inputs] outputs = utils.as_seq(outputs, tuple) sym_outputs = [self.get_symbolic(x) for x in outputs] # get symbolic inputs corresponding to shared inputs in s_inputs s_memo = OrderedDict((arg, arg.type()) for arg in utils.flat_from_doc(sym_inputs)) theano_inputs = tuple(s_memo.values()) # get new graph, replacing shared inputs with symbolic ones graph = theano.gof.graph.clone_get_equiv( theano.gof.graph.inputs(sym_outputs), sym_outputs, memo=s_memo.copy()) # get symbolic outputs theano_outputs = tuple([graph[o] for o in sym_outputs]) return theano_inputs, theano_outputs, graph
def fmin_cg(fn, init_args=None, init_kwargs=None, **scipy_kwargs): """ Minimize a scalar valued function using SciPy's nonlinear conjugate gradient algorithm. The initial parameter guess is 'init_args'. """ init_args = utils.as_seq(init_args, tuple) init_kwargs = utils.as_seq(init_kwargs, dict) f = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, compile_fn=True) fprime = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, compile_grad=True) x0 = f.vector_from_args(init_args, init_kwargs) x_opt = scipy.optimize.fmin_cg( f=f, x0=x0, fprime=fprime, full_output=False, **scipy_kwargs) x_reshaped = f.args_from_vector(x_opt) if len(x_reshaped) == 1: x_reshaped = x_reshaped[0] return x_reshaped
def _checkfn(context, f, var_ndim=None, *args, **kwargs): context.reset() override = kwargs.pop('override', None) var_ndim = utils.as_seq(var_ndim) dim = [[4] * nd for nd in var_ndim] values = tuple([np.random.random(d) for d in dim]) # make shallow copies to avoid inplace corruption sym_values = copy.copy(values) sym_args = copy.copy(args) sym_kwargs = copy.copy(kwargs) F = context.recompile(f) sym_vars = F(*(sym_values + sym_args), **sym_kwargs) sym_result = [ v.eval() if utils.isvar(v) else v for v in utils.as_seq(sym_vars) ] if len(sym_result) == 0: sym_result = None py_result = override or f(*(values + args), **kwargs) if sym_result is None: return sym_result is None and py_result is None else: return np.allclose(py_result, sym_result)
def __init__(self, pyfn, context=None, force_floatX=False, borrowable=None, ignore=None, infer_updates=False, escape_on_error=False): """ Arguments --------- borrow : tuple of objects If an object in this tuple is encountered while tracing the function, then its symbolic representation will alias that object's memory location. This means that *inplace* operations on the Python (likely NumPy) object will affect the symbolic function. """ if context is None: context = Context(borrowable=utils.as_seq(borrowable, tuple), ignore=utils.as_seq(ignore, tuple), force_floatX=force_floatX, infer_updates=infer_updates, escape_on_error=escape_on_error) assert isinstance(context, Context) self.context = context if isinstance(pyfn, Symbolic): pyfn = pyfn.pyfn self._pyfn = pyfn self._symfn = self.context.recompile(self.pyfn)
def get_gradient_compile_args(self, inputs, outputs, wrt=None, reduction=None): """ Helper function: given the symbolic inputs and outputs, as well as a theano graph and wrt/reduction info, return the appropriate arguments for theano.function to compile a gradient. """ wrt = utils.as_seq(wrt) if reduction in ['sum', 'max', 'mean', 'min', 'prod', 'std', 'var']: reduction = getattr(theano.tensor, reduction) if isinstance(reduction, collections.Callable): if 'numpy' in reduction.__module__: reduction = getattr(theano.tensor, reduction.__name__) outputs = [reduction(o) if o.ndim > 0 else o for o in outputs] if any([o.ndim != 0 for o in outputs]): raise TypeError('Gradient requires either scalar outputs or a ' 'reduction that returns a scalar.') # get wrt variables. If none were specified, use inputs. if len(wrt) == 0: wrt = [i for i in inputs] else: wrt = [self.get_symbolic(w) for w in wrt] grads = utils.flatten([T.grad(o, wrt=wrt) for o in outputs]) return dict(inputs=inputs, outputs=utils.as_seq(grads, tuple))
def get_theano_variables(self, inputs=None, outputs=None): """ Returns a dict containing inputs, outputs and graph corresponding to the Theano version of the pyfn. This version of the function returns a single vector input. """ inputs = utils.as_seq(inputs, tuple) outputs = utils.as_seq(outputs, tuple) if inputs: sym_inputs = [self.get_symbolic(x) for x in inputs] else: sym_inputs = self.s_inputs.values() if outputs: sym_outputs = [self.get_symbolic(x) for x in outputs] else: sym_outputs = self.s_outputs.values() if len(sym_outputs) > 1: raise ValueError( 'VectorArg functions should return a single output.') # get symbolic inputs corresponding to shared inputs in s_inputs s_memo = OrderedDict() sym_args = utils.flat_from_doc(sym_inputs) real_args = utils.flat_from_doc(self.all_init_args) # create a symbolic vector, then split it up into symbolic input # args inputs_dtype = self.vector_from_args(self.all_init_args).dtype theano_input = tt.vector(name='theta', dtype=inputs_dtype) i = 0 for sa, ra in zip(sym_args, real_args): if sa.ndim > 0: vector_arg = theano_input[i: i + ra.size].reshape(ra.shape) else: vector_arg = theano_input[i] s_memo[sa] = tt.patternbroadcast( vector_arg.astype(str(sa.dtype)), broadcastable=sa.broadcastable) i += ra.size # get new graph, replacing shared inputs with symbolic ones graph = theano.gof.graph.clone_get_equiv( theano.gof.graph.inputs(sym_outputs), sym_outputs, memo=s_memo.copy()) # get symbolic outputs theano_outputs = graph[sym_outputs[0]] f_in, f_out = self.finalize(theano_input, theano_outputs, graph) return f_in, f_out, graph
def get_theano_variables(self, inputs=None, outputs=None): """ Returns a dict containing inputs, outputs and graph corresponding to the Theano version of the pyfn. This version of the function returns a single vector input. """ inputs = utils.as_seq(inputs, tuple) outputs = utils.as_seq(outputs, tuple) if inputs: sym_inputs = [self.get_symbolic(x) for x in inputs] else: sym_inputs = self.s_inputs.values() if outputs: sym_outputs = [self.get_symbolic(x) for x in outputs] else: sym_outputs = self.s_outputs.values() if len(sym_outputs) > 1: raise ValueError( 'VectorArg functions should return a single output.') # get symbolic inputs corresponding to shared inputs in s_inputs s_memo = OrderedDict() sym_args = utils.flat_from_doc(sym_inputs) real_args = utils.flat_from_doc(self.all_init_args) # create a symbolic vector, then split it up into symbolic input # args inputs_dtype = self.vector_from_args(self.all_init_args).dtype theano_input = tt.vector(name='theta', dtype=inputs_dtype) i = 0 for sa, ra in zip(sym_args, real_args): if sa.ndim > 0: vector_arg = theano_input[i:i + ra.size].reshape(ra.shape) else: vector_arg = theano_input[i] s_memo[sa] = tt.patternbroadcast(vector_arg.astype(str(sa.dtype)), broadcastable=sa.broadcastable) i += ra.size # get new graph, replacing shared inputs with symbolic ones graph = theano.gof.graph.clone_get_equiv( theano.gof.graph.inputs(sym_outputs), sym_outputs, memo=s_memo.copy()) # get symbolic outputs theano_outputs = graph[sym_outputs[0]] f_in, f_out = self.finalize(theano_input, theano_outputs, graph) return f_in, f_out, graph
def __init__(self, borrowable=None, ignore=None, escape_on_error=False): self.sym_vars = dict() self.tags = dict() # FIXME do we need to hold on to all of these itermediates? # ensure these id's do not get recycled by garbage collection self._nogc = [] self._top_def = None self.borrowable = [id(b) for b in utils.as_seq(borrowable)] self.ignore = utils.as_seq(ignore, tuple) self.ignore += (utils.orderedcallargs,) self.escape_on_error = escape_on_error self.shadowed_containers = dict()
def __init__(self, pyfn, init_args=None, init_kwargs=None, context=None, force_floatX=False, borrowable=None, ignore=None, infer_updates=False, escape_on_error=False, function=False, gradient=False, hessian_vector=False): if isinstance(pyfn, Symbolic): pyfn = pyfn.pyfn self.pyfn = pyfn init_args = utils.as_seq(init_args, tuple) init_kwargs = utils.as_seq(init_kwargs, dict) self.init_args = utils.expandedcallargs( pyfn, *init_args, **init_kwargs) def wrapped_function(vector): return pyfn(*escaped_call(self.args_from_vector, vector)) def wrapper(*args, **kwargs): vector = self.vector_from_args(args, kwargs) v_args = self.args_from_vector(vector) return vector, pyfn(*v_args) symbolic = Symbolic(pyfn=wrapper, context=context, force_floatX=force_floatX, infer_updates=infer_updates, borrowable=borrowable, ignore=ignore, escape_on_error=escape_on_error) _, (sym_vector, result) = symbolic.trace(*init_args, **init_kwargs) fn = symbolic.compile(function=function, gradient=gradient, hessian_vector=hessian_vector, inputs=sym_vector, outputs=result) self.fn = fn
def fmin_l_bfgs_b(fn, init_args=None, init_kwargs=None, scalar_bounds=None, return_info=False, **scipy_kwargs): """ Minimize a scalar valued function using SciPy's L-BFGS-B algorithm. The initial parameter guess is 'init_args'. """ init_args = utils.as_seq(init_args, tuple) init_kwargs = utils.as_seq(init_kwargs, dict) f_df = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, function=True, gradient=True) x0 = f_df.vector_from_args(init_args, init_kwargs) if 'approx_grad' in scipy_kwargs: raise TypeError('duplicate argument: approx_grad') if scalar_bounds is not None: lb, ub = scalar_bounds bounds = np.empty((len(x0), 2)) bounds[:, 0] = lb bounds[:, 1] = ub if 'bounds' in scipy_kwargs: raise TypeError('duplicate argument: bounds') scipy_kwargs['bounds'] = bounds x_opt, f_opt, info = scipy.optimize.fmin_l_bfgs_b( func=f_df, x0=x0, approx_grad=False, **scipy_kwargs) x_reshaped = f_df.args_from_vector(x_opt) if len(x_reshaped) == 1: x_reshaped = x_reshaped[0] if return_info: return x_reshaped, {'f_opt': f_opt, 'info': info} else: return x_reshaped
def fmin_l_bfgs_b(fn, init_args=None, init_kwargs=None, scalar_bounds=None, return_info=False, **scipy_kwargs): """ Minimize a scalar valued function using SciPy's L-BFGS-B algorithm. The initial parameter guess is 'init_args'. """ init_args = utils.as_seq(init_args, tuple) init_kwargs = utils.as_seq(init_kwargs, dict) f_df = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, compile_fn=True, compile_grad=True) x0 = f_df.vector_from_args(init_args, init_kwargs) if 'approx_grad' in scipy_kwargs: raise TypeError('duplicate argument: approx_grad') if scalar_bounds is not None: lb, ub = scalar_bounds bounds = np.empty((len(x0), 2)) bounds[:, 0] = lb bounds[:, 1] = ub if 'bounds' in scipy_kwargs: raise TypeError('duplicate argument: bounds') scipy_kwargs['bounds'] = bounds x_opt, f_opt, info = scipy.optimize.fmin_l_bfgs_b( func=f_df, x0=x0, approx_grad=False, **scipy_kwargs) x_reshaped = f_df.args_from_vector(x_opt) if len(x_reshaped) == 1: x_reshaped = x_reshaped[0] if return_info: return x_reshaped, {'f_opt': f_opt, 'info': info} else: return x_reshaped
def call(self, *args, **kwargs): if '_vectors' in kwargs: vectors = kwargs.pop('_vectors') else: raise ValueError( 'Vectors must be passed the keyword \'_vectors\'.') vectors = utils.as_seq(vectors, tuple) all_args = utils.expandedcallargs(self.pyfn, *args, **kwargs) # avoid 'self' and 'cls' bound arguments if (inspect.ismethod(self.pyfn) or (len(all_args) > 0 and type(all_args[0]) is type)): all_args = all_args[1:] cache_key = tuple(np.asarray(a).ndim for a in all_args) if cache_key not in self.cache or not self.use_cache: self.cache[cache_key] = self.get_theano_fn(args, kwargs) fn = self.cache[cache_key] if len(self.wrt) > 0 and len(vectors) != len(self.wrt): raise ValueError('Expected {0} items in _vectors; received ' '{1}.'.format(len(self.wrt), len(vectors))) elif len(self.wrt) == 0 and len(vectors) != len(self.s_inputs): raise ValueError('Expected {0} items in _vectors; received ' '{1}.'.format(len(self.s_inputs), len(vectors))) return fn(*(all_args + vectors))
def get_theano_fn(self, args, kwargs): self.trace(*args, **kwargs) fn_inputs, fn_outputs, graph = self.get_theano_variables( self.s_inputs, self.s_outputs) if np.any([o.ndim != 0 for o in fn_outputs]): raise TypeError('HessianVector requires scalar outputs.') # get wrt variables. If none were specified, use inputs. wrt = utils.as_seq(self.wrt) if len(wrt) == 0: wrt = [i for i in fn_inputs] else: wrt = [graph[self.get_symbolic(w)] for w in wrt] grads = utils.flat_from_doc([tt.grad(o, wrt=wrt) for o in fn_outputs]) sym_vecs = tuple( tt.TensorType(dtype=w.dtype, broadcastable=[False] * w.ndim)() for w in wrt) hess_vec = tt.Rop(grads, wrt, sym_vecs) if len(hess_vec) == 1: hess_vec = hess_vec[0] # compile function fn = theano.function(inputs=fn_inputs + sym_vecs, outputs=hess_vec, on_unused_input='ignore') return fn
def __call__(self, *args, **kwargs): if 'vectors' in kwargs: vectors = kwargs.pop('vectors') else: raise ValueError( 'HessianVector must be called with the keyword \'vectors\'.') vectors = utils.as_seq(vectors, tuple) all_args = utils.expandedcallargs(self.symfn, *args, **kwargs) key = tuple(np.asarray(a).ndim for a in all_args) if key not in self.cache or not self.use_cache: self.context.reset() inputs, outputs = self.trace(*args, **kwargs) self.cache[key] = self.get_theano_function(inputs, outputs) fn = self.cache[key] if len(self.wrt) > 0 and len(vectors) != len(self.wrt): raise ValueError('Expected {0} items in `vectors`; received ' '{1}.'.format(len(self.wrt), len(vectors))) elif len(self.wrt) == 0 and len(vectors) != len(inputs): raise ValueError('Expected {0} items in `vectors`; received ' '{1}.'.format(len(inputs), len(vectors))) return fn(*(all_args + vectors))
def __init__(self, borrow=None, force_floatX=False, context=None): """ Arguments --------- borrow : tuple of objects If an object in this tuple is encountered while tracing the function, then its symbolic representation will alias that object's memory location. This means that *inplace* operations on the Python (likely NumPy) object will affect the symbolic function. force_floatX : bool If True, floats and float NumPy ndarrays will be cast to the dtype specified at theano.config.floatX when forming symbolic shared variables, if they do not have it already. Objects in `borrowable` are never cast. """ if context is None: self.context = Context(borrowable=utils.as_seq(borrow, tuple), force_floatX=force_floatX) elif isinstance(context, Context): self.context = context else: raise TypeError( 'Received unrecognized Context: {0}'.format(context))
def get_theano_fn(self, args, kwargs): self.trace(*args, **kwargs) fn_inputs, fn_outputs, graph = self.get_theano_variables( self.s_inputs, self.s_outputs) if np.any([o.ndim != 0 for o in fn_outputs]): raise TypeError('HessianVector requires scalar outputs.') # get wrt variables. If none were specified, use inputs. wrt = utils.as_seq(self.wrt) if len(wrt) == 0: wrt = [i for i in fn_inputs] else: wrt = [graph[self.get_symbolic(w)] for w in wrt] grads = utils.flat_from_doc([tt.grad(o, wrt=wrt) for o in fn_outputs]) sym_vecs = tuple(tt.TensorType(dtype=w.dtype, broadcastable=[False]*w.ndim)() for w in wrt) hess_vec = tt.Rop(grads, wrt, sym_vecs) if len(hess_vec) == 1: hess_vec = hess_vec[0] # compile function fn = theano.function(inputs=fn_inputs + sym_vecs, outputs=hess_vec, on_unused_input='ignore') return fn
def isvar_ast(name): """ Wraps a Name node in a call to utils.isvar. """ isvar = simple_Call(args=utils.as_seq(name), func=Attribute(attr='isvar', ctx=Load(), value=Name(ctx=Load(), id='_utils__'))) return isvar
def __init__(self, pyfn, wrt=None, borrow=None, force_floatX=False, context=None): super(Gradient, self).__init__(pyfn=pyfn, borrow=borrow, force_floatX=force_floatX, context=context) self.wrt = utils.as_seq(wrt, tuple)
def simple_Call(func, args=None): """ Simple alias for building Call nodes that doesn't require specification of keywords, kwargs or starargs. """ args = utils.as_seq(args) call = Call(args=args, func=func, keywords=[], kwargs=None, starargs=None) return call
def fmin_ncg(fn, init_args=None, init_kwargs=None, **scipy_kwargs): """ Minimize a scalar valued function using SciPy's Newton-CG algorithm. The initial parameter guess is 'init_args'. """ init_args = utils.as_seq(init_args, tuple) init_kwargs = utils.as_seq(init_kwargs, dict) f = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, function=True) fprime = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, gradient=True) fhess_p = VectorArg(fn, init_args=init_args, init_kwargs=init_kwargs, hessian_vector=True) x0 = f.vector_from_args(init_args, init_kwargs) x_opt = scipy.optimize.fmin_ncg( f=f, x0=x0, fprime=fprime, fhess_p=fhess_p, full_output=False, **scipy_kwargs) x_reshaped = f.args_from_vector(x_opt) if len(x_reshaped) == 1: x_reshaped = x_reshaped[0] return x_reshaped
def get_hessian_vector_compile_args(self, inputs, outputs, wrt=None, reduction=None): """ Helper function: given the symbolic inputs and outputs, as well as a theano graph and wrt/reduction/vectors info, return the appropriate argumentsfor theano.function to compile a Hessian-vector product. """ wrt = utils.as_seq(wrt) if reduction in ['sum', 'max', 'mean', 'min', 'prod', 'std', 'var']: reduction = getattr(theano.tensor, reduction) if isinstance(reduction, collections.Callable): if 'numpy' in reduction.__module__: reduction = getattr(theano.tensor, reduction.__name__) outputs = [reduction(o) if o.ndim > 0 else o for o in outputs] if any([o.ndim != 0 for o in outputs]): raise TypeError('Gradient requires either scalar outputs or a ' 'reduction that returns a scalar.') # get wrt variables. If none were specified, use inputs. if len(wrt) == 0: wrt = [i for i in inputs] else: wrt = [self.get_symbolic(w) for w in wrt] grads = utils.flatten([T.grad(o, wrt=wrt) for o in outputs]) sym_vectors = tuple(T.TensorType( dtype=w.dtype, broadcastable=[False] * w.ndim)() for w in wrt) hessian_vectors = utils.as_seq(T.Rop(grads, wrt, sym_vectors), tuple) return dict(inputs=inputs + sym_vectors, outputs=hessian_vectors)
def __init__(self, pyfn, wrt=None, reduction=None, borrowable=None, ignore=None, escape_on_error=False, context=None, use_cache=True): super(Gradient, self).__init__(pyfn=pyfn, borrowable=borrowable, ignore=ignore, context=context, escape_on_error=escape_on_error, use_cache=use_cache) self.wrt = utils.as_seq(wrt, tuple) self.reduction = reduction
def __init__(self, pyfn, wrt=None, reduction=None, force_floatX=False, infer_updates=False, borrowable=None, ignore=None, escape_on_error=False, context=None, use_cache=True): super(Gradient, self).__init__(pyfn=pyfn, force_floatX=force_floatX, borrowable=borrowable, ignore=ignore, infer_updates=infer_updates, context=context, escape_on_error=escape_on_error, use_cache=use_cache) self.wrt = utils.as_seq(wrt, tuple) self.reduction = reduction
def trace(self, *args, **kwargs): """ Given args and kwargs, call the Python function and get its symbolic representation. A dictionary of shadowed symbolic variables is maintained: self.s_vars : {id(obj) : sym_var} Contains all symbolic variables traced during function execution, indexed by the id of the corresponding Python object. Additionally, self.s_inputs and self.s_outputs are lists of symbolic arguments and results, respectively. """ # clean args and kwargs c_args, c_kwargs = clean_int_args(*args, **kwargs) # call the Context results = self.context.call(self.pyfn, c_args, c_kwargs) # get a tuple of the symbolic inputs # but avoid 'self' and 'cls' bound arguments callargs = utils.orderedcallargs(self.pyfn, *c_args, **c_kwargs) all_args = utils.flat_from_doc(callargs) if (inspect.ismethod(self.pyfn) or (len(all_args) > 0 and type(all_args[0]) is type)): all_args = all_args[1:] self.s_inputs = tuple([self.s_vars[id(a)] for a in all_args]) # get a tuple of the symbolic outputs self.s_outputs = tuple( [self.s_vars[id(r)] for r in utils.as_seq(results)]) # update variable names where possible for name, arg in callargs.iteritems(): if self.s_vars.get(id(arg), None) in self.s_inputs: self.s_vars[name] = self.s_vars[id(arg)] return results
def compile_gradient(self, inputs=None, outputs=None, wrt=None, reduction=None): fn_inputs, fn_outputs, graph = self.get_theano_variables(inputs, outputs) wrt = utils.as_seq(wrt) if reduction in ['sum', 'max', 'mean', 'min', 'prod', 'std', 'var']: reduction = getattr(theano.tensor, reduction) if callable(reduction): if 'numpy' in reduction.__module__: reduction = getattr(theano.tensor, reduction.__name__) fn_outputs = [reduction(o) for o in fn_outputs] if np.any([o.ndim != 0 for o in fn_outputs]): raise TypeError('Gradient requires either scalar outputs or a ' 'reduction that returns a scalar.') # get wrt variables. If none were specified, use inputs. if len(wrt) == 0: wrt = [i for i in fn_inputs] else: wrt = [graph[self.get_symbolic(w)] for w in wrt] grads = utils.flat_from_doc([tt.grad(o, wrt=wrt) for o in fn_outputs]) if len(grads) == 1: grads = grads[0] # compile function fn = theano.function(inputs=fn_inputs, outputs=grads, on_unused_input='ignore') return fn
def compile_gradient(self, inputs=None, outputs=None, wrt=None, reduction=None): fn_inputs, fn_outputs, graph = self.get_theano_variables( inputs, outputs) wrt = utils.as_seq(wrt) if reduction in ['sum', 'max', 'mean', 'min', 'prod', 'std', 'var']: reduction = getattr(theano.tensor, reduction) if callable(reduction): if 'numpy' in reduction.__module__: reduction = getattr(theano.tensor, reduction.__name__) fn_outputs = [reduction(o) for o in fn_outputs] if np.any([o.ndim != 0 for o in fn_outputs]): raise TypeError('Gradient requires either scalar outputs or a ' 'reduction that returns a scalar.') # get wrt variables. If none were specified, use inputs. if len(wrt) == 0: wrt = [i for i in fn_inputs] else: wrt = [graph[self.get_symbolic(w)] for w in wrt] grads = utils.flat_from_doc([tt.grad(o, wrt=wrt) for o in fn_outputs]) if len(grads) == 1: grads = grads[0] # compile function fn = theano.function(inputs=fn_inputs, outputs=grads, on_unused_input='ignore') return fn
def compile(self, function=False, gradient=False, hessian_vector=False, inputs=None, outputs=None, wrt=None, reduction=None, allow_input_downcast=True): assert isinstance(function, bool) assert isinstance(gradient, bool) assert isinstance(hessian_vector, bool) if not (function or gradient or hessian_vector): raise ValueError( 'At least one of `function`, `gradient`, or `hessian_vector` ' 'must be True when calling `compile()`.') sym_inputs = tuple( self.get_symbolic(i) for i in utils.as_seq(inputs)) sym_outputs = tuple( self.get_symbolic(o) for o in utils.as_seq(outputs)) fn_inputs = sym_inputs fn_outputs = () if function: fn_args = self.get_function_compile_args(inputs=sym_inputs, outputs=sym_outputs) fn_outputs += fn_args['outputs'] if gradient: g_args = self.get_gradient_compile_args(inputs=sym_inputs, outputs=sym_outputs, wrt=wrt, reduction=reduction) fn_outputs += g_args['outputs'] if hessian_vector: hv_args = self.get_hessian_vector_compile_args(inputs=sym_inputs, outputs=sym_outputs, wrt=wrt, reduction=reduction) fn_inputs = hv_args['inputs'] fn_outputs += hv_args['outputs'] if len(fn_outputs) == 1: fn_outputs = fn_outputs[0] new_inputs = tuple(i.type() for i in fn_inputs) givens = dict(zip(fn_inputs, new_inputs)) if self.context.infer_updates: updates = self.context.updates else: updates = collections.OrderedDict() fn = theano.function(inputs=new_inputs, outputs=fn_outputs, givens=givens, updates=updates, on_unused_input='ignore', allow_input_downcast=allow_input_downcast) return fn