def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = theano.grad( f, wrt=params, disconnected_inputs='warn') xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params]) def Hx_plain(): Hx_plain_splits = TT.grad( TT.sum([TT.sum(g * x) for g, x in zip(constraint_grads, xs)]), wrt=params, disconnected_inputs='warn' ) return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits]) self.opt_fun = ext.lazydict( f_Hx_plain=lambda: ext.compile_function( inputs=inputs + xs, outputs=Hx_plain(), log_name="f_Hx_plain", ), )
def update_opt(self, loss, target, inputs, extra_inputs=None, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target updates = self._update_method(loss, target.get_params(trainable=True)) updates = OrderedDict([(k, v.astype(k.dtype)) for k, v in list(updates.items())]) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function(inputs + extra_inputs, loss), f_opt=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, updates=updates, ) )
def update_opt(self, loss, target, inputs, extra_inputs=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target def get_opt_output(): flat_grad = tensor_utils.flatten_tensor_variables(tf.gradients(loss, target.get_params(trainable=True))) return [tf.cast(loss, tf.float64), tf.cast(flat_grad, tf.float64)] if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function(inputs + extra_inputs, loss), f_opt=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=get_opt_output(), ) )
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :param gradients: symbolic expressions for the gradients of trainable parameters of the target. By default this will be computed by calling theano.grad :return: No return value. """ self._target = target def get_opt_output(gradients): if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True)) flat_grad = flatten_tensor_variables(gradients) return [loss.astype('float64'), flat_grad.astype('float64')] if extra_inputs is None: extra_inputs = list() self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs + extra_inputs, loss), f_opt=lambda: compile_function( inputs=inputs + extra_inputs, outputs=get_opt_output(gradients), ) )
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = tf.gradients(f, xs=params) for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) xs = tuple([tensor_utils.new_tensor_like(p.name.split(":")[0], p) for p in params]) def Hx_plain(): Hx_plain_splits = tf.gradients( tf.reduce_sum( tf.stack([tf.reduce_sum(g * x) for g, x in zip(constraint_grads, xs)]) ), params ) for idx, (Hx, param) in enumerate(zip(Hx_plain_splits, params)): if Hx is None: Hx_plain_splits[idx] = tf.zeros_like(param) return tensor_utils.flatten_tensor_variables(Hx_plain_splits) self.opt_fun = ext.lazydict( f_Hx_plain=lambda: tensor_utils.compile_function( inputs=inputs + xs, outputs=Hx_plain(), log_name="f_Hx_plain", ), )
def update_opt(self, loss, target, inputs, extra_inputs=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ if config.TF_NN_SETTRACE: ipdb.set_trace() self._target = target def get_opt_output(): flat_grad = tensor_utils.flatten_tensor_variables( tf.gradients(loss, target.get_params(trainable=True))) return [tf.cast(loss, tf.float64), tf.cast(flat_grad, tf.float64)] if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), f_opt=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=get_opt_output(), ))
def update_opt(self, loss, target, inputs, extra_inputs=None, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target updates = self._update_method(loss, target.get_params(trainable=True)) updates = OrderedDict([(k, v.astype(k.dtype)) for k, v in updates.iteritems()]) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function(inputs + extra_inputs, loss), f_opt=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, updates=updates, ))
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = tf.gradients(f, xs=params) for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) xs = tuple([ tensor_utils.new_tensor_like(p.name.split(":")[0], p) for p in params ]) def Hx_plain(): Hx_plain_splits = tf.gradients( tf.reduce_sum( tf.stack([ tf.reduce_sum(g * x) for g, x in zip(constraint_grads, xs) ])), params) for idx, (Hx, param) in enumerate(zip(Hx_plain_splits, params)): if Hx is None: Hx_plain_splits[idx] = tf.zeros_like(param) return tensor_utils.flatten_tensor_variables(Hx_plain_splits) self.opt_fun = ext.lazydict( f_Hx_plain=lambda: tensor_utils.compile_function( inputs=inputs + xs, outputs=Hx_plain(), log_name="f_Hx_plain", ), )
def update_opt( self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs ): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params) flat_grad = ext.flatten_tensor_variables(grads) constraint_grads = theano.grad(constraint_term, wrt=params) xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params]) Hx_plain_splits = TT.grad(TT.sum([TT.sum(g * x) for g, x in itertools.izip(constraint_grads, xs)]), wrt=params) Hx_plain = TT.concatenate([TT.flatten(s) for s in Hx_plain_splits]) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name if self._debug_nan: from theano.compile.nanguardmode import NanGuardMode mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) else: mode = None self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", mode=mode ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", mode=mode ), f_Hx_plain=lambda: ext.compile_function( inputs=inputs + extra_inputs + xs, outputs=Hx_plain, log_name="f_Hx_plain", mode=mode ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", mode=mode ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", mode=mode ), )
def update_opt(self, loss, target, logstd, inputs, extra_inputs=None, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target self._log_std = tf.reduce_mean(logstd) if extra_inputs is None: extra_inputs = list() self._input_vars = inputs + extra_inputs # \partial{log \pi} / \partial{\phi} A # \phi is the mean_network parameters # pdb.set_trace() mean_w = target.get_mean_network().get_params(trainable=True) grads = tf.gradients( loss, xs=target.get_mean_network().get_params(trainable=True)) for idx, (g, param) in enumerate(zip(grads, mean_w)): if g is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) # \sum_d \partial{logstd^d} / \partial{\phi} # \phi is the std_network parameters var_grads = tf.gradients( loss - self._alpha * self._log_std, xs=target.get_std_network().get_params(trainable=True)) var_w = target.get_std_network().get_params(trainable=True) for idx, (g, param) in enumerate(zip(var_grads, var_w)): if g is None: var_grads[idx] = tf.zeros_like(param) flat_var_grad = tensor_utils.flatten_tensor_variables(var_grads) self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), f_grad=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, ), f_var_grad=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=flat_var_grad, ), )
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = tf.gradients(loss, xs=params) for idx, (grad, param) in enumerate(zip(grads, params)): if grad is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_constraint=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", ), f_loss_constraint=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", ), )
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :policy network with parameter w to optimize :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ if extra_inputs is None: extra_inputs = list() self._input_vars = inputs + extra_inputs self._target = target constraint_term, constraint_value = leq_constraint self._max_constraint_val = constraint_value w = target.get_params(trainable=True) grads = tf.gradients(loss, xs=w) for idx, (g, param) in enumerate(zip(grads, w)): if g is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=loss, ), f_grad=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, ), f_loss_constraint=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], ), ) inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff)
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ if config.TF_NN_SETTRACE: ipdb.set_trace() constraint_term, constraint_value = leq_constraint with tf.variable_scope(self._name): penalty_var = tf.placeholder(tf.float32, tuple(), name="penalty") penalized_loss = loss + penalty_var * constraint_term self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name def get_opt_output(): params = target.get_params(trainable=True) grads = tf.gradients(penalized_loss, params) for idx, (grad, param) in enumerate(zip(grads, params)): if grad is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) return [ tf.cast(penalized_loss, tf.float64), tf.cast(flat_grad, tf.float64), ] self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs, loss, log_name="f_loss"), f_constraint=lambda: tensor_utils.compile_function( inputs, constraint_term, log_name="f_constraint"), f_penalized_loss=lambda: tensor_utils.compile_function( inputs=inputs + [penalty_var], outputs=[penalized_loss, loss, constraint_term], log_name="f_penalized_loss", ), f_opt=lambda: tensor_utils.compile_function( inputs=inputs + [penalty_var], outputs=get_opt_output(), ))
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ constraint_terms = [c[0] for c in leq_constraint] constraint_values = [c[1] for c in leq_constraint] penalty_var = TT.scalar("penalty") penalty_loss = constraint_terms[0] for i in range(1, len(constraint_terms)): penalty_loss += constraint_terms[i] penalized_loss = loss + penalty_var * penalty_loss self._target = target self._max_constraint_vals = np.array(constraint_values) self._constraint_name = constraint_name def get_opt_output(): flat_grad = flatten_tensor_variables( theano.grad(penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore')) return [ penalized_loss.astype('float64'), flat_grad.astype('float64') ] self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs, loss, log_name="f_loss"), f_constraint=lambda: compile_function( inputs, penalty_loss, log_name="f_constraint"), f_penalized_loss=lambda: compile_function( inputs=inputs + [penalty_var], outputs=[penalized_loss, loss] + constraint_terms, log_name="f_penalized_loss", ), f_opt=lambda: compile_function(inputs=inputs + [penalty_var], outputs=get_opt_output(), log_name="f_opt"))
def update_opt(self, loss, target, inputs, extra_inputs=None, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ if config.TF_NN_SETTRACE: ipdb.set_trace() self._target = target self._train_op = self._tf_optimizer.minimize( loss, var_list=target.get_params(trainable=True)) # define operations for updating prior. update_mus = [(l.bayesreg.hyperparams['empirical'], l.bayesreg.w_mu.assign(l.W_mu)) for l in target.layers if hasattr(l, 'bayesreg')] update_rhos = [(l.bayesreg.hyperparams['empirical'], l.bayesreg.w_sig.assign(tf.log(1.0 + tf.exp(l.W_rho)))) for l in target.layers if hasattr(l, 'bayesreg')] self._update_priors_ops = update_mus + update_rhos # updates = OrderedDict([(k, v.astype(k.dtype)) for k, v in updates.iteritems()]) if extra_inputs is None: extra_inputs = list() self._input_vars = inputs + extra_inputs self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), ) if kwargs.has_key('like_loss'): def l_loss(): return tensor_utils.compile_function(inputs + extra_inputs, kwargs['like_loss']) self._opt_fun.set('l_loss', l_loss) if kwargs.has_key('cmpx_loss'): def c_loss(): return tensor_utils.compile_function(inputs + extra_inputs, kwargs['cmpx_loss']) self._opt_fun.set('c_loss', c_loss)
def update_opt(self, loss, target, inputs, extra_inputs=None, vars_to_optimize=None, **kwargs): # Initializes the update opt used in the optimization """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target if vars_to_optimize is None: vars_to_optimize = target.get_params(trainable=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: # for batch norm updates = tf.group(*update_ops) with tf.control_dependencies([updates]): self._train_op = self._tf_optimizer.minimize( loss, var_list=vars_to_optimize) if self._init_tf_optimizer is not None: self._init_train_op = self._init_tf_optimizer.minimize( loss, var_list=vars_to_optimize) else: self._train_op = self._tf_optimizer.minimize( loss, var_list=vars_to_optimize) if self._init_tf_optimizer is not None: self._init_train_op = self._init_tf_optimizer.minimize( loss, var_list=vars_to_optimize) if extra_inputs is None: extra_inputs = list() self._input_vars = inputs + extra_inputs self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), ) self.debug_loss = loss self.debug_vars = target.get_params(trainable=True) self.debug_target = target
def update_opt(self, f, target, inputs, reg_coeff): if config.TF_NN_SETTRACE: ipdb.set_trace() self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = tf.gradients(f, xs=params) for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(constraint_grads) def f_Hx_plain(*args): inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate([np.reshape(x, (-1, )) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32'](self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values(param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) self.target.set_param_values(param_val, trainable=True) if self.symmetric: self.target.set_param_values(param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: tensor_utils.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_Hx_plain=lambda: f_Hx_plain, )
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", ), )
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ constraint_term, constraint_value = leq_constraint with tf.variable_scope(self._name): penalty_var = tf.placeholder(tf.float32, tuple(), name="penalty") penalized_loss = loss + penalty_var * constraint_term self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name def get_opt_output(): params = target.get_params(trainable=True) grads = tf.gradients(penalized_loss, params) for idx, (grad, param) in enumerate(zip(grads, params)): if grad is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) return [ tf.cast(penalized_loss, tf.float64), tf.cast(flat_grad, tf.float64), ] self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function(inputs, loss, log_name="f_loss"), f_constraint=lambda: tensor_utils.compile_function(inputs, constraint_term, log_name="f_constraint"), f_penalized_loss=lambda: tensor_utils.compile_function( inputs=inputs + [penalty_var], outputs=[penalized_loss, loss, constraint_term], log_name="f_penalized_loss", ), f_opt=lambda: tensor_utils.compile_function( inputs=inputs + [penalty_var], outputs=get_opt_output(), ) )
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = theano.grad(f, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(constraint_grads) def f_Hx_plain(*args): inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate([np.reshape(x, (-1, )) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32'](self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values(param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) if self.symmetric: self.target.set_param_values(param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: self.target.set_param_values(param_val, trainable=True) flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: ext.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_Hx_plain=lambda: f_Hx_plain, )
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = theano.grad( f, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(constraint_grads) def f_Hx_plain(*args): inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate([np.reshape(x, (-1,)) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32']( self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values( param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) if self.symmetric: self.target.set_param_values( param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: self.target.set_param_values(param_val, trainable=True) flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: ext.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_Hx_plain=lambda: f_Hx_plain, )
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = tf.gradients(f, xs=params) for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(constraint_grads) def f_Hx_plain(*args): inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate([np.reshape(x, (-1,)) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32'](self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values(param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) self.target.set_param_values(param_val, trainable=True) if self.symmetric: self.target.set_param_values(param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: tensor_utils.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_Hx_plain=lambda: f_Hx_plain, )
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ constraint_term, constraint_value = leq_constraint penalty_var = TT.scalar("penalty") penalized_loss = loss + penalty_var * constraint_term self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name def get_opt_output(): flat_grad = flatten_tensor_variables(theano.grad( penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore' )) return [penalized_loss.astype('float64'), flat_grad.astype('float64')] self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs, loss, log_name="f_loss"), f_constraint=lambda: compile_function(inputs, constraint_term, log_name="f_constraint"), f_penalized_loss=lambda: compile_function( inputs=inputs + [penalty_var], outputs=[penalized_loss, loss, constraint_term], log_name="f_penalized_loss", ), f_opt=lambda: compile_function( inputs=inputs + [penalty_var], outputs=get_opt_output(), log_name="f_opt" ) )
def update_opt(self, loss, target, inputs, extra_inputs=None, **kwargs): # Initializes the update opt used in the optimization """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: # for batch norm updates = tf.group(*update_ops) with tf.control_dependencies([updates]): self._train_op = self._tf_optimizer.minimize(loss, var_list=target.get_params(trainable=True)) if self._init_tf_optimizer is not None: self._init_train_op = self._init_tf_optimizer.minimize(loss, var_list=target.get_params(trainable=True)) else: self._train_op = self._tf_optimizer.minimize(loss, var_list=target.get_params(trainable=True)) if self._init_tf_optimizer is not None: self._init_train_op = self._init_tf_optimizer.minimize(loss, var_list=target.get_params(trainable=True)) if extra_inputs is None: extra_inputs = list() self._input_vars = inputs + extra_inputs self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function(inputs + extra_inputs, loss), ) self.debug_loss = loss self.debug_vars = target.get_params(trainable=True) self.debug_target = target
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :param gradients: symbolic expressions for the gradients of trainable parameters of the target. By default this will be computed by calling theano.grad :return: No return value. """ self._target = target def get_opt_output(gradients): if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True)) flat_grad = flatten_tensor_variables(gradients) return [loss.astype('float64'), flat_grad.astype('float64')] if extra_inputs is None: extra_inputs = list() self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs + extra_inputs, loss), f_opt=lambda: compile_function( inputs=inputs + extra_inputs, outputs=get_opt_output(gradients), ))
def update_opt(self, loss, target, inputs, extra_inputs=None): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param inputs: A list of symbolic variables as inputs :param extra_inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target grads = utils.clip_grads( self._tf_optimizer.compute_gradients( loss=loss, var_list=target.get_params(trainable=True)), self.clip_grads) self._train_op = self._tf_optimizer.apply_gradients(grads) if extra_inputs is None: extra_inputs = list() self._input_vars = inputs + extra_inputs self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), )
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, **kwargs): self._target = target if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True), disconnected_inputs='ignore') flat_grad = ext.flatten_tensor_variables(gradients) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict(f_loss=lambda: ext.compile_function( inputs + extra_inputs, loss, log_name=self._name + "_f_loss"), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name=self._name + "_f_grad"))
def update_opt(self, loss, target, inputs, network_outputs, extra_inputs=None): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target if extra_inputs is None: extra_inputs = list() self._hf_optimizer = hf_optimizer( _p=target.get_params(trainable=True), inputs=(inputs + extra_inputs), s=network_outputs, costs=[loss], ) self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs + extra_inputs, loss), )
def update_opt(self, loss, target, quad_leq_constraint, lin_leq_constraint, inputs, extra_inputs=None, constraint_name_1="quad_constraint", constraint_name_2="lin_constraint", using_surrogate=False, true_linear_leq_constraint=None, precompute=False, attempt_feasible_recovery=False, attempt_infeasible_recovery=False, revert_to_last_safe_point=False, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param lin_leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. This constraint will be linearized. :param quad_leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. This constraint will be quadratified. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. All right, on the business of this "using_surrogate" and "true_linear_leq_constraint" stuff... In rllab, when we optimize a policy, we minimize a "surrogate loss" function (or, if you prefer, maximize a surrogate return). The surrogate loss function we optimize is mean( lr * advantage ), where 'lr' is the likelihood ratio of the new policy with respect to the old policy, lr(s,a) = pi_new(a|s) / pi_old(a|s). We choose this surrogate loss function because its gradient is equal to the gradient of the true objective function when pi_new = pi_old. However, the real thing we want to optimize is J(pi) = E_{tau ~ pi} [R(tau)]. If we wanted to measure J(pi_old), it would not suffice to calculate the surrogate loss function at pi_old. Usually this is not an issue because we don't actually need to compute J(pi_old) at all, because we have no need for it. But in our optimization procedure here, we need to calculate a directly analogous property - - the expected safety return - because its value matters for constraint enforcement in our linear approximation. So, "using_surrogate" and "true_linear_leq_constraint" are here to handle the cases where the "lin_leq_constraint" argument submitted by the user is really a SURROGATE leq_constraint, which we can get a good gradient from, but when we need a different symbolic expression to actually evaluate the linear_leq_constraint. "use_surrogate" is the flag indicating that the lin_leq_constraint argument is in fact a surrogate, and then "true_linear_leq_constraint" is for the actual value. :param precompute: Use an 'input' for the linearization constant instead of true_linear_leq_constraint. If present, overrides surrogate When using precompute, the last input is the precomputed linearization constant :param attempt_(in)feasible_recovery: deals with cases where x=0 is infeasible point but problem still feasible (where optimization problem is entirely infeasible) :param revert_to_last_safe_point: Behavior protocol for situation when optimization problem is entirely infeasible. Specifies that we should just reset the parameters to the last point that satisfied constraint. """ self.precompute = precompute self.attempt_feasible_recovery = attempt_feasible_recovery self.attempt_infeasible_recovery = attempt_infeasible_recovery self.revert_to_last_safe_point = revert_to_last_safe_point inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term_1, constraint_value_1 = quad_leq_constraint constraint_term_2, constraint_value_2 = lin_leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) lin_constraint_grads = theano.grad(constraint_term_2, wrt=params, disconnected_inputs='warn') flat_lin_constraint_grad = ext.flatten_tensor_variables( lin_constraint_grads) if using_surrogate and not (precompute): constraint_term_2 = true_linear_leq_constraint self._hvp_approach.update_opt(f=constraint_term_1, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_quad_constraint_val = constraint_value_1 self._max_lin_constraint_val = constraint_value_2 self._constraint_name_1 = constraint_name_1 self._constraint_name_2 = constraint_name_2 self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_quad_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term_1, log_name="quad_constraint", ), f_lin_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term_2, log_name="lin_constraint", ), f_lin_constraint_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_lin_constraint_grad, log_name="lin_constraint_grad", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term_1, constraint_term_2], log_name="f_loss_constraint", ), ) self.last_safe_point = None self._last_lin_pred_S = 0 self._last_surr_pred_S = 0
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params) flat_grad = ext.flatten_tensor_variables(grads) constraint_grads = theano.grad(constraint_term, wrt=params) xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params]) Hx_plain_splits = TT.grad( TT.sum([TT.sum(g * x) for g, x in itertools.izip(constraint_grads, xs)]), wrt=params, ) Hx_plain = TT.concatenate([TT.flatten(s) for s in Hx_plain_splits]) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name if self._debug_nan: from theano.compile.nanguardmode import NanGuardMode mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) else: mode = None self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", mode=mode, ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", mode=mode, ), f_Hx_plain=lambda: ext.compile_function( inputs=inputs + extra_inputs + xs, outputs=Hx_plain, log_name="f_Hx_plain", mode=mode, ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", mode=mode, ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", mode=mode, ), )
def update_opt(self, loss, target, quad_leq_constraint, lin_leq_constraint, inputs, extra_inputs=None, constraint_name_1="quad_constraint", constraint_name_2="lin_constraint", using_surrogate=False, true_linear_leq_constraint=None, precompute=False, attempt_feasible_recovery=False, attempt_infeasible_recovery=False, revert_to_last_safe_point=False, *args, **kwargs): self.precompute = precompute self.attempt_feasible_recovery = attempt_feasible_recovery self.attempt_infeasible_recovery = attempt_infeasible_recovery self.revert_to_last_safe_point = revert_to_last_safe_point inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term_1, constraint_value_1 = quad_leq_constraint constraint_term_2, constraint_value_2 = lin_leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) lin_constraint_grads = theano.grad(constraint_term_2, wrt=params, disconnected_inputs='warn') flat_lin_constraint_grad = ext.flatten_tensor_variables(lin_constraint_grads) if using_surrogate and not(precompute): constraint_term_2 = true_linear_leq_constraint self._hvp_approach.update_opt(f=constraint_term_1, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_quad_constraint_val = constraint_value_1 self._max_lin_constraint_val = constraint_value_2 self._constraint_name_1 = constraint_name_1 self._constraint_name_2 = constraint_name_2 self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_quad_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term_1, log_name="quad_constraint", ), f_lin_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term_2, log_name="lin_constraint", ), f_lin_constraint_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_lin_constraint_grad, log_name="lin_constraint_grad", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term_1, constraint_term_2], log_name="f_loss_constraint", ), ) self.last_safe_point = None self._last_lin_pred_S = 0 self._last_surr_pred_S = 0
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", dummy_loss=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ constraint_term, constraint_value = leq_constraint # print("debug101", constraint_term) with tf.variable_scope(self._name): penalty_var = tf.placeholder(tf.float32, tuple(), name="penalty") # temp1 = penalty_var * constraint_term # temp2 = loss + penalty_var # temp3 = loss + constraint_term penalized_loss = loss + penalty_var * constraint_term self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._inputs = inputs self._loss = loss self._dummy_loss = dummy_loss if self._use_momentum_optimizer: self._adam = tf.train.MomentumOptimizer(learning_rate=0.0000001, momentum=0.997, name=self._name) assert False, "not supported at the moment" else: self._adam = tf.train.AdamOptimizer( name=self._name) #learning_rate=0.001 self._optimizer_vars_initializers = [ var.initializer for var in tf.global_variables() if self._name in var.name ] # self._adam = tf.train.MomentumOptimizer(learning_rate=0.001, momentum=0.5) # self._train_step = self._adam.minimize(self._loss) if "correction_term" in kwargs: self._correction_term = kwargs["correction_term"] else: self._correction_term = None self._gradients = self._adam.compute_gradients(loss=self._loss, var_list=self._target) # self._gradients = self._adam.compute_gradients(loss=self._loss) if self._correction_term is None: self._train_step = self._adam.apply_gradients(self._gradients) else: # print("debug1", self._gradients) # print("debug2", self._correction_term) # print("debug2", self._correction_term.keys()) self.new_gradients = [] for grad, var in self._gradients: if var in self._correction_term.keys(): self.new_gradients.append( (grad + self._correction_term[var], var)) else: self.new_gradients.append((grad, var)) # print("debug3", self.new_gradients) self._train_step = self._adam.apply_gradients(self.new_gradients) # initialize Adam variables uninit_vars = [] sess = tf.get_default_session() if sess is None: sess = tf.Session() for var in tf.global_variables(): # note - this is hacky, may be better way to do this in newer TF. try: sess.run(var) except tf.errors.FailedPreconditionError: uninit_vars.append(var) sess.run(tf.variables_initializer(uninit_vars)) def get_opt_output(): params = target.get_params(trainable=True) grads = tf.gradients(penalized_loss, params) for idx, (grad, param) in enumerate(zip(grads, params)): if grad is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) return [ tf.cast(penalized_loss, tf.float32), tf.cast(flat_grad, tf.float32), ] self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs, loss, log_name="f_loss"), f_constraint=lambda: tensor_utils.compile_function( inputs, constraint_term, log_name="f_constraint"), f_penalized_loss=lambda: tensor_utils.compile_function( inputs=inputs + [penalty_var], outputs=[penalized_loss, loss, constraint_term], log_name="f_penalized_loss", ), f_opt=lambda: tensor_utils.compile_function( inputs=inputs + [penalty_var], outputs=get_opt_output(), ))