def update_opt( self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs ): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params) flat_grad = ext.flatten_tensor_variables(grads) constraint_grads = theano.grad(constraint_term, wrt=params) xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params]) Hx_plain_splits = TT.grad(TT.sum([TT.sum(g * x) for g, x in itertools.izip(constraint_grads, xs)]), wrt=params) Hx_plain = TT.concatenate([TT.flatten(s) for s in Hx_plain_splits]) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name if self._debug_nan: from theano.compile.nanguardmode import NanGuardMode mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) else: mode = None self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", mode=mode ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", mode=mode ), f_Hx_plain=lambda: ext.compile_function( inputs=inputs + extra_inputs + xs, outputs=Hx_plain, log_name="f_Hx_plain", mode=mode ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", mode=mode ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", mode=mode ), )
def get_opt_output(): flat_grad = flatten_tensor_variables( theano.grad(penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore')) return [ penalized_loss.astype('float64'), flat_grad.astype('float64') ]
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", ), )
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", ), )
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", ), )
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = theano.grad(f, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(constraint_grads) def f_Hx_plain(*args): inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate([np.reshape(x, (-1, )) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32'](self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values(param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) if self.symmetric: self.target.set_param_values(param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: self.target.set_param_values(param_val, trainable=True) flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: ext.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_Hx_plain=lambda: f_Hx_plain, )
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = theano.grad( f, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(constraint_grads) def f_Hx_plain(*args): inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate([np.reshape(x, (-1,)) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32']( self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values( param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) if self.symmetric: self.target.set_param_values( param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: self.target.set_param_values(param_val, trainable=True) flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: ext.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_Hx_plain=lambda: f_Hx_plain, )
def init_opt(self): obv_var = self.env.observation_space.new_tensor_variable( 'obv', extra_dims=0, ) act_var = self.env.action_space.new_tensor_variable( 'act', extra_dims=0, ) self.policyParametes = self.policy.get_params(trainable=True) po_log_grad = theano.grad(self.policy.action_log_prob_sym(obv_var, act_var), self.policyParametes, disconnected_inputs='ignore') flat_pol_log_grad = ext.flatten_tensor_variables(po_log_grad) self.polLogGradFunc = theano.function( inputs=[obv_var, act_var], outputs=flat_pol_log_grad, allow_input_downcast=True ) # Add gS to support RMSProp. self.gS = np.zeros(len(self.policy.get_param_values())) return dict()
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, **kwargs): self._target = target if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True), disconnected_inputs='ignore') flat_grad = ext.flatten_tensor_variables(gradients) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict(f_loss=lambda: ext.compile_function( inputs + extra_inputs, loss, log_name=self._name + "_f_loss"), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name=self._name + "_f_grad"))
def get_opt_output(): flat_grad = flatten_tensor_variables( theano.grad(loss, target.get_params(trainable=True))) return [loss.astype('float64'), flat_grad.astype('float64')]
def update_opt(self, loss, target, quad_leq_constraint, lin_leq_constraint, inputs, extra_inputs=None, constraint_name_1="quad_constraint", constraint_name_2="lin_constraint", using_surrogate=False, true_linear_leq_constraint=None, precompute=False, attempt_feasible_recovery=False, attempt_infeasible_recovery=False, revert_to_last_safe_point=False, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param lin_leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. This constraint will be linearized. :param quad_leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. This constraint will be quadratified. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. All right, on the business of this "using_surrogate" and "true_linear_leq_constraint" stuff... In rllab, when we optimize a policy, we minimize a "surrogate loss" function (or, if you prefer, maximize a surrogate return). The surrogate loss function we optimize is mean( lr * advantage ), where 'lr' is the likelihood ratio of the new policy with respect to the old policy, lr(s,a) = pi_new(a|s) / pi_old(a|s). We choose this surrogate loss function because its gradient is equal to the gradient of the true objective function when pi_new = pi_old. However, the real thing we want to optimize is J(pi) = E_{tau ~ pi} [R(tau)]. If we wanted to measure J(pi_old), it would not suffice to calculate the surrogate loss function at pi_old. Usually this is not an issue because we don't actually need to compute J(pi_old) at all, because we have no need for it. But in our optimization procedure here, we need to calculate a directly analogous property - - the expected safety return - because its value matters for constraint enforcement in our linear approximation. So, "using_surrogate" and "true_linear_leq_constraint" are here to handle the cases where the "lin_leq_constraint" argument submitted by the user is really a SURROGATE leq_constraint, which we can get a good gradient from, but when we need a different symbolic expression to actually evaluate the linear_leq_constraint. "use_surrogate" is the flag indicating that the lin_leq_constraint argument is in fact a surrogate, and then "true_linear_leq_constraint" is for the actual value. :param precompute: Use an 'input' for the linearization constant instead of true_linear_leq_constraint. If present, overrides surrogate When using precompute, the last input is the precomputed linearization constant :param attempt_(in)feasible_recovery: deals with cases where x=0 is infeasible point but problem still feasible (where optimization problem is entirely infeasible) :param revert_to_last_safe_point: Behavior protocol for situation when optimization problem is entirely infeasible. Specifies that we should just reset the parameters to the last point that satisfied constraint. """ self.precompute = precompute self.attempt_feasible_recovery = attempt_feasible_recovery self.attempt_infeasible_recovery = attempt_infeasible_recovery self.revert_to_last_safe_point = revert_to_last_safe_point inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term_1, constraint_value_1 = quad_leq_constraint constraint_term_2, constraint_value_2 = lin_leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) lin_constraint_grads = theano.grad(constraint_term_2, wrt=params, disconnected_inputs='warn') flat_lin_constraint_grad = ext.flatten_tensor_variables( lin_constraint_grads) if using_surrogate and not (precompute): constraint_term_2 = true_linear_leq_constraint self._hvp_approach.update_opt(f=constraint_term_1, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_quad_constraint_val = constraint_value_1 self._max_lin_constraint_val = constraint_value_2 self._constraint_name_1 = constraint_name_1 self._constraint_name_2 = constraint_name_2 self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_quad_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term_1, log_name="quad_constraint", ), f_lin_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term_2, log_name="lin_constraint", ), f_lin_constraint_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_lin_constraint_grad, log_name="lin_constraint_grad", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term_1, constraint_term_2], log_name="f_loss_constraint", ), ) self.last_safe_point = None self._last_lin_pred_S = 0 self._last_surr_pred_S = 0
def get_opt_output(gradients): if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True)) flat_grad = flatten_tensor_variables(gradients) return [loss.astype('float64'), flat_grad.astype('float64')]
def get_opt_output(): flat_grad = flatten_tensor_variables(theano.grad( penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore' )) return [penalized_loss.astype('float64'), flat_grad.astype('float64')]
def update_opt(self, loss, target, quad_leq_constraint, lin_leq_constraint, inputs, extra_inputs=None, constraint_name_1="quad_constraint", constraint_name_2="lin_constraint", using_surrogate=False, true_linear_leq_constraint=None, precompute=False, attempt_feasible_recovery=False, attempt_infeasible_recovery=False, revert_to_last_safe_point=False, *args, **kwargs): self.precompute = precompute self.attempt_feasible_recovery = attempt_feasible_recovery self.attempt_infeasible_recovery = attempt_infeasible_recovery self.revert_to_last_safe_point = revert_to_last_safe_point inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term_1, constraint_value_1 = quad_leq_constraint constraint_term_2, constraint_value_2 = lin_leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params, disconnected_inputs='warn') flat_grad = ext.flatten_tensor_variables(grads) lin_constraint_grads = theano.grad(constraint_term_2, wrt=params, disconnected_inputs='warn') flat_lin_constraint_grad = ext.flatten_tensor_variables(lin_constraint_grads) if using_surrogate and not(precompute): constraint_term_2 = true_linear_leq_constraint self._hvp_approach.update_opt(f=constraint_term_1, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_quad_constraint_val = constraint_value_1 self._max_lin_constraint_val = constraint_value_2 self._constraint_name_1 = constraint_name_1 self._constraint_name_2 = constraint_name_2 self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_quad_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term_1, log_name="quad_constraint", ), f_lin_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term_2, log_name="lin_constraint", ), f_lin_constraint_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_lin_constraint_grad, log_name="lin_constraint_grad", ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term_1, constraint_term_2], log_name="f_loss_constraint", ), ) self.last_safe_point = None self._last_lin_pred_S = 0 self._last_surr_pred_S = 0
def get_opt_output(): flat_grad = flatten_tensor_variables(theano.grad(loss, target.get_params(trainable=True))) return [loss.astype('float64'), flat_grad.astype('float64')]
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = theano.grad(loss, wrt=params) flat_grad = ext.flatten_tensor_variables(grads) constraint_grads = theano.grad(constraint_term, wrt=params) xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params]) Hx_plain_splits = TT.grad( TT.sum([TT.sum(g * x) for g, x in itertools.izip(constraint_grads, xs)]), wrt=params, ) Hx_plain = TT.concatenate([TT.flatten(s) for s in Hx_plain_splits]) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name if self._debug_nan: from theano.compile.nanguardmode import NanGuardMode mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) else: mode = None self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", mode=mode, ), f_grad=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", mode=mode, ), f_Hx_plain=lambda: ext.compile_function( inputs=inputs + extra_inputs + xs, outputs=Hx_plain, log_name="f_Hx_plain", mode=mode, ), f_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", mode=mode, ), f_loss_constraint=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", mode=mode, ), )
def get_opt_output(gradients): if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True)) flat_grad = flatten_tensor_variables(gradients) return [loss.astype('float64'), flat_grad.astype('float64')]