def update_opt(self, loss, target, inputs, extra_inputs=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab_maml.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target def get_opt_output(): flat_grad = tensor_utils.flatten_tensor_variables( tf.gradients(loss, target.get_params(trainable=True))) return [tf.cast(loss, tf.float64), tf.cast(flat_grad, tf.float64)] if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), f_opt=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=get_opt_output(), ))
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = tf.gradients(f, xs=params) for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) xs = tuple([ tensor_utils.new_tensor_like(p.name.split(":")[0], p) for p in params ]) def Hx_plain(): Hx_plain_splits = tf.gradients( tf.reduce_sum( tf.stack([ tf.reduce_sum(g * x) for g, x in zip(constraint_grads, xs) ])), params) for idx, (Hx, param) in enumerate(zip(Hx_plain_splits, params)): if Hx is None: Hx_plain_splits[idx] = tf.zeros_like(param) return tensor_utils.flatten_tensor_variables(Hx_plain_splits) self.opt_fun = ext.lazydict( f_Hx_plain=lambda: tensor_utils.compile_function( inputs=inputs + xs, outputs=Hx_plain(), log_name="f_Hx_plain", ), )
def update_opt(self, loss, target, inputs, inner_kl, outer_kl, extra_inputs=None, meta_batch_size=1, num_grad_updates=1, **kwargs): """ :param inner_kl: Symbolic expression for inner kl :param outer_kl: Symbolic expression for outer kl :param meta_batch_size: number of MAML tasks, for batcher """ super().update_opt(loss, target, inputs, extra_inputs, **kwargs) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function(inputs + extra_inputs, loss), f_inner_kl=lambda: tensor_utils.compile_function(inputs + extra_inputs, inner_kl), f_outer_kl=lambda: tensor_utils.compile_function(inputs + extra_inputs, outer_kl), ) if self.multi_adam > 1: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: # for batch norm updates = tf.group(*update_ops) with tf.control_dependencies([updates]): self._train_ops = [optimizer.minimize(loss, var_list=target.get_params(trainable=True)) for optimizer in self._tf_optimizers] else: self._train_ops = [optimizer.minimize(loss, var_list=target.get_params(trainable=True)) for optimizer in self._tf_optimizers] self.meta_batch_size = meta_batch_size self.num_grad_updates = num_grad_updates
def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = tf.gradients(f, xs=params) for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(constraint_grads) def f_Hx_plain( *args): #receives inputs and xs(flattened inputs) as arguments inputs_ = args[:len(inputs)] xs = args[len(inputs):] flat_xs = np.concatenate([np.reshape(x, (-1, )) for x in xs]) param_val = self.target.get_param_values(trainable=True) eps = np.cast['float32'](self.base_eps / (np.linalg.norm(param_val) + 1e-8)) self.target.set_param_values(param_val + eps * flat_xs, trainable=True) flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_) self.target.set_param_values(param_val, trainable=True) if self.symmetric: self.target.set_param_values(param_val - eps * flat_xs, trainable=True) flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps) self.target.set_param_values(param_val, trainable=True) else: flat_grad = self.opt_fun["f_grad"](*inputs_) hx = (flat_grad_dvplus - flat_grad) / eps return hx self.opt_fun = ext.lazydict( f_grad=lambda: tensor_utils.compile_function( inputs=inputs, outputs=flat_grad, log_name="f_grad", ), f_Hx_plain=lambda: f_Hx_plain, )
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab_maml.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ constraint_term, constraint_value = leq_constraint with tf.variable_scope(self._name): penalty_var = tf.placeholder(tf.float32, tuple(), name="penalty") penalized_loss = loss + penalty_var * constraint_term self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name def get_opt_output(): params = target.get_params(trainable=True) grads = tf.gradients(penalized_loss, params) for idx, (grad, param) in enumerate(zip(grads, params)): if grad is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) return [ tf.cast(penalized_loss, tf.float64), tf.cast(flat_grad, tf.float64), ] self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function(inputs, loss, log_name="f_loss"), f_constraint=lambda: tensor_utils.compile_function(inputs, constraint_term, log_name="f_constraint"), f_penalized_loss=lambda: tensor_utils.compile_function( inputs=inputs + [penalty_var], outputs=[penalized_loss, loss, constraint_term], log_name="f_penalized_loss", ), f_opt=lambda: tensor_utils.compile_function( inputs=inputs + [penalty_var], outputs=get_opt_output(), ) )
def update_opt(self, loss, target, inputs, extra_inputs=None, **kwargs): # Initializes the update opt used in the optimization """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab_maml.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: # for batch norm updates = tf.group(*update_ops) with tf.control_dependencies([updates]): self._train_op = self._tf_optimizer.minimize( loss, var_list=target.get_params(trainable=True)) if self._init_tf_optimizer is not None: self._init_train_op = self._init_tf_optimizer.minimize( loss, var_list=target.get_params(trainable=True)) else: self._train_op = self._tf_optimizer.minimize( loss, var_list=target.get_params(trainable=True)) if self._init_tf_optimizer is not None: self._init_train_op = self._init_tf_optimizer.minimize( loss, var_list=target.get_params(trainable=True)) if extra_inputs is None: extra_inputs = list() self._input_vars = inputs + extra_inputs self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), ) self.debug_loss = loss self.debug_vars = target.get_params(trainable=True) self.debug_target = target
def update_opt(self, loss, target, inputs, kl, extra_inputs=None, **kwargs): """ :param inner_kl: Symbolic expression for inner kl :param outer_kl: Symbolic expression for outer kl :param meta_batch_size: number of MAML tasks, for batcher """ super(PPOOptimizer, self).update_opt(loss, target, inputs, extra_inputs, **kwargs) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), f_kl=lambda: tensor_utils.compile_function(inputs + extra_inputs, kl), )
def update_opt(self, loss, target, inputs, inner_kl, extra_inputs=None, meta_batch_size=1, num_grad_updates=1, **kwargs): """ :param inner_kl: Symbolic expression for inner kl :param meta_batch_size: number of MAML tasks, for batcher """ super().update_opt(loss, target, inputs, extra_inputs, **kwargs) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs + extra_inputs, loss), f_inner_kl=lambda: tensor_utils.compile_function( inputs + extra_inputs, inner_kl)) self.meta_batch_size = meta_batch_size self.num_grad_updates = num_grad_updates
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab_maml.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ self._target = target if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True), disconnected_inputs='ignore') updates = self._update_method(gradients, target.get_params(trainable=True)) updates = OrderedDict([(k, v.astype(k.dtype)) for k, v in updates.items()]) if extra_inputs is None: extra_inputs = list() self._opt_fun = ext.lazydict( f_loss=lambda: ext.compile_function(inputs + extra_inputs, loss), f_opt=lambda: ext.compile_function( inputs=inputs + extra_inputs, outputs=loss, updates=updates, ))
def update_opt(self, loss, target, inputs, extra_inputs=None, gradients=None, *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab_maml.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :param gradients: symbolic expressions for the gradients of trainable parameters of the target. By default this will be computed by calling theano.grad :return: No return value. """ self._target = target def get_opt_output(gradients): if gradients is None: gradients = theano.grad(loss, target.get_params(trainable=True)) flat_grad = flatten_tensor_variables(gradients) return [loss.astype('float64'), flat_grad.astype('float64')] if extra_inputs is None: extra_inputs = list() self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs + extra_inputs, loss), f_opt=lambda: compile_function( inputs=inputs + extra_inputs, outputs=get_opt_output(gradients), ))
def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab_maml.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs :return: No return value. """ constraint_term, constraint_value = leq_constraint penalty_var = TT.scalar("penalty") penalized_loss = loss + penalty_var * constraint_term self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name def get_opt_output(): flat_grad = flatten_tensor_variables(theano.grad( penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore' )) return [penalized_loss.astype('float64'), flat_grad.astype('float64')] self._opt_fun = lazydict( f_loss=lambda: compile_function(inputs, loss, log_name="f_loss"), f_constraint=lambda: compile_function(inputs, constraint_term, log_name="f_constraint"), f_penalized_loss=lambda: compile_function( inputs=inputs + [penalty_var], outputs=[penalized_loss, loss, constraint_term], log_name="f_penalized_loss", ), f_opt=lambda: compile_function( inputs=inputs + [penalty_var], outputs=get_opt_output(), log_name="f_opt" ) )
def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs): """ :param loss: Symbolic expression for the loss function. :param target: A parameterized object to optimize over. It should implement methods of the :class:`rllab_maml.core.paramerized.Parameterized` class. :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed that the first dimension of these inputs should correspond to the number of data points :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled :return: No return value. """ inputs = tuple(inputs) if extra_inputs is None: extra_inputs = tuple() else: extra_inputs = tuple(extra_inputs) constraint_term, constraint_value = leq_constraint params = target.get_params(trainable=True) grads = tf.gradients(loss, xs=params) for idx, (grad, param) in enumerate(zip(grads, params)): if grad is None: grads[idx] = tf.zeros_like(param) flat_grad = tensor_utils.flatten_tensor_variables(grads) # f=KL-divergence and target is policy self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs, reg_coeff=self._reg_coeff) self._target = target self._max_constraint_val = constraint_value self._constraint_name = constraint_name self._opt_fun = ext.lazydict( f_loss=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", ), f_grad=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", ), f_constraint=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", ), f_loss_constraint=lambda: tensor_utils.compile_function( inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", ), )