def update_opt(self,
                   loss,
                   target,
                   inputs,
                   extra_inputs=None,
                   *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab_maml.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """

        self._target = target

        def get_opt_output():
            flat_grad = tensor_utils.flatten_tensor_variables(
                tf.gradients(loss, target.get_params(trainable=True)))
            return [tf.cast(loss, tf.float64), tf.cast(flat_grad, tf.float64)]

        if extra_inputs is None:
            extra_inputs = list()

        self._opt_fun = ext.lazydict(
            f_loss=lambda: tensor_utils.compile_function(
                inputs + extra_inputs, loss),
            f_opt=lambda: tensor_utils.compile_function(
                inputs=inputs + extra_inputs,
                outputs=get_opt_output(),
            ))
Ejemplo n.º 2
0
    def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        constraint_grads = tf.gradients(f, xs=params)
        for idx, (grad, param) in enumerate(zip(constraint_grads, params)):
            if grad is None:
                constraint_grads[idx] = tf.zeros_like(param)

        xs = tuple([
            tensor_utils.new_tensor_like(p.name.split(":")[0], p)
            for p in params
        ])

        def Hx_plain():
            Hx_plain_splits = tf.gradients(
                tf.reduce_sum(
                    tf.stack([
                        tf.reduce_sum(g * x)
                        for g, x in zip(constraint_grads, xs)
                    ])), params)
            for idx, (Hx, param) in enumerate(zip(Hx_plain_splits, params)):
                if Hx is None:
                    Hx_plain_splits[idx] = tf.zeros_like(param)
            return tensor_utils.flatten_tensor_variables(Hx_plain_splits)

        self.opt_fun = ext.lazydict(
            f_Hx_plain=lambda: tensor_utils.compile_function(
                inputs=inputs + xs,
                outputs=Hx_plain(),
                log_name="f_Hx_plain",
            ), )
Ejemplo n.º 3
0
 def update_opt(self, loss, target, inputs, inner_kl, outer_kl, extra_inputs=None, meta_batch_size=1, num_grad_updates=1, **kwargs):
     """
     :param inner_kl: Symbolic expression for inner kl
     :param outer_kl: Symbolic expression for outer kl
     :param meta_batch_size: number of MAML tasks, for batcher
     """
     super().update_opt(loss, target, inputs, extra_inputs, **kwargs)
     if extra_inputs is None:
         extra_inputs = list()
     self._opt_fun = ext.lazydict(
         f_loss=lambda: tensor_utils.compile_function(inputs + extra_inputs, loss),
         f_inner_kl=lambda: tensor_utils.compile_function(inputs + extra_inputs, inner_kl),
         f_outer_kl=lambda: tensor_utils.compile_function(inputs + extra_inputs, outer_kl),
     )
     if self.multi_adam > 1:
         update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
         if update_ops:
             # for batch norm
             updates = tf.group(*update_ops)
             with tf.control_dependencies([updates]):
                 self._train_ops = [optimizer.minimize(loss, var_list=target.get_params(trainable=True)) for optimizer in self._tf_optimizers]
         else:
             self._train_ops = [optimizer.minimize(loss, var_list=target.get_params(trainable=True)) for optimizer in self._tf_optimizers]
     self.meta_batch_size = meta_batch_size
     self.num_grad_updates = num_grad_updates
Ejemplo n.º 4
0
    def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff

        params = target.get_params(trainable=True)

        constraint_grads = tf.gradients(f, xs=params)
        for idx, (grad, param) in enumerate(zip(constraint_grads, params)):
            if grad is None:
                constraint_grads[idx] = tf.zeros_like(param)

        flat_grad = tensor_utils.flatten_tensor_variables(constraint_grads)

        def f_Hx_plain(
                *args):  #receives inputs and xs(flattened inputs) as arguments
            inputs_ = args[:len(inputs)]
            xs = args[len(inputs):]
            flat_xs = np.concatenate([np.reshape(x, (-1, )) for x in xs])
            param_val = self.target.get_param_values(trainable=True)
            eps = np.cast['float32'](self.base_eps /
                                     (np.linalg.norm(param_val) + 1e-8))
            self.target.set_param_values(param_val + eps * flat_xs,
                                         trainable=True)
            flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_)
            self.target.set_param_values(param_val, trainable=True)
            if self.symmetric:
                self.target.set_param_values(param_val - eps * flat_xs,
                                             trainable=True)
                flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_)
                hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps)
                self.target.set_param_values(param_val, trainable=True)
            else:
                flat_grad = self.opt_fun["f_grad"](*inputs_)
                hx = (flat_grad_dvplus - flat_grad) / eps
            return hx

        self.opt_fun = ext.lazydict(
            f_grad=lambda: tensor_utils.compile_function(
                inputs=inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_Hx_plain=lambda: f_Hx_plain,
        )
Ejemplo n.º 5
0
    def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab_maml.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """
        constraint_term, constraint_value = leq_constraint
        with tf.variable_scope(self._name):
            penalty_var = tf.placeholder(tf.float32, tuple(), name="penalty")
        penalized_loss = loss + penalty_var * constraint_term

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        def get_opt_output():
            params = target.get_params(trainable=True)
            grads = tf.gradients(penalized_loss, params)
            for idx, (grad, param) in enumerate(zip(grads, params)):
                if grad is None:
                    grads[idx] = tf.zeros_like(param)
            flat_grad = tensor_utils.flatten_tensor_variables(grads)
            return [
                tf.cast(penalized_loss, tf.float64),
                tf.cast(flat_grad, tf.float64),
            ]

        self._opt_fun = ext.lazydict(
            f_loss=lambda: tensor_utils.compile_function(inputs, loss, log_name="f_loss"),
            f_constraint=lambda: tensor_utils.compile_function(inputs, constraint_term, log_name="f_constraint"),
            f_penalized_loss=lambda: tensor_utils.compile_function(
                inputs=inputs + [penalty_var],
                outputs=[penalized_loss, loss, constraint_term],
                log_name="f_penalized_loss",
            ),
            f_opt=lambda: tensor_utils.compile_function(
                inputs=inputs + [penalty_var],
                outputs=get_opt_output(),
            )
        )
Ejemplo n.º 6
0
    def update_opt(self, loss, target, inputs, extra_inputs=None, **kwargs):
        # Initializes the update opt used in the optimization
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab_maml.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """

        self._target = target

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            # for batch norm
            updates = tf.group(*update_ops)
            with tf.control_dependencies([updates]):

                self._train_op = self._tf_optimizer.minimize(
                    loss, var_list=target.get_params(trainable=True))
                if self._init_tf_optimizer is not None:
                    self._init_train_op = self._init_tf_optimizer.minimize(
                        loss, var_list=target.get_params(trainable=True))
        else:
            self._train_op = self._tf_optimizer.minimize(
                loss, var_list=target.get_params(trainable=True))
            if self._init_tf_optimizer is not None:
                self._init_train_op = self._init_tf_optimizer.minimize(
                    loss, var_list=target.get_params(trainable=True))

        if extra_inputs is None:
            extra_inputs = list()
        self._input_vars = inputs + extra_inputs
        self._opt_fun = ext.lazydict(
            f_loss=lambda: tensor_utils.compile_function(
                inputs + extra_inputs, loss), )

        self.debug_loss = loss
        self.debug_vars = target.get_params(trainable=True)
        self.debug_target = target
 def update_opt(self,
                loss,
                target,
                inputs,
                kl,
                extra_inputs=None,
                **kwargs):
     """
     :param inner_kl: Symbolic expression for inner kl
     :param outer_kl: Symbolic expression for outer kl
     :param meta_batch_size: number of MAML tasks, for batcher
     """
     super(PPOOptimizer, self).update_opt(loss, target, inputs,
                                          extra_inputs, **kwargs)
     if extra_inputs is None:
         extra_inputs = list()
     self._opt_fun = ext.lazydict(
         f_loss=lambda: tensor_utils.compile_function(
             inputs + extra_inputs, loss),
         f_kl=lambda: tensor_utils.compile_function(inputs + extra_inputs,
                                                    kl),
     )
Ejemplo n.º 8
0
 def update_opt(self,
                loss,
                target,
                inputs,
                inner_kl,
                extra_inputs=None,
                meta_batch_size=1,
                num_grad_updates=1,
                **kwargs):
     """
     :param inner_kl: Symbolic expression for inner kl
     :param meta_batch_size: number of MAML tasks, for batcher
     """
     super().update_opt(loss, target, inputs, extra_inputs, **kwargs)
     if extra_inputs is None:
         extra_inputs = list()
     self._opt_fun = ext.lazydict(
         f_loss=lambda: tensor_utils.compile_function(
             inputs + extra_inputs, loss),
         f_inner_kl=lambda: tensor_utils.compile_function(
             inputs + extra_inputs, inner_kl))
     self.meta_batch_size = meta_batch_size
     self.num_grad_updates = num_grad_updates
Ejemplo n.º 9
0
    def update_opt(self,
                   loss,
                   target,
                   inputs,
                   extra_inputs=None,
                   gradients=None,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab_maml.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """

        self._target = target

        if gradients is None:
            gradients = theano.grad(loss,
                                    target.get_params(trainable=True),
                                    disconnected_inputs='ignore')
        updates = self._update_method(gradients,
                                      target.get_params(trainable=True))
        updates = OrderedDict([(k, v.astype(k.dtype))
                               for k, v in updates.items()])

        if extra_inputs is None:
            extra_inputs = list()

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(inputs + extra_inputs, loss),
            f_opt=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                updates=updates,
            ))
Ejemplo n.º 10
0
    def update_opt(self,
                   loss,
                   target,
                   inputs,
                   extra_inputs=None,
                   gradients=None,
                   *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab_maml.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :param gradients: symbolic expressions for the gradients of trainable parameters of the target. By default
        this will be computed by calling theano.grad
        :return: No return value.
        """

        self._target = target

        def get_opt_output(gradients):
            if gradients is None:
                gradients = theano.grad(loss,
                                        target.get_params(trainable=True))
            flat_grad = flatten_tensor_variables(gradients)
            return [loss.astype('float64'), flat_grad.astype('float64')]

        if extra_inputs is None:
            extra_inputs = list()

        self._opt_fun = lazydict(
            f_loss=lambda: compile_function(inputs + extra_inputs, loss),
            f_opt=lambda: compile_function(
                inputs=inputs + extra_inputs,
                outputs=get_opt_output(gradients),
            ))
Ejemplo n.º 11
0
    def update_opt(self, loss, target, leq_constraint, inputs, constraint_name="constraint", *args, **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab_maml.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """
        constraint_term, constraint_value = leq_constraint
        penalty_var = TT.scalar("penalty")
        penalized_loss = loss + penalty_var * constraint_term

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        def get_opt_output():
            flat_grad = flatten_tensor_variables(theano.grad(
                penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore'
            ))
            return [penalized_loss.astype('float64'), flat_grad.astype('float64')]

        self._opt_fun = lazydict(
            f_loss=lambda: compile_function(inputs, loss, log_name="f_loss"),
            f_constraint=lambda: compile_function(inputs, constraint_term, log_name="f_constraint"),
            f_penalized_loss=lambda: compile_function(
                inputs=inputs + [penalty_var],
                outputs=[penalized_loss, loss, constraint_term],
                log_name="f_penalized_loss",
            ),
            f_opt=lambda: compile_function(
                inputs=inputs + [penalty_var],
                outputs=get_opt_output(),
                log_name="f_opt"
            )
        )
Ejemplo n.º 12
0
    def update_opt(self,
                   loss,
                   target,
                   leq_constraint,
                   inputs,
                   extra_inputs=None,
                   constraint_name="constraint",
                   *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab_maml.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term, constraint_value = leq_constraint

        params = target.get_params(trainable=True)
        grads = tf.gradients(loss, xs=params)
        for idx, (grad, param) in enumerate(zip(grads, params)):
            if grad is None:
                grads[idx] = tf.zeros_like(param)
        flat_grad = tensor_utils.flatten_tensor_variables(grads)

        # f=KL-divergence and target is policy
        self._hvp_approach.update_opt(f=constraint_term,
                                      target=target,
                                      inputs=inputs + extra_inputs,
                                      reg_coeff=self._reg_coeff)

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        self._opt_fun = ext.lazydict(
            f_loss=lambda: tensor_utils.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                log_name="f_loss",
            ),
            f_grad=lambda: tensor_utils.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_constraint=lambda: tensor_utils.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term,
                log_name="constraint",
            ),
            f_loss_constraint=lambda: tensor_utils.compile_function(
                inputs=inputs + extra_inputs,
                outputs=[loss, constraint_term],
                log_name="f_loss_constraint",
            ),
        )