Python flatten_tensor_variables Exemples, rllab.misc.ext.flatten_tensor_variables Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : conjugate_gradient_optimizer.py Projet : yenchenlin/rllab

    def update_opt(
        self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs
    ):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term, constraint_value = leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params)
        flat_grad = ext.flatten_tensor_variables(grads)

        constraint_grads = theano.grad(constraint_term, wrt=params)
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])
        Hx_plain_splits = TT.grad(TT.sum([TT.sum(g * x) for g, x in itertools.izip(constraint_grads, xs)]), wrt=params)
        Hx_plain = TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        if self._debug_nan:
            from theano.compile.nanguardmode import NanGuardMode

            mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
        else:
            mode = None

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", mode=mode
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", mode=mode
            ),
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + extra_inputs + xs, outputs=Hx_plain, log_name="f_Hx_plain", mode=mode
            ),
            f_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", mode=mode
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", mode=mode
            ),
        )

Exemple #2

0

Afficher le fichier

Fichier : penalty_lbfgs_optimizer.py Projet : harjatinsingh/mujoco-env

 def get_opt_output():
     flat_grad = flatten_tensor_variables(
         theano.grad(penalized_loss,
                     target.get_params(trainable=True),
                     disconnected_inputs='ignore'))
     return [
         penalized_loss.astype('float64'),
         flat_grad.astype('float64')
     ]

Exemple #3

0

Afficher le fichier

Fichier : conjugate_gradient_optimizer.py Projet : ZikangXiong/cpo

    def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term, constraint_value = leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params, disconnected_inputs='warn')
        flat_grad = ext.flatten_tensor_variables(grads)

        self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs,
                                      reg_coeff=self._reg_coeff)

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                log_name="f_loss",
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term,
                log_name="constraint",
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=[loss, constraint_term],
                log_name="f_loss_constraint",
            ),
        )

Exemple #4

0

Afficher le fichier

Fichier : conjugate_gradient_optimizer.py Projet : QuantCollective/maml_rl

    def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term, constraint_value = leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params, disconnected_inputs='warn')
        flat_grad = ext.flatten_tensor_variables(grads)

        self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs,
                                      reg_coeff=self._reg_coeff)

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                log_name="f_loss",
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term,
                log_name="constraint",
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=[loss, constraint_term],
                log_name="f_loss_constraint",
            ),
        )

Exemple #5

0

Afficher le fichier

Fichier : conjugate_gradient_optimizer.py Projet : tkasarla/cpo

    def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args,
                   **kwargs):

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term, constraint_value = leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params, disconnected_inputs='warn')
        flat_grad = ext.flatten_tensor_variables(grads)

        self._hvp_approach.update_opt(f=constraint_term, target=target, inputs=inputs + extra_inputs,
                                      reg_coeff=self._reg_coeff)

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                log_name="f_loss",
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term,
                log_name="constraint",
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=[loss, constraint_term],
                log_name="f_loss_constraint",
            ),
        )

Exemple #6

0

Afficher le fichier

    def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff

        params = target.get_params(trainable=True)

        constraint_grads = theano.grad(f,
                                       wrt=params,
                                       disconnected_inputs='warn')
        flat_grad = ext.flatten_tensor_variables(constraint_grads)

        def f_Hx_plain(*args):
            inputs_ = args[:len(inputs)]
            xs = args[len(inputs):]
            flat_xs = np.concatenate([np.reshape(x, (-1, )) for x in xs])
            param_val = self.target.get_param_values(trainable=True)
            eps = np.cast['float32'](self.base_eps /
                                     (np.linalg.norm(param_val) + 1e-8))
            self.target.set_param_values(param_val + eps * flat_xs,
                                         trainable=True)
            flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_)
            if self.symmetric:
                self.target.set_param_values(param_val - eps * flat_xs,
                                             trainable=True)
                flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_)
                hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps)
                self.target.set_param_values(param_val, trainable=True)
            else:
                self.target.set_param_values(param_val, trainable=True)
                flat_grad = self.opt_fun["f_grad"](*inputs_)
                hx = (flat_grad_dvplus - flat_grad) / eps
            return hx

        self.opt_fun = ext.lazydict(
            f_grad=lambda: ext.compile_function(
                inputs=inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_Hx_plain=lambda: f_Hx_plain,
        )

Exemple #7

0

Afficher le fichier

Fichier : conjugate_gradient_optimizer.py Projet : QuantCollective/maml_rl

    def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff

        params = target.get_params(trainable=True)

        constraint_grads = theano.grad(
            f, wrt=params, disconnected_inputs='warn')
        flat_grad = ext.flatten_tensor_variables(constraint_grads)

        def f_Hx_plain(*args):
            inputs_ = args[:len(inputs)]
            xs = args[len(inputs):]
            flat_xs = np.concatenate([np.reshape(x, (-1,)) for x in xs])
            param_val = self.target.get_param_values(trainable=True)
            eps = np.cast['float32'](
                self.base_eps / (np.linalg.norm(param_val) + 1e-8))
            self.target.set_param_values(
                param_val + eps * flat_xs, trainable=True)
            flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_)
            if self.symmetric:
                self.target.set_param_values(
                    param_val - eps * flat_xs, trainable=True)
                flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_)
                hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps)
                self.target.set_param_values(param_val, trainable=True)
            else:
                self.target.set_param_values(param_val, trainable=True)
                flat_grad = self.opt_fun["f_grad"](*inputs_)
                hx = (flat_grad_dvplus - flat_grad) / eps
            return hx

        self.opt_fun = ext.lazydict(
            f_grad=lambda: ext.compile_function(
                inputs=inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_Hx_plain=lambda: f_Hx_plain,
        )

Exemple #8

0

Afficher le fichier

Fichier : power_gradient.py Projet : liuyan8023/Policy-and-Feature-Extractor

    def init_opt(self):
        obv_var = self.env.observation_space.new_tensor_variable(
            'obv',
            extra_dims=0,
        )
        act_var = self.env.action_space.new_tensor_variable(
            'act',
            extra_dims=0,
        )

        self.policyParametes = self.policy.get_params(trainable=True)
        po_log_grad = theano.grad(self.policy.action_log_prob_sym(obv_var, act_var), self.policyParametes, disconnected_inputs='ignore')
        flat_pol_log_grad = ext.flatten_tensor_variables(po_log_grad)
        self.polLogGradFunc = theano.function(
            inputs=[obv_var, act_var],
            outputs=flat_pol_log_grad,
            allow_input_downcast=True
        )

        # Add gS to support RMSProp.
        self.gS = np.zeros(len(self.policy.get_param_values()))

        return dict()

Exemple #9

0

Afficher le fichier

Fichier : first_order_optimizer.py Projet : ocean1211/ex2

    def update_opt(self,
                   loss,
                   target,
                   inputs,
                   extra_inputs=None,
                   gradients=None,
                   **kwargs):
        self._target = target

        if gradients is None:
            gradients = theano.grad(loss,
                                    target.get_params(trainable=True),
                                    disconnected_inputs='ignore')
        flat_grad = ext.flatten_tensor_variables(gradients)

        if extra_inputs is None:
            extra_inputs = list()

        self._opt_fun = ext.lazydict(f_loss=lambda: ext.compile_function(
            inputs + extra_inputs, loss, log_name=self._name + "_f_loss"),
                                     f_grad=lambda: ext.compile_function(
                                         inputs=inputs + extra_inputs,
                                         outputs=flat_grad,
                                         log_name=self._name + "_f_grad"))

Exemple #10

0

Afficher le fichier

Fichier : lbfgs_optimizer.py Projet : zhmz90/rllab

 def get_opt_output():
     flat_grad = flatten_tensor_variables(
         theano.grad(loss, target.get_params(trainable=True)))
     return [loss.astype('float64'), flat_grad.astype('float64')]

Exemple #11

0

Afficher le fichier

    def update_opt(self,
                   loss,
                   target,
                   quad_leq_constraint,
                   lin_leq_constraint,
                   inputs,
                   extra_inputs=None,
                   constraint_name_1="quad_constraint",
                   constraint_name_2="lin_constraint",
                   using_surrogate=False,
                   true_linear_leq_constraint=None,
                   precompute=False,
                   attempt_feasible_recovery=False,
                   attempt_infeasible_recovery=False,
                   revert_to_last_safe_point=False,
                   *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param lin_leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. 
            This constraint will be linearized.
        :param quad_leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. 
            This constraint will be quadratified.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.

        All right, on the business of this "using_surrogate" and "true_linear_leq_constraint" stuff...
        In rllab, when we optimize a policy, we minimize a "surrogate loss" function (or, if you prefer,
        maximize a surrogate return). The surrogate loss function we optimize is

                mean( lr * advantage ),

        where 'lr' is the likelihood ratio of the new policy with respect to the old policy,

                lr(s,a) = pi_new(a|s) / pi_old(a|s).

        We choose this surrogate loss function because its gradient is equal to the gradient of the true 
        objective function when pi_new = pi_old. 

        However, the real thing we want to optimize is 

                J(pi) = E_{tau ~ pi} [R(tau)].

        If we wanted to measure J(pi_old), it would not suffice to calculate the surrogate loss function at pi_old. 

        Usually this is not an issue because we don't actually need to compute J(pi_old) at all, because we have no need
        for it. But in our optimization procedure here, we need to calculate a directly analogous property - 
        - the expected safety return - because its value matters for constraint enforcement in our linear approximation.

        So, "using_surrogate" and "true_linear_leq_constraint" are here to handle the cases where the "lin_leq_constraint"
        argument submitted by the user is really a SURROGATE leq_constraint, which we can get a good gradient from, 
        but when we need a different symbolic expression to actually evaluate the linear_leq_constraint.

        "use_surrogate" is the flag indicating that the lin_leq_constraint argument is in fact a surrogate, 
        and then "true_linear_leq_constraint" is for the actual value. 

        :param precompute: Use an 'input' for the linearization constant instead of true_linear_leq_constraint.
                           If present, overrides surrogate
                           When using precompute, the last input is the precomputed linearization constant

        :param attempt_(in)feasible_recovery: deals with cases where x=0 is infeasible point but problem still feasible
                                                               (where optimization problem is entirely infeasible)

        :param revert_to_last_safe_point: Behavior protocol for situation when optimization problem is entirely infeasible.
                                          Specifies that we should just reset the parameters to the last point
                                          that satisfied constraint.

        """

        self.precompute = precompute
        self.attempt_feasible_recovery = attempt_feasible_recovery
        self.attempt_infeasible_recovery = attempt_infeasible_recovery
        self.revert_to_last_safe_point = revert_to_last_safe_point

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term_1, constraint_value_1 = quad_leq_constraint
        constraint_term_2, constraint_value_2 = lin_leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params, disconnected_inputs='warn')
        flat_grad = ext.flatten_tensor_variables(grads)

        lin_constraint_grads = theano.grad(constraint_term_2,
                                           wrt=params,
                                           disconnected_inputs='warn')
        flat_lin_constraint_grad = ext.flatten_tensor_variables(
            lin_constraint_grads)

        if using_surrogate and not (precompute):
            constraint_term_2 = true_linear_leq_constraint

        self._hvp_approach.update_opt(f=constraint_term_1,
                                      target=target,
                                      inputs=inputs + extra_inputs,
                                      reg_coeff=self._reg_coeff)

        self._target = target
        self._max_quad_constraint_val = constraint_value_1
        self._max_lin_constraint_val = constraint_value_2
        self._constraint_name_1 = constraint_name_1
        self._constraint_name_2 = constraint_name_2

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                log_name="f_loss",
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_quad_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term_1,
                log_name="quad_constraint",
            ),
            f_lin_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term_2,
                log_name="lin_constraint",
            ),
            f_lin_constraint_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_lin_constraint_grad,
                log_name="lin_constraint_grad",
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=[loss, constraint_term_1, constraint_term_2],
                log_name="f_loss_constraint",
            ),
        )

        self.last_safe_point = None
        self._last_lin_pred_S = 0
        self._last_surr_pred_S = 0

Exemple #12

0

Afficher le fichier

 def get_opt_output(gradients):
     if gradients is None:
         gradients = theano.grad(loss,
                                 target.get_params(trainable=True))
     flat_grad = flatten_tensor_variables(gradients)
     return [loss.astype('float64'), flat_grad.astype('float64')]

Exemple #13

0

Afficher le fichier

Fichier : penalty_lbfgs_optimizer.py Projet : AtousaTorabi/rllab

 def get_opt_output():
     flat_grad = flatten_tensor_variables(theano.grad(
         penalized_loss, target.get_params(trainable=True), disconnected_inputs='ignore'
     ))
     return [penalized_loss.astype('float64'), flat_grad.astype('float64')]

Exemple #14

0

Afficher le fichier

    def update_opt(self, loss, target, quad_leq_constraint, lin_leq_constraint, inputs, 
                    extra_inputs=None, 
                    constraint_name_1="quad_constraint",
                    constraint_name_2="lin_constraint", 
                    using_surrogate=False,
                    true_linear_leq_constraint=None,
                    precompute=False,
                    attempt_feasible_recovery=False,
                    attempt_infeasible_recovery=False,
                    revert_to_last_safe_point=False,
                    *args, **kwargs):

        self.precompute = precompute
        self.attempt_feasible_recovery = attempt_feasible_recovery
        self.attempt_infeasible_recovery = attempt_infeasible_recovery
        self.revert_to_last_safe_point = revert_to_last_safe_point

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term_1, constraint_value_1 = quad_leq_constraint
        constraint_term_2, constraint_value_2 = lin_leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params, disconnected_inputs='warn')
        flat_grad = ext.flatten_tensor_variables(grads)

        lin_constraint_grads = theano.grad(constraint_term_2, wrt=params, disconnected_inputs='warn')
        flat_lin_constraint_grad = ext.flatten_tensor_variables(lin_constraint_grads)

        if using_surrogate and not(precompute):
            constraint_term_2 = true_linear_leq_constraint

        self._hvp_approach.update_opt(f=constraint_term_1, target=target, 
                                      inputs=inputs + extra_inputs,
                                      reg_coeff=self._reg_coeff)

        self._target = target
        self._max_quad_constraint_val = constraint_value_1
        self._max_lin_constraint_val = constraint_value_2
        self._constraint_name_1 = constraint_name_1
        self._constraint_name_2 = constraint_name_2

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                log_name="f_loss",
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_grad,
                log_name="f_grad",
            ),
            f_quad_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term_1,
                log_name="quad_constraint",
            ),
            f_lin_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term_2,
                log_name="lin_constraint",
            ),
            f_lin_constraint_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_lin_constraint_grad,
                log_name="lin_constraint_grad",
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=[loss, constraint_term_1, constraint_term_2],
                log_name="f_loss_constraint",
            ),
        )

        self.last_safe_point = None
        self._last_lin_pred_S = 0
        self._last_surr_pred_S = 0

Exemple #15

0

Afficher le fichier

Fichier : lbfgs_optimizer.py Projet : AtousaTorabi/rllab

 def get_opt_output():
     flat_grad = flatten_tensor_variables(theano.grad(loss, target.get_params(trainable=True)))
     return [loss.astype('float64'), flat_grad.astype('float64')]

Exemple #16

0

Afficher le fichier

    def update_opt(self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term, constraint_value = leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params)
        flat_grad = ext.flatten_tensor_variables(grads)

        constraint_grads = theano.grad(constraint_term, wrt=params)
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])
        Hx_plain_splits = TT.grad(
            TT.sum([TT.sum(g * x) for g, x in itertools.izip(constraint_grads, xs)]),
            wrt=params,
        )
        Hx_plain = TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name


        if self._debug_nan:
            from theano.compile.nanguardmode import NanGuardMode
            mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
        else:
            mode = None

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=loss,
                log_name="f_loss",
                mode=mode,
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=flat_grad,
                log_name="f_grad",
                mode=mode,
            ),
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + extra_inputs + xs,
                outputs=Hx_plain,
                log_name="f_Hx_plain",
                mode=mode,
            ),
            f_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term,
                log_name="constraint",
                mode=mode,
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs,
                outputs=[loss, constraint_term],
                log_name="f_loss_constraint",
                mode=mode,
            ),
        )

Exemple #17

0

Afficher le fichier

Fichier : lbfgs_optimizer.py Projet : QuantCollective/maml_rl

 def get_opt_output(gradients):
     if gradients is None:
         gradients = theano.grad(loss, target.get_params(trainable=True))
     flat_grad = flatten_tensor_variables(gradients)
     return [loss.astype('float64'), flat_grad.astype('float64')]