Beispiel #1
0
    def update_opt(self, f, target, inputs, reg_coeff):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        constraint_grads = tf.gradients(f, xs=params)
        for idx, (grad, param) in enumerate(zip(constraint_grads, params)):
            if grad is None:
                constraint_grads[idx] = tf.zeros_like(param)

        xs = tuple([
            tensor_utils.new_tensor_like(p.name.split(":")[0], p)
            for p in params
        ])

        def Hx_plain():
            Hx_plain_splits = tf.gradients(
                tf.reduce_sum(
                    tf.stack([
                        tf.reduce_sum(g * x)
                        for g, x in zip(constraint_grads, xs)
                    ])), params)
            for idx, (Hx, param) in enumerate(zip(Hx_plain_splits, params)):
                if Hx is None:
                    Hx_plain_splits[idx] = tf.zeros_like(param)
            return tensor_utils.flatten_tensor_variables(Hx_plain_splits)

        self._opt_fun = LazyDict(
            f_Hx_plain=lambda: tensor_utils.compile_function(
                inputs=inputs + xs,
                outputs=Hx_plain(),
                log_name="f_Hx_plain",
            ), )
Beispiel #2
0
    def update_opt(self, loss, target, inputs, extra_inputs=None, **kwargs):
        """Construct operation graph for the optimizer.

        Args:
            loss (tf.Tensor): Loss objective to minimize.
            target (object): Target object to optimize. The object should
                implemenet `get_params()` and `get_param_values`.
            inputs (list[tf.Tensor]): List of input placeholders.
            extra_inputs (list[tf.Tensor]): List of extra input placeholders.
            kwargs (dict): Extra unused keyword arguments. Some optimizers
                have extra input, e.g. KL constraint.

        """
        del kwargs
        with tf.name_scope(self._name):
            self._target = target
            tf_optimizer = make_optimizer(self._tf_optimizer,
                                          **self._learning_rate)
            self._train_op = tf_optimizer.minimize(
                loss, var_list=target.get_params())

            if extra_inputs is None:
                extra_inputs = list()
            self._input_vars = inputs + extra_inputs
            self._opt_fun = LazyDict(
                f_loss=lambda: tensor_utils.compile_function(
                    inputs + extra_inputs, loss), )
Beispiel #3
0
    def update_opt(self, loss, target, inputs, extra_inputs=None, **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should
         implement methods of the
        :class:`garage.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon),
         of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """
        with tf.name_scope(self._name,
                           values=[
                               loss,
                               target.get_params(trainable=True), inputs,
                               extra_inputs
                           ]):

            self._target = target

            self._train_op = self._tf_optimizer.minimize(
                loss, var_list=target.get_params(trainable=True))

            # updates = OrderedDict(
            #     [(k, v.astype(k.dtype)) for k, v in updates.iteritems()])

            if extra_inputs is None:
                extra_inputs = list()
            self._input_vars = inputs + extra_inputs
            self._opt_fun = LazyDict(
                f_loss=lambda: tensor_utils.compile_function(
                    inputs + extra_inputs, loss), )
Beispiel #4
0
    def update_hvp(self, f, target, inputs, reg_coeff, name='PearlmutterHvp'):
        """Build the symbolic graph to compute the Hessian-vector product.

        Args:
            f (tf.Tensor): The function whose Hessian needs to be computed.
            target (garage.tf.policies.Policy): A parameterized object to
                optimize over.
            inputs (tuple[tf.Tensor]): The inputs for function f.
            reg_coeff (float): A small value so that A -> A + reg*I.
            name (str): Name to be used in tf.name_scope.

        """
        self._target = target
        self._reg_coeff = reg_coeff
        params = target.get_params()
        with tf.name_scope(name):
            constraint_grads = tf.gradients(f,
                                            xs=params,
                                            name='gradients_constraint')
            for idx, (grad, param) in enumerate(zip(constraint_grads, params)):
                if grad is None:
                    constraint_grads[idx] = tf.zeros_like(param)

            xs = tuple([
                tensor_utils.new_tensor_like(p.name.split(':')[0], p)
                for p in params
            ])

            def hx_plain():
                """Computes product of Hessian(f) and vector v.

                Returns:
                    tf.Tensor: Symbolic result.

                """
                with tf.name_scope('hx_plain'):
                    with tf.name_scope('hx_function'):
                        hx_f = tf.reduce_sum(
                            tf.stack([
                                tf.reduce_sum(g * x)
                                for g, x in zip(constraint_grads, xs)
                            ])),
                    hx_plain_splits = tf.gradients(hx_f,
                                                   params,
                                                   name='gradients_hx_plain')
                    for idx, (hx,
                              param) in enumerate(zip(hx_plain_splits,
                                                      params)):
                        if hx is None:
                            hx_plain_splits[idx] = tf.zeros_like(param)
                    return tensor_utils.flatten_tensor_variables(
                        hx_plain_splits)

            self._hvp_fun = LazyDict(
                f_hx_plain=lambda: tensor_utils.compile_function(
                    inputs=inputs + xs,
                    outputs=hx_plain(),
                    log_name='f_hx_plain',
                ), )
Beispiel #5
0
    def update_opt(self,
                   loss,
                   target,
                   leq_constraint,
                   inputs,
                   constraint_name='constraint',
                   name=None,
                   *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should
         implement methods of the
         :class:`garage.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon),
         of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """
        params = target.get_params(trainable=True)
        with tf.name_scope(name, 'PenaltyLbfgsOptimizer',
                           [leq_constraint, loss, params]):
            constraint_term, constraint_value = leq_constraint
            penalty_var = tf.compat.v1.placeholder(tf.float32,
                                                   tuple(),
                                                   name='penalty')
            penalized_loss = loss + penalty_var * constraint_term

            self._target = target
            self._max_constraint_val = constraint_value
            self._constraint_name = constraint_name

            def get_opt_output():
                with tf.name_scope('get_opt_output',
                                   values=[params, penalized_loss]):
                    grads = tf.gradients(penalized_loss, params)
                    for idx, (grad, param) in enumerate(zip(grads, params)):
                        if grad is None:
                            grads[idx] = tf.zeros_like(param)
                    flat_grad = tensor_utils.flatten_tensor_variables(grads)
                    return [
                        tf.cast(penalized_loss, tf.float64),
                        tf.cast(flat_grad, tf.float64),
                    ]

            self._opt_fun = LazyDict(
                f_loss=lambda: tensor_utils.compile_function(
                    inputs, loss, log_name='f_loss'),
                f_constraint=lambda: tensor_utils.compile_function(
                    inputs, constraint_term, log_name='f_constraint'),
                f_penalized_loss=lambda: tensor_utils.compile_function(
                    inputs=inputs + [penalty_var],
                    outputs=[penalized_loss, loss, constraint_term],
                    log_name='f_penalized_loss',
                ),
                f_opt=lambda: tensor_utils.compile_function(
                    inputs=inputs + [penalty_var],
                    outputs=get_opt_output(),
                ))
Beispiel #6
0
    def update_opt(self,
                   target,
                   leq_constraint,
                   inputs,
                   extra_inputs=None,
                   constraint_name="constraint",
                   *args,
                   **kwargs):
        """Update the internal tensowflow operations.

        Parameters
        ----------
        target :
            A parameterized object to optimize over. It should implement methods of the
            :py:class:`garage.core.paramerized.Parameterized` class.
        leq_constraint : :py:class:'tensorflow.Tensor'
            The variable to be constrained.
        inputs :
            A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
            that the first dimension of these inputs should correspond to the number of data points.
        extra_inputs :
            A list of symbolic variables as extra inputs which should not be subsampled.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        # constraint_term, constraint_value = leq_constraint
        constraint_term = leq_constraint

        # params = target.get_params(trainable=True)

        self._hvp_approach.update_hvp(f=constraint_term,
                                      target=target,
                                      inputs=inputs + extra_inputs,
                                      reg_coeff=self._reg_coeff)

        self._target = target
        # self._max_constraint_val = constraint_value
        self._max_constraint_val = np.inf
        self._constraint_name = constraint_name

        self._opt_fun = LazyDict(
            f_constraint=lambda: tensor_utils.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term,
                log_name="constraint",
            ), )
Beispiel #7
0
    def update_opt(self,
                   loss,
                   target,
                   inputs,
                   extra_inputs=None,
                   name='LbfgsOptimizer',
                   **kwargs):
        """Construct operation graph for the optimizer.

        Args:
            loss (tf.Tensor): Loss objective to minimize.
            target (object): Target object to optimize. The object should
                implemenet `get_params()` and `get_param_values`.
            inputs (list[tf.Tensor]): List of input placeholders.
            extra_inputs (list[tf.Tensor]): List of extra input placeholders.
            name (str): Name scope.
            kwargs (dict): Extra unused keyword arguments. Some optimizers
                have extra input, e.g. KL constraint.

        """
        del kwargs
        self._target = target
        params = target.get_params()
        with tf.name_scope(name):

            def get_opt_output():
                """Helper function to construct graph.

                Returns:
                    list[tf.Tensor]: Loss and gradient tensor.

                """
                with tf.name_scope('get_opt_output'):
                    flat_grad = tensor_utils.flatten_tensor_variables(
                        tf.gradients(loss, params))
                    return [
                        tf.cast(loss, tf.float64),
                        tf.cast(flat_grad, tf.float64)
                    ]

            if extra_inputs is None:
                extra_inputs = list()

            self._opt_fun = LazyDict(
                f_loss=lambda: tensor_utils.compile_function(
                    inputs + extra_inputs, loss),
                f_opt=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=get_opt_output(),
                ))
    def update_opt(self, f, target, inputs, reg_coeff, name=None):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)

        with tf.name_scope(name, "FiniteDifferenceHvp",
                           [f, inputs, params, target]):
            constraint_grads = tf.gradients(f,
                                            xs=params,
                                            name="gradients_constraint")
            for idx, (grad, param) in enumerate(zip(constraint_grads, params)):
                if grad is None:
                    constraint_grads[idx] = tf.zeros_like(param)

            flat_grad = tensor_utils.flatten_tensor_variables(constraint_grads)

            def f_hx_plain(*args):
                with tf.name_scope("f_hx_plain", values=[inputs, self.target]):
                    inputs_ = args[:len(inputs)]
                    xs = args[len(inputs):]
                    flat_xs = np.concatenate(
                        [np.reshape(x, (-1, )) for x in xs])
                    param_val = self.target.get_param_values(trainable=True)
                    eps = np.cast['float32'](
                        self.base_eps / (np.linalg.norm(param_val) + 1e-8))
                    self.target.set_param_values(param_val + eps * flat_xs,
                                                 trainable=True)
                    flat_grad_dvplus = self.opt_fun["f_grad"](*inputs_)
                    self.target.set_param_values(param_val, trainable=True)
                    if self.symmetric:
                        self.target.set_param_values(param_val - eps * flat_xs,
                                                     trainable=True)
                        flat_grad_dvminus = self.opt_fun["f_grad"](*inputs_)
                        hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps)
                        self.target.set_param_values(param_val, trainable=True)
                    else:
                        flat_grad = self.opt_fun["f_grad"](*inputs_)
                        hx = (flat_grad_dvplus - flat_grad) / eps
                    return hx

            self.opt_fun = LazyDict(
                f_grad=lambda: tensor_utils.compile_function(
                    inputs=inputs,
                    outputs=flat_grad,
                    log_name="f_grad",
                ),
                f_hx_plain=lambda: f_hx_plain,
            )
Beispiel #9
0
    def update_opt(self,
                   target,
                   leq_constraint,
                   inputs,
                   extra_inputs=None,
                   constraint_name="constraint",
                   *args,
                   **kwargs):
        """
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`garage.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        # constraint_term, constraint_value = leq_constraint
        constraint_term = leq_constraint

        # params = target.get_params(trainable=True)

        self._hvp_approach.update_opt(f=constraint_term,
                                      target=target,
                                      inputs=inputs + extra_inputs,
                                      reg_coeff=self._reg_coeff)

        self._target = target
        # self._max_constraint_val = constraint_value
        self._max_constraint_val = np.inf
        self._constraint_name = constraint_name

        self._opt_fun = LazyDict(
            f_constraint=lambda: tensor_utils.compile_function(
                inputs=inputs + extra_inputs,
                outputs=constraint_term,
                log_name="constraint",
            ), )
    def update_opt(self, f, target, inputs, reg_coeff, name=None):
        self.target = target
        self.reg_coeff = reg_coeff
        params = target.get_params(trainable=True)
        with tf.name_scope(name, 'PerlmutterHvp', [f, inputs, params]):
            constraint_grads = tf.gradients(f,
                                            xs=params,
                                            name='gradients_constraint')
            for idx, (grad, param) in enumerate(zip(constraint_grads, params)):
                if grad is None:
                    constraint_grads[idx] = tf.zeros_like(param)

            xs = tuple([
                tensor_utils.new_tensor_like(p.name.split(':')[0], p)
                for p in params
            ])

            def hx_plain():
                with tf.name_scope('hx_plain',
                                   values=[constraint_grads, params, xs]):
                    with tf.name_scope('hx_function',
                                       values=[constraint_grads, xs]):
                        hx_f = tf.reduce_sum(
                            tf.stack([
                                tf.reduce_sum(g * x)
                                for g, x in zip(constraint_grads, xs)
                            ])),
                    hx_plain_splits = tf.gradients(hx_f,
                                                   params,
                                                   name='gradients_hx_plain')
                    for idx, (hx,
                              param) in enumerate(zip(hx_plain_splits,
                                                      params)):
                        if hx is None:
                            hx_plain_splits[idx] = tf.zeros_like(param)
                    return tensor_utils.flatten_tensor_variables(
                        hx_plain_splits)

            self.opt_fun = LazyDict(
                f_hx_plain=lambda: tensor_utils.compile_function(
                    inputs=inputs + xs,
                    outputs=hx_plain(),
                    log_name='f_hx_plain',
                ), )
    def update_opt(self,
                   loss,
                   target,
                   inputs,
                   extra_inputs=None,
                   name=None,
                   *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should
         implement methods of the
        :class:`garage.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon),
         of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs
        :return: No return value.
        """
        self._target = target
        params = target.get_params(trainable=True)
        with tf.name_scope(name, 'LbfgsOptimizer',
                           [loss, inputs, params, extra_inputs]):

            def get_opt_output():
                with tf.name_scope('get_opt_output', values=[loss, params]):
                    flat_grad = tensor_utils.flatten_tensor_variables(
                        tf.gradients(loss, params))
                    return [
                        tf.cast(loss, tf.float64),
                        tf.cast(flat_grad, tf.float64)
                    ]

            if extra_inputs is None:
                extra_inputs = list()

            self._opt_fun = LazyDict(
                f_loss=lambda: tensor_utils.compile_function(
                    inputs + extra_inputs, loss),
                f_opt=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=get_opt_output(),
                ))
    def update_opt(
        self,
        loss,
        target,
        leq_constraint,
        inputs,
        extra_inputs=None,
        name=None,
        constraint_name='constraint',
    ):
        """Update the optimizer.

        Build the functions for computing loss, gradient, and
        the constraint value.

        Args:
            loss (tf.Tensor): Symbolic expression for the loss function.
            target (garage.tf.policies.Policy): A parameterized object to
                optimize over.
            leq_constraint (tuple[tf.Tensor, float]): A constraint provided
                as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
            inputs (list(tf.Tenosr)): A list of symbolic variables as inputs,
                which could be subsampled if needed. It is assumed that the
                first dimension of these inputs should correspond to the
                number of data points.
            extra_inputs (list[tf.Tenosr]): A list of symbolic variables as
                extra inputs which should not be subsampled.
            name (str): Name to be passed to tf.name_scope.
            constraint_name (str): A constraint name for prupose of logging
                and variable names.

        """
        params = target.get_params()
        ns_vals = [loss, target, leq_constraint, inputs, extra_inputs, params]
        with tf.name_scope(name, 'ConjugateGradientOptimizer', ns_vals):
            inputs = tuple(inputs)
            if extra_inputs is None:
                extra_inputs = tuple()
            else:
                extra_inputs = tuple(extra_inputs)

            constraint_term, constraint_value = leq_constraint

            with tf.name_scope('loss_gradients', values=[loss, params]):
                grads = tf.gradients(loss, xs=params)
                for idx, (grad, param) in enumerate(zip(grads, params)):
                    if grad is None:
                        grads[idx] = tf.zeros_like(param)
                flat_grad = tensor_utils.flatten_tensor_variables(grads)

            self._hvp_approach.update_hvp(f=constraint_term,
                                          target=target,
                                          inputs=inputs + extra_inputs,
                                          reg_coeff=self._reg_coeff,
                                          name='update_opt_' + constraint_name)

            self._target = target
            self._max_constraint_val = constraint_value
            self._constraint_name = constraint_name

            self._opt_fun = LazyDict(
                f_loss=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=loss,
                    log_name='f_loss',
                ),
                f_grad=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=flat_grad,
                    log_name='f_grad',
                ),
                f_constraint=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=constraint_term,
                    log_name='constraint',
                ),
                f_loss_constraint=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=[loss, constraint_term],
                    log_name='f_loss_constraint',
                ),
            )
    def update_hvp(self, f, target, inputs, reg_coeff, name=None):
        """Build the symbolic graph to compute the Hessian-vector product.

        Args:
            f (tf.Tensor): The function whose Hessian needs to be computed.
            target (garage.tf.policies.Policy): A parameterized object to
                optimize over.
            inputs (tuple[tf.Tensor]): The inputs for function f.
            reg_coeff (float): A small value so that A -> A + reg*I.
            name (str): Name to be used in tf.name_scope.

        """
        self._target = target
        self._reg_coeff = reg_coeff
        params = target.get_params()
        with tf.name_scope(name, 'FiniteDifferenceHvp',
                           [f, inputs, params, target]):
            constraint_grads = tf.gradients(f,
                                            xs=params,
                                            name='gradients_constraint')
            for idx, (grad, param) in enumerate(zip(constraint_grads, params)):
                if grad is None:
                    constraint_grads[idx] = tf.zeros_like(param)
            flat_grad = tensor_utils.flatten_tensor_variables(constraint_grads)

            def f_hx_plain(*args):
                """Computes product of Hessian(f) and vector v.

                Args:
                    args (tuple[numpy.ndarray]): Contains inputs of function f
                        , and vector v.

                Returns:
                    tf.Tensor: Symbolic result.

                """
                with tf.name_scope('f_hx_plain', values=[inputs,
                                                         self._target]):
                    inputs_ = args[:len(inputs)]
                    xs = args[len(inputs):]
                    flat_xs = np.concatenate(
                        [np.reshape(x, (-1, )) for x in xs])
                    param_val = self._target.get_param_values()
                    eps = np.cast['float32'](
                        self.base_eps / (np.linalg.norm(param_val) + 1e-8))
                    self._target.set_param_values(param_val + eps * flat_xs)
                    flat_grad_dvplus = self._hvp_fun['f_grad'](*inputs_)
                    self._target.set_param_values(param_val)
                    if self.symmetric:
                        self._target.set_param_values(param_val -
                                                      eps * flat_xs)
                        flat_grad_dvminus = self._hvp_fun['f_grad'](*inputs_)
                        hx = (flat_grad_dvplus - flat_grad_dvminus) / (2 * eps)
                        self._target.set_param_values(param_val)
                    else:
                        flat_grad = self._hvp_fun['f_grad'](*inputs_)
                        hx = (flat_grad_dvplus - flat_grad) / eps
                    return hx

            self._hvp_fun = LazyDict(
                f_grad=lambda: tensor_utils.compile_function(
                    inputs=inputs,
                    outputs=flat_grad,
                    log_name='f_grad',
                ),
                f_hx_plain=lambda: f_hx_plain,
            )
    def update_opt(self,
                   loss,
                   target,
                   leq_constraint,
                   inputs,
                   extra_inputs=None,
                   name=None,
                   constraint_name="constraint",
                   *args,
                   **kwargs):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should
         implement methods of the
         the :class:`garage.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon),
         of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be
         subsampled if needed. It is assumed that the first dimension of these
         inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which
         should not be subsampled
        :return: No return value.
        """
        params = target.get_params(trainable=True)
        with tf.name_scope(
                name, "ConjugateGradientOptimizer",
                [loss, target, leq_constraint, inputs, extra_inputs,
                 params]):  # yapf: disable
            inputs = tuple(inputs)
            if extra_inputs is None:
                extra_inputs = tuple()
            else:
                extra_inputs = tuple(extra_inputs)

            constraint_term, constraint_value = leq_constraint

            with tf.name_scope("loss_gradients", values=[loss, params]):
                grads = tf.gradients(loss, xs=params)
                for idx, (grad, param) in enumerate(zip(grads, params)):
                    if grad is None:
                        grads[idx] = tf.zeros_like(param)
                flat_grad = tensor_utils.flatten_tensor_variables(grads)

            self._hvp_approach.update_opt(
                f=constraint_term,
                target=target,
                inputs=inputs + extra_inputs,
                reg_coeff=self._reg_coeff,
                name="update_opt_" + constraint_name)

            self._target = target
            self._max_constraint_val = constraint_value
            self._constraint_name = constraint_name

            self._opt_fun = LazyDict(
                f_loss=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=loss,
                    log_name="f_loss",
                ),
                f_grad=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=flat_grad,
                    log_name="f_grad",
                ),
                f_constraint=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=constraint_term,
                    log_name="constraint",
                ),
                f_loss_constraint=lambda: tensor_utils.compile_function(
                    inputs=inputs + extra_inputs,
                    outputs=[loss, constraint_term],
                    log_name="f_loss_constraint",
                ),
            )
Beispiel #15
0
    def update_opt(self,
                   loss,
                   target,
                   leq_constraint,
                   inputs,
                   constraint_name='constraint',
                   name=None,
                   **kwargs):
        """Construct operation graph for the optimizer.

        Args:
            loss (tf.Tensor): Loss objective to minimize.
            target (object): Target object to optimize. The object should
                implemenet `get_params()` and `get_param_values`.
            leq_constraint (tuple): It contains a tf.Tensor and a float value.
                The tf.Tensor represents the constraint term, and the float
                value is the constraint value.
            inputs (list[tf.Tensor]): List of input placeholders.
            constraint_name (str): Constraint name for logging.
            name (str): Name scope.
            kwargs (dict): Extra unused keyword arguments. Some optimizers
                have extra input, e.g. KL constraint.

        """
        params = target.get_params()
        with tf.name_scope(name, 'PenaltyLbfgsOptimizer',
                           [leq_constraint, loss, params]):
            constraint_term, constraint_value = leq_constraint
            penalty_var = tf.compat.v1.placeholder(tf.float32,
                                                   tuple(),
                                                   name='penalty')
            penalized_loss = loss + penalty_var * constraint_term

            self._target = target
            self._max_constraint_val = constraint_value
            self._constraint_name = constraint_name

            def get_opt_output():
                """Helper function to construct graph.

                Returns:
                    list[tf.Tensor]: Penalized loss and gradient tensor.

                """
                with tf.name_scope('get_opt_output',
                                   values=[params, penalized_loss]):
                    grads = tf.gradients(penalized_loss, params)
                    for idx, (grad, param) in enumerate(zip(grads, params)):
                        if grad is None:
                            grads[idx] = tf.zeros_like(param)
                    flat_grad = tensor_utils.flatten_tensor_variables(grads)
                    return [
                        tf.cast(penalized_loss, tf.float64),
                        tf.cast(flat_grad, tf.float64),
                    ]

            self._opt_fun = LazyDict(
                f_loss=lambda: tensor_utils.compile_function(
                    inputs, loss, log_name='f_loss'),
                f_constraint=lambda: tensor_utils.compile_function(
                    inputs, constraint_term, log_name='f_constraint'),
                f_penalized_loss=lambda: tensor_utils.compile_function(
                    inputs=inputs + [penalty_var],
                    outputs=[penalized_loss, loss, constraint_term],
                    log_name='f_penalized_loss',
                ),
                f_opt=lambda: tensor_utils.compile_function(
                    inputs=inputs + [penalty_var],
                    outputs=get_opt_output(),
                ))