def update_opt(self, f, target, inputs, reg_coeff): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) constraint_grads = tf.gradients(f, xs=params) for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) xs = tuple([ tensor_utils.new_tensor_like(p.name.split(":")[0], p) for p in params ]) def Hx_plain(): Hx_plain_splits = tf.gradients( tf.reduce_sum( tf.stack([ tf.reduce_sum(g * x) for g, x in zip(constraint_grads, xs) ])), params) for idx, (Hx, param) in enumerate(zip(Hx_plain_splits, params)): if Hx is None: Hx_plain_splits[idx] = tf.zeros_like(param) return tensor_utils.flatten_tensor_variables(Hx_plain_splits) self._opt_fun = LazyDict( f_Hx_plain=lambda: tensor_utils.compile_function( inputs=inputs + xs, outputs=Hx_plain(), log_name="f_Hx_plain", ), )
def update_hvp(self, f, target, inputs, reg_coeff, name='PearlmutterHvp'): """Build the symbolic graph to compute the Hessian-vector product. Args: f (tf.Tensor): The function whose Hessian needs to be computed. target (garage.tf.policies.Policy): A parameterized object to optimize over. inputs (tuple[tf.Tensor]): The inputs for function f. reg_coeff (float): A small value so that A -> A + reg*I. name (str): Name to be used in tf.name_scope. """ self._target = target self._reg_coeff = reg_coeff params = target.get_params() with tf.name_scope(name): constraint_grads = tf.gradients(f, xs=params, name='gradients_constraint') for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) xs = tuple([ tensor_utils.new_tensor_like(p.name.split(':')[0], p) for p in params ]) def hx_plain(): """Computes product of Hessian(f) and vector v. Returns: tf.Tensor: Symbolic result. """ with tf.name_scope('hx_plain'): with tf.name_scope('hx_function'): hx_f = tf.reduce_sum( tf.stack([ tf.reduce_sum(g * x) for g, x in zip(constraint_grads, xs) ])), hx_plain_splits = tf.gradients(hx_f, params, name='gradients_hx_plain') for idx, (hx, param) in enumerate(zip(hx_plain_splits, params)): if hx is None: hx_plain_splits[idx] = tf.zeros_like(param) return tensor_utils.flatten_tensor_variables( hx_plain_splits) self._hvp_fun = LazyDict( f_hx_plain=lambda: tensor_utils.compile_function( inputs=inputs + xs, outputs=hx_plain(), log_name='f_hx_plain', ), )
def update_opt(self, f, target, inputs, reg_coeff, name=None): self.target = target self.reg_coeff = reg_coeff params = target.get_params(trainable=True) with tf.name_scope(name, "PerlmutterHvp", [f, inputs, params]): constraint_grads = tf.gradients(f, xs=params, name="gradients_constraint") for idx, (grad, param) in enumerate(zip(constraint_grads, params)): if grad is None: constraint_grads[idx] = tf.zeros_like(param) xs = tuple([ tensor_utils.new_tensor_like(p.name.split(":")[0], p) for p in params ]) def hx_plain(): with tf.name_scope("hx_plain", values=[constraint_grads, params, xs]): with tf.name_scope("hx_function", values=[constraint_grads, xs]): hx_f = tf.reduce_sum( tf.stack([ tf.reduce_sum(g * x) for g, x in zip(constraint_grads, xs) ])), hx_plain_splits = tf.gradients(hx_f, params, name="gradients_hx_plain") for idx, (hx, param) in enumerate(zip(hx_plain_splits, params)): if hx is None: hx_plain_splits[idx] = tf.zeros_like(param) return tensor_utils.flatten_tensor_variables( hx_plain_splits) self.opt_fun = ext.LazyDict( f_hx_plain=lambda: tensor_utils.compile_function( inputs=inputs + xs, outputs=hx_plain(), log_name="f_hx_plain", ), )