def compute_gradients( policy, optimizer: LocalOptimizer, loss: TensorType ) -> ModelGradients: # Compute the gradients. variables = policy.model.trainable_variables if isinstance(policy.model, ModelV2): variables = variables() grads_and_vars = optimizer.compute_gradients(loss, variables) # Clip by global norm, if necessary. if policy.config.get("grad_clip") is not None: # Defuse inf gradients (due to super large losses). grads = [g for (g, v) in grads_and_vars] grads, _ = tf.clip_by_global_norm(grads, policy.config["grad_clip"]) # If the global_norm is inf -> All grads will be NaN. Stabilize this # here by setting them to 0.0. This will simply ignore destructive loss # calculations. policy.grads = [] for g in grads: if g is not None: policy.grads.append(tf.where(tf.math.is_nan(g), tf.zeros_like(g), g)) else: policy.grads.append(None) clipped_grads_and_vars = list(zip(policy.grads, variables)) return clipped_grads_and_vars else: return grads_and_vars
def compute_and_clip_gradients(policy: Policy, optimizer: LocalOptimizer, loss: TensorType) -> ModelGradients: """Gradients computing function (from loss tensor, using local optimizer). Args: policy (Policy): The Policy object that generated the loss tensor and that holds the given local optimizer. optimizer (LocalOptimizer): The tf (local) optimizer object to calculate the gradients with. loss (TensorType): The loss tensor for which gradients should be calculated. Returns: ModelGradients: List of the possibly clipped gradients- and variable tuples. """ # Compute the gradients. variables = policy.model.trainable_variables() grads_and_vars = optimizer.compute_gradients(loss, variables) # Clip by global norm, if necessary. if policy.config["grad_clip"] is not None: # Defuse inf gradients (due to super large losses). grads = [g for (g, v) in grads_and_vars] grads, _ = tf.clip_by_global_norm(grads, policy.config["grad_clip"]) # If the global_norm is inf -> All grads will be NaN. Stabilize this # here by setting them to 0.0. This will simply ignore destructive loss # calculations. policy.grads = [ tf.where(tf.math.is_nan(g), tf.zeros_like(g), g) for g in grads ] clipped_grads_and_vars = list(zip(policy.grads, variables)) return clipped_grads_and_vars else: return grads_and_vars
def compute_gradients_fn(self, optimizer: LocalOptimizer, loss: TensorType) -> ModelGradients: # Supporting more than one loss/optimizer. if self.config["_tf_policy_handles_more_than_one_loss"]: optimizers = force_list(optimizer) losses = force_list(loss) assert len(optimizers) == len(losses) clipped_grads_and_vars = [] for optim, loss_ in zip(optimizers, losses): grads_and_vars = optim.compute_gradients( loss_, self.model.trainable_variables()) clipped_g_and_v = [] for g, v in grads_and_vars: if g is not None: clipped_g, _ = tf.clip_by_global_norm( [g], self.config["grad_clip"]) clipped_g_and_v.append((clipped_g[0], v)) clipped_grads_and_vars.append(clipped_g_and_v) self.grads = [ g for g_and_v in clipped_grads_and_vars for (g, v) in g_and_v ] # Only one optimizer and and loss term. else: grads_and_vars = optimizer.compute_gradients( loss, self.model.trainable_variables()) grads = [g for (g, v) in grads_and_vars] self.grads, _ = tf.clip_by_global_norm(grads, self.config["grad_clip"]) clipped_grads_and_vars = list( zip(self.grads, self.model.trainable_variables())) return clipped_grads_and_vars
def compute_and_clip_gradients(policy: Policy, optimizer: LocalOptimizer, loss: TensorType) -> ModelGradients: """Gradients computing function (from loss tensor, using local optimizer). Args: policy (Policy): The Policy object that generated the loss tensor and that holds the given local optimizer. optimizer (LocalOptimizer): The tf (local) optimizer object to calculate the gradients with. loss (TensorType): The loss tensor for which gradients should be calculated. Returns: ModelGradients: List of the possibly clipped gradients- and variable tuples. """ # Compute the gradients. variables = policy.model.trainable_variables() grads_and_vars = optimizer.compute_gradients(loss, variables) # Clip by global norm, if necessary. if policy.config["grad_clip"] is not None: grads = [g for (g, v) in grads_and_vars] policy.grads, _ = tf.clip_by_global_norm(grads, policy.config["grad_clip"]) clipped_grads_and_vars = list(zip(policy.grads, variables)) return clipped_grads_and_vars else: return grads_and_vars
def clip_gradients(policy: Policy, optimizer: LocalOptimizer, loss: TensorType) -> ModelGradients: grads_and_vars = optimizer.compute_gradients( loss, policy.model.trainable_variables()) grads = [g for (g, v) in grads_and_vars] grads, _ = tf.clip_by_global_norm(grads, policy.config["grad_clip"]) clipped_grads = list(zip(grads, policy.model.trainable_variables())) return clipped_grads
def minimize_and_clip( optimizer: LocalOptimizer, objective: TensorType, var_list: List["tf.Variable"], clip_val: float = 10.0, ) -> ModelGradients: """Computes, then clips gradients using objective, optimizer and var list. Ensures the norm of the gradients for each variable is clipped to `clip_val`. Args: optimizer: Either a shim optimizer (tf eager) containing a tf.GradientTape under `self.tape` or a tf1 local optimizer object. objective: The loss tensor to calculate gradients on. var_list: The list of tf.Variables to compute gradients over. clip_val: The global norm clip value. Will clip around -clip_val and +clip_val. Returns: The resulting model gradients (list or tuples of grads + vars) corresponding to the input `var_list`. """ # Accidentally passing values < 0.0 will break all gradients. assert clip_val is None or clip_val > 0.0, clip_val if tf.executing_eagerly(): tape = optimizer.tape grads_and_vars = list(zip(list(tape.gradient(objective, var_list)), var_list)) else: grads_and_vars = optimizer.compute_gradients(objective, var_list=var_list) return [ (tf.clip_by_norm(g, clip_val) if clip_val is not None else g, v) for (g, v) in grads_and_vars if g is not None ]