def compute_gradients(self, loss, var_list=None, gate_gradients=optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, grad_loss=None): """Compute gradients of `loss` for the variables in `var_list`. Add rho*elastic_difference to loss to control the exploration This is the first part of `minimize()`. It returns a list of (gradient, variable) pairs where "gradient" is the gradient for "variable". Note that "gradient" can be a `Tensor`, an `IndexedSlices`, or `None` if there is no gradient for the given variable. Args: loss: A Tensor containing the value to minimize. var_list: Optional list or tuple of `tf.Variable` to update to minimize `loss`. Defaults to the list of variables collected in the graph under the key `GraphKey.TRAINABLE_VARIABLES`. gate_gradients: How to gate the computation of gradients. Can be `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. Returns: A list of (gradient, variable) pairs. Variable is always present, but gradient can be `None`. Raises: TypeError: If `var_list` contains anything else than `Variable` objects. ValueError: If some arguments are invalid. """ if not var_list: var_list = variables.trainable_variables() elastic_difference = [ math_ops.subtract(v, lv) for v, lv in zip(variables.trainable_variables(), [self._local_map[var] for var in var_list]) ] distance_loss = self._rho * math_ops.add_n( [gen_nn_ops.l2_loss(ed) for ed in elastic_difference]) total_loss = loss + distance_loss return self._opt.compute_gradients(total_loss, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, grad_loss)
def global_norm(t_list, name=None): """Computes the global norm of multiple tensors. Given a tuple or list of tensors `t_list`, this operation returns the global norm of the elements in all tensors in `t_list`. The global norm is computed as: `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))` Any entries in `t_list` that are of type None are ignored. Args: t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None. name: A name for the operation (optional). Returns: A 0-D (scalar) `Tensor` of type `float`. Raises: TypeError: If `t_list` is not a sequence. """ if (not isinstance(t_list, collections_abc.Sequence) or isinstance(t_list, six.string_types)): raise TypeError("`t_list` should be a sequence of tensors. Received " f"{type(t_list)}.") t_list = list(t_list) with ops.name_scope(name, "global_norm", t_list) as name: values = [ ops.convert_to_tensor( t.values if isinstance(t, ops.IndexedSlices) else t, name="t_%d" % i) if t is not None else t for i, t in enumerate(t_list) ] half_squared_norms = [] for v in values: if v is not None: with ops.colocate_with(v): half_squared_norms.append(gen_nn_ops.l2_loss(v)) half_squared_norm = math_ops.reduce_sum( array_ops.stack(half_squared_norms)) norm = math_ops.sqrt( half_squared_norm * constant_op.constant(2.0, dtype=half_squared_norm.dtype), name="global_norm") return norm
def global_norm(t_list, name=None): """Computes the global norm of multiple tensors. Given a tuple or list of tensors `t_list`, this operation returns the global norm of the elements in all tensors in `t_list`. The global norm is computed as: `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))` Any entries in `t_list` that are of type None are ignored. Args: t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None. name: A name for the operation (optional). Returns: A 0-D (scalar) `Tensor` of type `float`. Raises: TypeError: If `t_list` is not a sequence. """ if (not isinstance(t_list, collections.Sequence) or isinstance(t_list, six.string_types)): raise TypeError("t_list should be a sequence") t_list = list(t_list) with ops.name_scope(name, "global_norm", t_list) as name: values = [ ops.convert_to_tensor( t.values if isinstance(t, ops.IndexedSlices) else t, name="t_%d" % i) if t is not None else t for i, t in enumerate(t_list)] half_squared_norms = [] for v in values: if v is not None: with ops.colocate_with(v): half_squared_norms.append(gen_nn_ops.l2_loss(v)) half_squared_norm = math_ops.reduce_sum(array_ops.stack(half_squared_norms)) norm = math_ops.sqrt( half_squared_norm * constant_op.constant(2.0, dtype=half_squared_norm.dtype), name="global_norm") return norm