Exemple #1
0
def summarize_activation(op):
    """Summarize an activation.

  This applies the given activation and adds useful summaries specific to the
  activation.

  Args:
    op: The tensor to summarize (assumed to be a layer activation).
  Returns:
    The summary op created to summarize `op`.
  """
    if op.op.type in ('Relu', 'Softplus', 'Relu6'):
        # Using inputs to avoid floating point equality and/or epsilons.
        _add_scalar_summary(
            standard_ops.reduce_mean(
                standard_ops.to_float(
                    standard_ops.less(
                        op.op.inputs[0],
                        standard_ops.cast(0.0, op.op.inputs[0].dtype)))),
            '%s/zeros' % op.op.name)
    if op.op.type == 'Relu6':
        _add_scalar_summary(
            standard_ops.reduce_mean(
                standard_ops.to_float(
                    standard_ops.greater(
                        op.op.inputs[0],
                        standard_ops.cast(6.0, op.op.inputs[0].dtype)))),
            '%s/sixes' % op.op.name)
    return _add_histogram_summary(op, '%s/activation' % op.op.name)
Exemple #2
0
def summarize_activation(op):
    """Summarize an activation.

  This applies the given activation and adds useful summaries specific to the
  activation.

  Args:
    op: The tensor to summarize (assumed to be a layer activation).
  Returns:
    The summary op created to summarize `op`.
  """
    if op.op.type in ("Relu", "Softplus", "Relu6"):
        # Using inputs to avoid floating point equality and/or epsilons.
        _add_scalar_summary(
            standard_ops.reduce_mean(
                standard_ops.to_float(standard_ops.less(op.op.inputs[0], standard_ops.cast(0.0, op.op.inputs[0].dtype)))
            ),
            "%s/zeros" % op.op.name,
        )
    if op.op.type == "Relu6":
        _add_scalar_summary(
            standard_ops.reduce_mean(
                standard_ops.to_float(
                    standard_ops.greater(op.op.inputs[0], standard_ops.cast(6.0, op.op.inputs[0].dtype))
                )
            ),
            "%s/sixes" % op.op.name,
        )
    return _add_histogram_summary(op, "%s/activation" % op.op.name)
 def while_loop_body(iteration, matrix, inactive, old_inactive):
   """Performs one iteration of the projection."""
   del old_inactive  # Needed by the condition, but not the body.
   iteration += 1
   scale = (1.0 - standard_ops.reduce_sum(
       matrix, axis=0, keepdims=True)) / standard_ops.maximum(
           1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True))
   matrix = matrix + (scale * inactive)
   new_inactive = standard_ops.cast(matrix > 0, matrix.dtype)
   matrix = matrix * new_inactive
   return (iteration, matrix, new_inactive, inactive)
 def while_loop_body(iteration, matrix, inactive, old_inactive):
   """Performs one iteration of the projection."""
   del old_inactive  # Needed by the condition, but not the body.
   iteration += 1
   scale = (1.0 - standard_ops.reduce_sum(
       matrix, axis=0, keepdims=True)) / standard_ops.maximum(
           1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True))
   matrix += scale * inactive
   new_inactive = standard_ops.cast(matrix > 0, matrix.dtype)
   matrix *= new_inactive
   return (iteration, matrix, new_inactive, inactive)
 def while_loop_body(iteration, multipliers, inactive, old_inactive):
     """Performs one iteration of the projection."""
     del old_inactive  # Needed by the condition, but not the body.
     iteration += 1
     scale = standard_ops.minimum(
         0.0, (radius - standard_ops.reduce_sum(multipliers)) /
         standard_ops.maximum(1.0, standard_ops.reduce_sum(inactive)))
     multipliers = multipliers + (scale * inactive)
     new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype)
     multipliers = multipliers * new_inactive
     return (iteration, multipliers, new_inactive, inactive)
 def while_loop_body(iteration, multipliers, inactive, old_inactive):
   """Performs one iteration of the projection."""
   del old_inactive  # Needed by the condition, but not the body.
   iteration += 1
   scale = standard_ops.minimum(
       0.0,
       (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum(
           1.0, standard_ops.reduce_sum(inactive)))
   multipliers += scale * inactive
   new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype)
   multipliers *= new_inactive
   return (iteration, multipliers, new_inactive, inactive)
Exemple #7
0
    def minimize(self,
                 minimization_problem,
                 unconstrained_steps=None,
                 global_step=None,
                 var_list=None,
                 gate_gradients=train_optimizer.Optimizer.GATE_OP,
                 aggregation_method=None,
                 colocate_gradients_with_ops=False,
                 name=None,
                 grad_loss=None):
        """Returns an `Operation` for minimizing the constrained problem.

    This method combines the functionality of `minimize_unconstrained` and
    `minimize_constrained`. If global_step < unconstrained_steps, it will
    perform an unconstrained update, and if global_step >= unconstrained_steps,
    it will perform a constrained update.

    The reason for this functionality is that it may be best to initialize the
    constrained optimizer with an approximate optimum of the unconstrained
    problem.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      unconstrained_steps: int, number of steps for which we should perform
        unconstrained updates, before transitioning to constrained updates.
      global_step: as in `tf.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
        method.
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Returns:
      `Operation`, the train_op.

    Raises:
      ValueError: If unconstrained_steps is provided, but global_step is not.
    """
        def unconstrained_fn():
            """Returns an `Operation` for minimizing the unconstrained problem."""
            return self.minimize_unconstrained(
                minimization_problem=minimization_problem,
                global_step=global_step,
                var_list=var_list,
                gate_gradients=gate_gradients,
                aggregation_method=aggregation_method,
                colocate_gradients_with_ops=colocate_gradients_with_ops,
                name=name,
                grad_loss=grad_loss)

        def constrained_fn():
            """Returns an `Operation` for minimizing the constrained problem."""
            return self.minimize_constrained(
                minimization_problem=minimization_problem,
                global_step=global_step,
                var_list=var_list,
                gate_gradients=gate_gradients,
                aggregation_method=aggregation_method,
                colocate_gradients_with_ops=colocate_gradients_with_ops,
                name=name,
                grad_loss=grad_loss)

        if unconstrained_steps is not None:
            if global_step is None:
                raise ValueError(
                    "global_step cannot be None if unconstrained_steps is provided"
                )
            unconstrained_steps_tensor = ops.convert_to_tensor(
                unconstrained_steps)
            dtype = unconstrained_steps_tensor.dtype
            return control_flow_ops.cond(standard_ops.cast(global_step, dtype)
                                         < unconstrained_steps_tensor,
                                         true_fn=unconstrained_fn,
                                         false_fn=constrained_fn)
        else:
            return constrained_fn()
  def _minimize_constrained(self,
                            minimization_problem,
                            global_step=None,
                            var_list=None,
                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
                            aggregation_method=None,
                            colocate_gradients_with_ops=False,
                            name=None,
                            grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize`
        method.
      colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s
        `minimize` method.
      name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints

    # Make sure that the objective, constraints and proxy constraints all have
    # the same dtype.
    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
      raise ValueError("objective, constraints and proxy_constraints must "
                       "have the same dtype")

    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat((standard_ops.zeros(
        (1,), dtype=constraints.dtype), constraints),
                                               axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
    loss = standard_ops.tensordot(
        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
        standard_ops.expand_dims(
            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, matrix_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      matrix_grads_and_vars = [
          self._constraint_grad_and_var(state, matrix_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                matrix_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)
  def _minimize_constrained(self,
                            minimization_problem,
                            global_step=None,
                            var_list=None,
                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
                            aggregation_method=None,
                            colocate_gradients_with_ops=False,
                            name=None,
                            grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
        method.
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints

    # Make sure that the objective, constraints and proxy constraints all have
    # the same dtype.
    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
      raise ValueError("objective, constraints and proxy_constraints must "
                       "have the same dtype")

    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat(
        (standard_ops.zeros((1,), dtype=constraints.dtype), constraints),
        axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
    loss = standard_ops.tensordot(
        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
        standard_ops.expand_dims(
            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, matrix_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      matrix_grads_and_vars = [
          self._constraint_grad_and_var(state, matrix_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                matrix_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)
  def minimize(self,
               minimization_problem,
               unconstrained_steps=None,
               global_step=None,
               var_list=None,
               gate_gradients=train_optimizer.Optimizer.GATE_OP,
               aggregation_method=None,
               colocate_gradients_with_ops=False,
               name=None,
               grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    This method combines the functionality of `minimize_unconstrained` and
    `minimize_constrained`. If global_step < unconstrained_steps, it will
    perform an unconstrained update, and if global_step >= unconstrained_steps,
    it will perform a constrained update.

    The reason for this functionality is that it may be best to initialize the
    constrained optimizer with an approximate optimum of the unconstrained
    problem.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      unconstrained_steps: int, number of steps for which we should perform
        unconstrained updates, before transitioning to constrained updates.
      global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize`
        method.
      colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s
        `minimize` method.
      name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.

    Returns:
      `Operation`, the train_op.

    Raises:
      ValueError: If unconstrained_steps is provided, but global_step is not.
    """

    def unconstrained_fn():
      """Returns an `Operation` for minimizing the unconstrained problem."""
      return self.minimize_unconstrained(
          minimization_problem=minimization_problem,
          global_step=global_step,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          name=name,
          grad_loss=grad_loss)

    def constrained_fn():
      """Returns an `Operation` for minimizing the constrained problem."""
      return self.minimize_constrained(
          minimization_problem=minimization_problem,
          global_step=global_step,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          name=name,
          grad_loss=grad_loss)

    if unconstrained_steps is not None:
      if global_step is None:
        raise ValueError(
            "global_step cannot be None if unconstrained_steps is provided")
      unconstrained_steps_tensor = ops.convert_to_tensor(unconstrained_steps)
      dtype = unconstrained_steps_tensor.dtype
      return control_flow_ops.cond(
          standard_ops.cast(global_step, dtype) < unconstrained_steps_tensor,
          true_fn=unconstrained_fn,
          false_fn=constrained_fn)
    else:
      return constrained_fn()