def __init__(self, constraints):
        """Constructs a new `ConstantMinimizationProblem'.

    Args:
      constraints: 1d numpy array, the constant constraint violations.

    Returns:
      A new `ConstantMinimizationProblem'.
    """
        # We make an fake 1-parameter linear objective so that we don't get a "no
        # variables to optimize" error.
        self._objective = standard_ops.Variable(0.0, dtype=dtypes.float32)
        self._constraints = standard_ops.constant(constraints,
                                                  dtype=dtypes.float32)
  def _minimize_constrained(self,
                            minimization_problem,
                            global_step=None,
                            var_list=None,
                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
                            aggregation_method=None,
                            colocate_gradients_with_ops=False,
                            name=None,
                            grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize`
        method.
      colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s
        `minimize` method.
      name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints

    # Make sure that the objective, constraints and proxy constraints all have
    # the same dtype.
    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
      raise ValueError("objective, constraints and proxy_constraints must "
                       "have the same dtype")

    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat((standard_ops.zeros(
        (1,), dtype=constraints.dtype), constraints),
                                               axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
    loss = standard_ops.tensordot(
        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
        standard_ops.expand_dims(
            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, matrix_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      matrix_grads_and_vars = [
          self._constraint_grad_and_var(state, matrix_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                matrix_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)
Exemple #3
0
    def minimize_constrained(self,
                             minimization_problem,
                             global_step=None,
                             var_list=None,
                             gate_gradients=train_optimizer.Optimizer.GATE_OP,
                             aggregation_method=None,
                             colocate_gradients_with_ops=False,
                             name=None,
                             grad_loss=None):
        """Returns an `Op` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the Lagrange multipliers will be updated using
    `constrained_optimizer` (if provided) or `optimizer` (if not).

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
        method.
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Returns:
      TensorFlow Op.
    """
        objective = minimization_problem.objective

        constraints = minimization_problem.constraints
        proxy_constraints = minimization_problem.proxy_constraints
        if proxy_constraints is None:
            proxy_constraints = constraints
        # Flatten both constraints tensors to 1d.
        num_constraints = minimization_problem.num_constraints
        constraints = standard_ops.reshape(constraints,
                                           shape=(num_constraints, ))
        proxy_constraints = standard_ops.reshape(proxy_constraints,
                                                 shape=(num_constraints, ))

        # We use a lambda to initialize the state so that, if this function call is
        # inside the scope of a tf.control_dependencies() block, the dependencies
        # will not be applied to the initializer.
        state = standard_ops.Variable(
            lambda: self._initial_state(num_constraints),
            trainable=False,
            name="external_regret_optimizer_state")

        multipliers = self._lagrange_multipliers(state)
        loss = (objective +
                standard_ops.tensordot(multipliers, proxy_constraints, 1))
        multipliers_gradient = constraints

        update_ops = []
        if self.constraint_optimizer is None:
            # If we don't have a separate constraint_optimizer, then we use
            # self._optimizer for both the update of the model parameters, and that of
            # the internal state.
            grads_and_vars = self.optimizer.compute_gradients(
                loss,
                var_list=var_list,
                gate_gradients=gate_gradients,
                aggregation_method=aggregation_method,
                colocate_gradients_with_ops=colocate_gradients_with_ops,
                grad_loss=grad_loss)
            grads_and_vars.append(
                self._constraint_grad_and_var(state, multipliers_gradient))
            update_ops.append(
                self.optimizer.apply_gradients(grads_and_vars, name="update"))
        else:
            # If we have a separate constraint_optimizer, then we use self._optimizer
            # for the update of the model parameters, and self._constraint_optimizer
            # for that of the internal state.
            grads_and_vars = self.optimizer.compute_gradients(
                loss,
                var_list=var_list,
                gate_gradients=gate_gradients,
                aggregation_method=aggregation_method,
                colocate_gradients_with_ops=colocate_gradients_with_ops,
                grad_loss=grad_loss)
            multiplier_grads_and_vars = [
                self._constraint_grad_and_var(state, multipliers_gradient)
            ]

            gradients = [
                gradient
                for gradient, _ in grads_and_vars + multiplier_grads_and_vars
                if gradient is not None
            ]
            with ops.control_dependencies(gradients):
                update_ops.append(
                    self.optimizer.apply_gradients(grads_and_vars,
                                                   name="update"))
                update_ops.append(
                    self.constraint_optimizer.apply_gradients(
                        multiplier_grads_and_vars,
                        name="optimizer_state_update"))

        with ops.control_dependencies(update_ops):
            if global_step is None:
                # If we don't have a global step, just project, and we're done.
                return self._projection_op(state, name=name)
            else:
                # If we have a global step, then we need to increment it in addition to
                # projecting.
                projection_op = self._projection_op(state, name="project")
                with ops.colocate_with(global_step):
                    global_step_op = state_ops.assign_add(
                        global_step, 1, name="global_step_increment")
                return control_flow_ops.group(projection_op,
                                              global_step_op,
                                              name=name)
Exemple #4
0
    def _cpu_with_grad_accum(test_wrapper, stages, inputs_fn, input_values,
                             repeat_count, num_batches_to_accumulate,
                             dataset_fn, optimizer):

        g = ops.Graph()
        with g.as_default(), test_wrapper.test_session(graph=g) as session:
            dataset = dataset_fn()
            inputs = inputs_fn()
            with variable_scope.variable_scope("cpu",
                                               use_resource=True,
                                               reuse=False):

                def pipeline(*args):
                    # TF2 replacement for: iterator = dataset.make_one_shot_iterator()
                    iterator = compat_v1_data.make_one_shot_iterator(dataset)
                    next_example, next_label = iterator.get_next()
                    outputs = functional_ops._convert_to_list(args)  # pylint: disable=W0212
                    outputs.append(next_example)
                    outputs.append(next_label)
                    for stage in stages:
                        outputs = stage(
                            *functional_ops._convert_to_list(outputs))  # pylint: disable=W0212
                    return outputs

                loss = pipeline(*inputs)

                if optimizer:
                    trainable_variables = variables.trainable_variables()
                    accum_vars = [
                        standard_ops.Variable(
                            array_ops.zeros_like(var.initialized_value()),
                            trainable=False) for var in trainable_variables
                    ]
                    zero_ops = [
                        var.assign(array_ops.zeros_like(var))
                        for var in accum_vars
                    ]
                    grads = optimizer.compute_gradients(
                        loss, trainable_variables)
                    accum_ops = [
                        accum_vars[i].assign_add(gv[0])
                        for i, gv in enumerate(grads)
                    ]
                    train_step = optimizer.apply_gradients([
                        (accum_vars[i], gv[1]) for i, gv in enumerate(grads)
                    ])
                else:
                    train_step = None
                    accum_ops = []
                    zero_ops = []

            session.run(variables.global_variables_initializer())
            losses = []
            with ops.device("cpu"):
                for _ in range(repeat_count):
                    session.run(zero_ops)
                    for _ in range(num_batches_to_accumulate):
                        l, _ = session.run([loss, accum_ops],
                                           feed_dict=dict(
                                               zip(inputs, input_values)))
                        losses.append(l)
                    # Run the train_step ops to update the weights based on accumulated
                    # gradients
                    if train_step:
                        session.run(train_step)
            return losses