def _minimize_constrained(self, minimization_problem, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Operation` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the constraint/objective weight matrix (the analogue of Lagrange multipliers) will be updated using `constrained_optimizer` (if provided) or `optimizer` (if not). Whether the matrix updates are additive or multiplicative depends on the derived class. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. Raises: ValueError: If the minimization_problem tensors have different dtypes. Returns: `Operation`, the train_op. """ objective = minimization_problem.objective constraints = minimization_problem.constraints proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints # Make sure that the objective, constraints and proxy constraints all have # the same dtype. if (objective.dtype.base_dtype != constraints.dtype.base_dtype or objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype): raise ValueError("objective, constraints and proxy_constraints must " "have the same dtype") # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) proxy_constraints = standard_ops.reshape( proxy_constraints, shape=(num_constraints,)) # We use a lambda to initialize the state so that, if this function call is # inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. state = standard_ops.Variable( lambda: self._initial_state(num_constraints), trainable=False, name="swap_regret_optimizer_state") zero_and_constraints = standard_ops.concat((standard_ops.zeros( (1,), dtype=constraints.dtype), constraints), axis=0) objective_and_proxy_constraints = standard_ops.concat( (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0) distribution = self._distribution(state) loss = standard_ops.tensordot( standard_ops.cast(distribution, objective_and_proxy_constraints.dtype), objective_and_proxy_constraints, 1) matrix_gradient = standard_ops.matmul( standard_ops.expand_dims( standard_ops.cast(zero_and_constraints, distribution.dtype), 1), standard_ops.expand_dims(distribution, 0)) update_ops = [] if self.constraint_optimizer is None: # If we don't have a separate constraint_optimizer, then we use # self._optimizer for both the update of the model parameters, and that of # the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) grads_and_vars.append( self._constraint_grad_and_var(state, matrix_gradient)) update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) else: # If we have a separate constraint_optimizer, then we use self._optimizer # for the update of the model parameters, and self._constraint_optimizer # for that of the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) matrix_grads_and_vars = [ self._constraint_grad_and_var(state, matrix_gradient) ] gradients = [ gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars if gradient is not None ] with ops.control_dependencies(gradients): update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) update_ops.append( self.constraint_optimizer.apply_gradients( matrix_grads_and_vars, name="optimizer_state_update")) with ops.control_dependencies(update_ops): if global_step is None: # If we don't have a global step, just project, and we're done. return self._projection_op(state, name=name) else: # If we have a global step, then we need to increment it in addition to # projecting. projection_op = self._projection_op(state, name="project") with ops.colocate_with(global_step): global_step_op = state_ops.assign_add( global_step, 1, name="global_step_increment") return control_flow_ops.group(projection_op, global_step_op, name=name)
def minimize_constrained(self, minimization_problem, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Op` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the Lagrange multipliers will be updated using `constrained_optimizer` (if provided) or `optimizer` (if not). Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. global_step: as in `tf.train.Optimizer`'s `minimize` method. var_list: as in `tf.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` method. name: as in `tf.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Returns: TensorFlow Op. """ objective = minimization_problem.objective constraints = minimization_problem.constraints proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints, )) proxy_constraints = standard_ops.reshape(proxy_constraints, shape=(num_constraints, )) # We use a lambda to initialize the state so that, if this function call is # inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. state = standard_ops.Variable( lambda: self._initial_state(num_constraints), trainable=False, name="external_regret_optimizer_state") multipliers = self._lagrange_multipliers(state) loss = (objective + standard_ops.tensordot(multipliers, proxy_constraints, 1)) multipliers_gradient = constraints update_ops = [] if self.constraint_optimizer is None: # If we don't have a separate constraint_optimizer, then we use # self._optimizer for both the update of the model parameters, and that of # the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) grads_and_vars.append( self._constraint_grad_and_var(state, multipliers_gradient)) update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) else: # If we have a separate constraint_optimizer, then we use self._optimizer # for the update of the model parameters, and self._constraint_optimizer # for that of the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) multiplier_grads_and_vars = [ self._constraint_grad_and_var(state, multipliers_gradient) ] gradients = [ gradient for gradient, _ in grads_and_vars + multiplier_grads_and_vars if gradient is not None ] with ops.control_dependencies(gradients): update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) update_ops.append( self.constraint_optimizer.apply_gradients( multiplier_grads_and_vars, name="optimizer_state_update")) with ops.control_dependencies(update_ops): if global_step is None: # If we don't have a global step, just project, and we're done. return self._projection_op(state, name=name) else: # If we have a global step, then we need to increment it in addition to # projecting. projection_op = self._projection_op(state, name="project") with ops.colocate_with(global_step): global_step_op = state_ops.assign_add( global_step, 1, name="global_step_increment") return control_flow_ops.group(projection_op, global_step_op, name=name)
def _minimize_constrained(self, minimization_problem, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Operation` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the constraint/objective weight matrix (the analogue of Lagrange multipliers) will be updated using `constrained_optimizer` (if provided) or `optimizer` (if not). Whether the matrix updates are additive or multiplicative depends on the derived class. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. global_step: as in `tf.train.Optimizer`'s `minimize` method. var_list: as in `tf.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` method. name: as in `tf.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Raises: ValueError: If the minimization_problem tensors have different dtypes. Returns: `Operation`, the train_op. """ objective = minimization_problem.objective constraints = minimization_problem.constraints proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints # Make sure that the objective, constraints and proxy constraints all have # the same dtype. if (objective.dtype.base_dtype != constraints.dtype.base_dtype or objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype): raise ValueError("objective, constraints and proxy_constraints must " "have the same dtype") # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) proxy_constraints = standard_ops.reshape( proxy_constraints, shape=(num_constraints,)) # We use a lambda to initialize the state so that, if this function call is # inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. state = standard_ops.Variable( lambda: self._initial_state(num_constraints), trainable=False, name="swap_regret_optimizer_state") zero_and_constraints = standard_ops.concat( (standard_ops.zeros((1,), dtype=constraints.dtype), constraints), axis=0) objective_and_proxy_constraints = standard_ops.concat( (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0) distribution = self._distribution(state) loss = standard_ops.tensordot( standard_ops.cast(distribution, objective_and_proxy_constraints.dtype), objective_and_proxy_constraints, 1) matrix_gradient = standard_ops.matmul( standard_ops.expand_dims( standard_ops.cast(zero_and_constraints, distribution.dtype), 1), standard_ops.expand_dims(distribution, 0)) update_ops = [] if self.constraint_optimizer is None: # If we don't have a separate constraint_optimizer, then we use # self._optimizer for both the update of the model parameters, and that of # the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) grads_and_vars.append( self._constraint_grad_and_var(state, matrix_gradient)) update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) else: # If we have a separate constraint_optimizer, then we use self._optimizer # for the update of the model parameters, and self._constraint_optimizer # for that of the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) matrix_grads_and_vars = [ self._constraint_grad_and_var(state, matrix_gradient) ] gradients = [ gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars if gradient is not None ] with ops.control_dependencies(gradients): update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) update_ops.append( self.constraint_optimizer.apply_gradients( matrix_grads_and_vars, name="optimizer_state_update")) with ops.control_dependencies(update_ops): if global_step is None: # If we don't have a global step, just project, and we're done. return self._projection_op(state, name=name) else: # If we have a global step, then we need to increment it in addition to # projecting. projection_op = self._projection_op(state, name="project") with ops.colocate_with(global_step): global_step_op = state_ops.assign_add( global_step, 1, name="global_step_increment") return control_flow_ops.group(projection_op, global_step_op, name=name)
def minimize_constrained(self, minimization_problem, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Op` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the Lagrange multipliers will be updated using `constrained_optimizer` (if provided) or `optimizer` (if not). Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. global_step: as in `tf.train.Optimizer`'s `minimize` method. var_list: as in `tf.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` method. name: as in `tf.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Returns: TensorFlow Op. """ objective = minimization_problem.objective constraints = minimization_problem.constraints proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) proxy_constraints = standard_ops.reshape( proxy_constraints, shape=(num_constraints,)) # We use a lambda to initialize the state so that, if this function call is # inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. state = standard_ops.Variable( lambda: self._initial_state(num_constraints), trainable=False, name="external_regret_optimizer_state") multipliers = self._lagrange_multipliers(state) loss = ( objective + standard_ops.tensordot(multipliers, proxy_constraints, 1)) multipliers_gradient = constraints update_ops = [] if self.constraint_optimizer is None: # If we don't have a separate constraint_optimizer, then we use # self._optimizer for both the update of the model parameters, and that of # the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) grads_and_vars.append( self._constraint_grad_and_var(state, multipliers_gradient)) update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) else: # If we have a separate constraint_optimizer, then we use self._optimizer # for the update of the model parameters, and self._constraint_optimizer # for that of the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) multiplier_grads_and_vars = [ self._constraint_grad_and_var(state, multipliers_gradient) ] gradients = [ gradient for gradient, _ in grads_and_vars + multiplier_grads_and_vars if gradient is not None ] with ops.control_dependencies(gradients): update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) update_ops.append( self.constraint_optimizer.apply_gradients( multiplier_grads_and_vars, name="optimizer_state_update")) with ops.control_dependencies(update_ops): if global_step is None: # If we don't have a global step, just project, and we're done. return self._projection_op(state, name=name) else: # If we have a global step, then we need to increment it in addition to # projecting. projection_op = self._projection_op(state, name="project") with ops.colocate_with(global_step): global_step_op = state_ops.assign_add( global_step, 1, name="global_step_increment") return control_flow_ops.group(projection_op, global_step_op, name=name)