def summarize_activation(op): """Summarize an activation. This applies the given activation and adds useful summaries specific to the activation. Args: op: The tensor to summarize (assumed to be a layer activation). Returns: The summary op created to summarize `op`. """ if op.op.type in ('Relu', 'Softplus', 'Relu6'): # Using inputs to avoid floating point equality and/or epsilons. _add_scalar_summary( standard_ops.reduce_mean( standard_ops.to_float( standard_ops.less( op.op.inputs[0], standard_ops.cast(0.0, op.op.inputs[0].dtype)))), '%s/zeros' % op.op.name) if op.op.type == 'Relu6': _add_scalar_summary( standard_ops.reduce_mean( standard_ops.to_float( standard_ops.greater( op.op.inputs[0], standard_ops.cast(6.0, op.op.inputs[0].dtype)))), '%s/sixes' % op.op.name) return _add_histogram_summary(op, '%s/activation' % op.op.name)
def summarize_activation(op): """Summarize an activation. This applies the given activation and adds useful summaries specific to the activation. Args: op: The tensor to summarize (assumed to be a layer activation). Returns: The summary op created to summarize `op`. """ if op.op.type in ("Relu", "Softplus", "Relu6"): # Using inputs to avoid floating point equality and/or epsilons. _add_scalar_summary( standard_ops.reduce_mean( standard_ops.to_float(standard_ops.less(op.op.inputs[0], standard_ops.cast(0.0, op.op.inputs[0].dtype))) ), "%s/zeros" % op.op.name, ) if op.op.type == "Relu6": _add_scalar_summary( standard_ops.reduce_mean( standard_ops.to_float( standard_ops.greater(op.op.inputs[0], standard_ops.cast(6.0, op.op.inputs[0].dtype)) ) ), "%s/sixes" % op.op.name, ) return _add_histogram_summary(op, "%s/activation" % op.op.name)
def while_loop_body(iteration, matrix, inactive, old_inactive): """Performs one iteration of the projection.""" del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = (1.0 - standard_ops.reduce_sum( matrix, axis=0, keepdims=True)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True)) matrix = matrix + (scale * inactive) new_inactive = standard_ops.cast(matrix > 0, matrix.dtype) matrix = matrix * new_inactive return (iteration, matrix, new_inactive, inactive)
def while_loop_body(iteration, matrix, inactive, old_inactive): """Performs one iteration of the projection.""" del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = (1.0 - standard_ops.reduce_sum( matrix, axis=0, keepdims=True)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True)) matrix += scale * inactive new_inactive = standard_ops.cast(matrix > 0, matrix.dtype) matrix *= new_inactive return (iteration, matrix, new_inactive, inactive)
def while_loop_body(iteration, multipliers, inactive, old_inactive): """Performs one iteration of the projection.""" del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = standard_ops.minimum( 0.0, (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum(1.0, standard_ops.reduce_sum(inactive))) multipliers = multipliers + (scale * inactive) new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype) multipliers = multipliers * new_inactive return (iteration, multipliers, new_inactive, inactive)
def while_loop_body(iteration, multipliers, inactive, old_inactive): """Performs one iteration of the projection.""" del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = standard_ops.minimum( 0.0, (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive))) multipliers += scale * inactive new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype) multipliers *= new_inactive return (iteration, multipliers, new_inactive, inactive)
def minimize(self, minimization_problem, unconstrained_steps=None, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Operation` for minimizing the constrained problem. This method combines the functionality of `minimize_unconstrained` and `minimize_constrained`. If global_step < unconstrained_steps, it will perform an unconstrained update, and if global_step >= unconstrained_steps, it will perform a constrained update. The reason for this functionality is that it may be best to initialize the constrained optimizer with an approximate optimum of the unconstrained problem. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. unconstrained_steps: int, number of steps for which we should perform unconstrained updates, before transitioning to constrained updates. global_step: as in `tf.train.Optimizer`'s `minimize` method. var_list: as in `tf.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` method. name: as in `tf.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Returns: `Operation`, the train_op. Raises: ValueError: If unconstrained_steps is provided, but global_step is not. """ def unconstrained_fn(): """Returns an `Operation` for minimizing the unconstrained problem.""" return self.minimize_unconstrained( minimization_problem=minimization_problem, global_step=global_step, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, name=name, grad_loss=grad_loss) def constrained_fn(): """Returns an `Operation` for minimizing the constrained problem.""" return self.minimize_constrained( minimization_problem=minimization_problem, global_step=global_step, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, name=name, grad_loss=grad_loss) if unconstrained_steps is not None: if global_step is None: raise ValueError( "global_step cannot be None if unconstrained_steps is provided" ) unconstrained_steps_tensor = ops.convert_to_tensor( unconstrained_steps) dtype = unconstrained_steps_tensor.dtype return control_flow_ops.cond(standard_ops.cast(global_step, dtype) < unconstrained_steps_tensor, true_fn=unconstrained_fn, false_fn=constrained_fn) else: return constrained_fn()
def _minimize_constrained(self, minimization_problem, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Operation` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the constraint/objective weight matrix (the analogue of Lagrange multipliers) will be updated using `constrained_optimizer` (if provided) or `optimizer` (if not). Whether the matrix updates are additive or multiplicative depends on the derived class. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. Raises: ValueError: If the minimization_problem tensors have different dtypes. Returns: `Operation`, the train_op. """ objective = minimization_problem.objective constraints = minimization_problem.constraints proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints # Make sure that the objective, constraints and proxy constraints all have # the same dtype. if (objective.dtype.base_dtype != constraints.dtype.base_dtype or objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype): raise ValueError("objective, constraints and proxy_constraints must " "have the same dtype") # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) proxy_constraints = standard_ops.reshape( proxy_constraints, shape=(num_constraints,)) # We use a lambda to initialize the state so that, if this function call is # inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. state = standard_ops.Variable( lambda: self._initial_state(num_constraints), trainable=False, name="swap_regret_optimizer_state") zero_and_constraints = standard_ops.concat((standard_ops.zeros( (1,), dtype=constraints.dtype), constraints), axis=0) objective_and_proxy_constraints = standard_ops.concat( (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0) distribution = self._distribution(state) loss = standard_ops.tensordot( standard_ops.cast(distribution, objective_and_proxy_constraints.dtype), objective_and_proxy_constraints, 1) matrix_gradient = standard_ops.matmul( standard_ops.expand_dims( standard_ops.cast(zero_and_constraints, distribution.dtype), 1), standard_ops.expand_dims(distribution, 0)) update_ops = [] if self.constraint_optimizer is None: # If we don't have a separate constraint_optimizer, then we use # self._optimizer for both the update of the model parameters, and that of # the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) grads_and_vars.append( self._constraint_grad_and_var(state, matrix_gradient)) update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) else: # If we have a separate constraint_optimizer, then we use self._optimizer # for the update of the model parameters, and self._constraint_optimizer # for that of the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) matrix_grads_and_vars = [ self._constraint_grad_and_var(state, matrix_gradient) ] gradients = [ gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars if gradient is not None ] with ops.control_dependencies(gradients): update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) update_ops.append( self.constraint_optimizer.apply_gradients( matrix_grads_and_vars, name="optimizer_state_update")) with ops.control_dependencies(update_ops): if global_step is None: # If we don't have a global step, just project, and we're done. return self._projection_op(state, name=name) else: # If we have a global step, then we need to increment it in addition to # projecting. projection_op = self._projection_op(state, name="project") with ops.colocate_with(global_step): global_step_op = state_ops.assign_add( global_step, 1, name="global_step_increment") return control_flow_ops.group(projection_op, global_step_op, name=name)
def _minimize_constrained(self, minimization_problem, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Operation` for minimizing the constrained problem. The `optimizer` constructor parameter will be used to update the model parameters, while the constraint/objective weight matrix (the analogue of Lagrange multipliers) will be updated using `constrained_optimizer` (if provided) or `optimizer` (if not). Whether the matrix updates are additive or multiplicative depends on the derived class. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. global_step: as in `tf.train.Optimizer`'s `minimize` method. var_list: as in `tf.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize` method. name: as in `tf.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.train.Optimizer`'s `minimize` method. Raises: ValueError: If the minimization_problem tensors have different dtypes. Returns: `Operation`, the train_op. """ objective = minimization_problem.objective constraints = minimization_problem.constraints proxy_constraints = minimization_problem.proxy_constraints if proxy_constraints is None: proxy_constraints = constraints # Make sure that the objective, constraints and proxy constraints all have # the same dtype. if (objective.dtype.base_dtype != constraints.dtype.base_dtype or objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype): raise ValueError("objective, constraints and proxy_constraints must " "have the same dtype") # Flatten both constraints tensors to 1d. num_constraints = minimization_problem.num_constraints constraints = standard_ops.reshape(constraints, shape=(num_constraints,)) proxy_constraints = standard_ops.reshape( proxy_constraints, shape=(num_constraints,)) # We use a lambda to initialize the state so that, if this function call is # inside the scope of a tf.control_dependencies() block, the dependencies # will not be applied to the initializer. state = standard_ops.Variable( lambda: self._initial_state(num_constraints), trainable=False, name="swap_regret_optimizer_state") zero_and_constraints = standard_ops.concat( (standard_ops.zeros((1,), dtype=constraints.dtype), constraints), axis=0) objective_and_proxy_constraints = standard_ops.concat( (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0) distribution = self._distribution(state) loss = standard_ops.tensordot( standard_ops.cast(distribution, objective_and_proxy_constraints.dtype), objective_and_proxy_constraints, 1) matrix_gradient = standard_ops.matmul( standard_ops.expand_dims( standard_ops.cast(zero_and_constraints, distribution.dtype), 1), standard_ops.expand_dims(distribution, 0)) update_ops = [] if self.constraint_optimizer is None: # If we don't have a separate constraint_optimizer, then we use # self._optimizer for both the update of the model parameters, and that of # the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) grads_and_vars.append( self._constraint_grad_and_var(state, matrix_gradient)) update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) else: # If we have a separate constraint_optimizer, then we use self._optimizer # for the update of the model parameters, and self._constraint_optimizer # for that of the internal state. grads_and_vars = self.optimizer.compute_gradients( loss, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, grad_loss=grad_loss) matrix_grads_and_vars = [ self._constraint_grad_and_var(state, matrix_gradient) ] gradients = [ gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars if gradient is not None ] with ops.control_dependencies(gradients): update_ops.append( self.optimizer.apply_gradients(grads_and_vars, name="update")) update_ops.append( self.constraint_optimizer.apply_gradients( matrix_grads_and_vars, name="optimizer_state_update")) with ops.control_dependencies(update_ops): if global_step is None: # If we don't have a global step, just project, and we're done. return self._projection_op(state, name=name) else: # If we have a global step, then we need to increment it in addition to # projecting. projection_op = self._projection_op(state, name="project") with ops.colocate_with(global_step): global_step_op = state_ops.assign_add( global_step, 1, name="global_step_increment") return control_flow_ops.group(projection_op, global_step_op, name=name)
def minimize(self, minimization_problem, unconstrained_steps=None, global_step=None, var_list=None, gate_gradients=train_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False, name=None, grad_loss=None): """Returns an `Operation` for minimizing the constrained problem. This method combines the functionality of `minimize_unconstrained` and `minimize_constrained`. If global_step < unconstrained_steps, it will perform an unconstrained update, and if global_step >= unconstrained_steps, it will perform a constrained update. The reason for this functionality is that it may be best to initialize the constrained optimizer with an approximate optimum of the unconstrained problem. Args: minimization_problem: ConstrainedMinimizationProblem, the problem to optimize. unconstrained_steps: int, number of steps for which we should perform unconstrained updates, before transitioning to constrained updates. global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method. Returns: `Operation`, the train_op. Raises: ValueError: If unconstrained_steps is provided, but global_step is not. """ def unconstrained_fn(): """Returns an `Operation` for minimizing the unconstrained problem.""" return self.minimize_unconstrained( minimization_problem=minimization_problem, global_step=global_step, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, name=name, grad_loss=grad_loss) def constrained_fn(): """Returns an `Operation` for minimizing the constrained problem.""" return self.minimize_constrained( minimization_problem=minimization_problem, global_step=global_step, var_list=var_list, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops, name=name, grad_loss=grad_loss) if unconstrained_steps is not None: if global_step is None: raise ValueError( "global_step cannot be None if unconstrained_steps is provided") unconstrained_steps_tensor = ops.convert_to_tensor(unconstrained_steps) dtype = unconstrained_steps_tensor.dtype return control_flow_ops.cond( standard_ops.cast(global_step, dtype) < unconstrained_steps_tensor, true_fn=unconstrained_fn, false_fn=constrained_fn) else: return constrained_fn()