def _invalid_model_fn(features, labels): # pylint: disable=unused-argument w = variables_lib.Variable(42.0, 'weight') update_global_step = variables.get_global_step().assign_add(1) with control_flow_ops.control_dependencies([update_global_step]): loss = 100.0 - w return None, loss, None
def mean_baseline(_, loss): with ops.name_scope(name): ema = training.ExponentialMovingAverage(decay=ema_decay) update_op = ema.apply(math_ops.reduce_mean(loss)) with control_flow_ops.control_dependencies([update_op]): # TODO(rsepassi): Possibly implement the initialization bias correction # term from Adam (section 3 of https://arxiv.org/pdf/1412.6980v8.pdf). baseline = ema.average(loss) return baseline
def mean_baseline(_, loss): with ops.name_scope(name): ema = training.ExponentialMovingAverage(decay=ema_decay) update_op = ema.apply(math_ops.reduce_mean(loss)) with control_flow_ops.control_dependencies([update_op]): # TODO(rsepassi): Possibly implement the initialization bias correction # term from Adam (section 3 of https://arxiv.org/pdf/1412.6980v8.pdf). baseline = ema.average(loss) return baseline
def _invalid_model_fn(features, labels, mode): # pylint: disable=unused-argument w = variables_lib.Variable(42.0, 'weight') loss = 100.0 - w update_global_step = variables.get_global_step().assign_add(1) with control_flow_ops.control_dependencies([update_global_step]): train_op = w.assign_add(loss / 100.0) predictions = loss if mode == model_fn.ModeKeys.EVAL: loss = None return predictions, loss, train_op
def create_train_op( total_loss, optimizer, global_step=None, update_ops=None, variables_to_train=None, clip_gradient_norm=0, summarize_gradients=False, gate_gradients=tf_optimizer.Optimizer.GATE_OP, aggregation_method=None, colocate_gradients_with_ops=False): """Creates an `Operation` that evaluates the gradients and returns the loss. Args: total_loss: A `Tensor` representing the total loss. optimizer: A tf.Optimizer to use for computing the gradients. global_step: A `Tensor` representing the global step variable. If left as `None`, then slim.variables.global_step() is used. update_ops: an optional list of updates to execute. Note that the update_ops that are used are the union of those update_ops passed to the function and the value of slim.ops.GetUpdateOps(). Therefore, if `update_ops` is None, then the value of slim.ops.GetUpdateOps() is still used. variables_to_train: an optional list of variables to train. If None, it will default to all tf.trainable_variables(). clip_gradient_norm: If greater than 0 then the gradients would be clipped by it. summarize_gradients: Whether or not add summaries for each gradient. gate_gradients: How to gate the computation of gradients. See tf.Optimizer. aggregation_method: Specifies the method used to combine gradient terms. Valid values are defined in the class `AggregationMethod`. colocate_gradients_with_ops: Whether or not to try colocating the gradients with the ops that generated them. Returns: A `Tensor` that when evaluated, computes the gradients and returns the total loss value. """ if global_step is None: global_step = variables.get_or_create_global_step() update_ops = set(update_ops or []) # Make sure update_ops are computed before total_loss. if update_ops: with control_flow_ops.control_dependencies(update_ops): barrier = control_flow_ops.no_op(name='update_barrier') total_loss = control_flow_ops.with_dependencies([barrier], total_loss) if variables_to_train is None: # Default to tf.trainable_variables() variables_to_train = tf_variables.trainable_variables() else: # Make sure that variables_to_train are in tf.trainable_variables() for v in variables_to_train: assert v in tf_variables.trainable_variables() assert variables_to_train # Create the gradients. Note that apply_gradients adds the gradient # computation to the current graph. grads = optimizer.compute_gradients( total_loss, variables_to_train, gate_gradients=gate_gradients, aggregation_method=aggregation_method, colocate_gradients_with_ops=colocate_gradients_with_ops) # Clip gradients. if clip_gradient_norm > 0: grads = clip_gradient_norms(grads, clip_gradient_norm) # Summarize gradients. if summarize_gradients: for grad, var in grads: if grad is not None: if isinstance(grad, ops.IndexedSlices): grad_values = grad.values else: grad_values = grad logging_ops.histogram_summary(var.op.name + ':gradient', grad_values) logging_ops.histogram_summary(var.op.name + ':gradient_norm', clip_ops.global_norm([grad_values])) else: logging.info('Var %s has no gradient', var.op.name) # Create gradient updates. grad_updates = optimizer.apply_gradients(grads, global_step=global_step) # Make sure total_loss is valid. total_loss = array_ops.check_numerics(total_loss, 'LossTensor is inf or nan') # Ensure the train_tensor computes grad_updates. return control_flow_ops.with_dependencies([grad_updates], total_loss)