Example #1
0
def hinge_loss(logits, labels=None, scope=None):
  """Method that returns the loss tensor for hinge loss.

  Args:
    logits: The logits, a float tensor. Note that logits are assumed to be
      unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive
      (resp. negative) binary prediction.
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0. Internally
      the {0,1} labels are converted to {-1,1} when calculating the hinge loss.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    An unweighted `Tensor` of same shape as `logits` and `labels` representing
    the
      loss values across the batch.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    labels = math_ops.to_float(labels)
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    return nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
  def test_multiple_outputs(self):
    #   -         +
    #  / \y0   y1/ \
    # x    split    z
    #       |
    #       y         (nodes are ops; edges are going up)
    g = ops.Graph()
    with g.as_default():
      x = array_ops.placeholder(dtypes.float32, shape=[1], name='x')
      y = array_ops.placeholder(dtypes.float32, shape=[2], name='y')
      y0, y1 = array_ops.split(y, num_or_size_splits=2, axis=0)
      z = array_ops.placeholder(dtypes.float32, shape=[1], name='z')
      math_ops.add(x, y0)
      math_ops.subtract(y1, z)

    y1_pattern = graph_matcher.OpTypePattern('*')
    minus_pattern = graph_matcher.OpTypePattern('Sub', inputs=[y1_pattern, '*'])
    matcher = graph_matcher.GraphMatcher(minus_pattern)

    match_results = list(matcher.match_graph(g))
    self.assertEqual(1, len(match_results))
    match_result = match_results[0]

    self.assertEqual(y0.op, y1.op)
    self.assertEqual(match_result.get_op(y1_pattern), y1.op)
    self.assertEqual(match_result.get_tensor(y1_pattern), y1)
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
    logits = math_ops.to_float(logits)
    labels = math_ops.to_float(labels)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    losses = nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Example #4
0
def huber_loss(y_true, y_pred, delta=1.0):
  """Computes Huber loss value.

  For each value x in `error=y_true-y_pred`, the following is calculated:

  ```
  0.5 * x^2                  if |x| <= d
  0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```
  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

  Args:
    y_true: tensor of true targets.
    y_pred: tensor of predicted targets.
    delta: A float, the point where the Huber loss function changes from a
      quadratic to linear.

  Returns:
    Tensor with one scalar loss entry per sample.
  """
  y_pred = math_ops.cast(y_pred, dtype=K.floatx())
  y_true = math_ops.cast(y_true, dtype=K.floatx())
  error = math_ops.subtract(y_pred, y_true)
  abs_error = math_ops.abs(error)
  quadratic = math_ops.minimum(abs_error, delta)
  linear = math_ops.subtract(abs_error, quadratic)
  return math_ops.add(
      math_ops.multiply(
          ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
          math_ops.multiply(quadratic, quadratic)),
      math_ops.multiply(delta, linear))
Example #5
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES):
  """Adds a hinge loss to the training procedure.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    logits: The logits, a float tensor.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` of the loss value.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", (logits, labels)) as scope:
    logits = math_ops.to_float(logits)
    labels = math_ops.to_float(labels)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    losses = nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
  def testStripUnusedMultipleInputs(self):
    input_graph_name = "input_graph.pb"
    output_graph_name = "output_graph.pb"

    # We'll create an input graph that multiplies two input nodes.
    with ops.Graph().as_default():
      constant_node1 = constant_op.constant(1.0, name="constant_node1")
      constant_node2 = constant_op.constant(2.0, name="constant_node2")
      input_node1 = math_ops.subtract(constant_node1, 3.0, name="input_node1")
      input_node2 = math_ops.subtract(constant_node2, 5.0, name="input_node2")
      output_node = math_ops.multiply(
          input_node1, input_node2, name="output_node")
      math_ops.add(output_node, 2.0, name="later_node")
      sess = session.Session()
      output = sess.run(output_node)
      self.assertNear(6.0, output, 0.00001)
      graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name)

    # We save out the graph to disk, and then call the const conversion
    # routine.
    input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name)
    input_binary = False
    input_node_names = "input_node1,input_node2"
    input_node_types = [
        dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum
    ]
    output_binary = True
    output_node_names = "output_node"
    output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name)

    strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary,
                                             output_graph_path, output_binary,
                                             input_node_names,
                                             output_node_names,
                                             input_node_types)

    # Now we make sure the variable is now a constant, and that the graph still
    # produces the expected result.
    with ops.Graph().as_default():
      output_graph_def = graph_pb2.GraphDef()
      with open(output_graph_path, "rb") as f:
        output_graph_def.ParseFromString(f.read())
        _ = importer.import_graph_def(output_graph_def, name="")

      self.assertEqual(3, len(output_graph_def.node))
      for node in output_graph_def.node:
        self.assertNotEqual("Add", node.op)
        self.assertNotEqual("Sub", node.op)
        if node.name == input_node_names:
          self.assertTrue("shape" in node.attr)

      with session.Session() as sess:
        input_node1 = sess.graph.get_tensor_by_name("input_node1:0")
        input_node2 = sess.graph.get_tensor_by_name("input_node2:0")
        output_node = sess.graph.get_tensor_by_name("output_node:0")
        output = sess.run(output_node,
                          feed_dict={input_node1: [10.0],
                                     input_node2: [-5.0]})
        self.assertNear(-50.0, output, 0.00001)
Example #7
0
  def unregularized_loss(self, examples):
    """Add operations to compute the loss (without the regularization loss).

    Args:
      examples: Examples to compute unregularized loss on.

    Returns:
      An Operation that computes mean (unregularized) loss for given set of
      examples.

    Raises:
      ValueError: if examples are not well defined.
    """
    self._assertSpecified([
        'example_labels', 'example_weights', 'sparse_features', 'dense_features'
    ], examples)
    self._assertList(['sparse_features', 'dense_features'], examples)
    with name_scope('sdca/unregularized_loss'):
      predictions = math_ops.cast(
          self._linear_predictions(examples), dtypes.float64)
      labels = math_ops.cast(
          internal_convert_to_tensor(examples['example_labels']),
          dtypes.float64)
      weights = math_ops.cast(
          internal_convert_to_tensor(examples['example_weights']),
          dtypes.float64)

      if self._options['loss_type'] == 'logistic_loss':
        return math_ops.reduce_sum(math_ops.multiply(
            sigmoid_cross_entropy_with_logits(labels=labels,
                                              logits=predictions),
            weights)) / math_ops.reduce_sum(weights)

      if self._options['loss_type'] == 'poisson_loss':
        return math_ops.reduce_sum(math_ops.multiply(
            log_poisson_loss(targets=labels, log_input=predictions),
            weights)) / math_ops.reduce_sum(weights)

      if self._options['loss_type'] in ['hinge_loss', 'smooth_hinge_loss']:
        # hinge_loss = max{0, 1 - y_i w*x} where y_i \in {-1, 1}. So, we need to
        # first convert 0/1 labels into -1/1 labels.
        all_ones = array_ops.ones_like(predictions)
        adjusted_labels = math_ops.subtract(2 * labels, all_ones)
        # Tensor that contains (unweighted) error (hinge loss) per
        # example.
        error = nn_ops.relu(
            math_ops.subtract(all_ones,
                              math_ops.multiply(adjusted_labels, predictions)))
        weighted_error = math_ops.multiply(error, weights)
        return math_ops.reduce_sum(weighted_error) / math_ops.reduce_sum(
            weights)

      # squared loss
      err = math_ops.subtract(labels, predictions)

      weighted_squared_err = math_ops.multiply(math_ops.square(err), weights)
      # SDCA squared loss function is sum(err^2) / (2*sum(weights))
      return (math_ops.reduce_sum(weighted_squared_err) /
              (2.0 * math_ops.reduce_sum(weights)))
Example #8
0
def exact_laplacian_kernel(x, y, stddev):
  """Computes exact Laplacian kernel value(s) for tensors x and y using stddev.

  The Laplacian kernel for vectors u, v is defined as follows:
       K(u, v) = exp(-||u-v|| / stddev)
  where the norm is the l1-norm. x, y can be either vectors or matrices. If they
  are vectors, they must have the same dimension. If they are matrices, they
  must have the same number of columns. In the latter case, the method returns
  (as a matrix) K(u, v) values for all pairs (u, v) where u is a row from x and
  v is a row from y.

  Args:
    x: a tensor of rank 1 or 2. It's shape should be either [dim] or [m, dim].
    y: a tensor of rank 1 or 2. It's shape should be either [dim] or [n, dim].
    stddev: The width of the Gaussian kernel.

  Returns:
    A single value (scalar) with shape (1, 1)  if x, y are vectors or a matrix
    of shape (m, n) with entries K(u, v) (where K is the Laplacian kernel) for
    all (u,v) pairs where u, v are rows from x and y respectively.

  Raises:
    InvalidShapeError: if the shapes of x, y are not compatible.
  """
  x_aligned, y_aligned = _align_matrices(x, y)
  diff_l1_norm = math_ops.reduce_sum(
      math_ops.abs(math_ops.subtract(x_aligned, y_aligned)), 2)
  return math_ops.exp(-diff_l1_norm / stddev)
Example #9
0
    def BackwardLoopBody(*args):
      """Backward loop body function."""
      t, dev_t = args[0], args[1]
      (theta, orig_state0, inputs, acc_state, acc_extras, d_theta, d_state1,
       d_inputs, d_acc_state) = _Pack(args[2:], bakloop_sig)

      # The input recurrent state for time step t is previous time step's
      # output, or the original state0 when on time step 0.
      state_from_acc = _Index(acc_state, math_ops.maximum(0, t - 1))
      state0 = functional_ops.If(
          math_ops.equal(t, array_ops.constant(0, dtypes.int32)),
          _Flatten([state_from_acc, orig_state0]), ReturnOrigState0,
          ReturnAccState)
      state0 = nest.pack_sequence_as(orig_state0, state0)

      # The external inputs for time step t.
      inputs_t = _Index(inputs, t)
      # The extras for time step t.
      extras_t = _Index(acc_extras, t)

      d_state1 = _Add(_Index(d_acc_state, t), d_state1)
      (d_theta_t, d_state0, d_inputs_t) = _Pack(
          Bak(*_Flatten([theta, state0, inputs_t, extras_t, d_state1])),
          [self._theta, self._state, self._inputs])
      d_theta = _Add(d_theta, d_theta_t)
      d_inputs = _Update(d_inputs, d_inputs_t, dev_t)
      return [math_ops.subtract(dev_t, 1)] + _Flatten([
          theta, orig_state0, inputs, acc_state, acc_extras, d_theta, d_state0,
          d_inputs, d_acc_state
      ])
Example #10
0
def mean_squared_error(predictions, labels=None, weights=1.0, scope=None):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    predictions: The predicted outputs.
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    weights: Coefficients for the loss a scalar, a tensor of shape
      [batch_size] or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "mean_squared_error",
                      [predictions, labels, weights]) as scope:
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    losses = math_ops.square(math_ops.subtract(predictions, labels))
    return compute_weighted_loss(losses, weights, scope=scope)
 def _Update_global_variables():
   local_vars = [v for g, v in grads_and_vars if g is not None]
   global_center_vars = [self._global_map[var] for var in local_vars]
   local_center_vars = [self._local_map[var] for var in local_vars]
   local_center_vars_update = []
   for lvar, var in zip(local_center_vars, global_center_vars):
     local_center_vars_update.append(lvar.assign(var))
   update_ops = []
   differences = []
   with ops.control_dependencies(local_center_vars_update):
     for v, lv in zip(local_vars, local_center_vars):
       with ops.device(v.device):
         differences.append(math_ops.subtract(v, lv))
     for lvar, diff in zip(local_vars, differences):
       with ops.device(lvar.device):
         update_ops.append(
             state_ops.assign_sub(lvar,
                                  math_ops.multiply(self._moving_rate,
                                                    diff)))
     for var, diff in zip(global_center_vars, differences):
       with ops.device(var.device):
         update_ops.append(
             state_ops.assign_add(var,
                                  math_ops.multiply(self._moving_rate,
                                                    diff)))
     if global_step:
       with ops.colocate_with(global_step):
         update_ops.append(state_ops.assign_add(global_step, 1))
   variable_update = control_flow_ops.group(*(update_ops))
   return variable_update
Example #12
0
def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
  """Adds a Huber Loss term to the training procedure.

  For each value x in `error=labels-predictions`, the following is calculated:

  ```
    0.5 * x^2                  if |x| <= d
    0.5 * d^2 + d * (|x| - d)  if |x| > d
  ```

  where d is `delta`.

  See: https://en.wikipedia.org/wiki/Huber_loss

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    delta: `float`, the point where the huber loss function
      changes from a quadratic to linear.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "huber_loss",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    error = math_ops.subtract(predictions, labels)
    abs_error = math_ops.abs(error)
    quadratic = math_ops.minimum(abs_error, delta)
    # The following expression is the same in value as
    # tf.maximum(abs_error - delta, 0), but importantly the gradient for the
    # expression when abs_error == delta is 0 (for tf.maximum it would be 1).
    # This is necessary to avoid doubling the gradient, since there is already a
    # nonzero contribution to the gradient from the quadratic term.
    linear = (abs_error - quadratic)
    losses = 0.5 * quadratic**2 + delta * linear
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Example #13
0
def mean_squared_error(labels, predictions, weights=1.0, scope=None,
                       loss_collection=ops.GraphKeys.LOSSES):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
  with ops.name_scope(scope, "mean_squared_error",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    losses = math_ops.square(math_ops.subtract(predictions, labels))
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Example #14
0
def huber_loss(labels, predictions, weight=1.0, k=1.0, scope=None):
    """Define a huber loss  https://en.wikipedia.org/wiki/Huber_loss
      tensor: tensor to regularize.
      k: value of k in the huber loss
      scope: Optional scope for op_scope.

    Huber loss:
    f(x) = if |x| <= k:
              0.5 * x^2
           else:
              k * |x| - 0.5 * k^2

    Returns:
      the L1 loss op.

    http://concise-bio.readthedocs.io/en/latest/_modules/concise/tf_helper.html
    """
    with ops.name_scope(scope, "absolute_difference",
                        [predictions, labels]) as scope:
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        if weight is None:
            raise ValueError("`weight` cannot be None")
        predictions = math_ops.to_float(predictions)
        labels = math_ops.to_float(labels)
        diff = math_ops.subtract(predictions, labels)
        abs_diff = tf.abs(diff)
        losses = tf.where(abs_diff < k,
                          0.5 * tf.square(diff),
                          k * abs_diff - 0.5 * k ** 2)
        return tf.losses.compute_weighted_loss(losses, weight)
Example #15
0
def normalize_moments(counts, mean_ss, variance_ss, shift, name=None):
  """Calculate the mean and variance of based on the sufficient statistics.

  Args:
    counts: A `Tensor` containing a the total count of the data (one value).
    mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly
      shifted) sum of the elements to average over.
    variance_ss: A `Tensor` containing the variance sufficient statistics: the
      (possibly shifted) squared sum of the data to compute the variance over.
    shift: A `Tensor` containing the value by which the data is shifted for
      numerical stability, or `None` if no shift was performed.
    name: Name used to scope the operations that compute the moments.

  Returns:
    Two `Tensor` objects: `mean` and `variance`.
  """
  with ops.name_scope(name, "normalize", [counts, mean_ss, variance_ss, shift]):
    divisor = math_ops.reciprocal(counts, name="divisor")
    if shift is not None:
      shifted_mean = math_ops.multiply(mean_ss, divisor, name="shifted_mean")
      mean = math_ops.add(shifted_mean, shift, name="mean")
    else:  # no shift.
      shifted_mean = math_ops.multiply(mean_ss, divisor, name="mean")
      mean = shifted_mean
    variance = math_ops.subtract(math_ops.multiply(variance_ss, divisor),
                                 math_ops.square(shifted_mean),
                                 name="variance")
  return (mean, variance)
Example #16
0
    def inner_loss(y_true, y_pred):
        delta = math_ops.subtract(y_pred, y_true)
        losses = tf.clip_by_value(delta,
                                  clip_value_min=clip_value_min, clip_value_max=clip_value_max)
        losses = tf.square(losses)

        return losses
Example #17
0
    def inner_loss(y_true, y_pred):
        delta = math_ops.abs(math_ops.subtract(y_pred, y_true))
        losses = math_ops.square(delta)
        if clip > 0.0:
            losses = tf.where(delta < clip, 0.5 * losses, delta - 0.5)

        return losses
Example #18
0
 def _model_fn(features, labels, mode):
   _ = labels
   x = features['x']
   y = features['y']
   with ops.name_scope('outputs'):
     predictions = {'sum': math_ops.add(x, y, name='sum'),
                    'product': math_ops.multiply(x, y, name='product'),
                    'difference': math_ops.subtract(x, y, name='difference')}
   if core:
     export_outputs = {k: export_output.PredictOutput({k: v})
                       for k, v in predictions.items()}
     export_outputs[signature_constants.
                    DEFAULT_SERVING_SIGNATURE_DEF_KEY] = export_outputs['sum']
     return model_fn.EstimatorSpec(mode=mode,
                                   predictions=predictions,
                                   export_outputs=export_outputs,
                                   loss=constant_op.constant(0),
                                   train_op=control_flow_ops.no_op())
   else:
     output_alternatives = {k: (constants.ProblemType.UNSPECIFIED, {k: v})
                            for k, v in predictions.items()}
     return contrib_model_fn.ModelFnOps(
         mode=mode,
         predictions=predictions,
         output_alternatives=output_alternatives,
         loss=constant_op.constant(0),
         train_op=control_flow_ops.no_op())
Example #19
0
def hinge_loss(labels, logits, weights=1.0, scope=None,
               loss_collection=ops.GraphKeys.LOSSES,
               reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Adds a hinge loss to the training procedure.

  Args:
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0. Internally
      the {0,1} labels are converted to {-1,1} when calculating the hinge loss.
    logits: The logits, a float tensor. Note that logits are assumed to be
      unbounded and 0-centered. A value > 0 (resp. < 0) is considered a positive
      (resp. negative) binary prediction.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match or
      if `labels` or `logits` is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
  if labels is None:
    raise ValueError("labels must not be None.")
  if logits is None:
    raise ValueError("logits must not be None.")
  with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
    logits = math_ops.to_float(logits)
    labels = math_ops.to_float(labels)
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    losses = nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
def _FoldFusedBatchNorms(graph):
  """Finds fused batch norm layers and folds them into preceding layers.

  Folding only affects the following layers: Conv2D, fully connected, depthwise
  convolution.

  Args:
    graph: Graph to walk and modify.

  Raises:
    ValueError: When batch norm folding fails.
  """
  for match in _FindFusedBatchNorms(graph):
    scope, sep, _ = match.layer_op.name.rpartition('/')
    # Make sure new ops are added to `graph` and put on the same device as
    # `bn_op`. The '/' (i.e. `sep`) ensures that we reuse the existing scope
    # named `scope`. Otherwise, TF creates a unique scope whose name starts with
    # `scope`.
    with graph.as_default(), graph.name_scope(scope + sep), ops.device(
        match.bn_op.device):
      with graph.name_scope(scope + sep + 'BatchNorm_Fold' + sep):
        # new weights = old weights * gamma / sqrt(variance + epsilon)
        # new biases = -mean * gamma / sqrt(variance + epsilon) + beta
        multiplier_tensor = match.gamma_tensor * math_ops.rsqrt(
            match.variance_tensor + match.bn_op.get_attr('epsilon'))
        bias_tensor = math_ops.subtract(
            match.beta_tensor,
            match.mean_tensor * multiplier_tensor,
            name='bias')

        # The shape of depthwise weights is different, so we need to reshape the
        # multiplier_tensor to ensure that the scaled_weight_tensor has the
        # expected shape.
        if match.layer_op.type == 'DepthwiseConv2dNative':
          new_shape = [
              match.weight_tensor.get_shape().as_list()[2],
              match.weight_tensor.get_shape().as_list()[3]
          ]
          multiplier_tensor = array_ops.reshape(
              multiplier_tensor, new_shape, name='scale_reshape')

      # TODO(suharshs): This naming of the following ops needs to carefully
      # follow the naming expected by quantize.py. Generalize the quantize code
      # to not require these delicate naming conventions.
      scaled_weight_tensor = math_ops.multiply(
          match.weight_tensor, multiplier_tensor, name='mul_fold')

      new_layer_tensor = _CloneWithNewOperands(
          match.layer_op, match.input_tensor, scaled_weight_tensor)

      bias_add_tensor = math_ops.add(
          new_layer_tensor, bias_tensor, name='add_fold')

      nodes_modified_count = graph_editor.reroute_ts(bias_add_tensor,
                                                     match.output_tensor)
      if nodes_modified_count != 1:
        raise ValueError(
            'Unexpected inputs to op: %s' % match.output_tensor.name)
Example #21
0
def _AtanhGrad(op, grad):
  """Returns grad * 1/ (1 - x^2)."""
  x = op.inputs[0]
  with ops.control_dependencies([grad]):
    x = math_ops.conj(x)
    x2 = math_ops.square(x)
    one = constant_op.constant(1, dtype=grad.dtype)
    inv = math_ops.reciprocal(math_ops.subtract(one, x2))
    return grad * inv
Example #22
0
def _AcosGrad(op, grad):
  """Returns grad * -1/sqrt(1-x^2)."""
  x = op.inputs[0]
  with ops.control_dependencies([grad.op]):
    x = math_ops.conj(x)
    x2 = math_ops.square(x)
    one = constant_op.constant(1, dtype=grad.dtype)
    den = math_ops.sqrt(math_ops.subtract(one, x2))
    inv = math_ops.reciprocal(den)
    return -grad * inv
  def _objective(self, x):
    """Rosenbrock function. (Carl Edward Rasmussen, 2001-07-21).

    f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2

    Args:
      x: a Variable
    Returns:
      f: a tensor (objective value)
    """

    d = array_ops.size(x)
    s = math_ops.add(
        100 * math_ops.square(
            math_ops.subtract(
                array_ops.strided_slice(x, [1], [d]),
                math_ops.square(array_ops.strided_slice(x, [0], [d - 1])))),
        math_ops.square(
            math_ops.subtract(1.0, array_ops.strided_slice(x, [0], [d - 1]))))
    return math_ops.reduce_sum(s)
Example #24
0
 def test_logging_trainable(self):
   with ops.Graph().as_default() as g, self.test_session(g):
     var = variables.Variable(constant_op.constant(42.0), name='foo')
     var.initializer.run()
     cof = constant_op.constant(1.0)
     loss = math_ops.subtract(
         math_ops.multiply(var, cof), constant_op.constant(1.0))
     train_step = gradient_descent.GradientDescentOptimizer(0.5).minimize(loss)
     ops.get_default_session().run(train_step)
     self._run_monitor(learn.monitors.LoggingTrainable('foo'))
     self.assertRegexpMatches(str(self.logged_message), var.name)
  def compute_gradients(self,
                        loss,
                        var_list=None,
                        gate_gradients=optimizer.Optimizer.GATE_OP,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None):
    """Compute gradients of `loss` for the variables in `var_list`.

    Add rho*elastic_difference to loss to control the exploration
    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph under
        the key `GraphKey.TRAINABLE_VARIABLES`.
      gate_gradients: How to gate the computation of gradients.  Can be
        `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with the
        corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid.
    """
    if not var_list:
      var_list = variables.trainable_variables()

    elastic_difference = [
        math_ops.subtract(v, lv)
        for v, lv in zip(variables.trainable_variables(),
                         [self._local_map[var] for var in var_list])
    ]

    distance_loss = self._rho * math_ops.add_n(
        [gen_nn_ops.l2_loss(ed) for ed in elastic_difference])

    total_loss = loss + distance_loss
    return self._opt.compute_gradients(total_loss, var_list, gate_gradients,
                                       aggregation_method,
                                       colocate_gradients_with_ops, grad_loss)
Example #26
0
  def call(self, values, denominator):
    """Computes the rate since the last call.

    Args:
      values: Tensor with the per-example value.
      denominator: Measure to take the rate with respect to.

    Returns:
      The rate or 0 if denominator is unchanged since last call.
    """
    if denominator.dtype != dtypes.float64:
      denominator = math_ops.cast(denominator, dtypes.float64)
    if values.dtype != dtypes.float64:
      values = math_ops.cast(values, dtypes.float64)

    state_ops.assign(self.numer, math_ops.subtract(values, self.prev_values))
    state_ops.assign(self.denom,
                     math_ops.subtract(denominator, self.prev_denominator))
    state_ops.assign(self.prev_values, values)
    state_ops.assign(self.prev_denominator, denominator)

    return self._safe_div(self.numer, self.denom, name="safe_rate")
Example #27
0
  def testFeedTwoHandlesDirectly(self):
    with self.test_session() as sess:
      a = constant_op.constant(10.0)
      b = constant_op.constant(5.0)
      c = math_ops.multiply(a, b)
      d = math_ops.div(a, b)
      e = math_ops.subtract(c, d)

      h_c = sess.run(session_ops.get_session_handle(c))
      h_d = sess.run(session_ops.get_session_handle(d))

      self.assertAllClose(48.0, sess.run(e, feed_dict={c: h_c, d: h_d}))
      self.assertAllClose(-48.0, sess.run(e, feed_dict={c: h_d, d: h_c}))
Example #28
0
 def _assign_moving_average(self, variable, value, one_minus_decay):
   with ops.name_scope(None, 'AssignMovingAvg',
                       [variable, value, one_minus_decay]) as scope:
     with ops.colocate_with(variable):
       update_delta = math_ops.multiply(
           math_ops.subtract(variable.read_value(), value),
           one_minus_decay)
       if isinstance(variable, resource_variable_ops.ResourceVariable):
         # state_ops.assign_sub does an extra read_variable_op after the
         # assign. We avoid that here.
         return gen_resource_variable_ops.assign_sub_variable_op(
             variable.handle, update_delta, name=scope)
       else:
         return state_ops.assign_sub(variable, update_delta, name=scope)
Example #29
0
def absolute_difference(
    labels, predictions, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Adds an Absolute Difference loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a `Tensor` of
  shape `[batch_size]`, then the total loss for each sample of the batch is
  rescaled by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of
      `labels` or if the shape of `weights` is invalid or if `labels`
      or `predictions` is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
  if labels is None:
    raise ValueError("labels must not be None.")
  if predictions is None:
    raise ValueError("predictions must not be None.")
  with ops.name_scope(scope, "absolute_difference",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    losses = math_ops.abs(math_ops.subtract(predictions, labels))
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Example #30
0
def hinge_loss(logits, labels=None, scope=None):
  """Method that returns the loss tensor for hinge loss.

  Args:
    logits: The logits, a float tensor.
    labels: The ground truth output tensor. Its shape should match the shape of
      logits. The values of the tensor are expected to be 0.0 or 1.0.
    scope: The scope for the operations performed in computing the loss.

  Returns:
    A `Tensor` of same shape as `logits` and `labels` representing the loss
      values across the batch.

  Raises:
    ValueError: If the shapes of `logits` and `labels` don't match.
  """
  with ops.name_scope(scope, "hinge_loss", [logits, labels]) as scope:
    logits.get_shape().assert_is_compatible_with(labels.get_shape())
    # We first need to convert binary labels to -1/1 labels (as floats).
    labels = math_ops.to_float(labels)
    all_ones = array_ops.ones_like(labels)
    labels = math_ops.subtract(2 * labels, all_ones)
    return nn_ops.relu(
        math_ops.subtract(all_ones, math_ops.multiply(labels, logits)))
def absolute_difference(
    labels, predictions, weights=1.0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Adds an Absolute Difference loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a `Tensor` of
  shape `[batch_size]`, then the total loss for each sample of the batch is
  rescaled by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which this loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
    shape as `labels`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of
      `labels` or if the shape of `weights` is invalid or if `labels`
      or `predictions` is None.
  """
  if labels is None:
    raise ValueError("labels must not be None.")
  if predictions is None:
    raise ValueError("predictions must not be None.")
  with ops.name_scope(scope, "absolute_difference",
                      (predictions, labels, weights)) as scope:
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    predictions.get_shape().assert_is_compatible_with(labels.get_shape())
    losses = math_ops.abs(math_ops.subtract(predictions, labels))
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Example #32
0
  def testNextOp(self):
    """Ensures all ops get selected eventually."""
    with tf_ops.Graph().as_default():
      ops = [
          math_ops.add(1, 2),
          math_ops.subtract(1, 2),
          math_ops.reduce_mean([1, 2]),
      ]
      queue = op_queue.OpQueue(ops, seed=0)

      with self.cached_session() as sess:
        # Ensure every inv update op gets selected.
        selected_ops = set([queue.next_op(sess) for _ in ops])
        self.assertEqual(set(ops), set(selected_ops))

        # Ensure additional calls don't create any new ops.
        selected_ops.add(queue.next_op(sess))
        self.assertEqual(set(ops), set(selected_ops))
Example #33
0
  def setUp(self):
    self.a = variables.Variable(10.0, name="a")
    self.b = variables.Variable(20.0, name="b")

    self.c = math_ops.add(self.a, self.b, name="c")  # Should be 30.0.
    self.d = math_ops.subtract(self.a, self.c, name="d")  # Should be -20.0.
    self.e = math_ops.multiply(self.c, self.d, name="e")  # Should be -600.0.

    self.ph = array_ops.placeholder(dtypes.float32, shape=(2, 2), name="ph")
    self.f = math_ops.multiply(self.e, self.ph, name="f")

    self.opt = gradient_descent.GradientDescentOptimizer(0.1).minimize(
        self.e, name="opt")

    self.sess = session.Session()

    self.sess.run(self.a.initializer)
    self.sess.run(self.b.initializer)
Example #34
0
def mean_squared_error(labels,
                       predictions,
                       weights=1.0,
                       scope=None,
                       loss_collection=ops.GraphKeys.LOSSES,
                       reduction=Reduction.WEIGHTED_SUM_BY_NONZERO_WEIGHTS):
    """Adds a Sum-of-Squares loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `losses` dimension).
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
    with ops.name_scope(scope, "mean_squared_error",
                        (predictions, labels, weights)) as scope:
        predictions = math_ops.to_float(predictions)
        labels = math_ops.to_float(labels)
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        losses = math_ops.square(math_ops.subtract(predictions, labels))
        return compute_weighted_loss(losses,
                                     weights,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)
Example #35
0
    def test_subscribe_tensors_within_control_flow_context(self):
        """Side effect ops are added with the same control flow context."""
        c1 = constant_op.constant(10)
        c2 = constant_op.constant(20)
        x1 = math_ops.add(c1, c2)
        x2 = math_ops.multiply(c1, c2)

        cond = control_flow_ops.cond(
            x1 < x2,
            lambda: math_ops.add(c1, c2, name='then'),
            lambda: math_ops.subtract(c1, c2, name='else'),
            name='cond')

        branch = ops.get_default_graph().get_tensor_by_name('cond/then:0')

        def context(tensor):
            return tensor.op._get_control_flow_context()

        self.assertIs(context(x1), context(x2))
        self.assertIsNot(context(x1), context(branch))

        results = []

        def sub(tensor):
            results.append(tensor)
            return tensor

        tensors = [x1, branch, x2]
        subscriptions = subscribe.subscribe(
            tensors, lambda t: script_ops.py_func(sub, [t], [t.dtype]))

        for tensor, subscription in zip(tensors, subscriptions):
            self.assertIs(context(tensor), context(subscription))

        # Verify that sub(x1) and sub(x2) are in the same context.
        self.assertIs(context(subscriptions[0]), context(subscriptions[2]))

        # Verify that sub(x1) and sub(branch) are not.
        self.assertIsNot(context(subscriptions[0]), context(subscriptions[1]))

        with self.cached_session() as sess:
            self.evaluate(cond)

        self.assertEqual(3, len(results))
Example #36
0
def get_smoothL1_loss(predictions, labels, object_matches, delta=1.0):
    ''' Calculate smooth L1 loss
    Args:
        predictions (batch size, dimensions)
        labels (batch size, dimensions)
    Returns:
        smooth L1 loss according to https://mohitjainweb.files.wordpress.com/2018/03/smoothl1loss.pdf
    '''
    #assert(predictions.shape == labels.shape)
    predictions = math_ops.to_float(predictions)
    labels = math_ops.to_float(labels)
    # Simpler L1 loss
    #return tf.reduce_mean(math_ops.abs(math_ops.subtract(predictions, labels)))
    l1_losses = tf.reshape(
        object_matches, (-1, object_matches.shape[1])) * math_ops.reduce_sum(
            math_ops.abs(math_ops.subtract(predictions, labels)), axis=-1)
    condition = tf.less(l1_losses, delta)
    smoothL1_loss = tf.where(condition, 0.5 * (l1_losses**2), l1_losses - 0.5)
    return tf.reduce_mean(smoothL1_loss)
Example #37
0
 def fm_logit_fn(inputs):
     with variable_scope.variable_scope('get_fm_inputs'):
         fm_filed_num = len(column_names)
         fm_inputs = []
         for c in column_names:
             fm_inputs.append(inputs[c])
         net = array_ops.concat(fm_inputs, axis=1)
         embeddings = gen_array_ops.reshape(
             net, (-1, fm_filed_num, fm_embedding_size))  # -1 * F * K
         # according to simplified formula
         summed_squared_emb = math_ops.square(
             math_ops.reduce_sum(embeddings, -2))  # -1 * K
         squared_summed_emb = math_ops.reduce_sum(
             math_ops.square(embeddings), -2)  # -1 * K
         logits = 0.5 * math_ops.reduce_sum(
             math_ops.subtract(summed_squared_emb, squared_summed_emb),
             -1)  # -1
         logits = array_ops.expand_dims(logits, -1)
         return logits
Example #38
0
def mean_squared_error(labels,
                       predictions,
                       weights=1.0,
                       scope=None,
                       loss_collection=ops.GraphKeys.LOSSES):
    """Adds a Sum-of-Squares loss to the training procedure.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  [batch_size], then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector. If the shape of
  `weights` matches the shape of `predictions`, then the loss of each
  measurable element of `predictions` is scaled by the corresponding value of
  `weights`.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  Args:
    labels: The ground truth output tensor, same dimensions as 'predictions'.
    predictions: The predicted outputs.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.
  """
    with ops.name_scope(scope, "mean_squared_error",
                        [predictions, labels, weights]) as scope:
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        predictions = math_ops.to_float(predictions)
        labels = math_ops.to_float(labels)
        losses = math_ops.square(math_ops.subtract(predictions, labels))
        return compute_weighted_loss(losses, weights, scope, loss_collection)
Example #39
0
    def _get_learning_rate(self, step):
        with ops.name_scope_v2(self.name
                               or 'PolynomialDecayWithWarmup') as name:

            initial_learning_rate = ops.convert_to_tensor_v2(
                self.initial_learning_rate, name='initial_learning_rate')
            warmup_steps = ops.convert_to_tensor_v2(self.warmup_steps,
                                                    name='warmup_steps')

            warmup_rate = (initial_learning_rate * step / warmup_steps)

            poly_steps = math_ops.subtract(step, warmup_steps)
            poly_rate = self.poly_rate_scheduler(poly_steps)

            decay_rate = tf.where(step <= warmup_steps,
                                  warmup_rate,
                                  poly_rate,
                                  name=name)
            return decay_rate
Example #40
0
def focal_loss_alpha(labels=[],
                     logits=[],
                     pos_weights=[],
                     gamma=2.,
                     clips=[],
                     name='focal_loss'):
    """
    Add focal loss weigths to the wigthted sigmoid cross entropy
    :return:
    """
    batchsize = labels.get_shape().as_list()[0]
    n_classes = labels.get_shape().as_list()[1]

    with tf.variable_scope(name) as vs:
        # first get a sigmoid to determine the focal loss weigths:
        sigmoid_logits = tf.nn.sigmoid(logits)
        # determine the focal loss weigths:
        labels = math_ops.to_float(labels)
        sigmoid_logits.get_shape().assert_is_compatible_with(
            labels.get_shape())
        preds = array_ops.where(math_ops.equal(labels, 1.), sigmoid_logits,
                                1. - sigmoid_logits)
        focal_weights = (math_ops.subtract(1., preds))**gamma
        print(focal_weights)

        # clip the weights at E-3 and E3
        up_clip = math_ops.multiply(tf.ones([batchsize, n_classes]), clips[1])
        low_clip = math_ops.multiply(tf.ones([batchsize, n_classes]), clips[0])
        focal_weights = array_ops.where(
            math_ops.greater(focal_weights, clips[1]), up_clip, focal_weights)
        focal_weights = array_ops.where(math_ops.less(focal_weights, clips[0]),
                                        low_clip, focal_weights)
        log_weight = 1. + (pos_weights - 1.) * labels

        # now put them into a weighted softmax ce:
        loss = math_ops.multiply(math_ops.add(
            (1. - labels) * logits,
            log_weight * (math_ops.log1p(math_ops.exp(-math_ops.abs(logits))) +
                          nn_ops.relu(-logits))),
                                 focal_weights,
                                 name='sc_entropy')
        return loss
Example #41
0
def switch_loss(labels,
                predictions,
                weights=1.0,
                c1=0.999,
                c2=1.0,
                c3=1.0,
                switch_value=1.0,
                scope=None,
                loss_collection=ops.GraphKeys.LOSSES,
                reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Exponential loss for labels <= switch_value, linear loss for labels > switch_value.
    Exponential function is defined by `f(x) = b*x^2` with the constraints `f(c1) = c1` and `f(c2) = c3*c2`.
    Thus, for each label `l` and prediction `p` the following is calculated:
    ```
    b*(l-p)^a             if l <= d
    |l-p|                 if l > d
    ```
    """
    def get_exp_params(c1=0.999, c2=1.0, c3=1.0):
        a = math.log(float(c1) / (c2 * c3), c1) / (1 - math.log(float(c3), c1))
        b = c1 / c1**a
        return a, b

    if labels is None:
        raise ValueError("labels must not be None.")
    if predictions is None:
        raise ValueError("predictions must not be None.")
    a, b = get_exp_params(c1, c2, c3)
    with ops.name_scope(scope, "switch_loss",
                        (predictions, labels, weights)) as scope:
        predictions = math_ops.to_float(predictions)
        labels = math_ops.to_float(labels)
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        error = math_ops.subtract(predictions, labels)
        abs_error = math_ops.abs(error)
        exp_error = b * (abs_error**a)
        losses = tf.where(labels <= switch_value, exp_error, abs_error)
        return tf.losses.compute_weighted_loss(losses,
                                               weights,
                                               scope,
                                               loss_collection,
                                               reduction=reduction)
Example #42
0
  def __call__(self, step):
    with tf.name_scope(self.name or "Dilera") as name:
      dtype = tf.dtypes.float32

      initial_learning_rate = tf.convert_to_tensor(self.initial_learning_rate, dtype=dtype, name="initial_learning_rate")

      sigma = math_ops.cast(self.sigma, dtype)
      t_step = math_ops.cast(step, dtype)
      # t_step = math_ops.multiply(t_step, t_step)
      dt = tf.constant(1, dtype=dtype)
      t_step = math_ops.add(t_step, tf.constant(1, dtype=dtype))
      Z_t = tf.random.normal([1], mean=0.0, stddev=1.0, dtype=dtype)

      Z_over_T = math_ops.divide(Z_t[0], t_step)
      Sigma_Z_over_T = math_ops.multiply(sigma, Z_over_T)
      Sigma_Z_sqrtDt_over_T = math_ops.multiply(Sigma_Z_over_T, math_ops.sqrt(dt))

      eta_dT = math_ops.multiply(initial_learning_rate, dt)
      newLearningRate  = math_ops.subtract(eta_dT, Sigma_Z_sqrtDt_over_T, name=name)
      return newLearningRate
Example #43
0
def mean_squared_log_loss(
        labels,
        predictions,
        weights=1.0,
        scope=None,
        loss_collection=ops.GraphKeys.LOSSES,
        reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS):
    with ops.name_scope(scope, "mean_squared_norm_loss",
                        (predictions, labels, weights)) as scope:
        predictions = math_ops.to_float(predictions)
        labels = math_ops.to_float(labels)
        predictions.get_shape().assert_is_compatible_with(labels.get_shape())
        error = math_ops.square(
            math_ops.subtract(math_ops.log(tf.maximum(predictions, 1.0)),
                              math_ops.log(tf.maximum(labels, 1.0))))
        return tf.losses.compute_weighted_loss(error,
                                               weights,
                                               scope,
                                               loss_collection,
                                               reduction=reduction)
Example #44
0
def normalize_for_graph_lstm(tensor):
    """Normalizes Tensor to range [-0.5, 0.5].

    Scales a Tensor uniformly to fit within [-0.5, 0.5]^n. Additionally,
      each dimension is shifted to be centred around [0]^n i.e. the origin,
      in a way that data extends the same distance in positive and negative
      direction. In other words, the mean between maximum and minimum value
      of each dimension is shifted to zero. The undo_scaling op undoes
      scaling, but does not undo shifting. The unnormalize op does both,
      but is currently unused.

    Returns: The normalized Tensor, and an op to undo normalization.

    Example usage:
    ```
    normalized_tensor, undo_scaling = normalize_for_graph_lstm(input_tensor)
    normalized_output_tensor = some_op(normalized_tensor)
    output_tensor = undo_scaling(normalized_output_tensor)
    ```
    """
    # tensor is normalized to range[-0.5, 0.5]
    # this function assumes tensors with shape [ batch_size, number_of_nodes, output_size ]
    assert (len(tensor.shape) == 3)
    # compute maximum and minimum joint position value in each dimension
    max_dim = math_ops.reduce_max(tensor, axis=1, keepdims=True)
    min_dim = math_ops.reduce_min(tensor, axis=1, keepdims=True)
    diff_dim = math_ops.subtract(max_dim, min_dim)
    # get normalizing factor as maximum difference within all dimensions
    max_diff = math_ops.reduce_max(diff_dim, axis=2, keepdims=True)
    normalized_tensor = math_ops.divide(tensor - min_dim - diff_dim / 2,
                                        max_diff)

    # return output rescaled and shifted to original position
    def unnormalize(tensor):
        return math_ops.multiply(tensor, max_diff) + diff_dim / 2 + min_dim

    # return output only rescaled, centered around 0
    def undo_scaling(tensor):
        return math_ops.multiply(tensor, max_diff)

    return normalized_tensor, undo_scaling
Example #45
0
def softmax(logits: ragged_tensor.Ragged, axis=None, name=None):
  """Computes softmax activations.

  Used for multi-class predictions. The sum of all outputs generated by softmax
  is 1.

  This function performs the equivalent of

      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis)

  Example usage:

  >>> softmax = tf.nn.softmax([-1, 0., 1.])
  >>> softmax
  <tf.Tensor: shape=(3,), dtype=float32,
  numpy=array([0.09003057, 0.24472848, 0.66524094], dtype=float32)>
  >>> sum(softmax)
  <tf.Tensor: shape=(), dtype=float32, numpy=1.0>

  Args:
    logits: A non-empty `Tensor`. Must be one of the following types: `half`,
      `float32`, `float64`.
    axis: The dimension softmax would be performed on. The default is -1 which
      indicates the last dimension.
    name: A name for the operation (optional).

  Returns:
    A `Tensor`. Has the same type and shape as `logits`.

  Raises:
    InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
      dimension of `logits`.
  """
  if axis is None:
    axis = -1

  with ops.name_scope(name, 'RaggedSoftmax', [logits]) as name:
    max_input = reduce_max(logits, axis=axis, keepdims=True)
    logits_exp = math_ops.exp(math_ops.subtract(logits, max_input))
    denominator = reduce_sum(logits_exp, axis=axis, keepdims=True)
    return math_ops.divide(logits_exp, denominator)
  def testMultiGPUSessionRun(self):
    local_devices = device_lib.list_local_devices()
    gpu_device_names = []
    for device in local_devices:
      if device.device_type == "GPU":
        gpu_device_names.append(device.name)
    gpu_device_names = sorted(gpu_device_names)

    if len(gpu_device_names) < 2:
      self.skipTest(
          "This test requires at least 2 GPUs, but only %d is available." %
          len(gpu_device_names))

    with session.Session() as sess:
      v = variables.Variable([10.0, 15.0], dtype=dtypes.float32, name="v")
      with ops.device(gpu_device_names[0]):
        u0 = math_ops.add(v, v, name="u0")
      with ops.device(gpu_device_names[1]):
        u1 = math_ops.multiply(v, v, name="u1")
      w = math_ops.subtract(u1, u0, name="w")

      self.evaluate(v.initializer)

      run_options = config_pb2.RunOptions(output_partition_graphs=True)
      debug_utils.watch_graph(run_options, sess.graph,
                              debug_urls="file://" + self._dump_root)
      run_metadata = config_pb2.RunMetadata()
      self.assertAllClose(
          [80.0, 195.0],
          sess.run(w, options=run_options, run_metadata=run_metadata))

      debug_dump_dir = debug_data.DebugDumpDir(
          self._dump_root, partition_graphs=run_metadata.partition_graphs)
      self.assertEqual(3, len(debug_dump_dir.devices()))
      self.assertAllClose(
          [10.0, 15.0], debug_dump_dir.get_tensors("v", 0, "DebugIdentity")[0])
      self.assertAllClose(
          [20.0, 30.0], debug_dump_dir.get_tensors("u0", 0, "DebugIdentity")[0])
      self.assertAllClose(
          [100.0, 225.0],
          debug_dump_dir.get_tensors("u1", 0, "DebugIdentity")[0])
Example #47
0
  def setUp(self):
    self.base_path = os.path.join(test.get_temp_dir(), "no_vars")
    if not os.path.exists(self.base_path):
      os.mkdir(self.base_path)

    # Create a simple graph with a variable, then convert variables to
    # constants and export the graph.
    with ops.Graph().as_default() as g:
      x = array_ops.placeholder(dtypes.float32, name="x")
      w = variables.Variable(3.0)
      y = math_ops.subtract(w * x, 7.0, name="y")  # pylint: disable=unused-variable
      ops.add_to_collection("meta", "this is meta")

      with self.session(graph=g) as session:
        variables.global_variables_initializer().run()
        new_graph_def = graph_util.convert_variables_to_constants(
            session, g.as_graph_def(), ["y"])

      filename = os.path.join(self.base_path, constants.META_GRAPH_DEF_FILENAME)
      saver.export_meta_graph(
          filename, graph_def=new_graph_def, collection_list=["meta"])
  def _KroneckerProduct(b1, b2):
    """Computes the Kronecker product of two batches of square matrices."""
    b1_shape = array_ops.shape(b1)
    b2_shape = array_ops.shape(b2)
    b1_order = b1_shape[-1]
    b2_order = b2_shape[-1]

    shape_slice_size = [math_ops.subtract(array_ops.size(b1_shape), 2)]
    shape_slice = array_ops.slice(b1_shape, [0],
                                  shape_slice_size)  # Same for both batches
    b1_reshape_shape = array_ops.concat(
        [shape_slice, [b1_order], [1], [b1_order], [1]], 0)
    b2_reshape_shape = array_ops.concat(
        [shape_slice, [1], [b2_order], [1], [b2_order]], 0)

    b1_reshape = array_ops.reshape(b1, b1_reshape_shape)
    b2_reshape = array_ops.reshape(b2, b2_reshape_shape)

    order_prod = b1_order * b2_order
    kprod_shape = array_ops.concat([shape_slice, [order_prod], [order_prod]], 0)
    return array_ops.reshape(b1_reshape * b2_reshape, kprod_shape)
Example #49
0
  def setUp(self):
    self.a = variables.VariableV1(10.0, name="a")
    self.b = variables.VariableV1(20.0, name="b")

    self.c = math_ops.add(self.a, self.b, name="c")  # Should be 30.0.
    self.d = math_ops.subtract(self.a, self.c, name="d")  # Should be -20.0.
    self.e = math_ops.multiply(self.c, self.d, name="e")  # Should be -600.0.

    self.ph = array_ops.placeholder(dtypes.float32, shape=(2, 2), name="ph")
    self.f = math_ops.multiply(self.e, self.ph, name="f")

    self.opt = gradient_descent.GradientDescentOptimizer(0.1).minimize(
        self.e, name="opt")

    rewriter_config = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True)
    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
    config = config_pb2.ConfigProto(graph_options=graph_options)
    self.sess = session.Session(config=config)

    self.sess.run(self.a.initializer)
    self.sess.run(self.b.initializer)
Example #50
0
def per_image_standardization(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The standardized image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=False)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.subtract(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image
Example #51
0
    def sample(self, time, outputs, state, name=None):
        """sample for SampledEmbeddingHelper."""
        del time, state  # unused by sample_fn
        # Outputs are logits, use random_ops.multinomial to sample ids
        if not isinstance(outputs, ops.Tensor):
            raise TypeError("Expected outputs to be a single Tensor, got: %s" %
                            type(outputs))

        outputs2 = math_ops.div(math_ops.exp(outputs), self.temp)
        outputs3 = math_ops.div(
            outputs2, math_ops.reduce_sum(outputs2, axis=1, keep_dims=True))
        outputs4 = math_ops.log(outputs3) - math_ops.log(
            math_ops.subtract(1.0, outputs3))
        sample_ids2 = math_ops.cast(random_ops.multinomial(outputs4, 1),
                                    dtypes.int32)
        sample_ids = array_ops.reshape(sample_ids2, [-1])

        #with open("log.txt","w") as f:
        #  f.write(",".join([str(outputs.shape), str(outputs2.shape), str(outputs3.shape), str(sample_ids2.shape), str(sample_ids.shape),
        #                    str(outputs), str(outputs2), str(outputs3), str(sample_ids2), str(sample_ids),str(self.temp)]))

        return sample_ids
Example #52
0
        def compute_loss(labels, predictions, weights, loss_collection):
            predictions = math_ops.cast(predictions, dtype=dtypes.float32)
            predictions.get_shape().assert_is_compatible_with(
                labels.get_shape())

            diffs = math_ops.subtract(predictions, labels)

            axis = math_ops.range(1, array_ops.rank(diffs))

            sum_squares_diff_per_batch = math_ops.reduce_sum(
                math_ops.square(diffs), axis=axis, keepdims=True)
            num_present_per_batch = _num_present(diffs,
                                                 weights,
                                                 per_batch=True)

            term1 = 2.0 * math_ops.div_no_nan(
                sum_squares_diff_per_batch,
                math_ops.maximum(num_present_per_batch - 1, 0),
                name="value")

            sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
            term2 = 2.0 * math_ops.div_no_nan(
                math_ops.square(sum_diff),
                math_ops.maximum(
                    math_ops.multiply(num_present_per_batch,
                                      num_present_per_batch - 1), 0),
                name="value")

            weighted_losses = math_ops.multiply(term1 - term2, weights)
            loss = math_ops.reduce_sum(weighted_losses)

            mean_loss = array_ops.where(
                math_ops.reduce_sum(num_present_per_batch) > 0,
                loss,
                array_ops.zeros_like(loss),
                name="value")
            util.add_loss(mean_loss, loss_collection)
            return mean_loss
        def _Update_global_variables():
            local_vars = [v for g, v in grads_and_vars if g is not None]
            global_center_vars = [self._global_map[var] for var in local_vars]
            local_center_vars = [self._local_map[var] for var in local_vars]
            local_center_vars_update = []
            for lvar, var in zip(local_center_vars, global_center_vars):
                local_center_vars_update.append(lvar.assign(var))
            update_ops = []
            differences = []
            with ops.control_dependencies(local_center_vars_update):
                for v, lv in zip(local_vars, local_center_vars):
                    with ops.device(v.device):
                        differences.append(math_ops.subtract(v, lv))
                for lvar, diff in zip(local_vars, differences):
                    with ops.device(lvar.device):
                        flocking_local = tf.minimum(
                            (self._rep / self._dis) * tf.abs(diff) -
                            self._rep + self._att, self._att)
                        update_ops.append(
                            state_ops.assign_sub(
                                lvar, math_ops.multiply(flocking_local, diff)))

                for var, diff in zip(global_center_vars, differences):
                    with ops.device(var.device):
                        flocking_g = tf.minimum(
                            (self._rep / self._dis) * tf.abs(diff) -
                            self._rep + self._att, self._att)
                        update_ops.append(
                            state_ops.assign_add(
                                var,
                                math_ops.multiply(flocking_g, diff) / 4))

                if global_step:
                    with ops.colocate_with(global_step):
                        update_ops.append(state_ops.assign_add(global_step, 1))
            variable_update = control_flow_ops.group(*(update_ops))
            return variable_update
Example #54
0
 def _model_fn(features, labels, mode):
     _ = labels
     x = features['x']
     y = features['y']
     with ops.name_scope('outputs'):
         predictions = {
             'sum': math_ops.add(x, y, name='sum'),
             'product': math_ops.multiply(x, y, name='product'),
             'difference': math_ops.subtract(x, y, name='difference')
         }
     if core:
         export_outputs = {
             k: export_output.PredictOutput({k: v})
             for k, v in predictions.items()
         }
         export_outputs[
             signature_constants.
             DEFAULT_SERVING_SIGNATURE_DEF_KEY] = export_outputs['sum']
         return model_fn.EstimatorSpec(mode=mode,
                                       predictions=predictions,
                                       export_outputs=export_outputs,
                                       loss=constant_op.constant(0),
                                       train_op=control_flow_ops.no_op())
     else:
         output_alternatives = {
             k: (constants.ProblemType.UNSPECIFIED, {
                 k: v
             })
             for k, v in predictions.items()
         }
         return contrib_model_fn.ModelFnOps(
             mode=mode,
             predictions=predictions,
             output_alternatives=output_alternatives,
             loss=constant_op.constant(0),
             train_op=control_flow_ops.no_op())
Example #55
0
def _get_folded_kernel_bias(conv_type, kernel, bias, mu, var, gamma, beta,
                            epsilon):
  """ Get folded kernel and bias
      folded_kernel = kernel * multiplier
                    = kernel * gamma / sigma_bt

      folded_bias = beta - (mu - bias) * multiplier
                  = beta - (mu - bias) * gamma / sigma
  """
  sigma = math_ops.rsqrt(var + epsilon)
  if gamma is not None:
    multiplier = math_ops.mul(gamma, sigma)
  else:
    multiplier = sigma
  if conv_type == 'DepthwiseConv2D':
    new_shape = [kernel.shape[2], kernel.shape[3]]
    depthwise_multiplier = array_ops.reshape(multiplier, new_shape)
    folded_kernel = math_ops.mul(
        depthwise_multiplier, kernel, name='depthwise_kernel')
  else:
    folded_kernel = math_ops.mul(multiplier, kernel, name='kernel')

  folded_bias = math_ops.subtract(beta, (mu - bias) * multiplier, name='bias')
  return folded_kernel, folded_bias
Example #56
0
    def _resource_apply_dense(self, grad, var, constraint, apply_state):
        update_ops = []

        grad = ops.convert_to_tensor(grad, var.dtype.base_dtype)
        lr = math_ops.cast(self._get_hyper('learning_rate'),
                           var.dtype.base_dtype)

        if self._momentum:
            m = math_ops.cast(self._get_hyper('momentum'),
                              var.dtype.base_dtype)
            momentum_var = self.get_slot(var, 'momentum')
            modified_grad = momentum_var.assign(
                math_ops.add(m * momentum_var, (1 - m) * grad))
        else:
            modified_grad = grad

        v = ops.convert_to_tensor(constraint.lmo(modified_grad),
                                  var.dtype.base_dtype)
        vminvar = math_ops.subtract(v, var)

        if self.rescale is None:
            factor = math_ops.cast(1., var.dtype.base_dtype)
        elif self.rescale == 'diameter':
            factor = math_ops.cast(1. / constraint.get_diameter(),
                                   var.dtype.base_dtype)
        elif self.rescale == 'gradient':
            factor = math_ops.cast(
                tf.norm(modified_grad, ord=2) / tf.norm(vminvar, ord=2),
                var.dtype.base_dtype)
        clipped_lr = math_ops.ClipByValue(t=lr * factor,
                                          clip_value_min=0,
                                          clip_value_max=1)

        update_ops.append(state_ops.assign_add(var, clipped_lr * vminvar))

        return control_flow_ops.group(*update_ops)
Example #57
0
 def __rsub__(self, other):
     return math_ops.subtract(other, self)
Example #58
0
 def __sub__(self, other):
     return math_ops.subtract(self, other)
def _sampled_scattered_embedding_lookup(
    params, values, dimension=None, sampled_candidates=None, hash_key=None,
    name=None):
  """Looks up embeddings using parameter hashing for each value in `values`.

  This method looks up selected embedding dimensions if `sampled_candidates` is
  given, otherwise looks up all dimensions.

  The i-th embedding component of a value v in `values` is found by retrieving
  the weight whose index is a fingerprint of the pair (v,i).
  The concept is explored as "feature hashing" for model compression in this
  paper: http://arxiv.org/pdf/1504.04788.pdf

  Feature hashing has the pleasant effect of allowing us to compute an embedding
  without needing a pre-determined vocabulary, relieving some amount of process
  complexity. It also allows for us to maintain embeddings for possibly
  trillions of features with a fixed amount of memory.

  Note that this is superior to out-of-vocabulary shared "hash buckets" in that
  the embedding is extremely likely to be unique for each token as opposed to
  being shared across probably-colliding tokens. The price is that we must
  compute a hash once for each scalar in the token's embedding as opposed to
  once per token.

  If `params` is a list, it represents a partition of the embedding parameters.
  Each tensor in the list should have the same length, except for the first ones
  which may have an additional element. For instance 10 parameters can be
  partitioned in 4 tensors with length `[3, 3, 2, 2]`.

  Args:
    params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`.
      Each tensor must be of rank 1 with fully-defined shape.
    values: `Tensor` of values to be embedded with shape `[d0, ..., dn]`.
    dimension: Embedding dimension. The user must specify either `dimension` or
      `sampled_candidates`.
    sampled_candidates: An optional `Tensor` of slice indices to keep along the
      final dimension with shape `[d0, ..., dn, N]`. If given, `dimension` is
      ignored. If `None`, looks up all candidates.
    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
      function to combine the crosses fingerprints on SparseFeatureCrossOp
      (optional).
    name: An optional name for this op.

  Returns:
    A `Tensor` with shape `[d0, ..., dn, dimension]`.
    If `sampled_candidates` is given, the output shape is `[d0, ..., dn, N]`

  Raises:
    ValueError: if dimension is not positive or the partition size is invalid.
  """
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)
  if not isinstance(params, list):
    params = [params]

  with ops.name_scope(name, "scattered_embedding_lookup",
                      params + [dimension, values]):
    # Flatten the values
    values_shape = array_ops.shape(values)
    values = array_ops.reshape(values, [-1, 1])

    if sampled_candidates is None:
      if dimension is None:
        raise ValueError(
            "You must specify either dimension or sampled_candidates.")
      if dimension <= 0:
        raise ValueError("Dimension must be >0. Given is %d" % dimension)
      sampled_candidates = array_ops.tile(array_ops.expand_dims(
          math_ops.range(0, dimension), 0), array_ops.shape(values))
    else:
      dimension = array_ops.shape(sampled_candidates)[
          math_ops.subtract(array_ops.rank(sampled_candidates), 1)]
      sampled_candidates_shape = array_ops.shape(sampled_candidates)
      dimension_tensor = array_ops.reshape(dimension, shape=[1,])
      expected_shape = array_ops.concat([values_shape, dimension_tensor], 0)
      with ops.control_dependencies([control_flow_ops.Assert(
          math_ops.reduce_all(math_ops.equal(sampled_candidates_shape,
                                             expected_shape)),
          ["The shape of sampled_candidates: ", sampled_candidates_shape,
           " does not match the shape of values: ", values_shape])]):
        # Flatten sampled_candidates, same way as values are flattened.
        sampled_candidates = array_ops.reshape(sampled_candidates,
                                               [-1, dimension])

    num_partitions = len(params)
    partition_sizes = []
    for p in range(num_partitions):
      shape = params[p].get_shape()
      shape.assert_has_rank(1)
      shape.assert_is_fully_defined()
      partition_sizes.append(shape[0].value)
    num_params = sum(partition_sizes)  # Total number of parameters.

    # Assert the size of each partition.
    for p in range(num_partitions):
      expected_size = (num_params - p - 1) // num_partitions + 1
      if partition_sizes[p] != expected_size:
        raise ValueError("Tensor %d in params has size %d, expected %d." %
                         (p, partition_sizes[p], expected_size))

    # With two values v1 and v2 and 3 dimensions, we will cross
    # [[0, 1, 2], [0, 1, 2]] with [[v1], [v2]].
    tensors_to_cross = [sampled_candidates, values]
    ids = sparse_feature_cross_op.sparse_feature_cross(
        tensors_to_cross, hashed_output=True, num_buckets=num_params,
        hash_key=hash_key)
    ids = sparse_ops.sparse_tensor_to_dense(ids)

    # No need to validate the indices since we have checked the params
    # dimensions and we know the largest id.
    result = embedding_ops.embedding_lookup(
        params, ids, partition_strategy="div")

    return array_ops.reshape(result,
                             array_ops.concat([values_shape, [dimension]], 0))
Example #60
0
def mean_pairwise_squared_error(labels,
                                predictions,
                                weights=1.0,
                                scope=None,
                                loss_collection=ops.GraphKeys.LOSSES):
    """Adds a pairwise-errors-squared loss to the training procedure.

  Unlike `mean_squared_error`, which is a measure of the differences between
  corresponding elements of `predictions` and `labels`,
  `mean_pairwise_squared_error` is a measure of the differences between pairs of
  corresponding elements of `predictions` and `labels`.

  For example, if `labels`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3

  Note that since the inputs are of shape `[batch_size, d0, ... dN]`, the
  corresponding pairs are computed within each batch sample but not across
  samples within a batch. For example, if `predictions` represents a batch of
  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
  is drawn from each image, but not across images.

  `weights` acts as a coefficient for the loss. If a scalar is provided, then
  the loss is simply scaled by the given value. If `weights` is a tensor of size
  `[batch_size]`, then the total loss for each sample of the batch is rescaled
  by the corresponding element in the `weights` vector.

  Args:
    labels: The ground truth output tensor, whose shape must match the shape of
      `predictions`.
    predictions: The predicted outputs, a tensor of size
      `[batch_size, d0, .. dN]` where N+1 is the total number of dimensions in
      `predictions`.
    weights: Coefficients for the loss a scalar, a tensor of shape
      `[batch_size]` or a tensor whose shape matches `predictions`.
    scope: The scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` that returns the weighted loss.

  Raises:
    ValueError: If the shape of `predictions` doesn't match that of `labels` or
      if the shape of `weights` is invalid.  Also if `labels` or `predictions`
      is None.

  @compatibility(eager)
  The `loss_collection` argument is ignored when executing eagerly. Consider
  holding on to the return value or collecting losses via a `tf.keras.Model`.
  @end_compatibility
  """
    if labels is None:
        raise ValueError("labels must not be None.")
    if predictions is None:
        raise ValueError("predictions must not be None.")
    with ops.name_scope(scope, "mean_pairwise_squared_error",
                        (predictions, labels, weights)) as scope:
        weights = math_ops.to_float(weights)
        labels = math_ops.to_float(labels)
        with ops.control_dependencies(
            (weights_broadcast_ops.assert_broadcastable(weights, labels), )):
            predictions = math_ops.to_float(predictions)
            predictions.get_shape().assert_is_compatible_with(
                labels.get_shape())

            diffs = math_ops.subtract(predictions, labels)

            axis = math_ops.range(1, array_ops.rank(diffs))

            sum_squares_diff_per_batch = math_ops.reduce_sum(
                math_ops.square(diffs), axis=axis, keepdims=True)
            num_present_per_batch = _num_present(diffs,
                                                 weights,
                                                 per_batch=True)

            term1 = 2.0 * math_ops.div_no_nan(
                sum_squares_diff_per_batch,
                math_ops.maximum(num_present_per_batch - 1, 0),
                name="value")

            sum_diff = math_ops.reduce_sum(diffs, axis=axis, keepdims=True)
            term2 = 2.0 * math_ops.div_no_nan(
                math_ops.square(sum_diff),
                math_ops.maximum(
                    math_ops.multiply(num_present_per_batch,
                                      num_present_per_batch - 1), 0),
                name="value")

            weighted_losses = math_ops.multiply(term1 - term2, weights)
            loss = math_ops.reduce_sum(weighted_losses)

            mean_loss = array_ops.where(
                math_ops.reduce_sum(num_present_per_batch) > 0,
                loss,
                array_ops.zeros_like(loss),
                name="value")
            util.add_loss(mean_loss, loss_collection)
            return mean_loss