Esempio n. 1
0
 def forward(self, y_true, y_pred):
     dims = list(range(len(y_pred.get_shape())))
     if self.axis != -1 and self.axis not in dims:
         raise ValueError("Axis out of y_pred's dimensions")
     if len(dims) - F.ndim(y_true) > 1:
         raise ValueError("y_pred's rank should be equal to y_true's"
                          " rank or y_true's rank + 1")
     elif len(dims) - F.ndim(y_true) == 1:
         y_true = array_ops.one_hot(y_true,
                                    depth=y_pred.shape[-1],
                                    dtype=y_pred.dtype)
     if not self.from_logits:
         if isinstance(y_pred, (ops.EagerTensor, variables.Variable)) \
                 or y_pred.op.type != 'Softmax':
             y_pred /= math_ops.reduce_sum(y_pred,
                                           axis=self.axis,
                                           keepdims=True)
             y_pred = clip_ops.clip_by_value(t=y_pred,
                                             clip_value_min=epsilon,
                                             clip_value_max=1 - epsilon)
             return -math_ops.reduce_sum(
                 math_ops.cast(y_true, y_pred.dtype) * math_ops.log(y_pred),
                 axis=self.axis)
         else:
             # When softmax activation function is used for output operation, we
             # use logits from the softmax function directly to compute loss in order
             # to prevent collapsing zero when training.
             # See b/117284466
             assert len(y_pred.op.inputs) == 1
             y_pred = y_pred.op.inputs[0]
     return nn.softmax_cross_entropy_with_logits(labels=y_true,
                                                 logits=y_pred)
Esempio n. 2
0
def npairs_loss(labels, embeddings_anchor, embeddings_positive,
                reg_lambda=0.002, print_losses=False):
  """Computes the npairs loss.

  Npairs loss expects paired data where a pair is composed of samples from the
  same labels and each pairs in the minibatch have different labels. The loss
  has two components. The first component is the L2 regularizer on the
  embedding vectors. The second component is the sum of cross entropy loss
  which takes each row of the pair-wise similarity matrix as logits and
  the remapped one-hot labels as labels.

  See: http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf

  Args:
    labels: 1-D tf.int32 `Tensor` of shape [batch_size/2].
    embeddings_anchor: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
      embedding vectors for the anchor images. Embeddings should not be
      l2 normalized.
    embeddings_positive: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
      embedding vectors for the positive images. Embeddings should not be
      l2 normalized.
    reg_lambda: Float. L2 regularization term on the embedding vectors.
    print_losses: Boolean. Option to print the xent and l2loss.

  Returns:
    npairs_loss: tf.float32 scalar.
  """
  # pylint: enable=line-too-long
  # Add the regularizer on the embedding.
  reg_anchor = math_ops.reduce_mean(
      math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
  reg_positive = math_ops.reduce_mean(
      math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
  l2loss = math_ops.multiply(
      0.25 * reg_lambda, reg_anchor + reg_positive, name='l2loss')

  # Get per pair similarities.
  similarity_matrix = math_ops.matmul(
      embeddings_anchor, embeddings_positive, transpose_a=False,
      transpose_b=True)

  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  labels_remapped = math_ops.to_float(
      math_ops.equal(labels, array_ops.transpose(labels)))
  labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keep_dims=True)

  # Add the softmax loss.
  xent_loss = nn.softmax_cross_entropy_with_logits(
      logits=similarity_matrix, labels=labels_remapped)
  xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')

  if print_losses:
    xent_loss = logging_ops.Print(
        xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])

  return l2loss + xent_loss
Esempio n. 3
0
    def testOptimizerInit(self):
        with ops.Graph().as_default():
            layer_collection = lc.LayerCollection()

            inputs = array_ops.ones((2, 1)) * 2
            weights_val = np.ones((1, 1), dtype=np.float32) * 3.
            weights = variable_scope.get_variable(
                'w', initializer=array_ops.constant(weights_val))
            bias = variable_scope.get_variable(
                'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
            output = math_ops.matmul(inputs, weights) + bias

            layer_collection.register_fully_connected((weights, bias), inputs,
                                                      output)

            logits = math_ops.tanh(output)
            targets = array_ops.constant([[0.], [1.]])
            output = math_ops.reduce_mean(
                nn.softmax_cross_entropy_with_logits(logits=logits,
                                                     labels=targets))

            layer_collection.register_categorical_predictive_distribution(
                logits)

            optimizer.KfacOptimizer(0.1,
                                    0.2,
                                    0.3,
                                    layer_collection,
                                    momentum=0.5,
                                    momentum_type='regular')
Esempio n. 4
0
def softmax(logit, target, name=None):
  """Calculates the softmax cross-entropy loss, averaged across batches.

  **WARNING:** `logit` must be unscaled, while the `target` should be a
  normalized probability prediction. See
  `tf.nn.sigmoid_cross_entropy_with_logits` for more details.

  Args:
    logit: Tensor of actual values. Shape must have rank 2, generally
        (batch, num_classes). num_classes must be > 1. For single-class
        regression, use `logistic`. Type must be `tf.float32` or `tf.float64`.
    target: A `Tensor` of shape `[batch_size, dim_1, ..., dim_n]` of
      target values. The shape of the target tensor should match the
      `logit` tensor.
    name: A name for the operation (optional).

  Returns:
    A scalar `tensor` of the softmax cross-entropy loss, averaged across
    batches.

  Raises:
    ValueError: If `logit` and `target` shapes do not match.
  """
  with ops.op_scope([logit, target], name, "softmax_loss") as scope:
    shape = logit.get_shape().with_rank(2)
    if shape.dims[1] and shape.dims[1] < 2:
      raise ValueError(
          "Invalid shape %s; use logistic() instead for only 1 class." %
          shape)
    return _reduce_to_scalar(
        nn.softmax_cross_entropy_with_logits(logit, target), name=scope)
Esempio n. 5
0
def npairs_loss(labels, embeddings_anchor, embeddings_positive,
                reg_lambda=0.002, print_losses=False):
  """Computes the npairs loss.

  Npairs loss expects paired data where a pair is composed of samples from the
  same labels and each pairs in the minibatch have different labels. The loss
  has two components. The first component is the L2 regularizer on the
  embedding vectors. The second component is the sum of cross entropy loss
  which takes each row of the pair-wise similarity matrix as logits and
  the remapped one-hot labels as labels.

  See: http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf

  Args:
    labels: 1-D tf.int32 `Tensor` of shape [batch_size/2].
    embeddings_anchor: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
      embedding vectors for the anchor images. Embeddings should not be
      l2 normalized.
    embeddings_positive: 2-D Tensor of shape [batch_size/2, embedding_dim] for the
      embedding vectors for the positive images. Embeddings should not be
      l2 normalized.
    reg_lambda: Float. L2 regularization term on the embedding vectors.
    print_losses: Boolean. Option to print the xent and l2loss.

  Returns:
    npairs_loss: tf.float32 scalar.
  """
  # pylint: enable=line-too-long
  # Add the regularizer on the embedding.
  reg_anchor = math_ops.reduce_mean(
      math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
  reg_positive = math_ops.reduce_mean(
      math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
  l2loss = math_ops.multiply(
      0.25 * reg_lambda, reg_anchor + reg_positive, name='l2loss')

  # Get per pair similarities.
  similarity_matrix = math_ops.matmul(
      embeddings_anchor, embeddings_positive, transpose_a=False,
      transpose_b=True)

  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  labels_remapped = math_ops.to_float(
      math_ops.equal(labels, array_ops.transpose(labels)))
  labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keepdims=True)

  # Add the softmax loss.
  xent_loss = nn.softmax_cross_entropy_with_logits(
      logits=similarity_matrix, labels=labels_remapped)
  xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')

  if print_losses:
    xent_loss = logging_ops.Print(
        xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])

  return l2loss + xent_loss
Esempio n. 6
0
def softmax_cross_entropy(onehot_labels,
                          logits,
                          weights=1.0,
                          label_smoothing=0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES,
                          reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):

    if onehot_labels is None:
        raise ValueError("onehot_labels must not be None.")
    if logits is None:
        raise ValueError("logits must not be None.")
    with ops.name_scope(scope, "softmax_cross_entropy_loss",
                        (logits, onehot_labels, weights)) as scope:
        logits = ops.convert_to_tensor(logits)
        onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
        logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

        losses = nn.softmax_cross_entropy_with_logits(labels=onehot_labels,
                                                      logits=logits,
                                                      name="xentropy")
        tf.logging.info('losses  %s' % losses)
        tf.logging.info('onehot_lables  %s' % onehot_labels)
        return tf.losses.compute_weighted_loss(losses,
                                               weights,
                                               scope,
                                               loss_collection,
                                               reduction=reduction)
  def testOptimizerInit(self):
    with ops.Graph().as_default():
      layer_collection = lc.LayerCollection()

      inputs = array_ops.ones((2, 1)) * 2
      weights_val = np.ones((1, 1), dtype=np.float32) * 3.
      weights = variable_scope.get_variable(
          'w', initializer=array_ops.constant(weights_val))
      bias = variable_scope.get_variable(
          'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
      output = math_ops.matmul(inputs, weights) + bias

      layer_collection.register_fully_connected((weights, bias), inputs, output)

      logits = math_ops.tanh(output)
      targets = array_ops.constant([[0.], [1.]])
      output = math_ops.reduce_mean(
          nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))

      layer_collection.register_categorical_predictive_distribution(logits)

      optimizer.KfacOptimizer(
          0.1,
          0.2,
          0.3,
          layer_collection,
          momentum=0.5,
          momentum_type='regular')
Esempio n. 8
0
def sequence_classifier(decoding, labels, sampling_decoding=None, name=None):
  """Returns predictions and loss for sequence of predictions.

  Args:
    decoding: List of Tensors with predictions.
    labels: List of Tensors with labels.
    sampling_decoding: Optional, List of Tensor with predictions to be used
      in sampling. E.g. they shouldn't have dependncy on outputs.
      If not provided, decoding is used.
    name: Operation name.

  Returns:
    Predictions and losses tensors.
  """
  with ops.op_scope([decoding, labels], name, "sequence_classifier"):
    predictions, xent_list = [], []
    for i, pred in enumerate(decoding):
      xent_list.append(nn.softmax_cross_entropy_with_logits(
          pred, labels[i],
          name="sequence_loss/xent_raw{0}".format(i)))
      if sampling_decoding:
        predictions.append(nn.softmax(sampling_decoding[i]))
      else:
        predictions.append(nn.softmax(pred))
    xent = math_ops.add_n(xent_list, name="sequence_loss/xent")
    loss = math_ops.reduce_sum(xent, name="sequence_loss")
    return array_ops.expand_concat(1, predictions), loss
Esempio n. 9
0
def softmax_cross_entropy(onehot_labels,
                          logits,
                          weights=1.0,
                          label_smoothing=0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES,
                          reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
    """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
    logits: [batch_size, num_classes] logits outputs of the network .
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `onehot_labels`, and must be broadcastable to `onehot_labels` (i.e., all
      dimensions must be either `1`, or the same as the corresponding `losses`
      dimension).
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.
  """
    with ops.name_scope(scope, "softmax_cross_entropy_loss",
                        (logits, onehot_labels, weights)) as scope:
        logits = ops.convert_to_tensor(logits)
        onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
        logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

        if label_smoothing > 0:
            num_classes = math_ops.cast(
                array_ops.shape(onehot_labels)[1], logits.dtype)
            smooth_positives = 1.0 - label_smoothing
            smooth_negatives = label_smoothing / num_classes
            onehot_labels = onehot_labels * smooth_positives + smooth_negatives

        losses = nn.softmax_cross_entropy_with_logits(labels=onehot_labels,
                                                      logits=logits,
                                                      name="xentropy")
        return compute_weighted_loss(losses,
                                     weights,
                                     scope,
                                     loss_collection,
                                     reduction=reduction)
 def loop_fn(i):
   with g:
     logits_i = array_ops.gather(logits, i)
     labels_i = array_ops.gather(labels, i)
     loss = nn.softmax_cross_entropy_with_logits(
         labels=labels_i, logits=logits_i)
     total_loss = math_ops.reduce_sum(loss)
   return loss, g.gradient(total_loss, logits_i)
 def loop_fn(i):
   with g:
     logits_i = array_ops.gather(logits, i)
     labels_i = array_ops.gather(labels, i)
     loss = nn.softmax_cross_entropy_with_logits(
         labels=labels_i, logits=logits_i)
     total_loss = math_ops.reduce_sum(loss)
   return loss, g.gradient(total_loss, logits_i)
Esempio n. 12
0
def softmax_cross_entropy(
    onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES,
    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
    logits: `[batch_size, num_classes]` logits outputs of the network .
    weights: Optional `Tensor` whose rank is either 0, or rank 1 and is
      broadcastable to the loss which is a `Tensor` of shape `[batch_size]`.
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

  Returns:
    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.  Also if
      `onehot_labels` or `logits` is None.
  """
  if onehot_labels is None:
    raise ValueError("onehot_labels must not be None.")
  if logits is None:
    raise ValueError("logits must not be None.")
  with ops.name_scope(scope, "softmax_cross_entropy_loss",
                      (logits, onehot_labels, weights)) as scope:
    logits = ops.convert_to_tensor(logits)
    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

    if label_smoothing > 0:
      num_classes = math_ops.cast(
          array_ops.shape(onehot_labels)[1], logits.dtype)
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

    losses = nn.softmax_cross_entropy_with_logits(labels=onehot_labels,
                                                  logits=logits,
                                                  name="xentropy")
    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)
Esempio n. 13
0
def softmax_cross_entropy(onehot_labels,
                          logits,
                          weights=1.0,
                          label_smoothing=0,
                          scope=None,
                          loss_collection=ops.GraphKeys.LOSSES):
    """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
    logits: [batch_size, num_classes] logits outputs of the network .
    weights: Coefficients for the loss. This must be of shape `[]`,
      `[batch_size]` or `[batch_size, num_classes]`.
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the mean loss value.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.
  """
    with ops.name_scope(scope, "softmax_cross_entropy_loss",
                        [logits, onehot_labels, weights]) as scope:
        logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

        onehot_labels = math_ops.cast(onehot_labels, logits.dtype)

        if label_smoothing > 0:
            num_classes = math_ops.cast(
                array_ops.shape(onehot_labels)[1], logits.dtype)
            smooth_positives = 1.0 - label_smoothing
            smooth_negatives = label_smoothing / num_classes
            onehot_labels = onehot_labels * smooth_positives + smooth_negatives

        losses = nn.softmax_cross_entropy_with_logits(logits,
                                                      onehot_labels,
                                                      name="xentropy")
        return compute_weighted_loss(losses, weights, scope, loss_collection)
Esempio n. 14
0
def npairs_loss_hash(labels,
                     embeddings_anchor,
                     embeddings_positive,
                     objective,
                     similarity_func,
                     reg_lambda=0.002):
    """Computes the npairs loss with objective
    similarity base
    Args:
        labels - 1D tensor [batch_size/2],
            tf.int32
        embeddings_anchor - 2D tensor [batch_size/2, embedding_dim]
            embedding vectors for anchor images
        embeddings_positive - 2D tensor [batch_size/2, embedding_dim]
            embedding vectors for positive images
        objective -  2D tensor [batch_size/2, embedding_dim]
            should be binary(0 or 1)
        similarity_func - func 
            args : 
                anc - 2D tensor [ndata, embedding_dim]
                pos - 2D tensor [ndata, embedding_dim]
                obj - 2D tensor [ndata, embedding_dim]
                    which is binary
            return :
                2D tensor [ndata, ndata] 
        reg_lambda - float for  L2 regularization term of embedding vectors
    Returns:
        npairs_loss: tf.float32 scalar.
    """
    reg_anchor = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
    reg_positive = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
    l2loss = math_ops.multiply(0.25 * reg_lambda,
                               reg_anchor + reg_positive,
                               name='l2loss')

    similarity_matrix = similarity_func(
        anc=embeddings_anchor, pos=embeddings_positive,
        obj=objective)  # [batch_size/2, batch_size/2]
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    labels_remapped = math_ops.to_float(
        math_ops.equal(labels, array_ops.transpose(labels)))
    labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keep_dims=True)

    # Add the softmax loss.
    xent_loss = nn.softmax_cross_entropy_with_logits(logits=similarity_matrix,
                                                     labels=labels_remapped)
    xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')
    return l2loss + xent_loss
Esempio n. 15
0
def softmax_cross_entropy(
    onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES):
  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  WARNING: `weights` also supports dimensions of 1, but the broadcasting does
  not work as advertised, you'll wind up with weighted sum instead of weighted
  mean for any but the last dimension. This will be cleaned up soon, so please
  do not rely on the current behavior for anything but the shapes documented for
  `weights` below.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
    logits: [batch_size, num_classes] logits outputs of the network .
    weights: Coefficients for the loss. This must be of shape `[]`,
      `[batch_size]` or `[batch_size, num_classes]`.
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the mean loss value.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.
  """
  with ops.name_scope(scope, "softmax_cross_entropy_loss",
                      (logits, onehot_labels, weights)) as scope:
    logits = ops.convert_to_tensor(logits)
    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

    if label_smoothing > 0:
      num_classes = math_ops.cast(
          array_ops.shape(onehot_labels)[1], logits.dtype)
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

    losses = nn.softmax_cross_entropy_with_logits(labels=onehot_labels,
                                                  logits=logits,
                                                  name="xentropy")
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Esempio n. 16
0
def softmax_cross_entropy(logits,
                          onehot_labels,
                          weights=_WEIGHT_SENTINEL,
                          label_smoothing=0,
                          scope=None,
                          weight=_WEIGHT_SENTINEL):
    """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weight` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weight` is a
  tensor of size [`batch_size`], then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    logits: [batch_size, num_classes] logits outputs of the network .
    onehot_labels: [batch_size, num_classes] target one_hot_encoded labels.
    weights: Coefficients for the loss. The tensor must be a scalar or a tensor
      of shape [batch_size].
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    weight: Deprecated alias for `weights`.

  Returns:
    A scalar `Tensor` representing the loss value.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weight` is invalid or if `weight` is None.
  """
    weights = _weights(weights, weight)
    with ops.name_scope(scope, "softmax_cross_entropy_loss",
                        [logits, onehot_labels, weights]) as scope:
        logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

        onehot_labels = math_ops.cast(onehot_labels, logits.dtype)

        if label_smoothing > 0:
            num_classes = math_ops.cast(
                array_ops.shape(onehot_labels)[1], logits.dtype)
            smooth_positives = 1.0 - label_smoothing
            smooth_negatives = label_smoothing / num_classes
            onehot_labels = onehot_labels * smooth_positives + smooth_negatives

        losses = nn.softmax_cross_entropy_with_logits(logits,
                                                      onehot_labels,
                                                      name="xentropy")
        return compute_weighted_loss(losses, weights, scope=scope)
Esempio n. 17
0
def softmax_cross_entropy(
    onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
    loss_collection=ops.GraphKeys.LOSSES):
  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
    logits: [batch_size, num_classes] logits outputs of the network .
    weights: Optional `Tensor` whose rank is either 0, or the same rank as
      `onehot_labels`, and must be broadcastable to `onehot_labels` (i.e., all
      dimensions must be either `1`, or the same as the corresponding `losses`
      dimension).
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.

  Returns:
    A scalar `Tensor` representing the mean loss value.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.
  """
  with ops.name_scope(scope, "softmax_cross_entropy_loss",
                      (logits, onehot_labels, weights)) as scope:
    logits = ops.convert_to_tensor(logits)
    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

    if label_smoothing > 0:
      num_classes = math_ops.cast(
          array_ops.shape(onehot_labels)[1], logits.dtype)
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

    losses = nn.softmax_cross_entropy_with_logits(labels=onehot_labels,
                                                  logits=logits,
                                                  name="xentropy")
    return compute_weighted_loss(losses, weights, scope, loss_collection)
Esempio n. 18
0
    def testApplyGradients(self):
        with ops.Graph().as_default(), self.cached_session() as sess:
            layer_collection = lc.LayerCollection()

            inputs = array_ops.ones((2, 1)) * 2
            weights_val = np.ones((1, 1), dtype=np.float32) * 3.
            weights = variable_scope.get_variable(
                'w', initializer=array_ops.constant(weights_val))
            bias = variable_scope.get_variable(
                'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
            output = math_ops.matmul(inputs, weights) + bias

            layer_collection.register_fully_connected((weights, bias), inputs,
                                                      output)

            logits = math_ops.tanh(output)
            targets = array_ops.constant([[0.], [1.]])
            output = math_ops.reduce_mean(
                nn.softmax_cross_entropy_with_logits(logits=logits,
                                                     labels=targets))

            layer_collection.register_categorical_predictive_distribution(
                logits)

            opt = optimizer.KfacOptimizer(0.1,
                                          0.2,
                                          0.3,
                                          layer_collection,
                                          momentum=0.5,
                                          momentum_type='regular')
            (cov_update_thunks,
             inv_update_thunks) = opt.make_vars_and_create_op_thunks()
            cov_update_ops = tuple(thunk() for thunk in cov_update_thunks)
            inv_update_ops = tuple(thunk() for thunk in inv_update_thunks)

            grads_and_vars = opt.compute_gradients(output, [weights, bias])
            all_vars = [grad_and_var[1] for grad_and_var in grads_and_vars]

            op = opt.apply_gradients(grads_and_vars)

            sess.run(tf_variables.global_variables_initializer())
            old_vars = sess.run(all_vars)
            sess.run(cov_update_ops)
            sess.run(inv_update_ops)
            sess.run(op)
            new_vars = sess.run(all_vars)

            for old_var, new_var in zip(old_vars, new_vars):
                self.assertNotEqual(old_var, new_var)
Esempio n. 19
0
  def testApplyGradients(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      layer_collection = lc.LayerCollection()

      inputs = array_ops.ones((2, 1)) * 2
      weights_val = np.ones((1, 1), dtype=np.float32) * 3.
      weights = variable_scope.get_variable(
          'w', initializer=array_ops.constant(weights_val))
      bias = variable_scope.get_variable(
          'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
      output = math_ops.matmul(inputs, weights) + bias

      layer_collection.register_fully_connected((weights, bias), inputs, output)

      logits = math_ops.tanh(output)
      targets = array_ops.constant([[0.], [1.]])
      output = math_ops.reduce_mean(
          nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))

      layer_collection.register_categorical_predictive_distribution(logits)

      opt = optimizer.KfacOptimizer(
          0.1,
          0.2,
          0.3,
          layer_collection,
          momentum=0.5,
          momentum_type='regular')
      (cov_update_thunks,
       inv_update_thunks) = opt.make_vars_and_create_op_thunks()
      cov_update_ops = tuple(thunk() for thunk in cov_update_thunks)
      inv_update_ops = tuple(thunk() for thunk in inv_update_thunks)

      grads_and_vars = opt.compute_gradients(output, [weights, bias])
      all_vars = [grad_and_var[1] for grad_and_var in grads_and_vars]

      op = opt.apply_gradients(grads_and_vars)

      sess.run(tf_variables.global_variables_initializer())
      old_vars = sess.run(all_vars)
      sess.run(cov_update_ops)
      sess.run(inv_update_ops)
      sess.run(op)
      new_vars = sess.run(all_vars)

      for old_var, new_var in zip(old_vars, new_vars):
        self.assertNotEqual(old_var, new_var)
Esempio n. 20
0
def softmax_cross_entropy(logits,
                          onehot_labels,
                          weights=1.0,
                          label_smoothing=0,
                          scope=None):
  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of size [`batch_size`], then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

  Args:
    logits: [batch_size, num_classes] logits outputs of the network .
    onehot_labels: [batch_size, num_classes] one-hot-encoded labels.
    weights: Coefficients for the loss. The tensor must be a scalar or a tensor
      of shape [batch_size].
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.

  Returns:
    A scalar `Tensor` representing the mean loss value.

  Raises:
    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.
  """
  with ops.name_scope(scope, "softmax_cross_entropy_loss",
                      [logits, onehot_labels, weights]) as scope:
    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)

    if label_smoothing > 0:
      num_classes = math_ops.cast(
          array_ops.shape(onehot_labels)[1], logits.dtype)
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

    losses = nn.softmax_cross_entropy_with_logits(
        labels=onehot_labels, logits=logits, name="xentropy")
    return compute_weighted_loss(losses, weights, scope=scope)
Esempio n. 21
0
    def testNpairs(self):
        with self.cached_session():
            num_data = 15
            feat_dim = 6
            num_classes = 5
            reg_lambda = 0.02

            embeddings_anchor = np.random.rand(num_data,
                                               feat_dim).astype(np.float32)
            embeddings_positive = np.random.rand(num_data,
                                                 feat_dim).astype(np.float32)

            labels = np.random.randint(0, num_classes,
                                       size=(num_data)).astype(np.float32)
            # Reshape labels to compute adjacency matrix.
            labels_reshaped = np.reshape(labels, (labels.shape[0], 1))

            # Compute the loss in NP
            reg_term = np.mean(np.sum(np.square(embeddings_anchor), 1))
            reg_term += np.mean(np.sum(np.square(embeddings_positive), 1))
            reg_term *= 0.25 * reg_lambda

            similarity_matrix = np.matmul(embeddings_anchor,
                                          embeddings_positive.T)

            labels_remapped = np.equal(labels_reshaped,
                                       labels_reshaped.T).astype(np.float32)
            labels_remapped /= np.sum(labels_remapped, axis=1, keepdims=True)

            xent_loss = math_ops.reduce_mean(
                nn.softmax_cross_entropy_with_logits(
                    logits=ops.convert_to_tensor(similarity_matrix),
                    labels=ops.convert_to_tensor(labels_remapped))).eval()
            loss_np = xent_loss + reg_term

            # Compute the loss in TF
            loss_tf = metric_learning.npairs_loss(
                labels=ops.convert_to_tensor(labels),
                embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
                embeddings_positive=ops.convert_to_tensor(embeddings_positive),
                reg_lambda=reg_lambda)
            loss_tf = loss_tf.eval()
            self.assertAllClose(loss_np, loss_tf)
def npairs_loss_fan(labels,
                    embeddings_anchor,
                    embeddings_positive,
                    reg_lambda=3e-3,
                    print_losses=False,
                    hard_ori=False,
                    HardOrNot=None):
    # pylint: enable=line-too-long
    # Add the regularizer on the embedding.
    reg_anchor = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
    reg_positive = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
    l2loss = math_ops.multiply(0.25 * reg_lambda,
                               reg_anchor + reg_positive,
                               name='l2loss')

    # Get per pair similarities.
    similarity_matrix = math_ops.matmul(embeddings_anchor,
                                        embeddings_positive,
                                        transpose_a=False,
                                        transpose_b=True)

    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    labels_remapped = math_ops.to_float(
        math_ops.not_equal(labels, array_ops.transpose(labels)))
    # labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keepdims=True)

    # Add the softmax loss.
    xent_loss = nn.softmax_cross_entropy_with_logits(logits=similarity_matrix,
                                                     labels=labels_remapped)
    xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')
    if hard_ori:
        xent_loss = tf.multiply(xent_loss, HardOrNot)
    if print_losses:
        xent_loss = logging_ops.Print(
            xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])

    return l2loss + xent_loss
Esempio n. 23
0
    def testNpairsMultiLabel(self):
        with self.cached_session():
            num_data = 15
            feat_dim = 6
            num_classes = 10
            reg_lambda = 0.02

            embeddings_anchor = np.random.rand(num_data,
                                               feat_dim).astype(np.float32)
            embeddings_positive = np.random.rand(num_data,
                                                 feat_dim).astype(np.float32)

            labels = np.random.randint(0, 2, (num_data, num_classes))
            # set entire column to one so that each row has at least one bit set.
            labels[:, -1] = 1

            # Compute the loss in NP
            reg_term = np.mean(np.sum(np.square(embeddings_anchor), 1))
            reg_term += np.mean(np.sum(np.square(embeddings_positive), 1))
            reg_term *= 0.25 * reg_lambda

            similarity_matrix = np.matmul(embeddings_anchor,
                                          embeddings_positive.T)

            labels_remapped = np.dot(labels, labels.T).astype(np.float)
            labels_remapped /= np.sum(labels_remapped, 1, keepdims=True)

            xent_loss = math_ops.reduce_mean(
                nn.softmax_cross_entropy_with_logits(
                    logits=ops.convert_to_tensor(similarity_matrix),
                    labels=ops.convert_to_tensor(labels_remapped))).eval()
            loss_np = xent_loss + reg_term

            # Compute the loss in TF
            loss_tf = metric_learning.npairs_loss_multilabel(
                sparse_labels=convert_to_list_of_sparse_tensor(labels),
                embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
                embeddings_positive=ops.convert_to_tensor(embeddings_positive),
                reg_lambda=reg_lambda)
            loss_tf = loss_tf.eval()

            self.assertAllClose(loss_np, loss_tf)
Esempio n. 24
0
    def __init__(self,
                 cfg,
                 word_embd,
                 max_ques_len,
                 input_producer,
                 generated=None):
        batch_size = cfg.batch_size
        vocab_size = len(word_embd)
        with tf.variable_scope('disc'):
            word_embd = tf.get_variable(
                'word_embd',
                shape=word_embd.shape,
                initializer=tf.constant_initializer(word_embd))
            if generated:
                self.ques = generated['ques']
                self.ques_len = generated['ques_len']

                # soft embedding_lookup
                ques = tf.reshape(self.ques, [-1, vocab_size])
                ques = tf.matmul(ques, word_embd)
                ques = tf.reshape(ques, [batch_size, -1, cfg.embed_dim])
            else:
                self.ques = tf.placeholder(tf.int32,
                                           shape=[None, max_ques_len],
                                           name='question')
                self.ques_len = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name='question_length')
                ques = embedding_lookup(word_embd, self.ques)
            self.answ = input_producer.answ_disc
            cell = GRUCell(cfg.hidden_size)
            _, state = dynamic_rnn(cell,
                                   ques,
                                   sequence_length=self.ques_len,
                                   dtype=tf.float32)
            output_layer = Dense(vocab_size)
            logits = output_layer(state)
            labels = tf.one_hot(self.answ, vocab_size)
            self.pred = tf.argmax(logits, 1)
            loss = softmax_cross_entropy_with_logits(labels=labels,
                                                     logits=logits)
            self.loss = tf.reduce_mean(loss)
Esempio n. 25
0
def deprecated_flipped_softmax_cross_entropy_with_logits(
        logits, labels, dim=-1, name=None):
    """Computes softmax cross entropy between `logits` and `labels`.

  This function diffs from tf.nn.softmax_cross_entropy_with_logits only in the
  argument order.

  Measures the probability error in discrete classification tasks in which the
  classes are mutually exclusive (each entry is in exactly one class).  For
  example, each CIFAR-10 image is labeled with one and only one label: an image
  can be a dog or a truck, but not both.

  **NOTE:**  While the classes are mutually exclusive, their probabilities
  need not be.  All that is required is that each row of `labels` is
  a valid probability distribution.  If they are not, the computation of the
  gradient will be incorrect.

  If using exclusive `labels` (wherein one and only
  one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`.

  **WARNING:** This op expects unscaled logits, since it performs a `softmax`
  on `logits` internally for efficiency.  Do not call this op with the
  output of `softmax`, as it will produce incorrect results.

  `logits` and `labels` must have the same shape `[batch_size, num_classes]`
  and the same dtype (either `float16`, `float32`, or `float64`).

  Args:
    logits: Unscaled log probabilities.
    labels: Each row `labels[i]` must be a valid probability distribution.
    dim: The class dimension. Defaulted to -1 which is the last dimension.
    name: A name for the operation (optional).

  Returns:
    A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
    softmax cross entropy loss.
  """
    return nn.softmax_cross_entropy_with_logits(logits=logits,
                                                labels=labels,
                                                dim=dim,
                                                name=name)
Esempio n. 26
0
def deprecated_flipped_softmax_cross_entropy_with_logits(logits,
                                                         labels,
                                                         dim=-1,
                                                         name=None):
  """Computes softmax cross entropy between `logits` and `labels`.

  This function diffs from tf.nn.softmax_cross_entropy_with_logits only in the
  argument order.

  Measures the probability error in discrete classification tasks in which the
  classes are mutually exclusive (each entry is in exactly one class).  For
  example, each CIFAR-10 image is labeled with one and only one label: an image
  can be a dog or a truck, but not both.

  **NOTE:**  While the classes are mutually exclusive, their probabilities
  need not be.  All that is required is that each row of `labels` is
  a valid probability distribution.  If they are not, the computation of the
  gradient will be incorrect.

  If using exclusive `labels` (wherein one and only
  one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`.

  **WARNING:** This op expects unscaled logits, since it performs a `softmax`
  on `logits` internally for efficiency.  Do not call this op with the
  output of `softmax`, as it will produce incorrect results.

  `logits` and `labels` must have the same shape `[batch_size, num_classes]`
  and the same dtype (either `float16`, `float32`, or `float64`).

  Args:
    logits: Unscaled log probabilities.
    labels: Each row `labels[i]` must be a valid probability distribution.
    dim: The class dimension. Defaulted to -1 which is the last dimension.
    name: A name for the operation (optional).

  Returns:
    A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the
    softmax cross entropy loss.
  """
  return nn.softmax_cross_entropy_with_logits(
      labels=labels, logits=logits, dim=dim, name=name)
  def testNpairs(self):
    with self.test_session():
      num_data = 15
      feat_dim = 6
      num_classes = 5
      reg_lambda = 0.02

      embeddings_anchor = np.random.rand(num_data, feat_dim).astype(np.float32)
      embeddings_positive = np.random.rand(num_data, feat_dim).astype(
          np.float32)

      labels = np.random.randint(
          0, num_classes, size=(num_data)).astype(np.float32)
      # Reshape labels to compute adjacency matrix.
      labels_reshaped = np.reshape(labels, (labels.shape[0], 1))

      # Compute the loss in NP
      reg_term = np.mean(np.sum(np.square(embeddings_anchor), 1))
      reg_term += np.mean(np.sum(np.square(embeddings_positive), 1))
      reg_term *= 0.25 * reg_lambda

      similarity_matrix = np.matmul(embeddings_anchor, embeddings_positive.T)

      labels_remapped = np.equal(
          labels_reshaped, labels_reshaped.T).astype(np.float32)
      labels_remapped /= np.sum(labels_remapped, axis=1, keepdims=True)

      xent_loss = math_ops.reduce_mean(nn.softmax_cross_entropy_with_logits(
          logits=ops.convert_to_tensor(similarity_matrix),
          labels=ops.convert_to_tensor(labels_remapped))).eval()
      loss_np = xent_loss + reg_term

      # Compute the loss in TF
      loss_tf = metric_loss_ops.npairs_loss(
          labels=ops.convert_to_tensor(labels),
          embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
          embeddings_positive=ops.convert_to_tensor(embeddings_positive),
          reg_lambda=reg_lambda)
      loss_tf = loss_tf.eval()
      self.assertAllClose(loss_np, loss_tf)
  def testNpairsMultiLabel(self):
    with self.test_session():
      num_data = 15
      feat_dim = 6
      num_classes = 10
      reg_lambda = 0.02

      embeddings_anchor = np.random.rand(num_data, feat_dim).astype(np.float32)
      embeddings_positive = np.random.rand(num_data, feat_dim).astype(
          np.float32)

      labels = np.random.randint(0, 2, (num_data, num_classes))
      # set entire column to one so that each row has at least one bit set.
      labels[:, -1] = 1

      # Compute the loss in NP
      reg_term = np.mean(np.sum(np.square(embeddings_anchor), 1))
      reg_term += np.mean(np.sum(np.square(embeddings_positive), 1))
      reg_term *= 0.25 * reg_lambda

      similarity_matrix = np.matmul(embeddings_anchor, embeddings_positive.T)

      labels_remapped = np.dot(labels, labels.T).astype(np.float)
      labels_remapped /= np.sum(labels_remapped, 1, keepdims=True)

      xent_loss = math_ops.reduce_mean(nn.softmax_cross_entropy_with_logits(
          logits=ops.convert_to_tensor(similarity_matrix),
          labels=ops.convert_to_tensor(labels_remapped))).eval()
      loss_np = xent_loss + reg_term

      # Compute the loss in TF
      loss_tf = metric_loss_ops.npairs_loss_multilabel(
          sparse_labels=convert_to_list_of_sparse_tensor(labels),
          embeddings_anchor=ops.convert_to_tensor(embeddings_anchor),
          embeddings_positive=ops.convert_to_tensor(embeddings_positive),
          reg_lambda=reg_lambda)
      loss_tf = loss_tf.eval()

      self.assertAllClose(loss_np, loss_tf)
def npairs_loss_multilabel(sparse_labels,
                           embeddings_anchor,
                           embeddings_positive,
                           reg_lambda=0.002,
                           print_losses=False):
    r"""Computes the npairs loss with multilabel data.

  Npairs loss expects paired data where a pair is composed of samples from the
  same labels and each pairs in the minibatch have different labels. The loss
  has two components. The first component is the L2 regularizer on the
  embedding vectors. The second component is the sum of cross entropy loss
  which takes each row of the pair-wise similarity matrix as logits and
  the remapped one-hot labels as labels. Here, the similarity is defined by the
  dot product between two embedding vectors. S_{i,j} = f(x_i)^T f(x_j)

  To deal with multilabel inputs, we use the count of label intersection
  i.e. L_{i,j} = | set_of_labels_for(i) \cap set_of_labels_for(j) |
  Then we normalize each rows of the count based label matrix so that each row
  sums to one.

  Args:
    sparse_labels: List of 1-D Boolean `SparseTensor` of dense_shape
                   [batch_size/2, num_classes] labels for the anchor-pos pairs.
    embeddings_anchor: 2-D `Tensor` of shape [batch_size/2, embedding_dim] for
      the embedding vectors for the anchor images. Embeddings should not be
      l2 normalized.
    embeddings_positive: 2-D `Tensor` of shape [batch_size/2, embedding_dim] for
      the embedding vectors for the positive images. Embeddings should not be
      l2 normalized.
    reg_lambda: Float. L2 regularization term on the embedding vectors.
    print_losses: Boolean. Option to print the xent and l2loss.

  Returns:
    npairs_loss: tf.float32 scalar.
  Raises:
    TypeError: When the specified sparse_labels is not a `SparseTensor`.
  """
    if False in [
            isinstance(l, sparse_tensor.SparseTensor) for l in sparse_labels
    ]:
        raise TypeError(
            'sparse_labels must be a list of SparseTensors, but got %s' %
            str(sparse_labels))

    with ops.name_scope('NpairsLossMultiLabel'):
        # Add the regularizer on the embedding.
        reg_anchor = math_ops.reduce_mean(
            math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
        reg_positive = math_ops.reduce_mean(
            math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
        l2loss = math_ops.multiply(0.25 * reg_lambda,
                                   reg_anchor + reg_positive,
                                   name='l2loss')

        # Get per pair similarities.
        similarity_matrix = math_ops.matmul(embeddings_anchor,
                                            embeddings_positive,
                                            transpose_a=False,
                                            transpose_b=True)

        # TODO(coreylynch): need to check the sparse values
        # TODO(coreylynch): are composed only of 0's and 1's.

        multilabel_adjacency_matrix = _build_multilabel_adjacency(
            sparse_labels)
        labels_remapped = math_ops.to_float(multilabel_adjacency_matrix)
        labels_remapped /= math_ops.reduce_sum(labels_remapped,
                                               1,
                                               keepdims=True)

        # Add the softmax loss.
        xent_loss = nn.softmax_cross_entropy_with_logits(
            logits=similarity_matrix, labels=labels_remapped)
        xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')

        if print_losses:
            xent_loss = logging_ops.Print(
                xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])

        return l2loss + xent_loss
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
Esempio n. 31
0
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
Esempio n. 32
0
def npairs_loss_multilabel(sparse_labels, embeddings_anchor,
                           embeddings_positive, reg_lambda=0.002,
                           print_losses=False):
  r"""Computes the npairs loss with multilabel data.

  Npairs loss expects paired data where a pair is composed of samples from the
  same labels and each pairs in the minibatch have different labels. The loss
  has two components. The first component is the L2 regularizer on the
  embedding vectors. The second component is the sum of cross entropy loss
  which takes each row of the pair-wise similarity matrix as logits and
  the remapped one-hot labels as labels. Here, the similarity is defined by the
  dot product between two embedding vectors. S_{i,j} = f(x_i)^T f(x_j)

  To deal with multilabel inputs, we use the count of label intersection
  i.e. L_{i,j} = | set_of_labels_for(i) \cap set_of_labels_for(j) |
  Then we normalize each rows of the count based label matrix so that each row
  sums to one.

  Args:
    sparse_labels: List of 1-D Boolean `SparseTensor` of dense_shape
                   [batch_size/2, num_classes] labels for the anchor-pos pairs.
    embeddings_anchor: 2-D `Tensor` of shape [batch_size/2, embedding_dim] for
      the embedding vectors for the anchor images. Embeddings should not be
      l2 normalized.
    embeddings_positive: 2-D `Tensor` of shape [batch_size/2, embedding_dim] for
      the embedding vectors for the positive images. Embeddings should not be
      l2 normalized.
    reg_lambda: Float. L2 regularization term on the embedding vectors.
    print_losses: Boolean. Option to print the xent and l2loss.

  Returns:
    npairs_loss: tf.float32 scalar.
  Raises:
    TypeError: When the specified sparse_labels is not a `SparseTensor`.
  """
  if False in [isinstance(
      l, sparse_tensor.SparseTensor) for l in sparse_labels]:
    raise TypeError(
        'sparse_labels must be a list of SparseTensors, but got %s' % str(
            sparse_labels))

  with ops.name_scope('NpairsLossMultiLabel'):
    # Add the regularizer on the embedding.
    reg_anchor = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_anchor), 1))
    reg_positive = math_ops.reduce_mean(
        math_ops.reduce_sum(math_ops.square(embeddings_positive), 1))
    l2loss = math_ops.multiply(0.25 * reg_lambda,
                               reg_anchor + reg_positive, name='l2loss')

    # Get per pair similarities.
    similarity_matrix = math_ops.matmul(
        embeddings_anchor, embeddings_positive, transpose_a=False,
        transpose_b=True)

    # TODO(coreylynch): need to check the sparse values
    # TODO(coreylynch): are composed only of 0's and 1's.

    multilabel_adjacency_matrix = _build_multilabel_adjacency(sparse_labels)
    labels_remapped = math_ops.to_float(multilabel_adjacency_matrix)
    labels_remapped /= math_ops.reduce_sum(labels_remapped, 1, keepdims=True)

    # Add the softmax loss.
    xent_loss = nn.softmax_cross_entropy_with_logits(
        logits=similarity_matrix, labels=labels_remapped)
    xent_loss = math_ops.reduce_mean(xent_loss, name='xentropy')

    if print_losses:
      xent_loss = logging_ops.Print(
          xent_loss, ['cross entropy:', xent_loss, 'l2loss:', l2loss])

    return l2loss + xent_loss
Esempio n. 33
0
    def __init__(self, input_producer, embed_mat, config, is_train):
        x_enc = input_producer.x_enc
        x_dec = input_producer.x_dec
        y_dec = input_producer.y_dec
        len_enc = input_producer.len_enc
        len_dec = input_producer.len_dec
        self.answer = input_producer.answ_disc

        max_len = input_producer.seq_max_length
        vocab_num = input_producer.vocab_num
        config.update(**dict(max_len=max_len, vocab_num=vocab_num))
        # import ipdb; ipdb.set_trace()
        self.kl_weight = tf.Variable(0.0, "KL_weight")
        self.input_ids = y_dec

        modeler = CtrlVAEModelingHelper(config, embed_mat)

        with tf.variable_scope("CtrlVAE"):

            ### VAE ############################################################

            # encoder
            x_enc_onehot = tf.one_hot(x_enc, vocab_num)
            out_tuple = modeler.encoder(x_enc_onehot=x_enc_onehot,
                                        len_enc=len_enc)
            (vae_z, vae_mu, vae_logvar) = out_tuple

            # holistic representation
            with tf.device("/cpu:0"):
                vae_c = embedding_lookup(modeler.embed, self.answer)
            vae_c = tf.reshape(vae_c, [config.batch_size, -1])
            vae_represent = tf.concat([vae_z, vae_c], axis=1)

            # decoder
            x_dec_onehot = tf.one_hot(x_dec, config.vocab_num)
            out_tuple = modeler.decoder(initial_state=vae_represent,
                                        x_dec_onehot=x_dec_onehot,
                                        len_dec=len_dec,
                                        is_teacher_forcing=True)

            (vae_outputs, vae_state, vae_outputs_len) = out_tuple  # final
            (self.vae_output, self.vae_sample) = vae_outputs

            ### Generator ######################################################

            # random z and c from the prior
            self.gen_z = tf.random_normal(
                [config.batch_size, config.hidden_size])
            self.gen_c = vae_c
            gen_represent = tf.concat([self.gen_z, self.gen_c], axis=1)

            # generator (decoder)
            x_dec_onehot = tf.one_hot(x_dec, config.vocab_num)
            out_tuple = modeler.decoder(initial_state=gen_represent,
                                        x_dec_onehot=x_dec_onehot,
                                        len_dec=len_dec,
                                        is_teacher_forcing=True,
                                        reuse=True)

            (gen_outputs, gen_state, gen_outputs_len) = out_tuple  # final
            (self.gen_output, self.gen_sample) = gen_outputs
            gen_outputs_onehot = softmax(self.gen_output / ALMOST_ZERO)

            # discriminator (for c code)
            out_tuple = modeler.discriminator(inputs=gen_outputs_onehot,
                                              inputs_length=gen_outputs_len)
            (self.gen_c_output, self.gen_c_sample) = out_tuple

            # encoder again (for z code ; additional discriminator)
            out_tuple = modeler.encoder(x_enc_onehot=gen_outputs_onehot,
                                        len_enc=gen_outputs_len,
                                        reuse=True)
            (gen_z, dis_mu, dis_logvar) = out_tuple

            ### Discriminator ##################################################

            # discriminator (for training)
            x_dis_onehot = tf.one_hot(x_enc, config.vocab_num)
            out_tuple = modeler.discriminator(inputs=x_dis_onehot,
                                              inputs_length=gen_outputs_len,
                                              reuse=True)
            (self.dis_outputs, self.dis_sample) = out_tuple

        ########################################################################
        # get all the variables in this scope
        self.vars = get_variables("CtrlVAE")
        self.enc_vars = get_variables("CtrlVAE/encoder")
        self.gen_vars = get_variables("CtrlVAE/decoder")
        self.dis_vars = get_variables("CtrlVAE/discriminator")
        self.vae_vars = self.enc_vars + self.gen_vars
        ########################################################################
        # compute AE loss (reconstruction)
        len_out = tf.reduce_max(vae_outputs_len)
        targets = y_dec[:, :len_out]
        weights = tf.sequence_mask(vae_outputs_len, dtype=tf.float32)

        softmax_loss = sequence_loss(logits=self.vae_output,
                                     targets=targets,
                                     weights=weights,
                                     average_across_timesteps=False,
                                     average_across_batch=False)

        # NOTE: fix later!
        loss_sum = tf.reduce_sum(softmax_loss, axis=1)
        self.ae_loss = self.ae_loss_mean = tf.reduce_mean(loss_sum, axis=0)
        #self.ae_loss_mean = tf.reduce_mean(softmax_loss)

        # compute KL loss (regularization)
        KL_term = 1 + vae_logvar - tf.pow(vae_mu, 2) - tf.exp(vae_logvar)
        self.kl_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1)
        self.kl_loss_mean = tf.reduce_mean(self.kl_loss)

        # VAE total loss
        self.vae_loss = self.ae_loss + self.kl_weight * self.kl_loss_mean
        ########################################################################
        # c code loss
        answer_labels = tf.one_hot(self.answer, config.vocab_num)
        c_loss = softmax_cross_entropy_with_logits(labels=answer_labels,
                                                   logits=self.gen_c_output)
        self.c_loss = tf.reduce_mean(c_loss)

        # z code loss
        mu_loss = mean_pairwise_squared_error(vae_mu, dis_mu)
        logvar_loss = mean_pairwise_squared_error(vae_logvar, dis_logvar)
        self.z_loss = (mu_loss + logvar_loss) / 2

        # generator total loss
        self.gen_loss = self.c_loss + self.z_loss
        ########################################################################
        # discriminator training loss
        dis_loss = softmax_cross_entropy_with_logits(labels=answer_labels,
                                                     logits=self.dis_outputs)
        self.dis_loss = tf.reduce_mean(dis_loss)
        ########################################################################

        # optimization
        lr = config.learning_rate
        self.vae_lr = tf.Variable(lr, trainable=False, name="vae_lr")
        self.gen_lr = tf.Variable(0.0, trainable=False, name="gen_lr")
        self.dis_lr = tf.Variable(lr, trainable=False, name="dis_lr")

        vae_optim = tf.train.AdamOptimizer(self.vae_lr)
        gen_optim = tf.train.AdamOptimizer(self.gen_lr)
        dis_optim = tf.train.AdamOptimizer(self.dis_lr)

        vae_grads = tf.gradients(self.vae_loss, self.vae_vars)
        gen_grads = tf.gradients(self.gen_loss, self.gen_vars)
        dis_grads = tf.gradients(self.dis_loss, self.dis_vars)

        vae_grads, _ = tf.clip_by_global_norm(vae_grads, config.max_grad_norm)
        gen_grads, _ = tf.clip_by_global_norm(gen_grads, config.max_grad_norm)
        dis_grads, _ = tf.clip_by_global_norm(dis_grads, config.max_grad_norm)

        self.global_step = get_or_create_global_step()
        self.vae_train = vae_optim.apply_gradients(
            zip(vae_grads, self.vae_vars))
        self.gen_train = gen_optim.apply_gradients(
            zip(gen_grads, self.gen_vars))
        self.dis_train = dis_optim.apply_gradients(
            zip(dis_grads, self.dis_vars), self.global_step)

        # learning_rate update
        self.new_gen_lr = tf.placeholder(tf.float32,
                                         shape=[],
                                         name="new_gen_lr")
        self.gen_lr_update = tf.assign(self.gen_lr, self.new_gen_lr)

        # KL weight update
        self.new_kl_weight = tf.placeholder(tf.float32,
                                            shape=[],
                                            name="new_kl")
        self.kl_weight_update = tf.assign(self.kl_weight, self.new_kl_weight)

        # summaries
        tf.summary.scalar("Loss/ae_mean", self.ae_loss_mean)
        tf.summary.scalar("Loss/kl_mean", self.kl_loss_mean)
        tf.summary.scalar("Loss/Total", self.ae_loss_mean + self.kl_loss_mean)
        tf.summary.scalar("Misc/kl_weight", self.kl_weight)
        tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(vae_mu))
        tf.summary.scalar("Misc/logvar_mean", tf.reduce_mean(vae_logvar))
        tf.summary.scalar("Misc/gen_lr", self.gen_lr)
        self.summary_op = tf.summary.merge_all()
Esempio n. 34
0
#a = tf.constant([1, 2, 3, 4, 5, 6], shape=(2,3))
#a = tf.constant([1, 2, 3, 4, 5, 6])
a = tf.constant([1, 11, 3, 9, 5, 3, 13])
b = array_ops.shape(a)
a = array_ops.reshape(a, [b[0], 1])
b = array_ops.transpose(a)
labels_remapped = math_ops.to_float(math_ops.equal(a, array_ops.transpose(a)))
c = math_ops.reduce_sum(labels_remapped, 1, keepdims=True)

#b = tf.constant([1, 2, 2, 9, 5, 6], shape=(2,3))
# c = math_ops.matmul( a, b, transpose_a=False, transpose_b=True)

#c = math_ops.to_float(math_ops.equal( a, b ))
x = tf.constant([[1.0, 2.0, 2.0, 9.0, 5.0, 6.0, 1.0],
                 [1.0, 2.0, 2.0, 9.0, 5.0, 6.0, 1.0],
                 [1.0, 2.0, 2.0, 9.0, 5.0, 6.0, 1.0],
                 [1.0, 2.0, 2.0, 9.0, 5.0, 6.0, 1.0],
                 [1.0, 2.0, 2.0, 9.0, 5.0, 6.0, 1.0],
                 [1.0, 2.0, 2.0, 9.0, 5.0, 6.0, 1.0],
                 [1.0, 2.0, 2.0, 9.0, 5.0, 6.0, 1.0]])

y = nn.softmax_cross_entropy_with_logits(logits=x, labels=labels_remapped)
with tf.Session() as sess:
    # v = sess.run(a)
    # v = sess.run(labels_remapped)
    # v =  sess.run(b)
    v = sess.run(y)
    print(v)

print("tmp ok")
Esempio n. 35
0
  """
  weights = _weights(weights, weight)
  with ops.name_scope(scope, "softmax_cross_entropy_loss",
                      [logits=logits, labels=onehot_labels, weights]):
    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)

    if label_smoothing > 0:
      num_classes = math_ops.cast(
          array_ops.shape(onehot_labels)[1], logits.dtype)
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

    losses = nn.softmax_cross_entropy_with_logits(logits=logits, labels=onehot_labels,
                                                  name="xentropy")
    return compute_weighted_loss(losses, weights)


@deprecated_args(
    "2016-11-25", "`weight` is being deprecated, use `weights`", "weight")
def sparse_softmax_cross_entropy(
    logits=logits, labels=labels, weights=_WEIGHT_SENTINEL, scope=None,
    weight=_WEIGHT_SENTINEL):
  """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`.

  `weight` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weight` is a
  tensor of size [`batch_size`], then the loss weights apply to each
  corresponding sample.
Esempio n. 36
0
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn

x_ = tf.constant([0.2, 0.8])
x = tf.constant([0.2, 0.8])
y = nn.softmax_cross_entropy_with_logits(logits=x, labels=x_)

y_hat = tf.convert_to_tensor(np.array([[0.5, 1.5, 0.1], [2.2, 1.3, 1.7]]))
y_true = tf.convert_to_tensor(np.array([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]))
ent = nn.softmax_cross_entropy_with_logits(logits=y_hat, labels=y_true)

y_hat_softmax = tf.nn.softmax(y_hat)
tmp = y_true * tf.log(y_hat_softmax)
tmp2 = -tf.reduce_sum(tmp, [1])
total_loss = tf.reduce_mean(-tf.reduce_sum(y_true *
                                           tf.log(y_hat_softmax), [1]))

with tf.Session() as sess:
    v = sess.run(ent)
    print(v)
    v2 = sess.run(y_hat_softmax)
    print(v2)
    # v3 = sess.run(total_loss)
    #print(v3)
    v4 = sess.run(tmp)
    print(v4)
    v5 = sess.run(tmp2)
    print(v5)