Example #1
0
 def testScalar(self):
     with self.session(use_gpu=True):
         with self.assertRaisesRegexp(ValueError,
                                      ".*Logits cannot be scalars*"):
             nn_ops.sparse_softmax_cross_entropy_with_logits(
                 labels=constant_op.constant(0),
                 logits=constant_op.constant(1.0))
 def xent_grad(f):
     if not context.executing_eagerly():
         return gradients_impl.gradients(
             nn_ops.sparse_softmax_cross_entropy_with_logits(
                 labels=l, logits=f, name="xent"), [f])[0]
     with backprop_lib.GradientTape() as tape:
         tape.watch(f)
         return tape.gradient(
             nn_ops.sparse_softmax_cross_entropy_with_logits(
                 labels=l, logits=f, name="xent"), [f])[0]
def generate_single_output(encoder_state, attention_states, sequence_length, targets, num_classes, buckets,
                       use_mean_attention=False,
                       softmax_loss_function=None, per_example_loss=False, name=None, use_attention=False):
  all_inputs = targets
  with tf.name_scope(name, "model_with_buckets", all_inputs):
    with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                       reuse=None):
      bucket_attention_states, bucket_attn_weights, bucket_attns, bucket_outputs = attention_single_output_decoder(
                                                                                        encoder_state, attention_states, output_size=num_classes,
                                                                                        num_heads=1,
                                                                                        sequence_length=sequence_length,
                                                                                        initial_state_attention=True,
                                                                                        use_attention=use_attention)

      if softmax_loss_function is None:
        assert len(bucket_outputs) == len(targets) == 1
        # We need to make target and int64-tensor and set its shape.
        bucket_target = array_ops.reshape(math_ops.to_int64(targets[0]), [-1])
        crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
            labels=bucket_target, logits=bucket_outputs[0])
      else:
        assert len(bucket_outputs) == len(targets) == 1
        crossent = softmax_loss_function(bucket_outputs[0], targets[0])

      batch_size = array_ops.shape(targets[0])[0]
      loss = tf.reduce_sum(crossent) / math_ops.cast(batch_size, dtypes.float32)

  return bucket_outputs, loss
Example #4
0
        def body(i, prev_c, prev_h, actions, log_probs):
            # pylint: disable=g-long-lambda
            signal = control_flow_ops.cond(
                math_ops.equal(i, 0), lambda: array_ops.tile(
                    device_go_embedding, [self.hparams.num_children, 1]),
                lambda: embedding_ops.embedding_lookup(device_embeddings,
                                                       actions.read(i - 1)))
            if self.hparams.keep_prob is not None:
                signal = nn_ops.dropout(signal,
                                        rate=(1 - self.hparams.keep_prob))
            next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias)
            query = math_ops.matmul(next_h, attn_w_2)
            query = array_ops.reshape(
                query,
                [self.hparams.num_children, 1, self.hparams.hidden_size])
            query = math_ops.tanh(query + attn_mem)
            query = array_ops.reshape(query, [
                self.hparams.num_children * self.num_groups,
                self.hparams.hidden_size
            ])
            query = math_ops.matmul(query, attn_v)
            query = array_ops.reshape(
                query, [self.hparams.num_children, self.num_groups])
            query = nn_ops.softmax(query)
            query = array_ops.reshape(
                query, [self.hparams.num_children, self.num_groups, 1])
            query = math_ops.reduce_sum(attn_mem * query, axis=1)
            query = array_ops.concat([next_h, query], axis=1)
            logits = math_ops.matmul(query, device_softmax)
            logits /= self.hparams.temperature
            if self.hparams.tanh_constant > 0:
                logits = math_ops.tanh(logits) * self.hparams.tanh_constant
            if self.hparams.logits_std_noise > 0:
                num_in_logits = math_ops.cast(array_ops.size(logits),
                                              dtype=dtypes.float32)
                avg_norm = math_ops.divide(linalg_ops.norm(logits),
                                           math_ops.sqrt(num_in_logits))
                logits_noise = random_ops.random_normal(
                    array_ops.shape(logits),
                    stddev=self.hparams.logits_std_noise * avg_norm)
                logits = control_flow_ops.cond(
                    self.global_step > self.hparams.stop_noise_step,
                    lambda: logits, lambda: logits + logits_noise)

            if mode == "sample":
                next_y = random_ops.multinomial(logits,
                                                1,
                                                seed=self.hparams.seed)
            elif mode == "greedy":
                next_y = math_ops.argmax(logits, 1)
            elif mode == "target":
                next_y = array_ops.slice(y, [0, i], [-1, 1])
            else:
                raise NotImplementedError
            next_y = math_ops.cast(next_y, dtypes.int32)
            next_y = array_ops.reshape(next_y, [self.hparams.num_children])
            actions = actions.write(i, next_y)
            log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=next_y)
            return i + 1, next_c, next_h, actions, log_probs
Example #5
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])

        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])

        cells = []
        for _ in range(args.rnn_layers):
            cells.append(rnn.BasicLSTMCell(args.rnn_size))
        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        dense_layer_w = tf.get_variable("dense_layer_w", [args.rnn_size, args.vocab_size])
        dense_layer_b = tf.get_variable("dense_layer_b", [args.vocab_size])

        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(ip, [1]) for ip in inputs]

        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        outputs, self.final_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell)
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        logits = tf.matmul(output, dense_layer_w) + dense_layer_b
        self.probs = tf.nn.softmax(logits)
        self.predicted_output = tf.reshape(tf.argmax(self.probs, 1), [args.batch_size, args.seq_length])

        self.lr = tf.Variable(0.0, trainable=False)
        loss = sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.reshape(self.targets, [-1]))
        self.cost = tf.reduce_mean(loss)
        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
Example #6
0
def generate_single_output(encoder_state, attention_states, sequence_length, targets, num_classes, buckets,
                       use_mean_attention=False,
                       softmax_loss_function=None, per_example_loss=False, name=None, use_attention=False):
  all_inputs = targets
  with ops.op_scope(all_inputs, name, "model_with_buckets"):
    with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                       reuse=None):
      bucket_attention_states, bucket_attn_weights, bucket_attns, bucket_outputs = attention_single_output_decoder(
                                                                                        encoder_state, attention_states, output_size=num_classes,
                                                                                        num_heads=1,
                                                                                        sequence_length=sequence_length,
                                                                                        initial_state_attention=True,
                                                                                        use_attention=use_attention)

      if softmax_loss_function is None:
        assert len(bucket_outputs) == len(targets) == 1
        # We need to make target and int64-tensor and set its shape.
        bucket_target = array_ops.reshape(math_ops.to_int64(targets[0]), [-1])
        crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
            logits=bucket_outputs[0], labels=bucket_target)
      else:
        assert len(bucket_outputs) == len(targets) == 1
        crossent = softmax_loss_function(bucket_outputs[0], targets[0])

      batch_size = array_ops.shape(targets[0])[0]
      loss = tf.reduce_sum(crossent) / math_ops.cast(batch_size, dtypes.float32)

  return bucket_outputs, loss
Example #7
0
def sequence_loss_tensor(
    logits, targets, weights, num_classes, average_across_timesteps=True, softmax_loss_function=None, name=None
):
    """Weighted cross-entropy loss for a sequence of logits (per example).

    """
    #    if (logits.get_shape()[0:2]) != targets.get_shape() \
    #        or (logits.get_shape()[0:2]) != weights.get_shape():
    #        print(logits.get_shape()[0:2])
    #        print(targets.get_shape())
    #        print(weights.get_shape())
    #        raise ValueError("Shapes of logits, weights, and targets must be the "
    #            "same")
    with ops.op_scope([logits, targets, weights], name, "sequence_loss_by_example"):
        probs_flat = tf.reshape(logits, [-1, num_classes])
        targets = tf.reshape(targets, [-1])
        if softmax_loss_function is None:
            crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(probs_flat, targets)
        else:
            crossent = softmax_loss_function(probs_flat, targets)
        crossent = crossent * tf.reshape(weights, [-1])
        crossent = tf.reduce_sum(crossent)
        total_size = math_ops.reduce_sum(weights)
        total_size += 1e-12  # to avoid division by zero
        crossent /= total_size
        return crossent
Example #8
0
  def _log_prob(self, k):
    k = ops.convert_to_tensor(k, name="k")
    if self.validate_args:
      k = distribution_util.embed_check_integer_casting_closed(
          k, target_dtype=dtypes.int32)

    if self.logits.get_shape()[:-1] == k.get_shape():
      logits = self.logits
    else:
      logits = self.logits * array_ops.ones_like(
          array_ops.expand_dims(k, -1), dtype=self.logits.dtype)
      logits_shape = array_ops.shape(logits)[:-1]
      k *= array_ops.ones(logits_shape, dtype=k.dtype)
      k.set_shape(tensor_shape.TensorShape(logits.get_shape()[:-1]))
      if k.dtype.is_integer:
        pass
      elif k.dtype.is_floating:
        # When `validate_args=True` we've already ensured int/float casting
        # is closed.
        return ops.cast(k, dtype=dtypes.int32)
      else:
        raise TypeError("`value` should have integer `dtype` or "
                        "`self.dtype` ({})".format(self.dtype.base_dtype))
    return -nn_ops.sparse_softmax_cross_entropy_with_logits(labels=k,
                                                            logits=logits)
Example #9
0
 def testInt32GPU(self):
     if not context.context().num_gpus():
         self.skipTest('No GPUs found')
     with ops.device('gpu:0'):
         xent = nn_ops.sparse_softmax_cross_entropy_with_logits(
             logits=[[0.0, 0.0]], labels=[0])
     self.assertAllClose(xent, [0.69314718])
Example #10
0
 def MYsequence_loss_by_example(logits, targets, weights,
                              average_across_timesteps=True,
                              softmax_loss_function=None, name=None):
   if len(targets) != len(logits) or len(weights) != len(logits):
     raise ValueError("Lengths of logits, weights, and targets must be the same "
                      "%d, %d, %d." % (len(logits), len(weights), len(targets)))
   with ops.op_scope(logits + targets + weights, name,
                     "sequence_loss_by_example"):
     log_perp_list = []
     for logit, target, weight in zip(logits, targets, weights):
       if softmax_loss_function is None:
         # TODO(irving,ebrevdo): This reshape is needed because
         # sequence_loss_by_example is called with scalars sometimes, which
         # violates our general scalar strictness policy.
         target = array_ops.reshape(target, [-1])
         crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
             logit, target)
       else:
         crossent = softmax_loss_function(logit, target)
       print crossent, weight
       log_perp_list.append(crossent * weight)
       print log_perp_list              
     log_perps = math_ops.add_n(log_perp_list)
     if average_across_timesteps:
       total_size = math_ops.add_n(weights)
       total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
       log_perps /= total_size
   return log_perps
Example #11
0
    def seq2seq_loss(logits, targets, seq_len_target):
        """Calculate the cross entropy loss w.r.t. given target.

        Args:
            logits: A 2-d tensor of shape (TxB)x|V| containing the logit score
                per output symbol.
            targets: 2-d tensor of shape TxB that contains the ground truth
                output symbols.
            seq_len_target: Sequence length of output sequences. Required to
                mask padding symbols in output sequences.
        """
        with ops.name_scope("sequence_loss", [logits, targets]):
            flat_targets = tf.reshape(targets, [-1])
            cost = nn_ops.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=flat_targets)

            # Mask this cost since the output sequence is padded
            batch_major_mask = tf.sequence_mask(seq_len_target,
                                                dtype=tf.float32)
            time_major_mask = tf.transpose(batch_major_mask, [1, 0])
            weights = tf.reshape(time_major_mask, [-1])
            mask_cost = weights * cost

            loss = tf.reshape(mask_cost, tf.shape(targets))
            # Average the loss for each example by the # of timesteps
            cost_per_example = tf.reduce_sum(loss, reduction_indices=0) /\
                tf.cast(seq_len_target, tf.float32)
            # Return the average cost over all examples
            return tf.reduce_mean(cost_per_example)
Example #12
0
 def testLabelsPlaceholderScalar(self):
     with self.session(use_gpu=True):
         labels = array_ops.placeholder(np.int32)
         y = nn_ops.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                             logits=[[7.]])
         with self.assertRaisesOpError("labels must be 1-D"):
             y.eval(feed_dict={labels: 0})
Example #13
0
    def testSecondGradient(self):
        with self.session() as sess:
            l = constant_op.constant([3, 0, 1], name="l")
            f = constant_op.constant(
                [0.3, 0.4, 0.1, 1.2, 0.1, 1.9, 0.1, 0.7, 0.8, 0.2, 1.3, 1.3],
                shape=[3, 4],
                dtype=dtypes.float64,
                name="f")
            x = nn_ops.sparse_softmax_cross_entropy_with_logits(labels=l,
                                                                logits=f,
                                                                name="xent")

            gradients = gradients_impl.gradients(x, [f])[0]
            err = gradient_checker.compute_gradient_error(
                f, [3, 4], gradients, [3, 4])

            # Check that second derivative is calculated.
            # (it is equivalent to being `BatchMatMul` op in the graph because of
            # implementation of xentropy grad)
            op_names = [
                op.op_def.name for op in sess.graph.get_operations()
                if op.op_def
            ]
            self.assertIn("BatchMatMulV2", op_names)

        self.assertLess(err, 5e-8)
Example #14
0
def MMIloss(logits, targets, weights, lam, gam,
                             average_across_timesteps=True,
                             softmax_loss_function=None, name=None):
    
"""lam is lambda value(diversity penalty) of the object, gam is gamma value(length penalty) of the object
(see section 4.5.1 of Li et al)"""


  if len(targets) != len(logits) or len(weights) != len(logits):
    raise ValueError("Lengths of logits, weights, and targets must be the same "
                     "%d, %d, %d." % (len(logits), len(weights), len(targets)))
  
  with ops.op_scope(logits + targets + weights, name,
                    "sequence_loss_by_example"):
    log_perp_list = []
    for logit, target, weight in zip(logits, targets, weights):
        if softmax_loss_function is None:
        
            target = array_ops.reshape(target, [-1])
            crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
            logit, target)
        else:
            crossent = softmax_loss_function(logit, target)
        log_perp_list.append(crossent * weight)
    log_perps = math_ops.add_n(log_perp_list)
    if average_across_timesteps:
        total_size = math_ops.add_n(weights)
        total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
        log_perps /= total_size
        
       
    final_perps= log_perps - (lam)*lm_perps + (gam)*len(targets)   
    return final_perps
Example #15
0
 def MYsequence_loss_by_example(logits,
                                targets,
                                weights,
                                average_across_timesteps=True,
                                softmax_loss_function=None,
                                name=None):
     if len(targets) != len(logits) or len(weights) != len(logits):
         raise ValueError(
             "Lengths of logits, weights, and targets must be the same "
             "%d, %d, %d." % (len(logits), len(weights), len(targets)))
     with ops.op_scope(logits + targets + weights, name,
                       "sequence_loss_by_example"):
         log_perp_list = []
         for logit, target, weight in zip(logits, targets, weights):
             if softmax_loss_function is None:
                 # TODO(irving,ebrevdo): This reshape is needed because
                 # sequence_loss_by_example is called with scalars sometimes, which
                 # violates our general scalar strictness policy.
                 target = array_ops.reshape(target, [-1])
                 crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
                     logit, target)
             else:
                 crossent = softmax_loss_function(logit, target)
             print crossent, weight
             log_perp_list.append(crossent * weight)
             print log_perp_list
         log_perps = math_ops.add_n(log_perp_list)
         if average_across_timesteps:
             total_size = math_ops.add_n(weights)
             total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
             log_perps /= total_size
     return log_perps
Example #16
0
  def log_prob(self, k, name="log_prob"):
    """Log-probability of class `k`.

    Args:
      k: `int32` or `int64` Tensor. Must be broadcastable with a `batch_shape`
        `Tensor`.
      name: A name for this operation (optional).

    Returns:
      The log-probabilities of the classes indexed by `k`
    """
    with ops.name_scope(self.name):
      with ops.op_scope([k, self.logits], name):
        k = ops.convert_to_tensor(k, name="k")

        logits = self.logits * array_ops.ones_like(
            array_ops.expand_dims(k, -1),
            dtype=self.logits.dtype)
        k *= array_ops.ones(
            array_ops.slice(
                array_ops.shape(logits), [0], [array_ops.rank(logits) - 1]),
            dtype=k.dtype)
        k.set_shape(tensor_shape.TensorShape(logits.get_shape()[:-1]))

        return -nn_ops.sparse_softmax_cross_entropy_with_logits(logits, k)
Example #17
0
def sequence_loss_by_example(logits,
                             targets,
                             weights,
                             average_across_timesteps=True,
                             softmax_loss_function=None,
                             name=None):
    """
    Weighted cross-entropy loss for a sequence of logits (per example).
    """
    if len(targets) != len(logits) or len(weights) != len(logits):
        raise ValueError(
            "Lengths of logits, weights, and targets must be the same "
            "%d, %d, %d." % (len(logits), len(weights), len(targets)))

    with ops.name_scope("sequence_loss_by_example"):
        log_perp_list = []
        for logit, target, weight in zip(logits, targets, weights):
            if softmax_loss_function is None:
                target = array_ops.reshape(target, [-1])
                crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
                    logits=logit, labels=target)
            else:
                crossent = softmax_loss_function(logit, target)
            log_perp_list.append(crossent * weight)

        log_perps = math_ops.add_n(log_perp_list)
        if average_across_timesteps:
            total_size = math_ops.add_n(weights)
            total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
            log_perps /= total_size

    return log_perps
Example #18
0
def sequence_loss_tensor(logits,
                         targets,
                         weights,
                         num_classes,
                         average_across_timesteps=True,
                         softmax_loss_function=None,
                         name=None):
    """Weighted cross-entropy loss for a sequence of logits (per example).

    """
    #    if (logits.get_shape()[0:2]) != targets.get_shape() \
    #        or (logits.get_shape()[0:2]) != weights.get_shape():
    #        print(logits.get_shape()[0:2])
    #        print(targets.get_shape())
    #        print(weights.get_shape())
    #        raise ValueError("Shapes of logits, weights, and targets must be the "
    #            "same")
    with ops.op_scope([logits, targets, weights], name,
                      "sequence_loss_by_example"):
        probs_flat = tf.reshape(logits, [-1, num_classes])
        targets = tf.reshape(targets, [-1])
        if softmax_loss_function is None:
            crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
                probs_flat, targets)
        else:
            crossent = softmax_loss_function(probs_flat, targets)
        crossent = crossent * tf.reshape(weights, [-1])
        crossent = tf.reduce_sum(crossent)
        total_size = math_ops.reduce_sum(weights)
        total_size += 1e-12  # to avoid division by zero
        crossent /= total_size
        return crossent
Example #19
0
def sequence_loss_tensor(logits,
                         targets,
                         weights,
                         num_classes,
                         average_across_timesteps=True,
                         softmax_loss_function=None,
                         name=None):
    """Weighted cross-entropy loss for a sequence of logits (per example).
    faster? ; 3D tensor logit input; flattens and then multiples in one op; so no for loop 
    """
    with ops.name_scope(name, "sequence_loss_by_example",
                        [logits, targets, weights]):
        probs_flat = tf.reshape(logits, [-1, num_classes])
        targets = tf.reshape(targets, [-1])
        if softmax_loss_function is None:
            crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
                probs_flat, targets)
        else:
            crossent = softmax_loss_function(probs_flat, targets)
        crossent = crossent * tf.reshape(weights, [-1])
        crossent = tf.reduce_sum(crossent)
        total_size = math_ops.reduce_sum(weights)
        total_size += 1e-12  # to avoid division by zero
        crossent /= total_size
        return crossent
Example #20
0
    def _log_prob(self, k):
        k = ops.convert_to_tensor(k, name="k")
        if self.validate_args:
            k = distribution_util.embed_check_integer_casting_closed(
                k, target_dtype=dtypes.int32)

        if self.logits.get_shape()[:-1] == k.get_shape():
            logits = self.logits
        else:
            logits = self.logits * array_ops.ones_like(
                array_ops.expand_dims(k, -1), dtype=self.logits.dtype)
            logits_shape = array_ops.shape(logits)[:-1]
            k *= array_ops.ones(logits_shape, dtype=k.dtype)
            k.set_shape(tensor_shape.TensorShape(logits.get_shape()[:-1]))
            if k.dtype.is_integer:
                pass
            elif k.dtype.is_floating:
                # When `validate_args=True` we've already ensured int/float casting
                # is closed.
                return ops.cast(k, dtype=dtypes.int32)
            else:
                raise TypeError("`value` should have integer `dtype` or "
                                "`self.dtype` ({})".format(
                                    self.dtype.base_dtype))
        return -nn_ops.sparse_softmax_cross_entropy_with_logits(labels=k,
                                                                logits=logits)
Example #21
0
 def softmax_loss_function(logit,
                           target):  # loss function of seq2seq model
     logit = nn_ops.xw_plus_b(logit, output_projection[0],
                              output_projection[1])
     target = array_ops.reshape(target, [-1])
     return nn_ops.sparse_softmax_cross_entropy_with_logits(
         labels=target, logits=logit)
Example #22
0
def cross_entropy(labels, logits, name=None):
    """ Computes the cross entropy between the labels and logits
        This is a safe version that adds epsilon to logits to prevent log(0)
    """
    return nn_ops.sparse_softmax_cross_entropy_with_logits(logits=ensure_finite(logits),
                                                           labels=labels,
                                                           name=name)
Example #23
0
    def log_prob(self, k, name="log_prob"):
        """Log-probability of class `k`.

    Args:
      k: `int32` or `int64` Tensor. Must be broadcastable with a `batch_shape`
        `Tensor`.
      name: A name for this operation (optional).

    Returns:
      The log-probabilities of the classes indexed by `k`
    """
        with ops.name_scope(self.name):
            with ops.name_scope(name, values=[k, self.logits]):
                k = ops.convert_to_tensor(k, name="k")

                logits = self.logits * array_ops.ones_like(
                    array_ops.expand_dims(k, -1), dtype=self.logits.dtype)
                k *= array_ops.ones(
                    array_ops.slice(array_ops.shape(logits), [0],
                                    [array_ops.rank(logits) - 1]),
                    dtype=k.dtype)
                k.set_shape(tensor_shape.TensorShape(logits.get_shape()[:-1]))

                return -nn_ops.sparse_softmax_cross_entropy_with_logits(
                    logits, k)
Example #24
0
 def testLabelsPlaceholderScalar(self):
   with self.test_session(use_gpu=True):
     labels = array_ops.placeholder(np.int32)
     y = nn_ops.sparse_softmax_cross_entropy_with_logits(
         labels=labels, logits=[[7.]])
     with self.assertRaisesOpError("labels must be 1-D"):
       y.eval(feed_dict={labels: 0})
Example #25
0
 def _log_prob(self, k):
     k = ops.convert_to_tensor(k, name="k")
     logits = self.logits * array_ops.ones_like(array_ops.expand_dims(k, -1), dtype=self.logits.dtype)
     shape = array_ops.slice(array_ops.shape(logits), [0], [array_ops.rank(logits) - 1])
     k *= array_ops.ones(shape, dtype=k.dtype)
     k.set_shape(tensor_shape.TensorShape(logits.get_shape()[:-1]))
     return -nn_ops.sparse_softmax_cross_entropy_with_logits(logits, k)
Example #26
0
 def testInt32GPU(self):
   if not context.context().num_gpus():
     self.skipTest('No GPUs found')
   with ops.device('gpu:0'):
     xent = nn_ops.sparse_softmax_cross_entropy_with_logits(
         logits=[[0.0, 0.0]], labels=[0])
   self.assertAllClose(xent, [0.69314718])
Example #27
0
def sequence_loss_by_example(logits,
                             targets,
                             weights,
                             average_across_timesteps=True,
                             softmax_loss_function=None,
                             name=None):
    """Weighted cross-entropy loss for a sequence of logits (per example). see original tensorflow code :
    <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py#L1057>

    Parameters
    ----------
    logits: List
        List of 2D Tensors of shape [batch_size x num_decoder_symbols].
    targets: List
        List of 1D batch-sized int32 Tensors of the same length as logits.
    weights: List
        List of 1D batch-sized float-Tensors of the same length as logits.
    average_across_timesteps: Boolean
        If set, divide the returned cost by the total label weight.
    softmax_loss_function: None or Function
        Function (labels, logits) -> loss-batch to be used instead of the standard softmax (the default if this is None).
        **Note that to avoid confusion, it is required for the function to accept named arguments.**
    name: None or str
        Optional name for this operation, default: "sequence_loss_by_example".

    Returns
    -------
    1D batch-sized float Tensor: The log-perplexity for each sequence.

    Raises
    ------
    ValueError: If len(logits) is different from len(targets) or len(weights).

    """
    if len(targets) != len(logits) or len(weights) != len(logits):
        raise ValueError(
            "Lengths of logits, weights, and targets must be the same "
            "%d, %d, %d." % (len(logits), len(weights), len(targets)))
    with ops.name_scope(name, "sequence_loss_by_example",
                        logits + targets + weights):
        log_perp_list = []
        for logit, target, weight in zip(logits, targets, weights):
            if softmax_loss_function is None:
                # TODO(irving,ebrevdo): This reshape is needed because
                # sequence_loss_by_example is called with scalars sometimes, which
                # violates our general scalar strictness policy.
                target = array_ops.reshape(target, [-1])
                crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
                    labels=target, logits=logit)
            else:
                crossent = softmax_loss_function(labels=target, logits=logit)
            log_perp_list.append(crossent * weight)
    log_perps = math_ops.add_n(log_perp_list)
    if average_across_timesteps:
        total_size = math_ops.add_n(weights)
        total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
        log_perps /= total_size
    return log_perps
 def sampled_loss(logit, target):
     # labels = tf.reshape(labels, [-1, 1])
     logit = nn_ops.xw_plus_b(logit, output_projection[0],
                              output_projection[1])
     # return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
     #                                   self.target_vocab_size)
     target = array_ops.reshape(target, [-1])
     return nn_ops.sparse_softmax_cross_entropy_with_logits(
         logit, target)
Example #29
0
    def __init__(self, args, training=True):
        self.args = args
        # When we don't train then we will take in one character at a time and try to predict
        if not training:
            args.batch_size = 1
            args.seq_length = 1
        # Assign the basic type of RNN unit
        if args.mtype == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.mtype == 'gru':
            cell_fn = rnn.GRUCell
        elif args.mtype == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.mtype == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        softmax_w = tf.get_variable("softmax_w",
                                    [args.rnn_size, args.vocab_size])
        softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs, self.initial_state, cell)
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])

        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.predicted_output = tf.reshape(tf.argmax(self.probs, 1),
                                           [args.batch_size, args.seq_length])

        loss = sparse_softmax_cross_entropy_with_logits(
            logits=[self.logits], labels=[tf.reshape(self.targets, [-1])])

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
    def body(i, prev_c, prev_h, actions, log_probs):
      # pylint: disable=g-long-lambda
      signal = control_flow_ops.cond(
          math_ops.equal(i, 0),
          lambda: array_ops.tile(device_go_embedding,
                                 [self.hparams.num_children, 1]),
          lambda: embedding_ops.embedding_lookup(device_embeddings,
                                                 actions.read(i - 1))
      )
      if self.hparams.keep_prob is not None:
        signal = nn_ops.dropout(signal, self.hparams.keep_prob)
      next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias)
      query = math_ops.matmul(next_h, attn_w_2)
      query = array_ops.reshape(
          query, [self.hparams.num_children, 1, self.hparams.hidden_size])
      query = math_ops.tanh(query + attn_mem)
      query = array_ops.reshape(query, [
          self.hparams.num_children * self.num_groups, self.hparams.hidden_size
      ])
      query = math_ops.matmul(query, attn_v)
      query = array_ops.reshape(query,
                                [self.hparams.num_children, self.num_groups])
      query = nn_ops.softmax(query)
      query = array_ops.reshape(query,
                                [self.hparams.num_children, self.num_groups, 1])
      query = math_ops.reduce_sum(attn_mem * query, axis=1)
      query = array_ops.concat([next_h, query], axis=1)
      logits = math_ops.matmul(query, device_softmax)
      logits /= self.hparams.temperature
      if self.hparams.tanh_constant > 0:
        logits = math_ops.tanh(logits) * self.hparams.tanh_constant
      if self.hparams.logits_std_noise > 0:
        num_in_logits = math_ops.cast(
            array_ops.size(logits), dtype=dtypes.float32)
        avg_norm = math_ops.divide(
            linalg_ops.norm(logits), math_ops.sqrt(num_in_logits))
        logits_noise = random_ops.random_normal(
            array_ops.shape(logits),
            stddev=self.hparams.logits_std_noise * avg_norm)
        logits = control_flow_ops.cond(
            self.global_step > self.hparams.stop_noise_step, lambda: logits,
            lambda: logits + logits_noise)

      if mode == "sample":
        next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed)
      elif mode == "greedy":
        next_y = math_ops.argmax(logits, 1)
      elif mode == "target":
        next_y = array_ops.slice(y, [0, i], [-1, 1])
      else:
        raise NotImplementedError
      next_y = math_ops.to_int32(next_y)
      next_y = array_ops.reshape(next_y, [self.hparams.num_children])
      actions = actions.write(i, next_y)
      log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=next_y)
      return i + 1, next_c, next_h, actions, log_probs
 def _log_prob(self, k):
     k = ops.convert_to_tensor(k, name="k")
     logits = self.logits * array_ops.ones_like(
         array_ops.expand_dims(k, -1), dtype=self.logits.dtype)
     shape = array_ops.slice(array_ops.shape(logits), [0],
                             [array_ops.rank(logits) - 1])
     k *= array_ops.ones(shape, dtype=k.dtype)
     k.set_shape(tensor_shape.TensorShape(logits.get_shape()[:-1]))
     return -nn_ops.sparse_softmax_cross_entropy_with_logits(logits, k)
Example #32
0
def sequence_loss_by_example(logits,
                             targets,
                             weights,
                             average_across_timesteps=True,
                             softmax_loss_function=None,
                             name=None):
    """Weighted cross-entropy loss for a sequence of logits (per example).

  Args:
    logits: [batch_size, num_steps, num_decoder_symbols]. 
            if softmax_loss_function is not None then here is [batch_size, num_steps, emb_dim], 
            actually is just outputs from dynamic_rnn 
            if sotmax_loss_function is None, may be input is already [-1, num_decoder_symbols] flattened anyway, still ok
    targets: [batch_size, num_steps]
    weights: [batch_size, num_steps]
    average_across_timesteps: If set, divide the returned cost by the total
      label weight.
    softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
      to be used instead of the standard softmax (the default if this is None).
    name: Optional name for this operation, default: "sequence_loss_by_example".

  Returns:
    1D batch-sized float Tensor: The log-perplexity for each sequence.
  """
    with ops.name_scope(name, "sequence_loss_by_example",
                        [logits, targets, weights]):
        logits_shape = array_ops.shape(logits)
        batch_size = logits_shape[0]
        if softmax_loss_function is None:
            #croosents [batch_size, num_steps]
            #-----do not need to reshape for sparse_softmax_cross_entropy_with_logits accept both input
            #num_classes = logits_shape[-1]
            #logits = array_ops.reshape(logits, [-1, num_classes])
            #targets = array_ops.reshape(targets, [-1])
            crossents = nn_ops.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=targets)
            crossents = array_ops.reshape(crossents, [batch_size, -1])
            weights = array_ops.reshape(weights, [batch_size, -1])
        else:
            emb_dim = logits_shape[-1]
            #need reshape because unlike sparse_softmax_cross_entropy_with_logits,
            #tf.nn.sampled_softmax_loss now only accept 2d [batch_size, dim] as logits input
            logits = array_ops.reshape(logits, [-1, emb_dim])
            targets = array_ops.reshape(targets, [-1, 1])
            #croosents [batch_size * num_steps]
            crossents = softmax_loss_function(logits, targets)
            # croosents [batch_size, num_steps]
            crossents = array_ops.reshape(crossents, [batch_size, -1])

        log_perps = math_ops.reduce_sum(math_ops.multiply(crossents, weights),
                                        1)

        if average_across_timesteps:
            total_size = math_ops.reduce_sum(weights, 1)
            total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
            log_perps /= total_size
    return log_perps
Example #33
0
 def sequence_loss(self):
     with ops.name_scope("sequence_loss_by_example"):
         weights = tf.to_float(tf.sign(tf.abs(self.labels), name="mask"))
         batch_size = array_ops.shape(self.labels)[0]
         crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
             self.logits, self.labels)
         log_perps = tf.reduce_sum(
             tf.reduce_sum(crossent * weights, reduction_indices=1))
     return log_perps / tf.to_float(batch_size)
Example #34
0
  def _log_prob(self, k):
    k = ops.convert_to_tensor(k, name="k")
    if self.validate_args:
      k = distribution_util.embed_check_integer_casting_closed(
          k, target_dtype=dtypes.int32)
    k, logits = _broadcast_cat_event_and_params(
        k, self.logits, base_dtype=self.dtype.base_dtype)

    return -nn_ops.sparse_softmax_cross_entropy_with_logits(labels=k,
                                                            logits=logits)
Example #35
0
    def _log_prob(self, k):
        k = ops.convert_to_tensor(k, name="k")
        if self.validate_args:
            k = distribution_util.embed_check_integer_casting_closed(
                k, target_dtype=dtypes.int32)
        k, logits = _broadcast_cat_event_and_params(
            k, self.logits, base_dtype=self.dtype.base_dtype)

        return -nn_ops.sparse_softmax_cross_entropy_with_logits(labels=k,
                                                                logits=logits)
 def _testHighDim(self, features, labels):
   np_loss, np_backprop = self._npXent(np.array(features), np.array(labels))
   # manually reshape loss
   np_loss = np.reshape(np_loss, np.array(labels).shape)
   with self.test_session(use_gpu=True) as sess:
     loss = nn_ops.sparse_softmax_cross_entropy_with_logits(features, labels)
     backprop = loss.op.inputs[0].op.outputs[1]
     tf_loss, tf_backprop = sess.run([loss, backprop])
   self.assertAllCloseAccordingToType(np_loss, tf_loss)
   self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
Example #37
0
  def _testHighDim(self, features, labels):
    np_loss, np_backprop = self._npXent(np.array(features), np.array(labels))
    # manually reshape loss
    np_loss = np.reshape(np_loss, np.array(labels).shape)
    tf_loss = nn_ops.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=features)
    if not context.executing_eagerly():
      tf_backprop = tf_loss.op.inputs[0].op.outputs[1]
    else:
      with backprop_lib.GradientTape() as tape:
        features = constant_op.constant(features)
        tape.watch(features)
        tf_backprop = tape.gradient(
            nn_ops.sparse_softmax_cross_entropy_with_logits(
                labels=labels, logits=features), [features])[0]
        tf_backprop = array_ops.reshape(tf_backprop, np_backprop.shape)

    self.assertAllCloseAccordingToType(np_loss, tf_loss)
    self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
Example #38
0
def sequence_loss_tensor(logits,
                         targets,
                         num_classes,
                         weights=None,
                         average_across_timesteps=False,
                         softmax_loss_function=None,
                         name="sequenceLoss"):
    """
    Weighted cross-entropy loss for a sequence of logits (per example).
    It is a modification of TensorFlow's own sequence_to_sequence_loss.
    TensorFlow's seq2seq loss works with a 2D list instead of a 3D tensors.

    :param tf.Tensor logits: Logits for each class for all samples. [batch_size, (sequence_length), num_classes]
    :param tf.Tensor targets: True classes of samples. [batch_size, (sequence_length)]
    :param int | tf.Tensor num_classes: The total number of classes.
    :param tf.Tensor weights: Weighing of each sample. [batch_size, (sequence_length)]
    :param bool average_across_timesteps: Average loss across time-dimension.
    :param Callable softmax_loss_function: Method used for computing loss.
    :param str name: Name of loss-functions scope.
    :return: tf.Tensor
    """
    if average_across_timesteps:
        raise NotImplementedError(
            "Averaging across time-steps has not been implemented yet. ")

    with tf.variable_scope(name):
        # Flatten logits for using softmax operation, and targets for comparison
        # logits_flat: [batch_size * (sequence_length), num_classes]
        # targets: [batch_size * (sequence_length)]
        logits_flat = tf.reshape(logits, [-1, num_classes])
        targets = tf.reshape(targets, [-1])

        # If a custom loss function is given, then use that. Otherwise default
        # cross_ent: [batch_size * (sequence_length)]
        if softmax_loss_function is None:
            cross_ent = nn_ops.sparse_softmax_cross_entropy_with_logits(
                logits=logits_flat, labels=targets)
        else:
            cross_ent = softmax_loss_function(logits_flat, targets)

        # Weigh cross-entropy if wanted
        if weights is not None:
            cross_ent = cross_ent * tf.reshape(weights, [-1])

        # Cross-entropy sum
        cross_ent = tf.reduce_sum(cross_ent)

        # Divide by total weighting
        # TODO: Couldn't you just normalize the weights first?
        if weights is not None:
            total_size = tf.reduce_sum(weights)
            total_size += 1e-12  # to avoid division by zero
            cross_ent /= total_size

        return cross_ent
Example #39
0
 def _testHighDim(self, features, labels):
   np_loss, np_backprop = self._npXent(np.array(features), np.array(labels))
   # manually reshape loss
   np_loss = np.reshape(np_loss, np.array(labels).shape)
   with self.test_session(use_gpu=True) as sess:
     loss = nn_ops.sparse_softmax_cross_entropy_with_logits(
         labels=labels, logits=features)
     backprop = loss.op.inputs[0].op.outputs[1]
     tf_loss, tf_backprop = sess.run([loss, backprop])
   self.assertAllCloseAccordingToType(np_loss, tf_loss)
   self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
Example #40
0
 def testScalarHandling(self):
   with self.test_session(use_gpu=False) as sess:
     with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                  ".*labels must be 1-D.*"):
       labels = array_ops.placeholder(dtypes.int32, shape=[None, 1])
       logits = array_ops.placeholder(dtypes.float32, shape=[None, 3])
       ce = nn_ops.sparse_softmax_cross_entropy_with_logits(
           labels=array_ops.squeeze(labels), logits=logits)
       labels_v2 = np.zeros((1, 1), dtype=np.int32)
       logits_v2 = np.random.randn(1, 3)
       sess.run([ce], feed_dict={labels: labels_v2, logits: logits_v2})
 def testScalarHandling(self):
   with self.test_session(use_gpu=False) as sess:
     with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                  ".*labels must be 1-D.*"):
       labels = array_ops.placeholder(dtypes.int32, shape=[None, 1])
       logits = array_ops.placeholder(dtypes.float32, shape=[None, 3])
       ce = nn_ops.sparse_softmax_cross_entropy_with_logits(
           logits, array_ops.squeeze(labels))
       labels_v2 = np.zeros((1, 1), dtype=np.int32)
       logits_v2 = np.random.randn(1, 3)
       sess.run([ce], feed_dict={labels: labels_v2, logits: logits_v2})
def _sparse_vs_dense_xent_benchmark_sparse(labels, logits):
  # Using sparse_softmax_cross_entropy_with_logits
  labels = labels.astype(np.int64)
  labels = array_ops.identity(labels)
  logits = array_ops.identity(logits)
  crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
      logits, labels, name="SequenceLoss/CrossEntropy")
  crossent_sum = math_ops.reduce_sum(crossent)
  grads = gradients_impl.gradients([crossent_sum], [logits])[0]

  return (crossent_sum, grads)
Example #43
0
def _sparse_vs_dense_xent_benchmark_sparse(labels, logits):
    # Using sparse_softmax_cross_entropy_with_logits
    labels = labels.astype(np.int64)
    labels = array_ops.identity(labels)
    logits = array_ops.identity(logits)
    crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
        logits, labels, name="SequenceLoss/CrossEntropy")
    crossent_sum = math_ops.reduce_sum(crossent)
    grads = gradients_impl.gradients([crossent_sum], [logits])[0]

    return (crossent_sum, grads)
 def testGradient(self):
   with self.test_session(use_gpu=True):
     l = constant_op.constant([3, 0, 1], name="l")
     f = constant_op.constant(
         [0.1, 0.2, 0.3, 0.4, 0.1, 0.4, 0.9, 1.6, 0.1, 0.8, 2.7, 6.4],
         shape=[3, 4],
         dtype=dtypes.float64,
         name="f")
     x = nn_ops.sparse_softmax_cross_entropy_with_logits(f, l, name="xent")
     err = gradient_checker.compute_gradient_error(f, [3, 4], x, [3])
   print("cross entropy gradient err = ", err)
   self.assertLess(err, 5e-8)
Example #45
0
 def _log_prob(self, k):
   k = ops.convert_to_tensor(k, name="k")
   if self.logits.get_shape()[:-1] == k.get_shape():
     logits = self.logits
   else:
     logits = self.logits * array_ops.ones_like(
         array_ops.expand_dims(k, -1), dtype=self.logits.dtype)
     logits_shape = array_ops.shape(logits)[:-1]
     k *= array_ops.ones(logits_shape, dtype=k.dtype)
     k.set_shape(tensor_shape.TensorShape(logits.get_shape()[:-1]))
   return -nn_ops.sparse_softmax_cross_entropy_with_logits(labels=k,
                                                           logits=logits)
Example #46
0
  def log_pmf(self, k, name="log_pmf"):
    """Log-probability of class `k`.

    Args:
      k: `int32` or `int64` Tensor with shape = `self.batch_shape()`.
      name: A name for this operation (optional).

    Returns:
      The log-probabilities of the classes indexed by `k`
    """
    with ops.name_scope(self.name):
      k = ops.convert_to_tensor(k, name="k")
      k.set_shape(self.get_batch_shape())
      return -nn_ops.sparse_softmax_cross_entropy_with_logits(
          self.logits, k, name=name)
  def testSecondGradient(self):
    images_placeholder = array_ops.placeholder(dtypes.float32, shape=(3, 2))
    labels_placeholder = array_ops.placeholder(dtypes.int32, shape=(3))
    weights = variables.Variable(random_ops.truncated_normal([2], stddev=1.0))
    weights_with_zeros = array_ops.stack([array_ops.zeros([2]), weights],
                                         axis=1)
    logits = math_ops.matmul(images_placeholder, weights_with_zeros)
    cross_entropy = nn_ops.sparse_softmax_cross_entropy_with_logits(
        labels=labels_placeholder, logits=logits)
    loss = math_ops.reduce_mean(cross_entropy)

    # Taking ths second gradient should fail, since it is not
    # yet supported.
    with self.assertRaisesRegexp(LookupError,
                                 "explicitly disabled"):
      _ = gradients_impl.hessians(loss, [weights])
  def make_grouping_predictions(self, input_layer, reuse=None):
    """model that predicts grouping (grouping_actions).

    Args:
      input_layer: group_input_layer
      reuse: reuse

    Returns:
       grouping_actions: actions
       grouping_log_probs: log probabilities corresponding to actions
    """
    with variable_scope.variable_scope(self.hparams.name, reuse=True):
      # input_layer: tensor of size [1, num_ops, hidden_size]
      w_grouping_ff = variable_scope.get_variable("w_grouping_ff")
      w_grouping_softmax = variable_scope.get_variable("w_grouping_softmax")

    batch_size = array_ops.shape(input_layer)[0]
    embedding_dim = array_ops.shape(input_layer)[2]

    reshaped = array_ops.reshape(input_layer,
                                 [batch_size * self.num_ops, embedding_dim])
    ff_output = math_ops.matmul(reshaped, w_grouping_ff)
    logits = math_ops.matmul(ff_output, w_grouping_softmax)
    if self.hparams.logits_std_noise > 0:
      num_in_logits = math_ops.cast(
          array_ops.size(logits), dtype=dtypes.float32)
      avg_norm = math_ops.divide(
          linalg_ops.norm(logits), math_ops.sqrt(num_in_logits))
      logits_noise = random_ops.random_normal(
          array_ops.shape(logits),
          stddev=self.hparams.logits_std_noise * avg_norm)
      logits = control_flow_ops.cond(
          self.global_step > self.hparams.stop_noise_step, lambda: logits,
          lambda: logits + logits_noise)
    logits = array_ops.reshape(logits,
                               [batch_size * self.num_ops, self.num_groups])
    actions = random_ops.multinomial(logits, 1, seed=self.hparams.seed)
    actions = math_ops.to_int32(actions)
    actions = array_ops.reshape(actions, [batch_size, self.num_ops])
    action_label = array_ops.reshape(actions, [-1])
    log_probs = nn_ops.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=action_label)
    log_probs = array_ops.reshape(log_probs, [batch_size, -1])
    log_probs = math_ops.reduce_sum(log_probs, 1)
    grouping_actions = actions
    grouping_log_probs = log_probs
    return grouping_actions, grouping_log_probs
Example #49
0
  def loss(self, data, labels):
    """The loss to minimize while training."""

    if self.is_regression:
      diff = self.training_inference_graph(data) - math_ops.to_float(labels)
      mean_squared_error = math_ops.reduce_mean(diff * diff)
      root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss")
      loss = root_mean_squared_error
    else:
      loss = math_ops.reduce_mean(
          nn_ops.sparse_softmax_cross_entropy_with_logits(
              self.training_inference_graph(data),
              array_ops.squeeze(math_ops.to_int32(labels))),
          name="loss")
    if self.regularizer:
      loss += layers.apply_regularization(self.regularizer,
                                          variables.trainable_variables())
    return loss
Example #50
0
def sequence_loss_by_example(logits, targets, weights,
                             average_across_timesteps=True,
                             softmax_loss_function=None, name=None):
  """Weighted cross-entropy loss for a sequence of logits (per example).

  Args:
    logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols].
    targets: List of 1D batch-sized int32 Tensors of the same length as logits.
    weights: List of 1D batch-sized float-Tensors of the same length as logits.
    average_across_timesteps: If set, divide the returned cost by the total
      label weight.
    softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
      to be used instead of the standard softmax (the default if this is None).
    name: Optional name for this operation, default: "sequence_loss_by_example".

  Returns:
    1D batch-sized float Tensor: The log-perplexity for each sequence.

  Raises:
    ValueError: If len(logits) is different from len(targets) or len(weights).
  """
  if len(targets) != len(logits) or len(weights) != len(logits):
    raise ValueError("Lengths of logits, weights, and targets must be the same "
                     "%d, %d, %d." % (len(logits), len(weights), len(targets)))
  with ops.op_scope(logits + targets + weights, name,
                    "sequence_loss_by_example"):
    log_perp_list = []
    for logit, target, weight in zip(logits, targets, weights):
      if softmax_loss_function is None:
        # TODO(irving,ebrevdo): This reshape is needed because
        # sequence_loss_by_example is called with scalars sometimes, which
        # violates our general scalar strictness policy.
        target = array_ops.reshape(target, [-1])
        crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
            logit, target)
      else:
        crossent = softmax_loss_function(logit, target)
      log_perp_list.append(crossent * weight)
    log_perps = math_ops.add_n(log_perp_list)
    if average_across_timesteps:
      total_size = math_ops.add_n(weights)
      total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
      log_perps /= total_size
  return log_perps
Example #51
0
 def fn(x, y):
   return nn_ops.sparse_softmax_cross_entropy_with_logits(logits=x,
                                                          labels=y)[0]
Example #52
0
def sequence_loss(logits,
                  targets,
                  weights,
                  average_across_timesteps=True,
                  average_across_batch=True,
                  softmax_loss_function=None,
                  name=None):
  """Weighted cross-entropy loss for a sequence of logits.

  Depending on the values of `average_across_timesteps` and
  `average_across_batch`, the return Tensor will have rank 0, 1, or 2 as these
  arguments reduce the cross-entropy at each target, which has shape
  `[batch_size, sequence_length]`, over their respective dimensions. For
  example, if `average_across_timesteps` is `True` and `average_across_batch`
  is `False`, then the return Tensor will have shape `[batch_size]`.

  Args:
    logits: A Tensor of shape
      `[batch_size, sequence_length, num_decoder_symbols]` and dtype float.
      The logits correspond to the prediction across all classes at each
      timestep.
    targets: A Tensor of shape `[batch_size, sequence_length]` and dtype
      int. The target represents the true class at each timestep.
    weights: A Tensor of shape `[batch_size, sequence_length]` and dtype
      float. `weights` constitutes the weighting of each prediction in the
      sequence. When using `weights` as masking, set all valid timesteps to 1
      and all padded timesteps to 0, e.g. a mask returned by `tf.sequence_mask`.
    average_across_timesteps: If set, sum the cost across the sequence
      dimension and divide the cost by the total label weight across timesteps.
    average_across_batch: If set, sum the cost across the batch dimension and
      divide the returned cost by the batch size.
    softmax_loss_function: Function (labels, logits) -> loss-batch
      to be used instead of the standard softmax (the default if this is None).
      **Note that to avoid confusion, it is required for the function to accept
      named arguments.**
    name: Optional name for this operation, defaults to "sequence_loss".

  Returns:
    A float Tensor of rank 0, 1, or 2 depending on the
    `average_across_timesteps` and `average_across_batch` arguments. By default,
    it has rank 0 (scalar) and is the weighted average cross-entropy
    (log-perplexity) per symbol.

  Raises:
    ValueError: logits does not have 3 dimensions or targets does not have 2
                dimensions or weights does not have 2 dimensions.
  """
  if len(logits.get_shape()) != 3:
    raise ValueError("Logits must be a "
                     "[batch_size x sequence_length x logits] tensor")
  if len(targets.get_shape()) != 2:
    raise ValueError("Targets must be a [batch_size x sequence_length] "
                     "tensor")
  if len(weights.get_shape()) != 2:
    raise ValueError("Weights must be a [batch_size x sequence_length] "
                     "tensor")
  with ops.name_scope(name, "sequence_loss", [logits, targets, weights]):
    num_classes = array_ops.shape(logits)[2]
    logits_flat = array_ops.reshape(logits, [-1, num_classes])
    targets = array_ops.reshape(targets, [-1])
    if softmax_loss_function is None:
      crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
          labels=targets, logits=logits_flat)
    else:
      crossent = softmax_loss_function(labels=targets, logits=logits_flat)
    crossent *= array_ops.reshape(weights, [-1])
    if average_across_timesteps and average_across_batch:
      crossent = math_ops.reduce_sum(crossent)
      total_size = math_ops.reduce_sum(weights)
      total_size += 1e-12  # to avoid division by 0 for all-0 weights
      crossent /= total_size
    else:
      batch_size = array_ops.shape(logits)[0]
      sequence_length = array_ops.shape(logits)[1]
      crossent = array_ops.reshape(crossent, [batch_size, sequence_length])
    if average_across_timesteps and not average_across_batch:
      crossent = math_ops.reduce_sum(crossent, axis=[1])
      total_size = math_ops.reduce_sum(weights, axis=[1])
      total_size += 1e-12  # to avoid division by 0 for all-0 weights
      crossent /= total_size
    if not average_across_timesteps and average_across_batch:
      crossent = math_ops.reduce_sum(crossent, axis=[0])
      total_size = math_ops.reduce_sum(weights, axis=[0])
      total_size += 1e-12  # to avoid division by 0 for all-0 weights
      crossent /= total_size
    return crossent
 def testVector(self):
   with self.session(use_gpu=True):
     loss = nn_ops.sparse_softmax_cross_entropy_with_logits(
         labels=constant_op.constant(0), logits=constant_op.constant([1.0]))
     self.assertAllClose(0.0, self.evaluate(loss))
Example #54
0
def sampled_sparse_softmax_loss(weights,
                                biases,
                                labels,
                                inputs,
                                num_sampled,
                                num_classes,
                                sampled_values=None,
                                remove_accidental_hits=True,
                                partition_strategy="mod",
                                name="sampled_sparse_softmax_loss"):
  """Computes and returns the sampled sparse softmax training loss.

  This is a faster way to train a softmax classifier over a huge number of
  classes.

  This operation is for training only.  It is generally an underestimate of
  the full softmax loss.

  A common use case is to use this method for training, and calculate the full
  softmax loss for evaluation or inference. In this case, you must set
  `partition_strategy="div"` for the two losses to be consistent, as in the
  following example:

  ```python
  if mode == "train":
    loss = tf.nn.sampled_sparse_softmax_loss(
        weights=weights,
        biases=biases,
        labels=labels,
        inputs=inputs,
        ...,
        partition_strategy="div")
  elif mode == "eval":
    logits = tf.matmul(inputs, tf.transpose(weights))
    logits = tf.nn.bias_add(logits, biases)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.squeeze(labels),
        logits=logits)
  ```

  See our [Candidate Sampling Algorithms Reference]
  (https://www.tensorflow.org/extras/candidate_sampling.pdf)

  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.

  Args:
    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
        objects whose concatenation along dimension 0 has shape
        [num_classes, dim].  The (possibly-sharded) class embeddings.
    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
        The index of the single target class for each row of logits.  Note that
        this format differs from the `labels` argument of
        `nn.sparse_softmax_cross_entropy_with_logits`.
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
        activations of the input network.
    num_sampled: An `int`.  The number of classes to randomly sample per batch.
    num_classes: An `int`. The number of possible classes.
    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
        (if None, we default to `log_uniform_candidate_sampler`)
    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
        where a sampled class equals one of the target classes.  Default is
        True.
    partition_strategy: A string specifying the partitioning strategy, relevant
        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: A name for the operation (optional).

  Returns:
    A `batch_size` 1-D tensor of per-example sampled softmax losses.

  """
  logits, _ = nn_impl._compute_sampled_logits(
      weights=weights,
      biases=biases,
      labels=labels,
      inputs=inputs,
      num_sampled=num_sampled,
      num_classes=num_classes,
      num_true=1,
      sampled_values=sampled_values,
      subtract_log_q=True,
      remove_accidental_hits=remove_accidental_hits,
      partition_strategy=partition_strategy,
      name=name)

  # There is only one true label. _compute_sampled_logits puts the true logit
  # at index 0.
  labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)

  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
      labels=array_ops.squeeze(labels), logits=logits)
  # sampled_losses is a [batch_size] tensor.
  return sampled_losses
Example #55
0
 def testInt32GPU(self):
   with ops.device('gpu:0'):
     xent = nn_ops.sparse_softmax_cross_entropy_with_logits(
         logits=[[0.0, 0.0]], labels=[0])
   self.assertAllClose(xent, [0.69314718])
Example #56
0
def sequence_loss(logits, targets, weights,
                  average_across_timesteps=True, average_across_batch=True,
                  softmax_loss_function=None, name=None):
  """Weighted cross-entropy loss for a sequence of logits (per example).

  Args:
    logits: A 3D Tensor of shape
      [batch_size x sequence_length x num_decoder_symbols] and dtype float.
      The logits correspond to the prediction across all classes at each
      timestep.
    targets: A 2D Tensor of shape [batch_size x sequence_length] and dtype
      int. The target represents the true class at each timestep.
    weights: A 2D Tensor of shape [batch_size x sequence_length] and dtype
      float. Weights constitutes the weighting of each prediction in the
      sequence. When using weights as masking set all valid timesteps to 1 and
      all padded timesteps to 0.
    average_across_timesteps: If set, sum the cost across the sequence
      dimension and divide the cost by the total label weight across timesteps.
    average_across_batch: If set, sum the cost across the batch dimension and
      divide the returned cost by the batch size.
    softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch
      to be used instead of the standard softmax (the default if this is None).
    name: Optional name for this operation, defaults to "sequence_loss".

  Returns:
    A scalar float Tensor: The average log-perplexity per symbol (weighted).

  Raises:
    ValueError: logits does not have 3 dimensions or targets does not have 2
                dimensions or weights does not have 2 dimensions.
  """
  if len(logits.get_shape()) != 3:
    raise ValueError("Logits must be a "
                     "[batch_size x sequence_length x logits] tensor")
  if len(targets.get_shape()) != 2:
    raise ValueError("Targets must be a [batch_size x sequence_length] "
                     "tensor")
  if len(weights.get_shape()) != 2:
    raise ValueError("Weights must be a [batch_size x sequence_length] "
                     "tensor")
  with ops.name_scope(name, "sequence_loss", [logits, targets, weights]):
    num_classes = array_ops.shape(logits)[2]
    probs_flat = array_ops.reshape(logits, [-1, num_classes])
    targets = array_ops.reshape(targets, [-1])
    if softmax_loss_function is None:
      crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
        labels=targets, logits=probs_flat)
    else:
      crossent = softmax_loss_function(targets, probs_flat)
    crossent = crossent * array_ops.reshape(weights, [-1])
    if average_across_timesteps and average_across_batch:
      crossent = math_ops.reduce_sum(crossent)
      total_size = math_ops.reduce_sum(weights)
      total_size += 1e-12 # to avoid division by 0 for all-0 weights
      crossent /= total_size
    else:
      batch_size = array_ops.shape(logits)[0]
      sequence_length = array_ops.shape(logits)[1]
      crossent = array_ops.reshape(crossent, [batch_size, sequence_length])
    if average_across_timesteps and not average_across_batch:
      crossent = math_ops.reduce_sum(crossent, axis=[1])
      total_size = math_ops.reduce_sum(weights, axis=[1])
      total_size += 1e-12 # to avoid division by 0 for all-0 weights
      crossent /= total_size
    if not average_across_timesteps and average_across_batch:
      crossent = math_ops.reduce_sum(crossent, axis=[0])
      total_size = math_ops.reduce_sum(weights, axis=[0])
      total_size += 1e-12 # to avoid division by 0 for all-0 weights
      crossent /= total_size
    return crossent
 def testVector(self):
   with self.test_session(use_gpu=True):
     loss = nn_ops.sparse_softmax_cross_entropy_with_logits(
         constant_op.constant([1.0]), constant_op.constant(0))
     self.assertAllClose(0.0, loss.eval())
 def testScalar(self):
   with self.test_session(use_gpu=True):
     with self.assertRaisesRegexp(ValueError, ".*Logits cannot be scalars*"):
       nn_ops.sparse_softmax_cross_entropy_with_logits(
           constant_op.constant(1.0), constant_op.constant(0))
 def testShapeMismatch(self):
   with self.test_session(use_gpu=True):
     with self.assertRaisesRegexp(ValueError, ".*Rank mismatch:*"):
       nn_ops.sparse_softmax_cross_entropy_with_logits(
           [[0., 1.], [2., 3.], [2., 3.]], [[0, 2]])