def _ComputeSampledLogitsTF(self, weights, biases, hidden_acts, labels,
                              num_sampled, num_classes, num_true, sampled_vals,
                              subtract_log_q, remove_accidental_hits,
                              name="sampled_loss_TF"):
    # Should be called from within a `with test_session():` block
    if isinstance(weights, list):
      weights_tf = [tf.constant(shard) for shard in weights]
    else:
      weights_tf = tf.constant(weights)
    biases_tf = tf.constant(biases)
    hidden_acts_tf = tf.constant(hidden_acts,
                                 shape=(self._batch_size, self._dim))
    labels_tf = tf.constant(labels,
                            dtype=tf.int64,
                            shape=(self._batch_size, num_true))

    pred_logits_tf, pred_labels_tf = _compute_sampled_logits(
        weights_tf,
        biases_tf,
        hidden_acts_tf,
        labels_tf,
        num_sampled,
        num_classes,
        num_true,
        sampled_vals,
        subtract_log_q=subtract_log_q,
        remove_accidental_hits=remove_accidental_hits,
        name=name)
    return pred_logits_tf, pred_labels_tf
Exemple #2
0
 def testSubtractLogQ(self):
   """With subtract_log_q, no accidental hit removal."""
   np.random.seed(0)
   num_classes = 5
   batch_size = 3
   with self.test_session() as sess:
     for num_true in range(1, 5):
       labels = np.random.randint(
           low=0, high=num_classes, size=batch_size * num_true)
       (weights, biases, hidden_acts, sampled_vals, exp_logits,
        exp_labels) = self._GenerateTestData(
            num_classes=num_classes,
            dim=10,
            batch_size=batch_size,
            num_true=num_true,
            labels=labels,
            sampled=[1, 0, 2, 3],
            subtract_log_q=True)
       logits_tensor, labels_tensor = _compute_sampled_logits(
           weights=constant_op.constant(weights),
           biases=constant_op.constant(biases),
           labels=constant_op.constant(
               labels, dtype=dtypes.int64, shape=(batch_size, num_true)),
           inputs=constant_op.constant(hidden_acts),
           num_sampled=4,
           num_classes=num_classes,
           num_true=num_true,
           sampled_values=sampled_vals,
           subtract_log_q=True,
           remove_accidental_hits=False,
           partition_strategy="div",
           name="sampled_logits_subtract_log_q_num_true_%d" % num_true)
       got_logits, got_labels = sess.run([logits_tensor, labels_tensor])
       self.assertAllClose(exp_logits, got_logits, self._eps)
       self.assertAllClose(exp_labels, got_labels, self._eps)
Exemple #3
0
  def testShapes(self):
    np.random.seed(0)
    num_classes = 5
    batch_size = 3

    for num_true in range(1, 5):
      labels = np.random.randint(
          low=0, high=num_classes, size=batch_size * num_true)
      (weights, biases, hidden_acts, sampled_vals, exp_logits,
       exp_labels) = self._GenerateTestData(
           num_classes=num_classes,
           dim=10,
           batch_size=batch_size,
           num_true=num_true,
           labels=labels,
           sampled=[1, 0, 2, 3],
           subtract_log_q=False)
      logits_tensor, labels_tensor = _compute_sampled_logits(
          weights=constant_op.constant(weights),
          biases=constant_op.constant(biases),
          labels=constant_op.constant(
              labels, dtype=dtypes.int64, shape=(batch_size, num_true)),
          inputs=constant_op.constant(hidden_acts),
          num_sampled=4,
          num_classes=num_classes,
          num_true=num_true,
          sampled_values=sampled_vals,
          subtract_log_q=False,
          remove_accidental_hits=False,
          partition_strategy="div",
          name="sampled_logits_basic_num_true_%d" % num_true)
      got_logits, got_labels = self.evaluate([logits_tensor, labels_tensor])
      self.assertEqual(exp_logits.shape, got_logits.shape, self._eps)
      self.assertEqual(exp_labels.shape, got_labels.shape, self._eps)
Exemple #4
0
    def ranking_loss(self, labels, inputs):

        logits, labels = _compute_sampled_logits(
            weights=self.w,
            biases=self.b,
            labels=labels,
            inputs=inputs,
            num_sampled=self.num_sampled,
            num_classes=self.num_classes,
            num_true=self.num_true,
            sampled_values=self.sampled_values,
            subtract_log_q=True,
            remove_accidental_hits=self.remove_accidental_hits,
            partition_strategy=self.partition_strategy)
        # prevent backpropagation through labels
        labels = array_ops.stop_gradient(labels, name="labels_stop_gradient")
        # divided true score from sampled score
        true_logit = tf.gather(logits, [0], axis=1)
        sampled_logit = tf.gather(logits,
                                  [i + 1 for i in range(self.num_sampled)],
                                  axis=1)
        # compute top1 loss
        loss = tf.math.sigmoid(sampled_logit - true_logit) + tf.math.sigmoid(
            tf.square(sampled_logit))
        loss = tf.reduce_mean(loss, axis=1, keepdims=True)
        # predicted score
        predict = tf.math.sigmoid(true_logit)
        return tf.concat([loss, predict], axis=1)
Exemple #5
0
  def testShapes(self):
    np.random.seed(0)
    num_classes = 5
    batch_size = 3

    for num_true in range(1, 5):
      labels = np.random.randint(
          low=0, high=num_classes, size=batch_size * num_true)
      (weights, biases, hidden_acts, sampled_vals, exp_logits,
       exp_labels) = self._GenerateTestData(
           num_classes=num_classes,
           dim=10,
           batch_size=batch_size,
           num_true=num_true,
           labels=labels,
           sampled=[1, 0, 2, 3],
           subtract_log_q=False)
      logits_tensor, labels_tensor = _compute_sampled_logits(
          weights=constant_op.constant(weights),
          biases=constant_op.constant(biases),
          labels=constant_op.constant(
              labels, dtype=dtypes.int64, shape=(batch_size, num_true)),
          inputs=constant_op.constant(hidden_acts),
          num_sampled=4,
          num_classes=num_classes,
          num_true=num_true,
          sampled_values=sampled_vals,
          subtract_log_q=False,
          remove_accidental_hits=False,
          partition_strategy="div",
          name="sampled_logits_basic_num_true_%d" % num_true)
      got_logits, got_labels = self.evaluate([logits_tensor, labels_tensor])
      self.assertEqual(exp_logits.shape, got_logits.shape, self._eps)
      self.assertEqual(exp_labels.shape, got_labels.shape, self._eps)
Exemple #6
0
 def neg_loss(self,
          weights,
          biases,
          labels,
          inputs,
          num_sampled,
          num_classes,
          num_true=1,
          sampled_values=None,
          remove_accidental_hits=True,
          partition_strategy="mod",
          name="neg_loss"):
     logits, labels = nn_impl._compute_sampled_logits(
         weights=weights,
         biases=biases,
         labels=labels,
         inputs=inputs,
         num_sampled=num_sampled,
         num_classes=num_classes,
         num_true=num_true,
         sampled_values=sampled_values,
         subtract_log_q=False,
         remove_accidental_hits=remove_accidental_hits,
         partition_strategy=partition_strategy,
         name=name)
     sampled_losses = tf.nn.sigmoid_cross_entropy_with_logits(
     labels=labels, logits=logits, name="sampled_losses")
     # sampled_losses is batch_size x {true_loss, sampled_losses...}
     # We sum out true and sampled losses.
     return nn_impl._sum_rows(sampled_losses)
    def init_graph(self):
        # 初始化喂入参数,placeholder名字要唯一,不能更改placeholder的任何信息
        self.video_ids_ph = tf.placeholder(tf.int32, shape=[None, None], name='video_ids')
        self.search_id_ph = tf.placeholder(tf.int32, shape=[None], name='search_id')
        self.age_ph = tf.placeholder(tf.float32, shape=[None], name='age')
        self.gender_ph = tf.placeholder(tf.float32, shape=[None], name='gender')
        self.label_ph = tf.placeholder(tf.float32, shape=[None], name='label_ph')

        # 初始化视频embedding、搜索条件的embedding,concat两个embedding和age、gender
        video_embedding = tf.get_variable('video_embedding', shape=[self.video_total_num], dtype=tf.float32,
                                          initializer=tf.variance_scaling_initializer())
        video_vecs = tf.nn.embedding_lookup(video_embedding, self.video_ids_ph)
        search_embedding = tf.get_variable(name='search_embedding', shape=[self.search_total_num], dtype=tf.float32,
                                           initializer=tf.variance_scaling_initializer())
        search_vec = tf.nn.embedding_lookup(search_embedding, self.search_id_ph)
        input = tf.concat([tf.reshape(tf.reduce_mean(video_vecs, axis=1), shape=[-1, 1]),
                           tf.reshape(search_vec, shape=[-1, 1]), tf.reshape(self.age_ph, shape=[-1, 1]),
                           tf.reshape(self.gender_ph, shape=[-1, 1])], axis=1)

        # 经过多层深度训练,层数根据mAP确定
        for i in range(self.depth):
            input = tf.layers.dense(inputs=input, units=self.units_list[i],
                                    kernel_regularizer=layers.l2_regularizer(0.001), activation=tf.nn.relu,
                                    name='fc{}'.format(i), trainable=self.is_training)
            input = tf.layers.batch_normalization(input, training=self.is_training, name='fc{}_bn'.format(i))
        output = input
        # 初始化类别(就是每个视频的标签,对应论文中的百万级)的embedding对应的:weights和bias
        weights = tf.get_variable('soft_weight', shape=[self.class_distinct, 128],
                                  initializer=tf.variance_scaling_initializer())
        biases = tf.get_variable('soft_bias', shape=[self.class_distinct],
                                 initializer=tf.variance_scaling_initializer())
        if not self.is_training:
            # 计算预测值
            self.logits_out = tf.matmul(output, tf.transpose(weights))
        else:
            # label必须二维的,但是biases却是一维的
            self.labels = tf.reshape(self.label_ph, shape=[-1, 1])
            # 计算损失, num_true=1代表负采样有一个正例,one-hot值为1。
            self.logits_out, self.labels_out = nn_impl._compute_sampled_logits(weights=weights, biases=biases,
                                                                               labels=self.labels,
                                                                               inputs=input, num_sampled=100,
                                                                               num_classes=self.class_distinct,
                                                                               num_true=1,
                                                                               sampled_values=None,
                                                                               remove_accidental_hits=True,
                                                                               partition_strategy="div",
                                                                               name="sampled_softmax_loss",
                                                                               seed=None)
            labels = array_ops.stop_gradient(self.labels_out, name="labels_stop_gradient")
            sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(labels=labels, logits=self.logits_out)
            self.loss = tf.reduce_mean(sampled_losses)
            # 获得梯度下降优化器
            gradient_descent_optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
            train_var = tf.trainable_variables()
            clip_gradients, _ = tf.clip_by_global_norm(tf.gradients(self.loss, train_var), 5)
            self.gradient_descent = gradient_descent_optimizer.apply_gradients(zip(clip_gradients, train_var),
                                                                               global_step=self.global_step)
Exemple #8
0
 def testAccidentalHitRemoval(self):
     """With accidental hit removal, no subtract_log_q."""
     np.random.seed(0)
     num_classes = 5
     batch_size = 3
     sampled = [1, 0, 2, 3]
     with self.test_session():
         for num_true in range(1, 5):
             labels = np.random.randint(low=0,
                                        high=num_classes,
                                        size=batch_size * num_true)
             (weights, biases, hidden_acts, sampled_vals, _,
              _) = self._GenerateTestData(num_classes=num_classes,
                                          dim=10,
                                          batch_size=batch_size,
                                          num_true=num_true,
                                          labels=labels,
                                          sampled=sampled,
                                          subtract_log_q=False)
             logits_tensor, _ = _compute_sampled_logits(
                 weights=constant_op.constant(weights),
                 biases=constant_op.constant(biases),
                 labels=constant_op.constant(labels,
                                             dtype=dtypes.int64,
                                             shape=(batch_size, num_true)),
                 inputs=constant_op.constant(hidden_acts),
                 num_sampled=len(sampled),
                 num_classes=num_classes,
                 num_true=num_true,
                 sampled_values=sampled_vals,
                 subtract_log_q=False,
                 remove_accidental_hits=True,
                 partition_strategy="div",
                 colocate_logits=False,
                 name="sampled_logits_accidental_hit_removal_num_true_%d" %
                 num_true)
             # Test that the exponentiated logits of accidental hits are near 0.
             # First we need to find the hits in this random test run:
             labels_reshape = labels.reshape((batch_size, num_true))
             got_logits = logits_tensor.eval()
             for row in xrange(batch_size):
                 row_labels = labels_reshape[row, :]
                 for col in xrange(len(sampled)):
                     if sampled[col] in row_labels:
                         # We need to add the num_true_test offset into logits_*
                         self.assertNear(
                             np.exp(got_logits[row, col + num_true]), 0.,
                             self._eps)
Exemple #9
0
def sampled_sigmoid_loss(weights, biases, inputs, labels, num_sampled,
                         num_classes, num_true=2,
                         sampled_values=None,
                         remove_accidental_hits=True,
                         partition_strategy="mod",
                         name="sampled_softmax_loss"):
    logits, labels = _compute_sampled_logits(
        weights, biases, inputs, labels, num_sampled, num_classes,
        num_true=num_true,
        sampled_values=sampled_values,
        subtract_log_q=True,
        remove_accidental_hits=remove_accidental_hits,
        partition_strategy=partition_strategy,
        name=name)
    sampled_losses = nn_ops.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels)
    return sampled_losses
Exemple #10
0
 def testAccidentalHitRemoval(self):
   """With accidental hit removal, no subtract_log_q."""
   np.random.seed(0)
   num_classes = 5
   batch_size = 3
   sampled = [1, 0, 2, 3]
   with self.test_session():
     for num_true in range(1, 5):
       labels = np.random.randint(
           low=0, high=num_classes, size=batch_size * num_true)
       (weights, biases, hidden_acts, sampled_vals, _,
        _) = self._GenerateTestData(
            num_classes=num_classes,
            dim=10,
            batch_size=batch_size,
            num_true=num_true,
            labels=labels,
            sampled=sampled,
            subtract_log_q=False)
       logits_tensor, _ = _compute_sampled_logits(
           weights=constant_op.constant(weights),
           biases=constant_op.constant(biases),
           labels=constant_op.constant(
               labels, dtype=dtypes.int64, shape=(batch_size, num_true)),
           inputs=constant_op.constant(hidden_acts),
           num_sampled=len(sampled),
           num_classes=num_classes,
           num_true=num_true,
           sampled_values=sampled_vals,
           subtract_log_q=False,
           remove_accidental_hits=True,
           partition_strategy="div",
           colocate_logits=False,
           name="sampled_logits_accidental_hit_removal_num_true_%d" % num_true)
       # Test that the exponentiated logits of accidental hits are near 0.
       # First we need to find the hits in this random test run:
       labels_reshape = labels.reshape((batch_size, num_true))
       got_logits = logits_tensor.eval()
       for row in xrange(batch_size):
         row_labels = labels_reshape[row, :]
         for col in xrange(len(sampled)):
           if sampled[col] in row_labels:
             # We need to add the num_true_test offset into logits_*
             self.assertNear(
                 np.exp(got_logits[row, col + num_true]), 0., self._eps)
Exemple #11
0
 def testShardedColocatedLogits(self):
     """Sharded weights and biases and with colocated logit computation."""
     np.random.seed(0)
     num_classes = 5
     batch_size = 3
     with self.test_session() as sess:
         for num_true in range(1, 5):
             labels = np.random.randint(low=0,
                                        high=num_classes,
                                        size=batch_size * num_true)
             (weights, biases, hidden_acts, sampled_vals, exp_logits,
              exp_labels) = self._GenerateTestData(num_classes=num_classes,
                                                   dim=10,
                                                   batch_size=batch_size,
                                                   num_true=num_true,
                                                   labels=labels,
                                                   sampled=[1, 0, 2, 3],
                                                   subtract_log_q=False)
             weight_shards, bias_shards = self._ShardTestEmbeddings(
                 weights, biases, num_shards=3)
             logits_tensor, labels_tensor = _compute_sampled_logits(
                 weights=[
                     constant_op.constant(shard) for shard in weight_shards
                 ],
                 biases=[
                     constant_op.constant(shard) for shard in bias_shards
                 ],
                 labels=constant_op.constant(labels,
                                             dtype=dtypes.int64,
                                             shape=(batch_size, num_true)),
                 inputs=constant_op.constant(hidden_acts),
                 num_sampled=4,
                 num_classes=num_classes,
                 num_true=num_true,
                 sampled_values=sampled_vals,
                 subtract_log_q=False,
                 remove_accidental_hits=False,
                 partition_strategy="div",
                 colocate_logits=True,
                 name="sampled_logits_sharded_colocated_num_true_%d" %
                 num_true)
             got_logits, got_labels = sess.run(
                 [logits_tensor, labels_tensor])
             self.assertAllClose(exp_logits, got_logits, self._eps)
             self.assertAllClose(exp_labels, got_labels, self._eps)
Exemple #12
0
    def cost(weights, biases, labels, inputs, num_sampled, num_classes):
        sampled_values = sampler(
            true_classes=labels,
            num_true=1,
            num_sampled=num_sampled,
            unique=True,
            range_max=num_classes,
        )

        logits, labels = _compute_sampled_logits(weights=weights,
                                                 biases=biases,
                                                 labels=labels,
                                                 inputs=inputs,
                                                 num_sampled=num_sampled,
                                                 num_classes=num_classes,
                                                 num_true=1,
                                                 sampled_values=sampled_values,
                                                 subtract_log_q=subtract_log_q)

        return cost_function(labels=labels,
                             logits=logits,
                             num_classes=num_classes)
Exemple #13
0
    def build_noexclusive_sampled_outputs(self, inputs, num_classes):
        with tf.name_scope("output"):
            #weights = tf.get_variable("weights",shape=[num_classes, sum(self.num_filters)],dtype=tf.float32,\
            #        initializer=tf.contrib.layers.xavier_initializer())
            #biases = tf.get_variable("biases",shape=[num_classes], dtype=tf.float32,\
            #        initializer=tf.constant_initializer(0.2))
            weights = tf.get_variable("weights",
                                      shape=[num_classes, self.repr_len],
                                      dtype=tf.float32)
            biases = tf.get_variable("biases",
                                     shape=[num_classes],
                                     dtype=tf.float32)
            tf.summary.histogram('weights', weights)
            tf.summary.histogram('biases', biases)

        # as class number is big, use sampled softmax instead dense layer+softmax
        with tf.name_scope("loss"):
            tags_prob = tf.pad(self.input_y_prob,
                               [[0, 0], [0, config.num_sampled]])
            out_logits, out_labels= _compute_sampled_logits( weights, biases, self.input_y, inputs,\
                    config.num_sampled, num_classes, num_true= config.max_tags )
            # TODO:check out_labels keep order with inpuy
            weighted_out_labels = out_labels * tags_prob * config.max_tags
            # self.out_labels = weighted_out_labels
            self.loss = tf.reduce_sum(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=out_logits, labels=weighted_out_labels))

        with tf.name_scope("outputs"):
            logits = tf.nn.softmax(
                tf.matmul(inputs, tf.transpose(weights)) + biases)
            self.output_values, self.ouput_indexs = tf.nn.top_k(
                logits, config.topn)

        with tf.name_scope("score"):
            self.score = self.loss / tf.cast(self.batch_size, tf.float32)
            #self.accuracy = tf.reduce_sum( self.top_prob )

        tf.summary.scalar('loss', self.loss)
Exemple #14
0
 def testShardedColocatedLogits(self):
   """Sharded weights and biases and with colocated logit computation."""
   np.random.seed(0)
   num_classes = 5
   batch_size = 3
   with self.test_session() as sess:
     for num_true in range(1, 5):
       labels = np.random.randint(
           low=0, high=num_classes, size=batch_size * num_true)
       (weights, biases, hidden_acts, sampled_vals, exp_logits,
        exp_labels) = self._GenerateTestData(
            num_classes=num_classes,
            dim=10,
            batch_size=batch_size,
            num_true=num_true,
            labels=labels,
            sampled=[1, 0, 2, 3],
            subtract_log_q=False)
       weight_shards, bias_shards = self._ShardTestEmbeddings(
           weights, biases, num_shards=3)
       logits_tensor, labels_tensor = _compute_sampled_logits(
           weights=[constant_op.constant(shard) for shard in weight_shards],
           biases=[constant_op.constant(shard) for shard in bias_shards],
           labels=constant_op.constant(
               labels, dtype=dtypes.int64, shape=(batch_size, num_true)),
           inputs=constant_op.constant(hidden_acts),
           num_sampled=4,
           num_classes=num_classes,
           num_true=num_true,
           sampled_values=sampled_vals,
           subtract_log_q=False,
           remove_accidental_hits=False,
           partition_strategy="div",
           colocate_logits=True,
           name="sampled_logits_sharded_colocated_num_true_%d" % num_true)
       got_logits, got_labels = sess.run([logits_tensor, labels_tensor])
       self.assertAllClose(exp_logits, got_logits, self._eps)
       self.assertAllClose(exp_labels, got_labels, self._eps)
Exemple #15
0
def sampled_sparse_softmax_loss(weights,
                                biases,
                                labels,
                                inputs,
                                num_sampled,
                                num_classes,
                                sampled_values=None,
                                remove_accidental_hits=True,
                                partition_strategy="mod",
                                name="sampled_sparse_softmax_loss"):
  """Computes and returns the sampled sparse softmax training loss.

  This is a faster way to train a softmax classifier over a huge number of
  classes.

  This operation is for training only.  It is generally an underestimate of
  the full softmax loss.

  A common use case is to use this method for training, and calculate the full
  softmax loss for evaluation or inference. In this case, you must set
  `partition_strategy="div"` for the two losses to be consistent, as in the
  following example:

  ```python
  if mode == "train":
    loss = tf.nn.sampled_sparse_softmax_loss(
        weights=weights,
        biases=biases,
        labels=labels,
        inputs=inputs,
        ...,
        partition_strategy="div")
  elif mode == "eval":
    logits = tf.matmul(inputs, tf.transpose(weights))
    logits = tf.nn.bias_add(logits, biases)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.squeeze(labels),
        logits=logits)
  ```

  See our [Candidate Sampling Algorithms Reference]
  (https://www.tensorflow.org/extras/candidate_sampling.pdf)

  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.

  Args:
    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
        objects whose concatenation along dimension 0 has shape
        [num_classes, dim].  The (possibly-sharded) class embeddings.
    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
        The index of the single target class for each row of logits.  Note that
        this format differs from the `labels` argument of
        `nn.sparse_softmax_cross_entropy_with_logits`.
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
        activations of the input network.
    num_sampled: An `int`.  The number of classes to randomly sample per batch.
    num_classes: An `int`. The number of possible classes.
    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
        (if None, we default to `log_uniform_candidate_sampler`)
    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
        where a sampled class equals one of the target classes.  Default is
        True.
    partition_strategy: A string specifying the partitioning strategy, relevant
        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: A name for the operation (optional).

  Returns:
    A `batch_size` 1-D tensor of per-example sampled softmax losses.

  """
  logits, _ = nn_impl._compute_sampled_logits(
      weights=weights,
      biases=biases,
      labels=labels,
      inputs=inputs,
      num_sampled=num_sampled,
      num_classes=num_classes,
      num_true=1,
      sampled_values=sampled_values,
      subtract_log_q=True,
      remove_accidental_hits=remove_accidental_hits,
      partition_strategy=partition_strategy,
      name=name)

  # There is only one true label. _compute_sampled_logits puts the true logit
  # at index 0.
  labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64)

  sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
      labels=array_ops.squeeze(labels), logits=logits)
  # sampled_losses is a [batch_size] tensor.
  return sampled_losses
def sampled_sparse_softmax_loss(weights,
                                biases,
                                labels,
                                inputs,
                                num_sampled,
                                num_classes,
                                sampled_values=None,
                                remove_accidental_hits=True,
                                partition_strategy="mod",
                                name="sampled_sparse_softmax_loss"):
    """Computes and returns the sampled sparse softmax training loss.

  This is a faster way to train a softmax classifier over a huge number of
  classes.

  This operation is for training only.  It is generally an underestimate of
  the full softmax loss.

  A common use case is to use this method for training, and calculate the full
  softmax loss for evaluation or inference. In this case, you must set
  `partition_strategy="div"` for the two losses to be consistent, as in the
  following example:

  ```python
  if mode == "train":
    loss = tf.nn.sampled_sparse_softmax_loss(
        weights=weights,
        biases=biases,
        labels=labels,
        inputs=inputs,
        ...,
        partition_strategy="div")
  elif mode == "eval":
    logits = tf.matmul(inputs, tf.transpose(weights))
    logits = tf.nn.bias_add(logits, biases)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.squeeze(labels),
        logits=logits)
  ```

  See our [Candidate Sampling Algorithms Reference]
  (https://www.tensorflow.org/extras/candidate_sampling.pdf)

  Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007)
  ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math.

  Args:
    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
        objects whose concatenation along dimension 0 has shape
        [num_classes, dim].  The (possibly-sharded) class embeddings.
    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
    labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`.
        The index of the single target class for each row of logits.  Note that
        this format differs from the `labels` argument of
        `nn.sparse_softmax_cross_entropy_with_logits`.
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
        activations of the input network.
    num_sampled: An `int`.  The number of classes to randomly sample per batch.
    num_classes: An `int`. The number of possible classes.
    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
        (if None, we default to `log_uniform_candidate_sampler`)
    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
        where a sampled class equals one of the target classes.  Default is
        True.
    partition_strategy: A string specifying the partitioning strategy, relevant
        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: A name for the operation (optional).

  Returns:
    A `batch_size` 1-D tensor of per-example sampled softmax losses.

  """
    logits, _ = nn_impl._compute_sampled_logits(
        weights=weights,
        biases=biases,
        labels=labels,
        inputs=inputs,
        num_sampled=num_sampled,
        num_classes=num_classes,
        num_true=1,
        sampled_values=sampled_values,
        subtract_log_q=True,
        remove_accidental_hits=remove_accidental_hits,
        partition_strategy=partition_strategy,
        name=name)

    # There is only one true label. _compute_sampled_logits puts the true logit
    # at index 0.
    labels = array_ops.zeros([array_ops.shape(logits)[0], 1],
                             dtype=dtypes.int64)

    sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits(
        labels=array_ops.squeeze(labels), logits=logits)
    # sampled_losses is a [batch_size] tensor.
    return sampled_losses
Exemple #17
0
def TFNCELoss(X, target_word, L):
    tf.compat.v1.disable_eager_execution()
    in_embed = tf.compat.v1.placeholder(tf.float32, shape=X.shape)
    in_bias = tf.compat.v1.placeholder(tf.float32, shape=L.b.flatten().shape)
    in_weights = tf.compat.v1.placeholder(tf.float32,
                                          shape=L.W.transpose().shape)
    in_target_word = tf.compat.v1.placeholder(tf.int64)
    in_neg_samples = tf.compat.v1.placeholder(tf.int32)
    in_target_prob = tf.compat.v1.placeholder(tf.float32)
    in_neg_samp_prob = tf.compat.v1.placeholder(tf.float32)

    feed = {
        in_embed: X,
        in_weights: L.W.transpose(),
        in_target_word: target_word,
        in_bias: L.b.flatten(),
        in_neg_samples: L.derived_variables["noise_samples"][0],
        in_target_prob: L.derived_variables["noise_samples"][1],
        in_neg_samp_prob: L.derived_variables["noise_samples"][2],
    }

    nce_unreduced = tf.nn.nce_loss(
        weights=in_weights,
        biases=in_bias,
        labels=in_target_word,
        inputs=in_embed,
        sampled_values=(in_neg_samples, in_target_prob, in_neg_samp_prob),
        num_sampled=L.num_negative_samples,
        num_classes=L.n_classes,
    )

    loss = tf.reduce_sum(nce_unreduced)
    dLdW = tf.gradients(loss, [in_weights])[0]
    dLdb = tf.gradients(loss, [in_bias])[0]
    dLdX = tf.gradients(loss, [in_embed])[0]

    sampled_logits, sampled_labels = _compute_sampled_logits(
        weights=in_weights,
        biases=in_bias,
        labels=in_target_word,
        inputs=in_embed,
        sampled_values=(in_neg_samples, in_target_prob, in_neg_samp_prob),
        num_sampled=L.num_negative_samples,
        num_classes=L.n_classes,
        num_true=1,
        subtract_log_q=True,
    )

    sampled_losses = sigmoid_cross_entropy_with_logits(labels=sampled_labels,
                                                       logits=sampled_logits)

    with tf.compat.v1.Session() as session:
        session.run(tf.compat.v1.global_variables_initializer())
        (
            _final_loss,
            _nce_unreduced,
            _dLdW,
            _dLdb,
            _dLdX,
            _sampled_logits,
            _sampled_labels,
            _sampled_losses,
        ) = session.run(
            [
                loss,
                nce_unreduced,
                dLdW,
                dLdb,
                dLdX,
                sampled_logits,
                sampled_labels,
                sampled_losses,
            ],
            feed_dict=feed,
        )
    tf.compat.v1.reset_default_graph()

    return {
        "final_loss": _final_loss,
        "nce_unreduced": _nce_unreduced,
        "dLdW": _dLdW,
        "dLdb": _dLdb,
        "dLdX": _dLdX,
        "out_logits": _sampled_logits,
        "out_labels": _sampled_labels,
        "sampled_loss": _sampled_losses,
    }
Exemple #18
0
    def __init__(self, global_config, sample, info_input_embeddings,
                 is_training):
        """
        定义模型结构,并初始化参数
        :param global_config:
        """
        self.model_config = global_config

        self.feature_value_map = {}
        for i, feature in enumerate(global_config.feature_index_type_map):
            self.feature_value_map[feature] = sample[i]

        def _get_tensor_list(name, array, shard_num):
            tensor_list = []
            array_list = data_utils.shard_array(array, shard_num)
            for i, array in enumerate(array_list):
                tensor_list.append(
                    tf.get_variable('{}_sharded{}'.format(name, i),
                                    initializer=array,
                                    caching_device='/cpu:0'))
            return tensor_list

        # cpu_ts = tf.get_variable("cpu_arr", initializer=arr, dtype=tf.float32, caching_device='/cpu:0')
        self.feature_embeddings_map = {
            "info_input_embeddings":
            _get_tensor_list('info_input_embeddings', info_input_embeddings,
                             3),
            "info_output_embeddings":
            tf.get_variable(
                'info_output_embeddings',
                info_input_embeddings.shape,
                tf.float32,
                initializer=tf.random_normal_initializer(),
                caching_device='/cpu:0',
                # TODO: 目前为省时直接指定一个固定值, 15,000,000 * 50dim * 8byte
                # FIXME: 当修改了这里的partitioner时,注意下面的embedding_lookup 和nce_loss都需要修改为'div'
                # partitioner=tf.fixed_size_partitioner(3)
            )
        }
        for attribute in global_config.attribute_dim_map:
            self.feature_embeddings_map[attribute] = tf.get_variable(
                attribute + "_embeddings", [
                    global_config.attribute_dim_map[attribute],
                    global_config.default_attribute_embedding_size
                ],
                tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())

        func_dict = {
            "single": lambda x: x,
            "multi": lambda x: tf.reduce_sum(x, 1),
            "sequence": lambda x: tf.reduce_sum(x, 1)
        }
        self.before_fcn_embeddings_list = []
        for feature in global_config.feature_index_type_map:
            index, feature_nature, feature_type, attribute = global_config.feature_index_type_map[
                feature]
            if feature_nature == "label":
                continue
            tensor = tf.nn.embedding_lookup(
                self.feature_embeddings_map[attribute],
                self.feature_value_map[feature],
                'div' if attribute == "info_input_embeddings" else 'mod')
            self.before_fcn_embeddings_list.append(
                func_dict[feature_nature](tensor))

        self.so_called_raw_user_embedding = tf.concat(
            self.before_fcn_embeddings_list, 1, "so_called_raw_user_embedding")

        temp_hidden_vector = self.so_called_raw_user_embedding
        pre_layer_size = self.so_called_raw_user_embedding.shape[1]
        self.hidden_vector_list = []
        for i, hidden_layer_size in enumerate(
                global_config.full_connection_layer_list):
            temp_hidden_layer = tf.get_variable(
                "hidden_layer_{}".format(i),
                [pre_layer_size, hidden_layer_size],
                tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            temp_hidden_layer_bias = tf.get_variable(
                "hidden_layer_bias_{}".format(i), [1, hidden_layer_size],
                tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            temp_hidden_vector = tf.nn.relu(
                tf.add(tf.matmul(temp_hidden_vector, temp_hidden_layer),
                       temp_hidden_layer_bias))
            pre_layer_size = hidden_layer_size
            self.hidden_vector_list.append(temp_hidden_vector)
        self.so_called_user_embedding = temp_hidden_vector

        # 注意在这个地方的尺寸 todo so_called_user_embedding和info_output_embeddings需要一致
        self.logits, self.labels = _compute_sampled_logits(
            weights=self.feature_embeddings_map[
                "info_output_embeddings"],  # 是随机初始化还是? [input_embedding_size, 词典词数量]
            biases=tf.get_variable(
                "nce_classes_bias",
                info_input_embeddings.shape[0],
                tf.float32,
                initializer=tf.random_normal_initializer()),  # [词典词数量]
            inputs=self.
            so_called_user_embedding,  # [batch_size, input_embedding_size]
            labels=tf.expand_dims(self.feature_value_map["info_id"],
                                  -1),  # [batch_size, true_size]
            num_sampled=10,  # 负采样数量
            num_classes=info_input_embeddings.shape[0],  # 词典词数量
            num_true=1,
            partition_strategy='mod',  # 'div'
            name="nce_loss")

        sigmoid_cross_entropy_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=self.labels,
            logits=self.logits,
            name="sigmoid_cross_entropy_loss")
        # TODO: 检查此处是否有问题
        self.loss = tf.reduce_mean(tf.reduce_sum(sigmoid_cross_entropy_loss,
                                                 1))