def _ComputeSampledLogitsTF(self, weights, biases, hidden_acts, labels, num_sampled, num_classes, num_true, sampled_vals, subtract_log_q, remove_accidental_hits, name="sampled_loss_TF"): # Should be called from within a `with test_session():` block if isinstance(weights, list): weights_tf = [tf.constant(shard) for shard in weights] else: weights_tf = tf.constant(weights) biases_tf = tf.constant(biases) hidden_acts_tf = tf.constant(hidden_acts, shape=(self._batch_size, self._dim)) labels_tf = tf.constant(labels, dtype=tf.int64, shape=(self._batch_size, num_true)) pred_logits_tf, pred_labels_tf = _compute_sampled_logits( weights_tf, biases_tf, hidden_acts_tf, labels_tf, num_sampled, num_classes, num_true, sampled_vals, subtract_log_q=subtract_log_q, remove_accidental_hits=remove_accidental_hits, name=name) return pred_logits_tf, pred_labels_tf
def testSubtractLogQ(self): """With subtract_log_q, no accidental hit removal.""" np.random.seed(0) num_classes = 5 batch_size = 3 with self.test_session() as sess: for num_true in range(1, 5): labels = np.random.randint( low=0, high=num_classes, size=batch_size * num_true) (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData( num_classes=num_classes, dim=10, batch_size=batch_size, num_true=num_true, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=True) logits_tensor, labels_tensor = _compute_sampled_logits( weights=constant_op.constant(weights), biases=constant_op.constant(biases), labels=constant_op.constant( labels, dtype=dtypes.int64, shape=(batch_size, num_true)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=num_true, sampled_values=sampled_vals, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="div", name="sampled_logits_subtract_log_q_num_true_%d" % num_true) got_logits, got_labels = sess.run([logits_tensor, labels_tensor]) self.assertAllClose(exp_logits, got_logits, self._eps) self.assertAllClose(exp_labels, got_labels, self._eps)
def testShapes(self): np.random.seed(0) num_classes = 5 batch_size = 3 for num_true in range(1, 5): labels = np.random.randint( low=0, high=num_classes, size=batch_size * num_true) (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData( num_classes=num_classes, dim=10, batch_size=batch_size, num_true=num_true, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=False) logits_tensor, labels_tensor = _compute_sampled_logits( weights=constant_op.constant(weights), biases=constant_op.constant(biases), labels=constant_op.constant( labels, dtype=dtypes.int64, shape=(batch_size, num_true)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=num_true, sampled_values=sampled_vals, subtract_log_q=False, remove_accidental_hits=False, partition_strategy="div", name="sampled_logits_basic_num_true_%d" % num_true) got_logits, got_labels = self.evaluate([logits_tensor, labels_tensor]) self.assertEqual(exp_logits.shape, got_logits.shape, self._eps) self.assertEqual(exp_labels.shape, got_labels.shape, self._eps)
def ranking_loss(self, labels, inputs): logits, labels = _compute_sampled_logits( weights=self.w, biases=self.b, labels=labels, inputs=inputs, num_sampled=self.num_sampled, num_classes=self.num_classes, num_true=self.num_true, sampled_values=self.sampled_values, subtract_log_q=True, remove_accidental_hits=self.remove_accidental_hits, partition_strategy=self.partition_strategy) # prevent backpropagation through labels labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") # divided true score from sampled score true_logit = tf.gather(logits, [0], axis=1) sampled_logit = tf.gather(logits, [i + 1 for i in range(self.num_sampled)], axis=1) # compute top1 loss loss = tf.math.sigmoid(sampled_logit - true_logit) + tf.math.sigmoid( tf.square(sampled_logit)) loss = tf.reduce_mean(loss, axis=1, keepdims=True) # predicted score predict = tf.math.sigmoid(true_logit) return tf.concat([loss, predict], axis=1)
def testShapes(self): np.random.seed(0) num_classes = 5 batch_size = 3 for num_true in range(1, 5): labels = np.random.randint( low=0, high=num_classes, size=batch_size * num_true) (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData( num_classes=num_classes, dim=10, batch_size=batch_size, num_true=num_true, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=False) logits_tensor, labels_tensor = _compute_sampled_logits( weights=constant_op.constant(weights), biases=constant_op.constant(biases), labels=constant_op.constant( labels, dtype=dtypes.int64, shape=(batch_size, num_true)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=num_true, sampled_values=sampled_vals, subtract_log_q=False, remove_accidental_hits=False, partition_strategy="div", name="sampled_logits_basic_num_true_%d" % num_true) got_logits, got_labels = self.evaluate([logits_tensor, labels_tensor]) self.assertEqual(exp_logits.shape, got_logits.shape, self._eps) self.assertEqual(exp_labels.shape, got_labels.shape, self._eps)
def neg_loss(self, weights, biases, labels, inputs, num_sampled, num_classes, num_true=1, sampled_values=None, remove_accidental_hits=True, partition_strategy="mod", name="neg_loss"): logits, labels = nn_impl._compute_sampled_logits( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_sampled, num_classes=num_classes, num_true=num_true, sampled_values=sampled_values, subtract_log_q=False, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, name=name) sampled_losses = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits, name="sampled_losses") # sampled_losses is batch_size x {true_loss, sampled_losses...} # We sum out true and sampled losses. return nn_impl._sum_rows(sampled_losses)
def init_graph(self): # 初始化喂入参数,placeholder名字要唯一,不能更改placeholder的任何信息 self.video_ids_ph = tf.placeholder(tf.int32, shape=[None, None], name='video_ids') self.search_id_ph = tf.placeholder(tf.int32, shape=[None], name='search_id') self.age_ph = tf.placeholder(tf.float32, shape=[None], name='age') self.gender_ph = tf.placeholder(tf.float32, shape=[None], name='gender') self.label_ph = tf.placeholder(tf.float32, shape=[None], name='label_ph') # 初始化视频embedding、搜索条件的embedding,concat两个embedding和age、gender video_embedding = tf.get_variable('video_embedding', shape=[self.video_total_num], dtype=tf.float32, initializer=tf.variance_scaling_initializer()) video_vecs = tf.nn.embedding_lookup(video_embedding, self.video_ids_ph) search_embedding = tf.get_variable(name='search_embedding', shape=[self.search_total_num], dtype=tf.float32, initializer=tf.variance_scaling_initializer()) search_vec = tf.nn.embedding_lookup(search_embedding, self.search_id_ph) input = tf.concat([tf.reshape(tf.reduce_mean(video_vecs, axis=1), shape=[-1, 1]), tf.reshape(search_vec, shape=[-1, 1]), tf.reshape(self.age_ph, shape=[-1, 1]), tf.reshape(self.gender_ph, shape=[-1, 1])], axis=1) # 经过多层深度训练,层数根据mAP确定 for i in range(self.depth): input = tf.layers.dense(inputs=input, units=self.units_list[i], kernel_regularizer=layers.l2_regularizer(0.001), activation=tf.nn.relu, name='fc{}'.format(i), trainable=self.is_training) input = tf.layers.batch_normalization(input, training=self.is_training, name='fc{}_bn'.format(i)) output = input # 初始化类别(就是每个视频的标签,对应论文中的百万级)的embedding对应的:weights和bias weights = tf.get_variable('soft_weight', shape=[self.class_distinct, 128], initializer=tf.variance_scaling_initializer()) biases = tf.get_variable('soft_bias', shape=[self.class_distinct], initializer=tf.variance_scaling_initializer()) if not self.is_training: # 计算预测值 self.logits_out = tf.matmul(output, tf.transpose(weights)) else: # label必须二维的,但是biases却是一维的 self.labels = tf.reshape(self.label_ph, shape=[-1, 1]) # 计算损失, num_true=1代表负采样有一个正例,one-hot值为1。 self.logits_out, self.labels_out = nn_impl._compute_sampled_logits(weights=weights, biases=biases, labels=self.labels, inputs=input, num_sampled=100, num_classes=self.class_distinct, num_true=1, sampled_values=None, remove_accidental_hits=True, partition_strategy="div", name="sampled_softmax_loss", seed=None) labels = array_ops.stop_gradient(self.labels_out, name="labels_stop_gradient") sampled_losses = nn_ops.softmax_cross_entropy_with_logits_v2(labels=labels, logits=self.logits_out) self.loss = tf.reduce_mean(sampled_losses) # 获得梯度下降优化器 gradient_descent_optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) train_var = tf.trainable_variables() clip_gradients, _ = tf.clip_by_global_norm(tf.gradients(self.loss, train_var), 5) self.gradient_descent = gradient_descent_optimizer.apply_gradients(zip(clip_gradients, train_var), global_step=self.global_step)
def testAccidentalHitRemoval(self): """With accidental hit removal, no subtract_log_q.""" np.random.seed(0) num_classes = 5 batch_size = 3 sampled = [1, 0, 2, 3] with self.test_session(): for num_true in range(1, 5): labels = np.random.randint(low=0, high=num_classes, size=batch_size * num_true) (weights, biases, hidden_acts, sampled_vals, _, _) = self._GenerateTestData(num_classes=num_classes, dim=10, batch_size=batch_size, num_true=num_true, labels=labels, sampled=sampled, subtract_log_q=False) logits_tensor, _ = _compute_sampled_logits( weights=constant_op.constant(weights), biases=constant_op.constant(biases), labels=constant_op.constant(labels, dtype=dtypes.int64, shape=(batch_size, num_true)), inputs=constant_op.constant(hidden_acts), num_sampled=len(sampled), num_classes=num_classes, num_true=num_true, sampled_values=sampled_vals, subtract_log_q=False, remove_accidental_hits=True, partition_strategy="div", colocate_logits=False, name="sampled_logits_accidental_hit_removal_num_true_%d" % num_true) # Test that the exponentiated logits of accidental hits are near 0. # First we need to find the hits in this random test run: labels_reshape = labels.reshape((batch_size, num_true)) got_logits = logits_tensor.eval() for row in xrange(batch_size): row_labels = labels_reshape[row, :] for col in xrange(len(sampled)): if sampled[col] in row_labels: # We need to add the num_true_test offset into logits_* self.assertNear( np.exp(got_logits[row, col + num_true]), 0., self._eps)
def sampled_sigmoid_loss(weights, biases, inputs, labels, num_sampled, num_classes, num_true=2, sampled_values=None, remove_accidental_hits=True, partition_strategy="mod", name="sampled_softmax_loss"): logits, labels = _compute_sampled_logits( weights, biases, inputs, labels, num_sampled, num_classes, num_true=num_true, sampled_values=sampled_values, subtract_log_q=True, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, name=name) sampled_losses = nn_ops.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels) return sampled_losses
def testAccidentalHitRemoval(self): """With accidental hit removal, no subtract_log_q.""" np.random.seed(0) num_classes = 5 batch_size = 3 sampled = [1, 0, 2, 3] with self.test_session(): for num_true in range(1, 5): labels = np.random.randint( low=0, high=num_classes, size=batch_size * num_true) (weights, biases, hidden_acts, sampled_vals, _, _) = self._GenerateTestData( num_classes=num_classes, dim=10, batch_size=batch_size, num_true=num_true, labels=labels, sampled=sampled, subtract_log_q=False) logits_tensor, _ = _compute_sampled_logits( weights=constant_op.constant(weights), biases=constant_op.constant(biases), labels=constant_op.constant( labels, dtype=dtypes.int64, shape=(batch_size, num_true)), inputs=constant_op.constant(hidden_acts), num_sampled=len(sampled), num_classes=num_classes, num_true=num_true, sampled_values=sampled_vals, subtract_log_q=False, remove_accidental_hits=True, partition_strategy="div", colocate_logits=False, name="sampled_logits_accidental_hit_removal_num_true_%d" % num_true) # Test that the exponentiated logits of accidental hits are near 0. # First we need to find the hits in this random test run: labels_reshape = labels.reshape((batch_size, num_true)) got_logits = logits_tensor.eval() for row in xrange(batch_size): row_labels = labels_reshape[row, :] for col in xrange(len(sampled)): if sampled[col] in row_labels: # We need to add the num_true_test offset into logits_* self.assertNear( np.exp(got_logits[row, col + num_true]), 0., self._eps)
def testShardedColocatedLogits(self): """Sharded weights and biases and with colocated logit computation.""" np.random.seed(0) num_classes = 5 batch_size = 3 with self.test_session() as sess: for num_true in range(1, 5): labels = np.random.randint(low=0, high=num_classes, size=batch_size * num_true) (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData(num_classes=num_classes, dim=10, batch_size=batch_size, num_true=num_true, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=False) weight_shards, bias_shards = self._ShardTestEmbeddings( weights, biases, num_shards=3) logits_tensor, labels_tensor = _compute_sampled_logits( weights=[ constant_op.constant(shard) for shard in weight_shards ], biases=[ constant_op.constant(shard) for shard in bias_shards ], labels=constant_op.constant(labels, dtype=dtypes.int64, shape=(batch_size, num_true)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=num_true, sampled_values=sampled_vals, subtract_log_q=False, remove_accidental_hits=False, partition_strategy="div", colocate_logits=True, name="sampled_logits_sharded_colocated_num_true_%d" % num_true) got_logits, got_labels = sess.run( [logits_tensor, labels_tensor]) self.assertAllClose(exp_logits, got_logits, self._eps) self.assertAllClose(exp_labels, got_labels, self._eps)
def cost(weights, biases, labels, inputs, num_sampled, num_classes): sampled_values = sampler( true_classes=labels, num_true=1, num_sampled=num_sampled, unique=True, range_max=num_classes, ) logits, labels = _compute_sampled_logits(weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_sampled, num_classes=num_classes, num_true=1, sampled_values=sampled_values, subtract_log_q=subtract_log_q) return cost_function(labels=labels, logits=logits, num_classes=num_classes)
def build_noexclusive_sampled_outputs(self, inputs, num_classes): with tf.name_scope("output"): #weights = tf.get_variable("weights",shape=[num_classes, sum(self.num_filters)],dtype=tf.float32,\ # initializer=tf.contrib.layers.xavier_initializer()) #biases = tf.get_variable("biases",shape=[num_classes], dtype=tf.float32,\ # initializer=tf.constant_initializer(0.2)) weights = tf.get_variable("weights", shape=[num_classes, self.repr_len], dtype=tf.float32) biases = tf.get_variable("biases", shape=[num_classes], dtype=tf.float32) tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) # as class number is big, use sampled softmax instead dense layer+softmax with tf.name_scope("loss"): tags_prob = tf.pad(self.input_y_prob, [[0, 0], [0, config.num_sampled]]) out_logits, out_labels= _compute_sampled_logits( weights, biases, self.input_y, inputs,\ config.num_sampled, num_classes, num_true= config.max_tags ) # TODO:check out_labels keep order with inpuy weighted_out_labels = out_labels * tags_prob * config.max_tags # self.out_labels = weighted_out_labels self.loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits( logits=out_logits, labels=weighted_out_labels)) with tf.name_scope("outputs"): logits = tf.nn.softmax( tf.matmul(inputs, tf.transpose(weights)) + biases) self.output_values, self.ouput_indexs = tf.nn.top_k( logits, config.topn) with tf.name_scope("score"): self.score = self.loss / tf.cast(self.batch_size, tf.float32) #self.accuracy = tf.reduce_sum( self.top_prob ) tf.summary.scalar('loss', self.loss)
def testShardedColocatedLogits(self): """Sharded weights and biases and with colocated logit computation.""" np.random.seed(0) num_classes = 5 batch_size = 3 with self.test_session() as sess: for num_true in range(1, 5): labels = np.random.randint( low=0, high=num_classes, size=batch_size * num_true) (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData( num_classes=num_classes, dim=10, batch_size=batch_size, num_true=num_true, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=False) weight_shards, bias_shards = self._ShardTestEmbeddings( weights, biases, num_shards=3) logits_tensor, labels_tensor = _compute_sampled_logits( weights=[constant_op.constant(shard) for shard in weight_shards], biases=[constant_op.constant(shard) for shard in bias_shards], labels=constant_op.constant( labels, dtype=dtypes.int64, shape=(batch_size, num_true)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=num_true, sampled_values=sampled_vals, subtract_log_q=False, remove_accidental_hits=False, partition_strategy="div", colocate_logits=True, name="sampled_logits_sharded_colocated_num_true_%d" % num_true) got_logits, got_labels = sess.run([logits_tensor, labels_tensor]) self.assertAllClose(exp_logits, got_logits, self._eps) self.assertAllClose(exp_labels, got_labels, self._eps)
def sampled_sparse_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, sampled_values=None, remove_accidental_hits=True, partition_strategy="mod", name="sampled_sparse_softmax_loss"): """Computes and returns the sampled sparse softmax training loss. This is a faster way to train a softmax classifier over a huge number of classes. This operation is for training only. It is generally an underestimate of the full softmax loss. A common use case is to use this method for training, and calculate the full softmax loss for evaluation or inference. In this case, you must set `partition_strategy="div"` for the two losses to be consistent, as in the following example: ```python if mode == "train": loss = tf.nn.sampled_sparse_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, ..., partition_strategy="div") elif mode == "eval": logits = tf.matmul(inputs, tf.transpose(weights)) logits = tf.nn.bias_add(logits, biases) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.squeeze(labels), logits=logits) ``` See our [Candidate Sampling Algorithms Reference] (https://www.tensorflow.org/extras/candidate_sampling.pdf) Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. Args: weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape [num_classes, dim]. The (possibly-sharded) class embeddings. biases: A `Tensor` of shape `[num_classes]`. The class biases. labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`. The index of the single target class for each row of logits. Note that this format differs from the `labels` argument of `nn.sparse_softmax_cross_entropy_with_logits`. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled: An `int`. The number of classes to randomly sample per batch. num_classes: An `int`. The number of possible classes. sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. (if None, we default to `log_uniform_candidate_sampler`) remove_accidental_hits: A `bool`. whether to remove "accidental hits" where a sampled class equals one of the target classes. Default is True. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: A `batch_size` 1-D tensor of per-example sampled softmax losses. """ logits, _ = nn_impl._compute_sampled_logits( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_sampled, num_classes=num_classes, num_true=1, sampled_values=sampled_values, subtract_log_q=True, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, name=name) # There is only one true label. _compute_sampled_logits puts the true logit # at index 0. labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64) sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=array_ops.squeeze(labels), logits=logits) # sampled_losses is a [batch_size] tensor. return sampled_losses
def sampled_sparse_softmax_loss(weights, biases, labels, inputs, num_sampled, num_classes, sampled_values=None, remove_accidental_hits=True, partition_strategy="mod", name="sampled_sparse_softmax_loss"): """Computes and returns the sampled sparse softmax training loss. This is a faster way to train a softmax classifier over a huge number of classes. This operation is for training only. It is generally an underestimate of the full softmax loss. A common use case is to use this method for training, and calculate the full softmax loss for evaluation or inference. In this case, you must set `partition_strategy="div"` for the two losses to be consistent, as in the following example: ```python if mode == "train": loss = tf.nn.sampled_sparse_softmax_loss( weights=weights, biases=biases, labels=labels, inputs=inputs, ..., partition_strategy="div") elif mode == "eval": logits = tf.matmul(inputs, tf.transpose(weights)) logits = tf.nn.bias_add(logits, biases) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.squeeze(labels), logits=logits) ``` See our [Candidate Sampling Algorithms Reference] (https://www.tensorflow.org/extras/candidate_sampling.pdf) Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. Args: weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape [num_classes, dim]. The (possibly-sharded) class embeddings. biases: A `Tensor` of shape `[num_classes]`. The class biases. labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`. The index of the single target class for each row of logits. Note that this format differs from the `labels` argument of `nn.sparse_softmax_cross_entropy_with_logits`. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled: An `int`. The number of classes to randomly sample per batch. num_classes: An `int`. The number of possible classes. sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. (if None, we default to `log_uniform_candidate_sampler`) remove_accidental_hits: A `bool`. whether to remove "accidental hits" where a sampled class equals one of the target classes. Default is True. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: A `batch_size` 1-D tensor of per-example sampled softmax losses. """ logits, _ = nn_impl._compute_sampled_logits( weights=weights, biases=biases, labels=labels, inputs=inputs, num_sampled=num_sampled, num_classes=num_classes, num_true=1, sampled_values=sampled_values, subtract_log_q=True, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, name=name) # There is only one true label. _compute_sampled_logits puts the true logit # at index 0. labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64) sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=array_ops.squeeze(labels), logits=logits) # sampled_losses is a [batch_size] tensor. return sampled_losses
def TFNCELoss(X, target_word, L): tf.compat.v1.disable_eager_execution() in_embed = tf.compat.v1.placeholder(tf.float32, shape=X.shape) in_bias = tf.compat.v1.placeholder(tf.float32, shape=L.b.flatten().shape) in_weights = tf.compat.v1.placeholder(tf.float32, shape=L.W.transpose().shape) in_target_word = tf.compat.v1.placeholder(tf.int64) in_neg_samples = tf.compat.v1.placeholder(tf.int32) in_target_prob = tf.compat.v1.placeholder(tf.float32) in_neg_samp_prob = tf.compat.v1.placeholder(tf.float32) feed = { in_embed: X, in_weights: L.W.transpose(), in_target_word: target_word, in_bias: L.b.flatten(), in_neg_samples: L.derived_variables["noise_samples"][0], in_target_prob: L.derived_variables["noise_samples"][1], in_neg_samp_prob: L.derived_variables["noise_samples"][2], } nce_unreduced = tf.nn.nce_loss( weights=in_weights, biases=in_bias, labels=in_target_word, inputs=in_embed, sampled_values=(in_neg_samples, in_target_prob, in_neg_samp_prob), num_sampled=L.num_negative_samples, num_classes=L.n_classes, ) loss = tf.reduce_sum(nce_unreduced) dLdW = tf.gradients(loss, [in_weights])[0] dLdb = tf.gradients(loss, [in_bias])[0] dLdX = tf.gradients(loss, [in_embed])[0] sampled_logits, sampled_labels = _compute_sampled_logits( weights=in_weights, biases=in_bias, labels=in_target_word, inputs=in_embed, sampled_values=(in_neg_samples, in_target_prob, in_neg_samp_prob), num_sampled=L.num_negative_samples, num_classes=L.n_classes, num_true=1, subtract_log_q=True, ) sampled_losses = sigmoid_cross_entropy_with_logits(labels=sampled_labels, logits=sampled_logits) with tf.compat.v1.Session() as session: session.run(tf.compat.v1.global_variables_initializer()) ( _final_loss, _nce_unreduced, _dLdW, _dLdb, _dLdX, _sampled_logits, _sampled_labels, _sampled_losses, ) = session.run( [ loss, nce_unreduced, dLdW, dLdb, dLdX, sampled_logits, sampled_labels, sampled_losses, ], feed_dict=feed, ) tf.compat.v1.reset_default_graph() return { "final_loss": _final_loss, "nce_unreduced": _nce_unreduced, "dLdW": _dLdW, "dLdb": _dLdb, "dLdX": _dLdX, "out_logits": _sampled_logits, "out_labels": _sampled_labels, "sampled_loss": _sampled_losses, }
def __init__(self, global_config, sample, info_input_embeddings, is_training): """ 定义模型结构,并初始化参数 :param global_config: """ self.model_config = global_config self.feature_value_map = {} for i, feature in enumerate(global_config.feature_index_type_map): self.feature_value_map[feature] = sample[i] def _get_tensor_list(name, array, shard_num): tensor_list = [] array_list = data_utils.shard_array(array, shard_num) for i, array in enumerate(array_list): tensor_list.append( tf.get_variable('{}_sharded{}'.format(name, i), initializer=array, caching_device='/cpu:0')) return tensor_list # cpu_ts = tf.get_variable("cpu_arr", initializer=arr, dtype=tf.float32, caching_device='/cpu:0') self.feature_embeddings_map = { "info_input_embeddings": _get_tensor_list('info_input_embeddings', info_input_embeddings, 3), "info_output_embeddings": tf.get_variable( 'info_output_embeddings', info_input_embeddings.shape, tf.float32, initializer=tf.random_normal_initializer(), caching_device='/cpu:0', # TODO: 目前为省时直接指定一个固定值, 15,000,000 * 50dim * 8byte # FIXME: 当修改了这里的partitioner时,注意下面的embedding_lookup 和nce_loss都需要修改为'div' # partitioner=tf.fixed_size_partitioner(3) ) } for attribute in global_config.attribute_dim_map: self.feature_embeddings_map[attribute] = tf.get_variable( attribute + "_embeddings", [ global_config.attribute_dim_map[attribute], global_config.default_attribute_embedding_size ], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) func_dict = { "single": lambda x: x, "multi": lambda x: tf.reduce_sum(x, 1), "sequence": lambda x: tf.reduce_sum(x, 1) } self.before_fcn_embeddings_list = [] for feature in global_config.feature_index_type_map: index, feature_nature, feature_type, attribute = global_config.feature_index_type_map[ feature] if feature_nature == "label": continue tensor = tf.nn.embedding_lookup( self.feature_embeddings_map[attribute], self.feature_value_map[feature], 'div' if attribute == "info_input_embeddings" else 'mod') self.before_fcn_embeddings_list.append( func_dict[feature_nature](tensor)) self.so_called_raw_user_embedding = tf.concat( self.before_fcn_embeddings_list, 1, "so_called_raw_user_embedding") temp_hidden_vector = self.so_called_raw_user_embedding pre_layer_size = self.so_called_raw_user_embedding.shape[1] self.hidden_vector_list = [] for i, hidden_layer_size in enumerate( global_config.full_connection_layer_list): temp_hidden_layer = tf.get_variable( "hidden_layer_{}".format(i), [pre_layer_size, hidden_layer_size], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) temp_hidden_layer_bias = tf.get_variable( "hidden_layer_bias_{}".format(i), [1, hidden_layer_size], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) temp_hidden_vector = tf.nn.relu( tf.add(tf.matmul(temp_hidden_vector, temp_hidden_layer), temp_hidden_layer_bias)) pre_layer_size = hidden_layer_size self.hidden_vector_list.append(temp_hidden_vector) self.so_called_user_embedding = temp_hidden_vector # 注意在这个地方的尺寸 todo so_called_user_embedding和info_output_embeddings需要一致 self.logits, self.labels = _compute_sampled_logits( weights=self.feature_embeddings_map[ "info_output_embeddings"], # 是随机初始化还是? [input_embedding_size, 词典词数量] biases=tf.get_variable( "nce_classes_bias", info_input_embeddings.shape[0], tf.float32, initializer=tf.random_normal_initializer()), # [词典词数量] inputs=self. so_called_user_embedding, # [batch_size, input_embedding_size] labels=tf.expand_dims(self.feature_value_map["info_id"], -1), # [batch_size, true_size] num_sampled=10, # 负采样数量 num_classes=info_input_embeddings.shape[0], # 词典词数量 num_true=1, partition_strategy='mod', # 'div' name="nce_loss") sigmoid_cross_entropy_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=self.labels, logits=self.logits, name="sigmoid_cross_entropy_loss") # TODO: 检查此处是否有问题 self.loss = tf.reduce_mean(tf.reduce_sum(sigmoid_cross_entropy_loss, 1))