def testSampledSoftmaxLoss(self): # A simple test to verify the numerics. def _SoftmaxCrossEntropyWithLogits(logits, targets): # logits, targets: float arrays of the same shape. assert logits.shape == targets.shape stable_exp_logits = np.exp(logits - np.amax( logits, axis=1, keepdims=True)) pred = stable_exp_logits / np.sum(stable_exp_logits, 1, keepdims=True) return -np.sum(targets * np.log(pred + 1.0e-20), axis=1) np.random.seed(0) num_classes = 5 batch_size = 3 labels = [0, 1, 2] (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData( num_classes=num_classes, dim=10, batch_size=batch_size, num_true=1, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=True) exp_sampled_softmax_loss = _SoftmaxCrossEntropyWithLogits( exp_logits, exp_labels) with self.test_session(): got_sampled_softmax_loss = nn_impl.sampled_softmax_loss( weights=constant_op.constant(weights), biases=constant_op.constant(biases), labels=constant_op.constant(labels, shape=(batch_size, 1)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=1, sampled_values=sampled_vals, remove_accidental_hits=False, partition_strategy="div") self.assertAllClose(exp_sampled_softmax_loss, got_sampled_softmax_loss.eval(), 1e-4) # Test with sharded weights and sharded biases. weight_shards, bias_shards = self._ShardTestEmbeddings( weights, biases, num_shards=3) got_sampled_softmax_loss = nn_impl.sampled_softmax_loss( weights=[constant_op.constant(shard) for shard in weight_shards], biases=[constant_op.constant(shard) for shard in bias_shards], labels=constant_op.constant(labels, shape=(batch_size, 1)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=1, sampled_values=sampled_vals, remove_accidental_hits=False, partition_strategy="div") self.assertAllClose(exp_sampled_softmax_loss, got_sampled_softmax_loss.eval(), 1e-4)
def SampledLoss(labels, inputs): labels = array_ops.reshape(labels, [-1, 1]) return nn_impl.sampled_softmax_loss( weights=w_t, biases=b, labels=labels, inputs=inputs, num_sampled=8, num_classes=classes)
def testSampledSoftmaxLossBf16(self): # A simple test to verify the numerics for bfloat16. def _SoftmaxCrossEntropyWithLogits(logits, targets): # logits, targets: float arrays of the same shape. assert logits.shape == targets.shape stable_exp_logits = np.exp( logits - np.amax(logits, axis=1, keepdims=True)) pred = stable_exp_logits / np.sum(stable_exp_logits, 1, keepdims=True) return -np.sum(targets * np.log(pred + 1.0e-20), axis=1) np.random.seed(0) num_classes = 5 batch_size = 3 labels = [0, 1, 2] sampled = [1, 0, 2, 3] (weights, biases, hidden_acts, _, exp_logits, exp_labels) = self._GenerateTestData( num_classes=num_classes, dim=10, batch_size=batch_size, num_true=1, labels=labels, sampled=sampled, subtract_log_q=True) exp_sampled_softmax_loss = _SoftmaxCrossEntropyWithLogits( exp_logits, exp_labels) true_exp_bf16 = np.full([batch_size, 1], fill_value=0.5, dtype=dtypes.bfloat16.as_numpy_dtype) sampled_exp_bf16 = np.full([len(sampled)], fill_value=0.5, dtype=dtypes.bfloat16.as_numpy_dtype) sampled_vals_bf16 = (sampled, true_exp_bf16, sampled_exp_bf16) got_sampled_softmax_loss = math_ops.cast( nn_impl.sampled_softmax_loss( weights=constant_op.constant(weights, dtype=dtypes.bfloat16), biases=constant_op.constant(biases, dtype=dtypes.bfloat16), labels=constant_op.constant( labels, shape=(batch_size, 1), dtype=dtypes.bfloat16), inputs=constant_op.constant(hidden_acts, dtype=dtypes.bfloat16), num_sampled=4, num_classes=num_classes, num_true=1, sampled_values=sampled_vals_bf16, remove_accidental_hits=False, partition_strategy="div"), dtypes.float32) self.assertAllClose(exp_sampled_softmax_loss, self.evaluate(got_sampled_softmax_loss), 1e-1)
def testSampledSoftmaxLoss(self): # A simple test to verify the numerics. def _SoftmaxCrossEntropyWithLogits(logits, targets): # logits, targets: float arrays of the same shape. assert logits.shape == targets.shape stable_exp_logits = np.exp(logits - np.amax( logits, axis=1, keepdims=True)) pred = stable_exp_logits / np.sum(stable_exp_logits, 1, keepdims=True) return -np.sum(targets * np.log(pred + 1.0e-20), axis=1) weights, biases, hidden_acts, sharded_weights, sharded_biases = ( self._GenerateTestInputs()) labels = [0, 1, 2] true_w, true_b = weights[labels], biases[labels] sampled = [1, 0, 2, 3] num_sampled = len(sampled) true_exp = np.full([self._batch_size, 1], fill_value=0.5, dtype=np.float32) sampled_exp = np.full([num_sampled], fill_value=0.5, dtype=np.float32) sampled_w, sampled_b = weights[sampled], biases[sampled] test_sampled_vals = (sampled, true_exp, sampled_exp) with self.test_session(): logits_np, labels_np = self._ComputeSampledLogitsNP( true_w, true_b, sampled_w, sampled_b, hidden_acts, true_expected=true_exp, sampled_expected=sampled_exp) sampled_softmax_loss_np = _SoftmaxCrossEntropyWithLogits(logits_np, labels_np) labels_tf = constant_op.constant(labels, shape=(self._batch_size, 1)) weights_tf = constant_op.constant(weights) biases_tf = constant_op.constant(biases) inputs_tf = constant_op.constant(hidden_acts) sampled_softmax_loss_tf = nn_impl.sampled_softmax_loss( weights=weights_tf, biases=biases_tf, labels=labels_tf, inputs=inputs_tf, num_sampled=num_sampled, num_classes=self._num_classes, num_true=1, sampled_values=test_sampled_vals, remove_accidental_hits=False, partition_strategy="div") self.assertAllClose(sampled_softmax_loss_np, sampled_softmax_loss_tf.eval(), 1e-4) # Test with sharded weights and sharded biases. sampled_softmax_loss_tf = nn_impl.sampled_softmax_loss( weights=sharded_weights, biases=sharded_biases, labels=labels_tf, inputs=inputs_tf, num_sampled=num_sampled, num_classes=self._num_classes, num_true=1, sampled_values=test_sampled_vals, remove_accidental_hits=False, partition_strategy="div") self.assertAllClose(sampled_softmax_loss_np, sampled_softmax_loss_tf.eval(), 1e-4)
def testSampledSoftmaxLoss(self): # A simple test to verify the numerics. def _SoftmaxCrossEntropyWithLogits(logits, targets): # logits, targets: float arrays of the same shape. assert logits.shape == targets.shape stable_exp_logits = np.exp(logits - np.amax(logits, axis=1, keepdims=True)) pred = stable_exp_logits / np.sum( stable_exp_logits, 1, keepdims=True) return -np.sum(targets * np.log(pred + 1.0e-20), axis=1) np.random.seed(0) num_classes = 5 batch_size = 3 labels = [0, 1, 2] (weights, biases, hidden_acts, sampled_vals, exp_logits, exp_labels) = self._GenerateTestData(num_classes=num_classes, dim=10, batch_size=batch_size, num_true=1, labels=labels, sampled=[1, 0, 2, 3], subtract_log_q=True) exp_sampled_softmax_loss = _SoftmaxCrossEntropyWithLogits( exp_logits, exp_labels) with self.test_session(): got_sampled_softmax_loss = nn_impl.sampled_softmax_loss( weights=constant_op.constant(weights), biases=constant_op.constant(biases), labels=constant_op.constant(labels, shape=(batch_size, 1)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=1, sampled_values=sampled_vals, remove_accidental_hits=False, partition_strategy="div") self.assertAllClose(exp_sampled_softmax_loss, got_sampled_softmax_loss.eval(), 1e-4) # Test with sharded weights and sharded biases. weight_shards, bias_shards = self._ShardTestEmbeddings( weights, biases, num_shards=3) got_sampled_softmax_loss = nn_impl.sampled_softmax_loss( weights=[ constant_op.constant(shard) for shard in weight_shards ], biases=[constant_op.constant(shard) for shard in bias_shards], labels=constant_op.constant(labels, shape=(batch_size, 1)), inputs=constant_op.constant(hidden_acts), num_sampled=4, num_classes=num_classes, num_true=1, sampled_values=sampled_vals, remove_accidental_hits=False, partition_strategy="div") self.assertAllClose(exp_sampled_softmax_loss, got_sampled_softmax_loss.eval(), 1e-4)