def train_output(self, encoder_output, Y, teacher_probs, reuse):
        """Calculate loss and accuracy."""
        with tf.variable_scope(self.decoder_scope, reuse=reuse):
            logits = dense(encoder_output, self._config.dst_vocab_size, use_bias=False,
                               name="dst_embedding" if self._config.tie_embedding_and_softmax else "softmax",
                               reuse=True if self._config.tie_embedding_and_softmax else None)  # 2D to 3D
            preds = tf.to_int32(tf.argmax(logits, axis=-1))

            mask = tf.to_float(tf.not_equal(Y, 0))

            # Token-level accuracy
            acc = tf.reduce_sum(tf.to_float(tf.equal(preds, Y)) * mask) / tf.reduce_sum(mask)
            if not tf.get_variable_scope().reuse:
                tf.summary.scalar('accuracy', acc)

            if teacher_probs is not None:
                # Knowledge distillation
                loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=teacher_probs)
            else:
                # Smoothed loss
                loss = common_layers.smoothing_cross_entropy(logits=logits, labels=Y,
                                                             vocab_size=self._config.dst_vocab_size,
                                                             confidence=1 - self._config.train.label_smoothing)
            loss = tf.reduce_sum(loss * mask) / tf.reduce_sum(mask)

            return loss
Example #2
0
    def train_output(self, decoder_output, Y, X, reuse):
        """Calculate loss and accuracy."""
        with tf.variable_scope(self.decoder_scope, reuse=reuse):
            logits_gen = dense(
                decoder_output,
                self._config.dst_vocab_size,
                use_bias=False,
                name="dst_embedding"
                if self._config.tie_embedding_and_softmax else "softmax",
                reuse=True if self._config.tie_embedding_and_softmax else
                None)  # 2D to 3D

            preds_gen = tf.to_int32(tf.argmax(logits_gen, axis=-1))
            mask = tf.to_float(tf.not_equal(Y, 0))
            acc_gen = tf.reduce_sum(
                tf.to_float(tf.equal(preds_gen, Y)) *
                mask) / tf.reduce_sum(mask)

            # Smoothed loss
            loss_gen = common_layers.smoothing_cross_entropy(
                logits=logits_gen,
                labels=Y,
                vocab_size=self._config.dst_vocab_size,
                confidence=1 - self._config.train.label_smoothing)
            mean_loss_gen = tf.reduce_sum(
                loss_gen * mask) / (tf.reduce_sum(mask))

        return acc_gen, mean_loss_gen
Example #3
0
    def train_output(self, decoder_output, Y, reuse, decoder_scope):
        """Calculate loss and accuracy."""
        with tf.variable_scope(decoder_scope, reuse=reuse):
            if self._config.is_lsoftmax is None:
                self._config.is_lsoftmax = False
            if not self._config.is_lsoftmax:
                logits = dense(decoder_output, self._config.dst_vocab_size, use_bias=False,
                               name="dst_embedding" if self._config.tie_embedding_and_softmax else "softmax",
                               reuse=True if self._config.tie_embedding_and_softmax else None)
            else:
                with tf.variable_scope("dst_embedding" if self._config.tie_embedding_and_softmax else "softmax",
                                       "dense", reuse=reuse):
                    input_size = decoder_output.get_shape().as_list()[-1]
                    inputs_shape = tf.unstack(tf.shape(decoder_output))
                    decoder_output_tmp = tf.reshape(decoder_output, [-1, input_size])
                    Y_tmp = tf.reshape(Y, [-1])
                    with tf.variable_scope(tf.get_variable_scope(),
                                           reuse=True if self._config.tie_embedding_and_softmax else None):
                        weights = tf.get_variable("kernel", [self._config.dst_vocab_size, input_size])
                        weights = tf.transpose(weights)
                        logits = lsoftmax(decoder_output_tmp, weights, Y_tmp)
                        logits = tf.reshape(logits, inputs_shape[:-1] + [self._config.dst_vocab_size])

            preds = tf.to_int32(tf.argmax(logits, axis=-1))
            mask = tf.to_float(tf.not_equal(Y, 0))
            acc = tf.reduce_sum(tf.to_float(tf.equal(preds, Y)) * mask) / tf.reduce_sum(mask)

            # Smoothed loss
            loss = common_layers.smoothing_cross_entropy(logits=logits, labels=Y,
                                                         vocab_size=self._config.dst_vocab_size,
                                                         confidence=1 - self._config.train.label_smoothing)
            mean_loss = tf.reduce_sum(loss * mask) / (tf.reduce_sum(mask))

        return acc, mean_loss
Example #4
0
    def train_output_label(self, decoder_output, Y, X, Z, reuse):
        """Calculate loss and accuracy."""
        with tf.variable_scope(self.decoder_scope, reuse=reuse):
            logits_gen = dense(
                decoder_output,
                self._config.dst_vocab_size,
                use_bias=True,
                name="dst_embedding"
                if self._config.tie_embedding_and_softmax else "softmax",
                reuse=True if self._config.tie_embedding_and_softmax else
                None)  # 2D to 3D

            logits_cop = dense(decoder_output,
                               self._config.lbl_vocab_size,
                               use_bias=True,
                               name="lbl_embedding",
                               reuse=reuse)
            p_copy = tf.sigmoid(logits_cop)

            loss_copy = common_layers.smoothing_cross_entropy(
                logits=logits_cop,
                labels=Z,
                vocab_size=self._config.lbl_vocab_size,
                confidence=1 - self._config.train.label_smoothing)

            mask = tf.to_float(tf.not_equal(Z, 0))
            preds_cop = tf.to_int32(tf.argmax(logits_cop, axis=-1))
            acc_cop = tf.reduce_sum(
                tf.to_float(tf.equal(preds_cop, Z)) *
                mask) / tf.reduce_sum(mask)
            mean_loss_cop = tf.reduce_sum(
                loss_copy * mask) / (tf.reduce_sum(mask))

            # calculate accuracy of edit label
            Z_edit = tf.to_int32(tf.equal(Z, 2))
            preds_cop_edit = tf.to_int32(tf.equal(preds_cop, 2))
            mask_edit = tf.to_float(tf.not_equal(Z_edit, 0))
            acc_cop_edit = tf.reduce_sum(
                tf.to_float(tf.equal(preds_cop_edit, Z_edit)) *
                mask_edit) / tf.reduce_sum(mask_edit)

            preds_gen = tf.to_int32(tf.argmax(logits_gen, axis=-1))
            mask = tf.to_float(tf.not_equal(Y, 0))
            acc_gen = tf.reduce_sum(
                tf.to_float(tf.equal(preds_gen, Y)) *
                mask) / tf.reduce_sum(mask)

            # Smoothed loss
            loss_gen = common_layers.smoothing_cross_entropy(
                logits=logits_gen,
                labels=Y,
                vocab_size=self._config.dst_vocab_size,
                confidence=1 - self._config.train.label_smoothing)
            if self._config.del_penalty_coef:
                penalty_mask = tf.to_float(
                    tf.equal(preds_cop,
                             2)) * self._config.del_penalty_coef + tf.to_float(
                                 tf.not_equal(preds_cop, 2))
                loss_gen = loss_gen * penalty_mask

            mean_loss_gen = tf.reduce_sum(
                loss_gen * mask) / (tf.reduce_sum(mask))

            if self._config.label_loss_ratio:
                mean_loss = tf.add(
                    self._config.label_loss_ratio * mean_loss_cop,
                    (1.0 - self._config.label_loss_ratio) * mean_loss_gen)
            else:
                mean_loss = mean_loss_gen

        return acc_cop, acc_gen, acc_cop_edit, mean_loss