Exemple #1
0
    def ce_loss(self, logits, labels, len_logits, len_labels):
        """
        Compute optimization loss.
        batch major
        """
        l = tf.reduce_min([tf.shape(logits)[1], tf.shape(labels)[1]])
        with tf.name_scope('CE_loss'):
            crossent = smoothing_cross_entropy(logits=logits[:, :l, :],
                                               labels=labels[:, :l],
                                               vocab_size=self.args.dim_output,
                                               confidence=1.0)

            mask = tf.sequence_mask(len_labels, maxlen=l, dtype=logits.dtype)
            mask2 = tf.sequence_mask(len_logits, maxlen=l, dtype=logits.dtype)
            mask *= mask2
            # there must be reduce_sum not reduce_mean, for the valid token number is less
            loss = tf.reduce_sum(crossent * mask, -1)

            if self.args.model.decoder2.confidence_penalty > 0:  # utt-level
                cp_loss = self.args.model.decoder2.confidence_penalty * \
                            confidence_penalty(logits, len_logits)
                loss += cp_loss

            if self.args.model.token_level_ocd:  # token-level
                loss /= tf.reduce_sum(mask, -1)

        return loss
Exemple #2
0
    def ocd_loss(self, logits, len_logits, labels, decoded, len_decoded):
        """
        the logits length is the sample_id length
        return batch shape loss
        if `len_logits` is all zero. then outputs the 0
        """
        from tfModels.OptimalDistill import OCD

        optimal_distributions, optimal_targets = OCD(
            hyp=decoded, ref=labels, vocab_size=self.args.dim_output)

        crossent = tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=optimal_distributions, logits=logits)

        pad_mask = tf.sequence_mask(len_logits,
                                    maxlen=tf.shape(logits)[1],
                                    dtype=logits.dtype)

        loss = tf.reduce_sum(crossent * pad_mask, -1)  # utt-level

        if self.args.model.decoder2.confidence_penalty > 0:  # utt-level
            cp_loss = self.args.model.decoder2.confidence_penalty * \
                        confidence_penalty(logits, len_decoded)
            loss += cp_loss

        if self.args.model.token_level_ocd:  # token-level
            loss /= tf.reduce_sum(pad_mask, -1)

        return loss
Exemple #3
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)):

            inputs = tensors_input.feature_splits[id_gpu]
            len_inputs = tensors_input.len_fea_splits[id_gpu]
            inputs.set_shape([None, None, self.size_embedding])

            if self.type == 'LSTM':
                from tfSeq2SeqModels.decoders.lm_decoder import LM_Decoder
                self.decoder = LM_Decoder(self.args, self.is_train, self.embed_table_decoder)
                logits = self.decoder(inputs, len_inputs)
            elif self.type == 'SelfAttention':
                from tfSeq2SeqModels.decoders.self_attention_lm_decoder import SelfAttentionDecoder
                self.decoder= SelfAttentionDecoder(self.args, self.is_train, self.embed_table_decoder)
                # from tfSeq2SeqModels.decoders.self_attention_lm_decoder_lh import SelfAttentionDecoder_lh
                # decoder = SelfAttentionDecoder_lh(self.args, self.is_train, self.embed_table_decoder)
                logits = self.decoder(inputs, len_inputs)

            len_logits = tensors_input.len_label_splits[id_gpu]

            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tensors_input.label_splits[id_gpu],
                logits=logits)
            loss *= tf.sequence_mask(
                tensors_input.len_label_splits[id_gpu],
                maxlen=tf.shape(logits)[1],
                dtype=logits.dtype)
            if self.args.model.confidence_penalty:
                ls_loss = self.args.model.confidence_penalty * confidence_penalty(logits, len_logits)
                ls_loss = tf.reduce_mean(ls_loss)
                loss += ls_loss

            # from tfModels.tensor2tensor.common_layers import padded_cross_entropy, weights_nonzero
            #
            # mask = tf.sequence_mask(
            #     tensors_input.len_label_splits[id_gpu],
            #     maxlen=tf.shape(logits)[1],
            #     dtype=logits.dtype)
            # batch_mask = tf.tile(tf.expand_dims(mask, -1), [1, 1, tf.shape(logits)[-1]])
            # loss, _ = padded_cross_entropy(
            #     logits* batch_mask,
            #     tensors_input.label_splits[id_gpu],
            #     0.0,
            #     weights_fn=weights_nonzero,
            #     reduce_sum=False)
            # loss = tf.Print(loss, [weight_sum], message='weight_sum', summarize=1000)

            if self.is_train:
                with tf.name_scope("gradients"):
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info('\tbuild {} on {} succesfully! total model number: {}'.format(
            self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients
        else:
            return loss
Exemple #4
0
    def rna_loss(self, logits, len_logits, labels, len_labels, encoded=None, len_encoded=None):
        with tf.name_scope("ctc_loss"):
            labels_sparse = dense_sequence_to_sparse(
                labels,
                len_labels)
            loss = tf.nn.ctc_loss(
                labels_sparse,
                logits,
                sequence_length=len_logits,
                ctc_merge_repeated=False,
                ignore_longer_outputs_than_inputs=True,
                time_major=False)

        if self.args.model.decoder.confidence_penalty:
            ls_loss = self.args.model.decoder.confidence_penalty * \
                        confidence_penalty(logits, len_logits)
            loss += ls_loss

        return loss
Exemple #5
0
    def ctc_loss(self, logits, len_logits, labels, len_labels):
        """
        No valid path found: It is possible that no valid path is found if the
        activations for the targets are zero.
        return batch shape loss
        """
        with tf.name_scope("ctc_loss"):
            labels_sparse = dense_sequence_to_sparse(labels, len_labels)
            loss = tf.nn.ctc_loss(
                labels_sparse,
                logits,
                sequence_length=len_logits,
                ctc_merge_repeated=self.args.model.avg_repeated,
                ignore_longer_outputs_than_inputs=True,
                time_major=False)

        if self.args.model.decoder.confidence_penalty:
            ls_loss = self.args.model.decoder.confidence_penalty * \
                        confidence_penalty(logits, len_logits)
            loss += ls_loss

        return loss
Exemple #6
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(
            tf.variance_scaling_initializer(1.0,
                                            mode="fan_avg",
                                            distribution="uniform"))
        with tf.device(
                lambda op: choose_device(op, name_gpu, self.center_device)):
            # create encoder obj
            encoder = self.gen_encoder(is_train=self.is_train, args=self.args)
            decoder = self.gen_decoder(is_train=self.is_train,
                                       embed_table=None,
                                       global_step=self.global_step,
                                       args=self.args)

            features = tensors_input.feature_splits[id_gpu]
            # using encoder to encode the inout sequence
            hidden_output, len_hidden_output = encoder(
                features=features,
                len_feas=tensors_input.len_fea_splits[id_gpu])

            logits, align, len_logits = decoder(hidden_output,
                                                len_hidden_output)

            if self.is_train:
                loss = self.ctc_loss(
                    logits=logits,
                    len_logits=len_logits,
                    labels=tensors_input.label_splits[id_gpu],
                    len_labels=tensors_input.len_label_splits[id_gpu])

                if self.args.model.balance_training:
                    token_loss = loss / tf.to_float(len_logits)
                    musk = tf.to_float(
                        tf.greater(token_loss,
                                   self.args.model.balance_training))
                    loss *= musk

                if self.args.model.confidence_penalty:
                    cp_loss = self.args.model.decoder.confidence_penalty * confidence_penalty(
                        logits, len_logits)
                    assert cp_loss.get_shape().ndims == 1
                    loss += cp_loss

                if self.args.model.constrain_repeated:
                    from tfModels.CTCShrink import repeated_constrain_loss

                    loss_constrain = repeated_constrain_loss(
                        distribution_acoustic=logits,
                        hidden=hidden_output,
                        len_acoustic=len_hidden_output,
                        blank_id=self.args.dim_output - 1)
                    loss += self.args.model.constrain_repeated * loss_constrain

                with tf.name_scope("gradients"):
                    assert loss.get_shape().ndims == 1
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info(
            '\tbuild {} on {} succesfully! total model number: {}'.format(
                self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients, [align, tensors_input.label_splits[id_gpu]]
        else:
            return logits, len_logits
Exemple #7
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(
            tf.variance_scaling_initializer(1.0,
                                            mode="fan_avg",
                                            distribution="uniform"))
        with tf.device(
                lambda op: choose_device(op, name_gpu, self.center_device)):
            self.encoder = self.gen_encoder(is_train=self.is_train,
                                            args=self.args)
            self.fc_decoder = self.gen_decoder(is_train=self.is_train,
                                               embed_table=None,
                                               global_step=self.global_step,
                                               args=self.args,
                                               name='decoder')
            self.decoder = decoder = self.gen_decoder2(
                is_train=self.is_train,
                embed_table=self.embedding_tabel,
                global_step=self.global_step,
                args=self.args,
                name='decoder2')

            hidden_output, len_hidden_output = self.encoder(
                features=tensors_input.feature_splits[id_gpu],
                len_feas=tensors_input.len_fea_splits[id_gpu])
            logits_acoustic, alignment, len_acoustic = self.fc_decoder(
                hidden_output, len_hidden_output)
            logits_acoustic = tf.stop_gradient(logits_acoustic)
            len_acoustic = tf.stop_gradient(len_acoustic)

            distribution_acoustic = tf.nn.softmax(logits_acoustic)

            # whether to shrink the hidden or the acoutic distribution
            if not self.args.model.shrink_hidden:
                hidden_output = distribution_acoustic

            blank_id = self.args.dim_ctc_output - 1 if self.args.dim_ctc_output else self.args.dim_output - 1

            hidden_shrunk, len_no_blank = acoustic_hidden_shrink_tf(
                distribution_acoustic=distribution_acoustic,
                hidden=hidden_output,
                len_acoustic=len_acoustic,
                blank_id=blank_id,
                num_post=self.args.model.num_post,
                frame_expand=self.args.model.frame_expand)

            if (not self.is_train) and (self.args.beam_size > 1):
                # infer phrase
                with tf.variable_scope(decoder.name or 'decoder'):
                    logits, decoded, len_decoded = decoder.beam_decode_rerank(
                        hidden_shrunk, len_no_blank)
            else:
                # train phrase
                logits, decoded, len_decoded = decoder(hidden_shrunk,
                                                       len_no_blank)

            if self.is_train:
                if self.args.model.use_ce_loss:
                    loss = self.ce_loss(
                        logits=logits,
                        labels=tensors_input.label_splits[id_gpu],
                        len_logits=len_acoustic,
                        len_labels=tensors_input.len_label_splits[id_gpu])
                else:
                    loss = self.ocd_loss(
                        logits=logits,
                        len_logits=len_decoded,
                        labels=tensors_input.label_splits[id_gpu],
                        decoded=decoded)

                if self.args.model.confidence_penalty > 0:  # utt-level
                    cp_loss = self.args.model.confidence_penalty * \
                        confidence_penalty(logits, len_decoded)/len_decoded

                    loss += cp_loss

                if self.args.model.musk_update:
                    self.idx_update = self.deserve_idx(
                        decoded, len_decoded,
                        tensors_input.label_splits[id_gpu],
                        tensors_input.len_label_splits[id_gpu])
                    loss = tf.reshape(tf.gather(loss, self.idx_update), [-1])
                l2_loss = tf.add_n(
                    [tf.nn.l2_loss(v) for v in self.decoder.params])
                with tf.name_scope("gradients"):
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info(
            '\tbuild {} on {} succesfully! total model number: {}'.format(
                self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients, \
            [decoded, tensors_input.label_splits[id_gpu], l2_loss]
            # return loss, gradients, tf.no_op()
        else:
            return logits, len_decoded, decoded