Esempio n. 1
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)):

            inputs = tensors_input.feature_splits[id_gpu]
            len_inputs = tensors_input.len_fea_splits[id_gpu]
            inputs.set_shape([None, None, self.size_embedding])

            if self.type == 'LSTM':
                from tfSeq2SeqModels.decoders.lm_decoder import LM_Decoder
                self.decoder = LM_Decoder(self.args, self.is_train, self.embed_table_decoder)
                logits = self.decoder(inputs, len_inputs)
            elif self.type == 'SelfAttention':
                from tfSeq2SeqModels.decoders.self_attention_lm_decoder import SelfAttentionDecoder
                self.decoder= SelfAttentionDecoder(self.args, self.is_train, self.embed_table_decoder)
                # from tfSeq2SeqModels.decoders.self_attention_lm_decoder_lh import SelfAttentionDecoder_lh
                # decoder = SelfAttentionDecoder_lh(self.args, self.is_train, self.embed_table_decoder)
                logits = self.decoder(inputs, len_inputs)

            len_logits = tensors_input.len_label_splits[id_gpu]

            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tensors_input.label_splits[id_gpu],
                logits=logits)
            loss *= tf.sequence_mask(
                tensors_input.len_label_splits[id_gpu],
                maxlen=tf.shape(logits)[1],
                dtype=logits.dtype)
            if self.args.model.confidence_penalty:
                ls_loss = self.args.model.confidence_penalty * confidence_penalty(logits, len_logits)
                ls_loss = tf.reduce_mean(ls_loss)
                loss += ls_loss

            # from tfModels.tensor2tensor.common_layers import padded_cross_entropy, weights_nonzero
            #
            # mask = tf.sequence_mask(
            #     tensors_input.len_label_splits[id_gpu],
            #     maxlen=tf.shape(logits)[1],
            #     dtype=logits.dtype)
            # batch_mask = tf.tile(tf.expand_dims(mask, -1), [1, 1, tf.shape(logits)[-1]])
            # loss, _ = padded_cross_entropy(
            #     logits* batch_mask,
            #     tensors_input.label_splits[id_gpu],
            #     0.0,
            #     weights_fn=weights_nonzero,
            #     reduce_sum=False)
            # loss = tf.Print(loss, [weight_sum], message='weight_sum', summarize=1000)

            if self.is_train:
                with tf.name_scope("gradients"):
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info('\tbuild {} on {} succesfully! total model number: {}'.format(
            self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients
        else:
            return loss
Esempio n. 2
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):

        with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)):
            encoder = self.gen_encoder(
                is_train=self.is_train,
                embed_table=self.embed_table_encoder,
                args=self.args)
            decoder = self.gen_decoder(
                is_train=self.is_train,
                embed_table=self.embed_table_decoder,
                global_step=self.global_step,
                args=self.args)
            self.schedule = decoder.schedule

            encoded, len_encoded = encoder(
                features=tensors_input.feature_splits[id_gpu],
                len_feas=tensors_input.len_fea_splits[id_gpu])

            decoder_input = decoder.build_input(
                id_gpu=id_gpu,
                tensors_input=tensors_input)
            # if in the infer, the decoder_input.input_labels and len_labels are None
            decoder.build_helper(
                type=self.helper_type,
                labels=decoder_input.input_labels,
                len_labels=decoder_input.len_labels,
                batch_size=tf.shape(len_encoded)[0])

            logits, preds, len_decoded = decoder(encoded, len_encoded)

            if self.is_train:
                if self.args.model.loss_type == 'OCD':
                    # logits = tf.Print(logits, [tensors_input.len_label_splits[id_gpu][0]], message='label length: ', summarize=1000)
                    # logits = tf.Print(logits, [tf.shape(logits[0])], message='logits shape: ', summarize=1000)
                    loss, (optimal_targets, optimal_distributions) = self.ocd_loss(
                        logits=logits,
                        len_logits=len_decoded,
                        labels=tensors_input.label_splits[id_gpu],
                        preds=preds)
                elif self.args.model.loss_type == 'CE':
                    loss = self.ce_loss(
                        logits=logits,
                        labels=decoder_input.output_labels[:, :tf.shape(logits)[1]],
                        len_labels=decoder_input.len_labels)
                elif self.args.model.loss_type == 'Premium_CE':
                    table_targets_distributions = tf.nn.softmax(tf.constant(self.args.table_targets))
                    loss = self.premium_ce_loss(
                        logits=logits,
                        labels=tensors_input.label_splits[id_gpu],
                        table_targets_distributions=table_targets_distributions,
                        len_labels=tensors_input.len_label_splits[id_gpu])
                else:
                    raise NotImplemented('NOT found loss type!')

                with tf.name_scope("gradients"):
                    assert loss.get_shape().ndims == 1
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info('\tbuild {} on {} succesfully! total model number: {}'.format(
            self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            # no_op is preserved for debug info to pass
            # return loss, gradients, tf.no_op()
            return loss, gradients, [len_decoded, preds, tensors_input.label_splits[id_gpu]]
        else:
            return logits, len_decoded, preds
Esempio n. 3
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(
            tf.variance_scaling_initializer(1.0,
                                            mode="fan_avg",
                                            distribution="uniform"))
        with tf.device(
                lambda op: choose_device(op, name_gpu, self.center_device)):
            # create encoder obj
            encoder = self.gen_encoder(is_train=self.is_train, args=self.args)
            decoder = self.gen_decoder(is_train=self.is_train,
                                       embed_table=None,
                                       global_step=self.global_step,
                                       args=self.args)

            features = tensors_input.feature_splits[id_gpu]
            # using encoder to encode the inout sequence
            hidden_output, len_hidden_output = encoder(
                features=features,
                len_feas=tensors_input.len_fea_splits[id_gpu])

            logits, align, len_logits = decoder(hidden_output,
                                                len_hidden_output)

            if self.is_train:
                loss = self.ctc_loss(
                    logits=logits,
                    len_logits=len_logits,
                    labels=tensors_input.label_splits[id_gpu],
                    len_labels=tensors_input.len_label_splits[id_gpu])

                if self.args.model.balance_training:
                    token_loss = loss / tf.to_float(len_logits)
                    musk = tf.to_float(
                        tf.greater(token_loss,
                                   self.args.model.balance_training))
                    loss *= musk

                if self.args.model.confidence_penalty:
                    cp_loss = self.args.model.decoder.confidence_penalty * confidence_penalty(
                        logits, len_logits)
                    assert cp_loss.get_shape().ndims == 1
                    loss += cp_loss

                if self.args.model.constrain_repeated:
                    from tfModels.CTCShrink import repeated_constrain_loss

                    loss_constrain = repeated_constrain_loss(
                        distribution_acoustic=logits,
                        hidden=hidden_output,
                        len_acoustic=len_hidden_output,
                        blank_id=self.args.dim_output - 1)
                    loss += self.args.model.constrain_repeated * loss_constrain

                with tf.name_scope("gradients"):
                    assert loss.get_shape().ndims == 1
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info(
            '\tbuild {} on {} succesfully! total model number: {}'.format(
                self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients, [align, tensors_input.label_splits[id_gpu]]
        else:
            return logits, len_logits
Esempio n. 4
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(
            tf.variance_scaling_initializer(1.0,
                                            mode="fan_avg",
                                            distribution="uniform"))
        with tf.device(
                lambda op: choose_device(op, name_gpu, self.center_device)):
            # build ctc model
            decoded_ctc, _, distribution_ctc = self.ctc_model.list_run

            from tfModels.CTCShrink import feature_shrink_tf
            blank_id = self.args.dim_output - 1
            feature_shrunk, len_shrunk = feature_shrink_tf(
                distribution=distribution_ctc,
                feature=tensors_input.feature_splits[id_gpu],
                len_feature=tensors_input.len_fea_splits[id_gpu],
                blank_id=blank_id,
                frame_expand=self.args.model.frame_expand)

            feature_shrunk = tf.stop_gradient(feature_shrunk)
            len_shrunk = tf.stop_gradient(len_shrunk)

            # build sequence labellings model
            self.encoder = self.gen_encoder(is_train=self.is_train,
                                            args=self.args)
            self.decoder = self.gen_decoder(is_train=self.is_train,
                                            embed_table=self.embedding_tabel,
                                            global_step=self.global_step,
                                            args=self.args,
                                            name='decoder')
            self.schedule = self.decoder.schedule

            hidden_output, len_hidden_output = self.encoder(
                features=feature_shrunk, len_feas=len_shrunk)

            if (not self.is_train) and (self.args.beam_size > 1):
                # infer phrase
                with tf.variable_scope(self.decoder.name or 'decoder'):
                    if self.args.dirs.lm_checkpoint:
                        logging.info('beam search with language model ...')
                        logits, decoded, len_decoded = self.decoder.beam_decode_rerank(
                            hidden_output, len_hidden_output)
                    else:
                        logging.info('beam search ...')
                        logits, decoded, len_decoded = self.decoder.beam_decode(
                            hidden_output, len_hidden_output)
            else:
                # train phrase
                print('greedy search ...')
                logits, decoded, len_decoded = self.decoder(
                    hidden_output, len_hidden_output)

            if self.is_train:
                if self.args.model.decoder_loss == 'CE':
                    loss = self.ce_loss(
                        logits=logits,
                        labels=tensors_input.label_splits[id_gpu],
                        len_logits=len_decoded,
                        len_labels=tensors_input.len_label_splits[id_gpu])
                elif self.args.model.decoder_loss == 'OCD':
                    loss = self.ocd_loss(
                        logits=logits,
                        len_logits=len_decoded,
                        labels=tensors_input.label_splits[id_gpu],
                        decoded=decoded,
                        len_decoded=len_decoded)
                else:
                    logging.info('not found loss type for decoder!')

                with tf.name_scope("gradients"):
                    assert loss.get_shape().ndims == 1
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info(
            '\tbuild {} on {} succesfully! total model number: {}'.format(
                self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients, \
            [decoded, tensors_input.label_splits[id_gpu], loss]
            # return loss, gradients, tf.no_op()
        else:
            return logits, len_decoded, decoded
Esempio n. 5
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):

        with tf.device(
                lambda op: choose_device(op, name_gpu, self.center_device)):
            encoder = self.gen_encoder(is_train=self.is_train,
                                       embed_table=None,
                                       args=self.args)
            decoder = self.gen_decoder(is_train=self.is_train,
                                       embed_table=self.embedding_tabel,
                                       global_step=self.global_step,
                                       args=self.args)

            with tf.variable_scope(encoder.name or 'encoder'):
                encoded, len_encoded = encoder(
                    features=tensors_input.feature_splits[id_gpu],
                    len_feas=tensors_input.len_fea_splits[id_gpu])

            with tf.variable_scope(decoder.name or 'decoder'):
                decoder_input = decoder.build_input(
                    id_gpu=id_gpu, tensors_input=tensors_input)

                if (not self.is_train) or (self.args.model.training_type
                                           == 'self-learning'):
                    '''
                    training_type:
                        - self-learning: get logits fully depend on self
                        - teacher-forcing: get logits depend on labels during training
                    '''
                    # infer phrases
                    if self.args.beam_size > 1:
                        logging.info('beam search with language model ...')
                        results, preds, len_decoded = decoder.beam_decode_rerank(
                            encoded, len_encoded)
                    else:
                        logging.info('gready search ...')
                        results, preds, len_decoded = decoder.decoder_with_caching(
                            encoded, len_encoded)
                else:
                    logging.info('teacher-forcing training ...')
                    decoder_input_labels = decoder_input.input_labels * tf.sequence_mask(
                        decoder_input.len_labels,
                        maxlen=tf.shape(decoder_input.input_labels)[1],
                        dtype=tf.int32)
                    logits, preds = decoder.decode(
                        encoded=encoded,
                        len_encoded=len_encoded,
                        decoder_input=decoder_input_labels)

            if self.is_train:
                if self.args.model.loss_type == 'OCD':
                    """
                    constrain the max decode length for ocd training since model
                    will decode to that long at beginning. Recommend 30.
                    """
                    logits = results
                    loss, _ = self.ocd_loss(logits=logits,
                                            len_logits=len_decoded,
                                            labels=decoder_input.output_labels,
                                            preds=preds)

                elif self.args.model.loss_type == 'beam_OCD':
                    logits, preds, len_decoded, _, _ = results
                    batch = tf.shape(logits)[0]
                    beam_size = self.args.beam_size
                    batch_x_beam = batch * beam_size
                    logits = tf.reshape(
                        logits, [batch_x_beam, -1, self.args.dim_output])
                    len_decoded = tf.reshape(len_decoded, [-1])
                    preds = tf.reshape(preds, [batch_x_beam, -1])
                    labels = tf.reshape(
                        tf.tile(decoder_input.output_labels[:, None, :],
                                [1, beam_size, 1]), [batch_x_beam, -1])
                    # logits = tf.Print(logits, [batch_x_beam, tf.shape(logits), tf.shape(preds), tf.shape(labels), tf.shape(len_decoded)], message='batch_x_beam, logits, preds, labels, len_decoded: ', summarize=1000)
                    loss, _ = self.ocd_loss(logits=logits,
                                            len_logits=len_decoded,
                                            labels=labels,
                                            preds=preds)

                elif self.args.model.loss_type == 'CE':
                    loss = self.ce_loss(logits=logits,
                                        labels=decoder_input.output_labels,
                                        len_labels=decoder_input.len_labels)

                elif self.args.model.loss_type == 'Premium_CE':
                    table_targets_distributions = tf.nn.softmax(
                        tf.constant(self.args.table_targets))
                    loss = self.premium_ce_loss(
                        logits=logits,
                        labels=decoder_input.output_labels,
                        table_targets_distributions=table_targets_distributions,
                        len_labels=decoder_input.len_labels)
                else:
                    raise NotImplementedError('NOT found loss type: {}'.format(
                        self.args.model.loss_type))

                with tf.name_scope("gradients"):
                    assert loss.get_shape().ndims == 1
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info(
            '\tbuild {} on {} succesfully! total model number: {}'.format(
                self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            # no_op is preserved for debug info to pass
            return loss, gradients, [preds, tensors_input.label_splits[id_gpu]]
        else:
            return results, len_decoded, preds
Esempio n. 6
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(tf.variance_scaling_initializer(
            1.0, mode="fan_avg", distribution="uniform"))
        with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)):
            encoder = self.gen_encoder(
                is_train=self.is_train,
                args=self.args)
            self.decoder = decoder = self.gen_decoder(
                is_train=self.is_train,
                embed_table=self.embed_table_decoder,
                global_step=self.global_step,
                args=self.args)
            self.schedule = decoder.schedule

            encoded, len_encoded = encoder(
                features=tensors_input.feature_splits[id_gpu],
                len_feas=tensors_input.len_fea_splits[id_gpu])

            if (not self.is_train) and (self.args.beam_size>1):
                with tf.variable_scope(decoder.name or 'decoder'):
                    # fake logits!
                    decoded, logits = decoder.beam_decode_rerank(encoded, len_encoded)
            else:
                logits, decoded, len_decoded = decoder(encoded, len_encoded)

            if self.is_train:
                loss = 0
                if self.args.rna_train:
                    rna_loss = self.rna_loss(
                        logits=logits,
                        len_logits=len_encoded,
                        labels=tensors_input.label_splits[id_gpu],
                        len_labels=tensors_input.len_label_splits[id_gpu],
                        encoded=encoded,
                        len_encoded=len_encoded)
                    loss += rna_loss
                if self.args.OCD_train > 0:
                    ocd_loss = self.args.OCD_train * self.ocd_loss(
                        logits=logits,
                        len_logits=len_decoded,
                        labels=tensors_input.label_splits[id_gpu],
                        decoded=decoded,
                        len_decoded=len_decoded)
                    assert ocd_loss.get_shape().ndims == loss.get_shape().ndims == 1
                    loss = rna_loss + ocd_loss
                else:
                    ocd_loss = tf.constant(0)

                with tf.name_scope("gradients"):
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info('\tbuild {} on {} succesfully! total model number: {}'.format(
            self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients, [decoded, tensors_input.label_splits[id_gpu], ocd_loss]
            # return loss, gradients, tf.no_op()
        else:
            return logits, len_decoded, decoded
Esempio n. 7
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(
            tf.variance_scaling_initializer(1.0,
                                            mode="fan_avg",
                                            distribution="uniform"))
        with tf.device(
                lambda op: choose_device(op, name_gpu, self.center_device)):
            self.encoder = self.gen_encoder(is_train=self.is_train,
                                            args=self.args)
            self.fc_decoder = self.gen_decoder(is_train=self.is_train,
                                               embed_table=None,
                                               global_step=self.global_step,
                                               args=self.args,
                                               name='decoder')
            self.decoder = decoder = self.gen_decoder2(
                is_train=self.is_train,
                embed_table=self.embedding_tabel,
                global_step=self.global_step,
                args=self.args,
                name='decoder2')
            self.schedule = decoder.schedule

            hidden_output, len_hidden_output = self.encoder(
                features=tensors_input.feature_splits[id_gpu],
                len_feas=tensors_input.len_fea_splits[id_gpu])

            acoustic, alignment, len_acoustic = self.fc_decoder(
                hidden_output, len_hidden_output)

            if not self.args.model.train_encoder:
                acoustic = tf.stop_gradient(acoustic)
                len_acoustic = tf.stop_gradient(len_acoustic)
            # used to guide the shrinking of the hidden_output
            distribution_acoustic = tf.nn.softmax(acoustic)

            blank_id = self.args.dim_output - 1
            if self.args.model.true_end2end:
                from tfModels.CTCShrink import acoustic_hidden_shrink_v3
                hidden_shrunk, len_no_blank = acoustic_hidden_shrink_v3(
                    distribution_acoustic, hidden_output, len_acoustic,
                    blank_id, self.args.model.frame_expand)
            else:
                from tfModels.CTCShrink import acoustic_hidden_shrink_tf
                hidden_shrunk, len_no_blank = acoustic_hidden_shrink_tf(
                    distribution_acoustic=distribution_acoustic,
                    hidden=hidden_output,
                    len_acoustic=len_acoustic,
                    blank_id=blank_id,
                    frame_expand=self.args.model.frame_expand)

            if (not self.is_train) and (self.args.beam_size > 1):
                # infer phrase
                with tf.variable_scope(decoder.name or 'decoder'):
                    if self.args.dirs.lm_checkpoint:
                        logging.info('beam search with language model ...')
                        logits, decoded, len_decoded = decoder.beam_decode_rerank(
                            hidden_shrunk, len_no_blank)
                    else:
                        logging.info('beam search ...')
                        logits, decoded, len_decoded = decoder.beam_decode(
                            hidden_shrunk, len_no_blank)
            else:
                # train phrase
                print('greedy search ...')
                logits, decoded, len_decoded = decoder(hidden_shrunk,
                                                       len_no_blank)

            if self.is_train:
                if self.args.model.decoder_loss == 'CE':
                    ocd_loss = self.ce_loss(
                        logits=logits,
                        labels=tensors_input.label_splits[id_gpu],
                        len_logits=len_acoustic,
                        len_labels=tensors_input.len_label_splits[id_gpu])
                elif self.args.model.decoder_loss == 'OCD':
                    ocd_loss = self.ocd_loss(
                        logits=logits,
                        len_logits=len_decoded,
                        labels=tensors_input.label_splits[id_gpu],
                        decoded=decoded,
                        len_decoded=len_decoded)
                elif self.args.model.decoder_loss == 'Premium_CE':

                    table_targets_distributions = tf.nn.softmax(
                        tf.constant(self.args.table_targets))

                    ocd_loss = self.premium_ce_loss(
                        logits=logits,
                        labels=tensors_input.label_splits[id_gpu],
                        table_targets_distributions=table_targets_distributions,
                        len_logits=len_decoded,
                        len_labels=tensors_input.len_label_splits[id_gpu])
                elif self.args.model.decoder_loss == 'LM_CE':
                    ocd_loss = self.lm_ce_loss(
                        logits=logits,
                        len_logits=len_decoded,
                        labels=tensors_input.label_splits[id_gpu],
                        decoded=decoded,
                        len_decoded=len_decoded)
                else:
                    logging.info('not found loss type for decoder!')

                if self.args.model.train_encoder:
                    ctc_loss = self.ctc_loss(
                        logits=acoustic,
                        len_logits=len_acoustic,
                        labels=tensors_input.label_splits[id_gpu],
                        len_labels=tensors_input.len_label_splits[id_gpu])
                else:
                    ctc_loss = tf.constant(0.0)
                loss = self.schedule * ocd_loss + (1 -
                                                   self.schedule) * ctc_loss

                with tf.name_scope("gradients"):
                    assert loss.get_shape().ndims == 1
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info(
            '\tbuild {} on {} succesfully! total model number: {}'.format(
                self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients, \
            [decoded, tensors_input.label_splits[id_gpu], distribution_acoustic, len_acoustic, len_no_blank, hidden_shrunk, ctc_loss, ocd_loss]
            # return loss, gradients, tf.no_op()
        else:
            return logits, len_decoded, decoded
Esempio n. 8
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(
            tf.variance_scaling_initializer(1.0,
                                            mode="fan_avg",
                                            distribution="uniform"))
        with tf.device(
                lambda op: choose_device(op, name_gpu, self.center_device)):
            self.encoder = self.gen_encoder(is_train=self.is_train,
                                            args=self.args)
            self.fc_decoder = self.gen_decoder(is_train=self.is_train,
                                               embed_table=None,
                                               global_step=self.global_step,
                                               args=self.args,
                                               name='decoder')
            self.decoder = decoder = self.gen_decoder2(
                is_train=self.is_train,
                embed_table=self.embedding_tabel,
                global_step=self.global_step,
                args=self.args,
                name='decoder2')

            hidden_output, len_hidden_output = self.encoder(
                features=tensors_input.feature_splits[id_gpu],
                len_feas=tensors_input.len_fea_splits[id_gpu])
            logits_acoustic, alignment, len_acoustic = self.fc_decoder(
                hidden_output, len_hidden_output)
            logits_acoustic = tf.stop_gradient(logits_acoustic)
            len_acoustic = tf.stop_gradient(len_acoustic)

            distribution_acoustic = tf.nn.softmax(logits_acoustic)

            # whether to shrink the hidden or the acoutic distribution
            if not self.args.model.shrink_hidden:
                hidden_output = distribution_acoustic

            blank_id = self.args.dim_ctc_output - 1 if self.args.dim_ctc_output else self.args.dim_output - 1

            hidden_shrunk, len_no_blank = acoustic_hidden_shrink_tf(
                distribution_acoustic=distribution_acoustic,
                hidden=hidden_output,
                len_acoustic=len_acoustic,
                blank_id=blank_id,
                num_post=self.args.model.num_post,
                frame_expand=self.args.model.frame_expand)

            if (not self.is_train) and (self.args.beam_size > 1):
                # infer phrase
                with tf.variable_scope(decoder.name or 'decoder'):
                    logits, decoded, len_decoded = decoder.beam_decode_rerank(
                        hidden_shrunk, len_no_blank)
            else:
                # train phrase
                logits, decoded, len_decoded = decoder(hidden_shrunk,
                                                       len_no_blank)

            if self.is_train:
                if self.args.model.use_ce_loss:
                    loss = self.ce_loss(
                        logits=logits,
                        labels=tensors_input.label_splits[id_gpu],
                        len_logits=len_acoustic,
                        len_labels=tensors_input.len_label_splits[id_gpu])
                else:
                    loss = self.ocd_loss(
                        logits=logits,
                        len_logits=len_decoded,
                        labels=tensors_input.label_splits[id_gpu],
                        decoded=decoded)

                if self.args.model.confidence_penalty > 0:  # utt-level
                    cp_loss = self.args.model.confidence_penalty * \
                        confidence_penalty(logits, len_decoded)/len_decoded

                    loss += cp_loss

                if self.args.model.musk_update:
                    self.idx_update = self.deserve_idx(
                        decoded, len_decoded,
                        tensors_input.label_splits[id_gpu],
                        tensors_input.len_label_splits[id_gpu])
                    loss = tf.reshape(tf.gather(loss, self.idx_update), [-1])
                l2_loss = tf.add_n(
                    [tf.nn.l2_loss(v) for v in self.decoder.params])
                with tf.name_scope("gradients"):
                    loss = tf.reduce_mean(loss)
                    gradients = self.optimizer.compute_gradients(loss)

        self.__class__.num_Model += 1
        logging.info(
            '\tbuild {} on {} succesfully! total model number: {}'.format(
                self.__class__.__name__, name_gpu, self.__class__.num_Model))

        if self.is_train:
            return loss, gradients, \
            [decoded, tensors_input.label_splits[id_gpu], l2_loss]
            # return loss, gradients, tf.no_op()
        else:
            return logits, len_decoded, decoded
Esempio n. 9
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(tf.variance_scaling_initializer(
            1.0, mode="fan_avg", distribution="uniform"))
        with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)):
            batch_size = 1200 if self.is_train else 3
            state_agent_init = self.agent.zero_state(batch_size)
            state_lm_init = self.env.lm.zero_state(batch_size)
            rewards_lm_init = tf.zeros([batch_size, 0])
            actions_init = tf.zeros([batch_size, 0], dtype=tf.int32)
            logits_init = tf.zeros([batch_size, 0, self.args.dim_output])

            def step(i, state_agent, state_lm, rewards_lm, actions, logits):
                # generate env state
                state_env = tf.concat(state_lm[-1], -1)

                # agent takes action
                cur_logit, next_state_agent = self.agent.forward(state_env, state_agent)
                policy = tf.nn.softmax(cur_logit, name='actor_prob')

                action = tf.cond(
                    tf.less(self.choose, 0.8),
                    lambda: tf.distributions.Categorical(probs=policy).sample(),
                    lambda: tf.distributions.Categorical(logits=tf.ones_like(cur_logit)).sample())

                logits = tf.concat([logits, cur_logit[:, None, :]], 1)

                # env transfers staten and bills rewards
                next_state_lm, reward_lm, info = self.env.step(action, state_lm)
                rewards_lm = tf.concat([rewards_lm, reward_lm[:, None]], 1)
                actions = tf.concat([actions, action[:, None]], 1)

                return i+1, next_state_agent, next_state_lm, rewards_lm, actions, logits

            _, _, _, rewards_lm, actions, logits = tf.while_loop(
                cond=lambda i, *_: tf.less(i, 20),
                body=step,
                loop_vars=[0, state_agent_init, state_lm_init, rewards_lm_init, actions_init, logits_init],
                shape_invariants=[tf.TensorShape([]),
                                  nest.map_structure(lambda t: tf.TensorShape(t.shape), state_agent_init),
                                  nest.map_structure(lambda t: tf.TensorShape(t.shape), state_lm_init),
                                  tf.TensorShape([None, None]),
                                  tf.TensorShape([None, None]),
                                  tf.TensorShape([None, None, self.args.dim_output])]
                )

            if self.is_train:
                rewards = rewards_lm
                rewards_discounted = self.discount(self.discount_rate, rewards)
                rewards_discounted = tf.stop_gradient(rewards_discounted)

                crossent = smoothing_cross_entropy(
                    logits=logits,
                    labels=actions,
                    vocab_size=self.args.dim_output,
                    confidence=1.0)
                loss = crossent * rewards_discounted

                loss = tf.reduce_mean(loss)
                gradients = self.optimizer.compute_gradients(loss)

            self.__class__.num_Model += 1
            logging.info('\tbuild {} on {} succesfully! total model number: {}'.format(
                self.__class__.__name__, name_gpu, self.__class__.num_Model))

            if self.is_train:
                return loss, gradients, \
                [tf.reduce_sum(rewards_lm, -1), actions]
            else:
                return actions, rewards_lm
Esempio n. 10
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        tf.get_variable_scope().set_initializer(
            tf.variance_scaling_initializer(1.0,
                                            mode="fan_avg",
                                            distribution="uniform"))
        with tf.device(
                lambda op: choose_device(op, name_gpu, self.center_device)):
            batch_size = tf.shape(tensors_input.len_fea_splits[id_gpu])[0]
            state_agent_init = self.agent.zero_state(batch_size)
            state_lm_init = self.env.lm.zero_state(batch_size)
            rewards_lm_init = tf.zeros([batch_size, 0])
            actions_init = tf.zeros([batch_size, 0], dtype=tf.int32)
            logits_init = tf.zeros([batch_size, 0, self.args.dim_output])

            frames, len_frames = self.processor.process(
                inputs=tensors_input.feature_splits[id_gpu],
                len_inputs=tensors_input.len_fea_splits[id_gpu])

            # frames = tf.stop_gradient(frames)

            def step(i, state_agent, state_lm, rewards_lm, actions, logits):
                # generate env state
                state_ac = frames[:, i, :]
                state_env = tf.concat(
                    [state_ac, tf.concat(state_lm[-1], -1)], 1)

                # agent takes action
                cur_logit, next_state_agent = self.agent.forward(
                    state_env, state_agent)
                policy = tf.nn.softmax(cur_logit, name='actor_prob')
                logits = tf.concat([logits, cur_logit[:, None, :]], 1)
                action = tf.distributions.Categorical(probs=policy).sample()

                # env transfers staten and bills rewards
                next_state_lm, reward_lm, info = self.env.step(
                    action, state_lm)
                rewards_lm = tf.concat([rewards_lm, reward_lm[:, None]], 1)
                actions = tf.concat([actions, action[:, None]], 1)

                return i + 1, next_state_agent, next_state_lm, rewards_lm, actions, logits

            _, _, _, rewards_lm, actions, logits = tf.while_loop(
                cond=lambda i, *_: tf.less(i,
                                           tf.shape(frames)[1]),
                body=step,
                loop_vars=[
                    0, state_agent_init, state_lm_init, rewards_lm_init,
                    actions_init, logits_init
                ],
                shape_invariants=[
                    tf.TensorShape([]),
                    nest.map_structure(lambda t: tf.TensorShape(t.shape),
                                       state_agent_init),
                    nest.map_structure(lambda t: tf.TensorShape(t.shape),
                                       state_lm_init),
                    tf.TensorShape([None, None]),
                    tf.TensorShape([None, None]),
                    tf.TensorShape([None, None, self.args.dim_output])
                ])

            pad_musk = tf.sequence_mask(len_frames,
                                        maxlen=tf.shape(frames)[1],
                                        dtype=tf.float32)
            rewards_lm *= pad_musk

            if self.is_train:
                q_value = Qvalue(actions, tensors_input.label_splits[id_gpu])
                # rewards_ac: the temporal-difference Q value of each step
                rewards_ac = tf.to_float(q_value[:, 1:] - q_value[:, :-1])
                rewards_lm = tf.zeros_like(rewards_ac)
                rewards_ac *= pad_musk
                rewards = rewards_ac + rewards_lm
                # rewards = tf.Print(rewards, [tf.reduce_sum(rewards_lm, -1)], message='rewards_lm', summarize=1000)
                rewards_discounted = self.discount(self.discount_rate, rewards)
                rewards_discounted = tf.stop_gradient(rewards_discounted)

                crossent = smoothing_cross_entropy(
                    logits=logits,
                    labels=actions,
                    vocab_size=self.args.dim_output,
                    confidence=1.0)
                # crossent = tf.Print(crossent, [tf.reduce_sum(crossent)], message='crossent: ', summarize=1000)
                loss = crossent * rewards_discounted * pad_musk

                loss = tf.reduce_mean(loss)
                gradients = self.optimizer.compute_gradients(loss)

            self.__class__.num_Model += 1
            logging.info(
                '\tbuild {} on {} succesfully! total model number: {}'.format(
                    self.__class__.__name__, name_gpu,
                    self.__class__.num_Model))

            if self.is_train:
                return loss, gradients, \
                [tf.reduce_sum(rewards_ac, -1), tf.reduce_sum(rewards_lm, -1), actions]
            else:
                return actions, rewards_lm
Esempio n. 11
0
    def build_single_graph(self, id_gpu, name_gpu, tensors_input):
        """
        be used for build infer model and the train model, conditioned on self.is_train
        """
        # build model in one device
        num_cell_units = self.args.model.num_cell_units
        cell_type = self.args.model.cell_type
        dropout = self.args.model.dropout
        forget_bias = self.args.model.forget_bias
        use_residual = self.args.model.use_residual

        hidden_output = tensors_input.feature_splits[id_gpu]
        with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)):
            for i in range(self.args.model.num_lstm_layers):
                # build one layer: build block, connect block
                single_cell = build_cell(
                    num_units=num_cell_units,
                    num_layers=1,
                    is_train=self.is_train,
                    cell_type=cell_type,
                    dropout=dropout,
                    forget_bias=forget_bias,
                    use_residual=use_residual)
                hidden_output, _ = cell_forward(
                    cell=single_cell,
                    inputs=hidden_output,
                    index_layer=i)
                hidden_output = fully_connected(
                    inputs=hidden_output,
                    num_outputs=num_cell_units,
                    activation_fn=tf.nn.tanh,
                    scope='wx_b'+str(i))
                if self.args.model.use_layernorm:
                    hidden_output = layer_norm(hidden_output)

            logits = fully_connected(inputs=hidden_output,
                                     num_outputs=self.args.dim_output,
                                     activation_fn=tf.identity,
                                     scope='fully_connected')

            # Accuracy
            with tf.name_scope("label_accuracy"):
                correct = tf.nn.in_top_k(logits, tf.reshape(tensors_input.label_splits[id_gpu], [-1]), 1)
                correct = tf.multiply(tf.cast(correct, tf.float32), tf.reshape(tensors_input.mask_splits[id_gpu], [-1]))
                label_accuracy = tf.reduce_sum(correct)
            # Cross entropy loss
            with tf.name_scope("CE_loss"):
                cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=tf.reshape(tensors_input.label_splits[id_gpu], [-1]),
                    logits=logits)
                cross_entropy = tf.multiply(cross_entropy, tf.reshape(tensors_input.mask_splits[id_gpu], [-1]))
                cross_entropy_loss = tf.reduce_sum(cross_entropy) / tf.reduce_sum(tensors_input.mask_splits[id_gpu])
                loss = cross_entropy_loss

            if self.is_train:
                with tf.name_scope("gradients"):
                    gradients = self.optimizer.compute_gradients(loss)

        logging.info('\tbuild {} on {} succesfully! total model number: {}'.format(
            self.__class__.__name__, name_gpu, self.__class__.num_Instances))

        return loss, gradients if self.is_train else logits